diff --git a/docker/docker-compose-dev-essentials.yaml b/docker/docker-compose-dev-essentials.yaml index a0c231f66..6ccf2d227 100644 --- a/docker/docker-compose-dev-essentials.yaml +++ b/docker/docker-compose-dev-essentials.yaml @@ -28,6 +28,31 @@ services: labels: - traefik.enable=false + es: + image: "docker.elastic.co/elasticsearch/elasticsearch:8.14.1" + container_name: unstract-es + restart: unless-stopped + ports: + - "9200:9200" + environment: + node.name: es + discovery.seed_hosts: es + cluster.initial_master_nodes: es + cluster.name: unstract-metrics + bootstrap.memory_lock: "true" + xpack.security.enabled: "false" + ES_JAVA_OPTS: -Xms256m -Xmx256m + volumes: + - es_data:/usr/share/elasticsearch/data + ulimits: + memlock: + soft: -1 + hard: -1 + labels: + - traefik.enable=false + profiles: + - unstract-metrics + minio: image: 'minio/minio:latest' container_name: unstract-minio @@ -121,8 +146,8 @@ services: - ./essentials.env volumes: - flipt_data: minio_data: postgres_data: qdrant_data: redis_data: + es_data: diff --git a/unstract/metrics/README.md b/unstract/metrics/README.md new file mode 100644 index 000000000..98c246b01 --- /dev/null +++ b/unstract/metrics/README.md @@ -0,0 +1,9 @@ +# Unstract Metrics Aggregator + +Helps collect metrics from Unstract and its adapters and pushes them to Elasticsearch. + +Run `elasticsearch` with the compose profile `unstract-metrics`. + +```shell +VERSION= docker compose -f docker-compose.yaml --profile unstract-metrics up -d +``` diff --git a/unstract/metrics/pdm.lock b/unstract/metrics/pdm.lock new file mode 100644 index 000000000..b2503de9e --- /dev/null +++ b/unstract/metrics/pdm.lock @@ -0,0 +1,507 @@ +# This file is @generated by PDM. +# It is not intended for manual editing. + +[metadata] +groups = ["default", "test"] +strategy = ["cross_platform", "inherit_metadata"] +lock_version = "4.4.1" +content_hash = "sha256:c0e045089db5754a1f14ae9f74ed29b3dfbef9b795b5e5ef5773d1801e3f7cd4" + +[[package]] +name = "certifi" +version = "2024.7.4" +requires_python = ">=3.6" +summary = "Python package for providing Mozilla's CA Bundle." +groups = ["default"] +files = [ + {file = "certifi-2024.7.4-py3-none-any.whl", hash = "sha256:c198e21b1289c2ab85ee4e67bb4b4ef3ead0892059901a8d5b622f24a1101e90"}, + {file = "certifi-2024.7.4.tar.gz", hash = "sha256:5a1e7645bc0ec61a09e26c36f6106dd4cf40c6db3a1fb6352b0244e7fb057c7b"}, +] + +[[package]] +name = "chardet" +version = "5.2.0" +requires_python = ">=3.7" +summary = "Universal encoding detector for Python 3" +groups = ["test"] +files = [ + {file = "chardet-5.2.0-py3-none-any.whl", hash = "sha256:e1cf59446890a00105fe7b7912492ea04b6e6f06d4b742b2c788469e34c82970"}, + {file = "chardet-5.2.0.tar.gz", hash = "sha256:1b3b6ff479a8c414bc3fa2c0852995695c4a026dcd6d0633b2dd092ca39c1cf7"}, +] + +[[package]] +name = "colorama" +version = "0.4.6" +requires_python = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" +summary = "Cross-platform colored terminal text." +groups = ["test"] +marker = "sys_platform == \"win32\"" +files = [ + {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"}, + {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, +] + +[[package]] +name = "coverage" +version = "7.5.4" +requires_python = ">=3.8" +summary = "Code coverage measurement for Python" +groups = ["test"] +files = [ + {file = "coverage-7.5.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:6cfb5a4f556bb51aba274588200a46e4dd6b505fb1a5f8c5ae408222eb416f99"}, + {file = "coverage-7.5.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:2174e7c23e0a454ffe12267a10732c273243b4f2d50d07544a91198f05c48f47"}, + {file = "coverage-7.5.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2214ee920787d85db1b6a0bd9da5f8503ccc8fcd5814d90796c2f2493a2f4d2e"}, + {file = "coverage-7.5.4-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1137f46adb28e3813dec8c01fefadcb8c614f33576f672962e323b5128d9a68d"}, + {file = "coverage-7.5.4-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b385d49609f8e9efc885790a5a0e89f2e3ae042cdf12958b6034cc442de428d3"}, + {file = "coverage-7.5.4-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:b4a474f799456e0eb46d78ab07303286a84a3140e9700b9e154cfebc8f527016"}, + {file = "coverage-7.5.4-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:5cd64adedf3be66f8ccee418473c2916492d53cbafbfcff851cbec5a8454b136"}, + {file = "coverage-7.5.4-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:e564c2cf45d2f44a9da56f4e3a26b2236504a496eb4cb0ca7221cd4cc7a9aca9"}, + {file = "coverage-7.5.4-cp310-cp310-win32.whl", hash = "sha256:7076b4b3a5f6d2b5d7f1185fde25b1e54eb66e647a1dfef0e2c2bfaf9b4c88c8"}, + {file = "coverage-7.5.4-cp310-cp310-win_amd64.whl", hash = "sha256:018a12985185038a5b2bcafab04ab833a9a0f2c59995b3cec07e10074c78635f"}, + {file = "coverage-7.5.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:db14f552ac38f10758ad14dd7b983dbab424e731588d300c7db25b6f89e335b5"}, + {file = "coverage-7.5.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:3257fdd8e574805f27bb5342b77bc65578e98cbc004a92232106344053f319ba"}, + {file = "coverage-7.5.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3a6612c99081d8d6134005b1354191e103ec9705d7ba2754e848211ac8cacc6b"}, + {file = "coverage-7.5.4-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d45d3cbd94159c468b9b8c5a556e3f6b81a8d1af2a92b77320e887c3e7a5d080"}, + {file = "coverage-7.5.4-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ed550e7442f278af76d9d65af48069f1fb84c9f745ae249c1a183c1e9d1b025c"}, + {file = "coverage-7.5.4-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:7a892be37ca35eb5019ec85402c3371b0f7cda5ab5056023a7f13da0961e60da"}, + {file = "coverage-7.5.4-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:8192794d120167e2a64721d88dbd688584675e86e15d0569599257566dec9bf0"}, + {file = "coverage-7.5.4-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:820bc841faa502e727a48311948e0461132a9c8baa42f6b2b84a29ced24cc078"}, + {file = "coverage-7.5.4-cp311-cp311-win32.whl", hash = "sha256:6aae5cce399a0f065da65c7bb1e8abd5c7a3043da9dceb429ebe1b289bc07806"}, + {file = "coverage-7.5.4-cp311-cp311-win_amd64.whl", hash = "sha256:d2e344d6adc8ef81c5a233d3a57b3c7d5181f40e79e05e1c143da143ccb6377d"}, + {file = "coverage-7.5.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:b95c3a8cb0463ba9f77383d0fa8c9194cf91f64445a63fc26fb2327e1e1eb088"}, + {file = "coverage-7.5.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:3d7564cc09dd91b5a6001754a5b3c6ecc4aba6323baf33a12bd751036c998be4"}, + {file = "coverage-7.5.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:44da56a2589b684813f86d07597fdf8a9c6ce77f58976727329272f5a01f99f7"}, + {file = "coverage-7.5.4-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e16f3d6b491c48c5ae726308e6ab1e18ee830b4cdd6913f2d7f77354b33f91c8"}, + {file = "coverage-7.5.4-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dbc5958cb471e5a5af41b0ddaea96a37e74ed289535e8deca404811f6cb0bc3d"}, + {file = "coverage-7.5.4-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:a04e990a2a41740b02d6182b498ee9796cf60eefe40cf859b016650147908029"}, + {file = "coverage-7.5.4-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:ddbd2f9713a79e8e7242d7c51f1929611e991d855f414ca9996c20e44a895f7c"}, + {file = "coverage-7.5.4-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:b1ccf5e728ccf83acd313c89f07c22d70d6c375a9c6f339233dcf792094bcbf7"}, + {file = "coverage-7.5.4-cp39-cp39-win32.whl", hash = "sha256:56b4eafa21c6c175b3ede004ca12c653a88b6f922494b023aeb1e836df953ace"}, + {file = "coverage-7.5.4-cp39-cp39-win_amd64.whl", hash = "sha256:65e528e2e921ba8fd67d9055e6b9f9e34b21ebd6768ae1c1723f4ea6ace1234d"}, + {file = "coverage-7.5.4-pp38.pp39.pp310-none-any.whl", hash = "sha256:79b356f3dd5b26f3ad23b35c75dbdaf1f9e2450b6bcefc6d0825ea0aa3f86ca5"}, + {file = "coverage-7.5.4.tar.gz", hash = "sha256:a44963520b069e12789d0faea4e9fdb1e410cdc4aab89d94f7f55cbb7fef0353"}, +] + +[[package]] +name = "coverage" +version = "7.5.4" +extras = ["toml"] +requires_python = ">=3.8" +summary = "Code coverage measurement for Python" +groups = ["test"] +dependencies = [ + "coverage==7.5.4", + "tomli; python_full_version <= \"3.11.0a6\"", +] +files = [ + {file = "coverage-7.5.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:6cfb5a4f556bb51aba274588200a46e4dd6b505fb1a5f8c5ae408222eb416f99"}, + {file = "coverage-7.5.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:2174e7c23e0a454ffe12267a10732c273243b4f2d50d07544a91198f05c48f47"}, + {file = "coverage-7.5.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2214ee920787d85db1b6a0bd9da5f8503ccc8fcd5814d90796c2f2493a2f4d2e"}, + {file = "coverage-7.5.4-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1137f46adb28e3813dec8c01fefadcb8c614f33576f672962e323b5128d9a68d"}, + {file = "coverage-7.5.4-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b385d49609f8e9efc885790a5a0e89f2e3ae042cdf12958b6034cc442de428d3"}, + {file = "coverage-7.5.4-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:b4a474f799456e0eb46d78ab07303286a84a3140e9700b9e154cfebc8f527016"}, + {file = "coverage-7.5.4-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:5cd64adedf3be66f8ccee418473c2916492d53cbafbfcff851cbec5a8454b136"}, + {file = "coverage-7.5.4-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:e564c2cf45d2f44a9da56f4e3a26b2236504a496eb4cb0ca7221cd4cc7a9aca9"}, + {file = "coverage-7.5.4-cp310-cp310-win32.whl", hash = "sha256:7076b4b3a5f6d2b5d7f1185fde25b1e54eb66e647a1dfef0e2c2bfaf9b4c88c8"}, + {file = "coverage-7.5.4-cp310-cp310-win_amd64.whl", hash = "sha256:018a12985185038a5b2bcafab04ab833a9a0f2c59995b3cec07e10074c78635f"}, + {file = "coverage-7.5.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:db14f552ac38f10758ad14dd7b983dbab424e731588d300c7db25b6f89e335b5"}, + {file = "coverage-7.5.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:3257fdd8e574805f27bb5342b77bc65578e98cbc004a92232106344053f319ba"}, + {file = "coverage-7.5.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3a6612c99081d8d6134005b1354191e103ec9705d7ba2754e848211ac8cacc6b"}, + {file = "coverage-7.5.4-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d45d3cbd94159c468b9b8c5a556e3f6b81a8d1af2a92b77320e887c3e7a5d080"}, + {file = "coverage-7.5.4-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ed550e7442f278af76d9d65af48069f1fb84c9f745ae249c1a183c1e9d1b025c"}, + {file = "coverage-7.5.4-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:7a892be37ca35eb5019ec85402c3371b0f7cda5ab5056023a7f13da0961e60da"}, + {file = "coverage-7.5.4-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:8192794d120167e2a64721d88dbd688584675e86e15d0569599257566dec9bf0"}, + {file = "coverage-7.5.4-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:820bc841faa502e727a48311948e0461132a9c8baa42f6b2b84a29ced24cc078"}, + {file = "coverage-7.5.4-cp311-cp311-win32.whl", hash = "sha256:6aae5cce399a0f065da65c7bb1e8abd5c7a3043da9dceb429ebe1b289bc07806"}, + {file = "coverage-7.5.4-cp311-cp311-win_amd64.whl", hash = "sha256:d2e344d6adc8ef81c5a233d3a57b3c7d5181f40e79e05e1c143da143ccb6377d"}, + {file = "coverage-7.5.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:b95c3a8cb0463ba9f77383d0fa8c9194cf91f64445a63fc26fb2327e1e1eb088"}, + {file = "coverage-7.5.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:3d7564cc09dd91b5a6001754a5b3c6ecc4aba6323baf33a12bd751036c998be4"}, + {file = "coverage-7.5.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:44da56a2589b684813f86d07597fdf8a9c6ce77f58976727329272f5a01f99f7"}, + {file = "coverage-7.5.4-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e16f3d6b491c48c5ae726308e6ab1e18ee830b4cdd6913f2d7f77354b33f91c8"}, + {file = "coverage-7.5.4-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dbc5958cb471e5a5af41b0ddaea96a37e74ed289535e8deca404811f6cb0bc3d"}, + {file = "coverage-7.5.4-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:a04e990a2a41740b02d6182b498ee9796cf60eefe40cf859b016650147908029"}, + {file = "coverage-7.5.4-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:ddbd2f9713a79e8e7242d7c51f1929611e991d855f414ca9996c20e44a895f7c"}, + {file = "coverage-7.5.4-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:b1ccf5e728ccf83acd313c89f07c22d70d6c375a9c6f339233dcf792094bcbf7"}, + {file = "coverage-7.5.4-cp39-cp39-win32.whl", hash = "sha256:56b4eafa21c6c175b3ede004ca12c653a88b6f922494b023aeb1e836df953ace"}, + {file = "coverage-7.5.4-cp39-cp39-win_amd64.whl", hash = "sha256:65e528e2e921ba8fd67d9055e6b9f9e34b21ebd6768ae1c1723f4ea6ace1234d"}, + {file = "coverage-7.5.4-pp38.pp39.pp310-none-any.whl", hash = "sha256:79b356f3dd5b26f3ad23b35c75dbdaf1f9e2450b6bcefc6d0825ea0aa3f86ca5"}, + {file = "coverage-7.5.4.tar.gz", hash = "sha256:a44963520b069e12789d0faea4e9fdb1e410cdc4aab89d94f7f55cbb7fef0353"}, +] + +[[package]] +name = "dataproperty" +version = "1.0.1" +requires_python = ">=3.7" +summary = "Python library for extract property from data." +groups = ["test"] +dependencies = [ + "mbstrdecoder<2,>=1.0.0", + "typepy[datetime]<2,>=1.2.0", +] +files = [ + {file = "DataProperty-1.0.1-py3-none-any.whl", hash = "sha256:0b8b07d4fb6453fcf975b53d35dea41f3cfd69c9d79b5010c3cf224ff0407a7a"}, + {file = "DataProperty-1.0.1.tar.gz", hash = "sha256:723e5729fa6e885e127a771a983ee1e0e34bb141aca4ffe1f0bfa7cde34650a4"}, +] + +[[package]] +name = "elastic-transport" +version = "8.13.1" +requires_python = ">=3.7" +summary = "Transport classes and utilities shared among Python Elastic client libraries" +groups = ["default"] +dependencies = [ + "certifi", + "urllib3<3,>=1.26.2", +] +files = [ + {file = "elastic_transport-8.13.1-py3-none-any.whl", hash = "sha256:5d4bb6b8e9d74a9c16de274e91a5caf65a3a8d12876f1e99152975e15b2746fe"}, + {file = "elastic_transport-8.13.1.tar.gz", hash = "sha256:16339d392b4bbe86ad00b4bdeecff10edf516d32bc6c16053846625f2c6ea250"}, +] + +[[package]] +name = "elasticsearch" +version = "8.14.0" +requires_python = ">=3.7" +summary = "Python client for Elasticsearch" +groups = ["default"] +dependencies = [ + "elastic-transport<9,>=8.13", +] +files = [ + {file = "elasticsearch-8.14.0-py3-none-any.whl", hash = "sha256:cef8ef70a81af027f3da74a4f7d9296b390c636903088439087b8262a468c130"}, + {file = "elasticsearch-8.14.0.tar.gz", hash = "sha256:aa2490029dd96f4015b333c1827aa21fd6c0a4d223b00dfb0fe933b8d09a511b"}, +] + +[[package]] +name = "elasticsearch-dsl" +version = "8.14.0" +requires_python = ">=3.8" +summary = "Python client for Elasticsearch" +groups = ["default"] +dependencies = [ + "elasticsearch<9.0.0,>=8.0.0", + "python-dateutil", + "typing-extensions", +] +files = [ + {file = "elasticsearch-dsl-8.14.0.tar.gz", hash = "sha256:326c6dccf32f1ff3d4c84889388590778444ba18f770adee52f24721cb97c4c7"}, + {file = "elasticsearch_dsl-8.14.0-py3-none-any.whl", hash = "sha256:99c5cbda28a37eec05ac78e82fdc87e8641eb83956ea63a817bf0e9da3d688dd"}, +] + +[[package]] +name = "exceptiongroup" +version = "1.2.1" +requires_python = ">=3.7" +summary = "Backport of PEP 654 (exception groups)" +groups = ["test"] +marker = "python_version < \"3.11\"" +files = [ + {file = "exceptiongroup-1.2.1-py3-none-any.whl", hash = "sha256:5258b9ed329c5bbdd31a309f53cbfb0b155341807f6ff7606a1e801a891b29ad"}, + {file = "exceptiongroup-1.2.1.tar.gz", hash = "sha256:a4785e48b045528f5bfe627b6ad554ff32def154f42372786903b7abcfe1aa16"}, +] + +[[package]] +name = "iniconfig" +version = "2.0.0" +requires_python = ">=3.7" +summary = "brain-dead simple config-ini parsing" +groups = ["test"] +files = [ + {file = "iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374"}, + {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"}, +] + +[[package]] +name = "mbstrdecoder" +version = "1.1.3" +requires_python = ">=3.7" +summary = "mbstrdecoder is a Python library for multi-byte character string decoder" +groups = ["test"] +dependencies = [ + "chardet<6,>=3.0.4", +] +files = [ + {file = "mbstrdecoder-1.1.3-py3-none-any.whl", hash = "sha256:d66c1ed3f2dc4e7c5d87cd44a75be10bc5af4250f95b38bbaedd7851308ce938"}, + {file = "mbstrdecoder-1.1.3.tar.gz", hash = "sha256:dcfd2c759322eb44fe193a9e0b1b86c5b87f3ec5ea8e1bb43b3e9ae423f1e8fe"}, +] + +[[package]] +name = "packaging" +version = "24.1" +requires_python = ">=3.8" +summary = "Core utilities for Python packages" +groups = ["test"] +files = [ + {file = "packaging-24.1-py3-none-any.whl", hash = "sha256:5b8f2217dbdbd2f7f384c41c628544e6d52f2d0f53c6d0c3ea61aa5d1d7ff124"}, + {file = "packaging-24.1.tar.gz", hash = "sha256:026ed72c8ed3fcce5bf8950572258698927fd1dbda10a5e981cdf0ac37f4f002"}, +] + +[[package]] +name = "pathvalidate" +version = "3.2.0" +requires_python = ">=3.7" +summary = "pathvalidate is a Python library to sanitize/validate a string such as filenames/file-paths/etc." +groups = ["test"] +files = [ + {file = "pathvalidate-3.2.0-py3-none-any.whl", hash = "sha256:cc593caa6299b22b37f228148257997e2fa850eea2daf7e4cc9205cef6908dee"}, + {file = "pathvalidate-3.2.0.tar.gz", hash = "sha256:5e8378cf6712bff67fbe7a8307d99fa8c1a0cb28aa477056f8fc374f0dff24ad"}, +] + +[[package]] +name = "pluggy" +version = "1.5.0" +requires_python = ">=3.8" +summary = "plugin and hook calling mechanisms for python" +groups = ["test"] +files = [ + {file = "pluggy-1.5.0-py3-none-any.whl", hash = "sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669"}, + {file = "pluggy-1.5.0.tar.gz", hash = "sha256:2cffa88e94fdc978c4c574f15f9e59b7f4201d439195c3715ca9e2486f1d0cf1"}, +] + +[[package]] +name = "pytablewriter" +version = "1.2.0" +requires_python = ">=3.7" +summary = "pytablewriter is a Python library to write a table in various formats: AsciiDoc / CSV / Elasticsearch / HTML / JavaScript / JSON / LaTeX / LDJSON / LTSV / Markdown / MediaWiki / NumPy / Excel / Pandas / Python / reStructuredText / SQLite / TOML / TSV / YAML." +groups = ["test"] +dependencies = [ + "DataProperty<2,>=1.0.1", + "mbstrdecoder<2,>=1.0.0", + "pathvalidate<4,>=2.3.0", + "setuptools>=38.3.0", + "tabledata<2,>=1.3.1", + "tcolorpy<1,>=0.0.5", + "typepy[datetime]<2,>=1.3.2", +] +files = [ + {file = "pytablewriter-1.2.0-py3-none-any.whl", hash = "sha256:4a30e2bb4bf5bc1069b1d2b2bc41947577c4517ab0875b23a5b194d296f543d8"}, + {file = "pytablewriter-1.2.0.tar.gz", hash = "sha256:0204a4bb684a22140d640f2599f09e137bcdc18b3dd49426f4a555016e246b46"}, +] + +[[package]] +name = "pytest" +version = "8.2.2" +requires_python = ">=3.8" +summary = "pytest: simple powerful testing with Python" +groups = ["test"] +dependencies = [ + "colorama; sys_platform == \"win32\"", + "exceptiongroup>=1.0.0rc8; python_version < \"3.11\"", + "iniconfig", + "packaging", + "pluggy<2.0,>=1.5", + "tomli>=1; python_version < \"3.11\"", +] +files = [ + {file = "pytest-8.2.2-py3-none-any.whl", hash = "sha256:c434598117762e2bd304e526244f67bf66bbd7b5d6cf22138be51ff661980343"}, + {file = "pytest-8.2.2.tar.gz", hash = "sha256:de4bb8104e201939ccdc688b27a89a7be2079b22e2bd2b07f806b6ba71117977"}, +] + +[[package]] +name = "pytest-cov" +version = "5.0.0" +requires_python = ">=3.8" +summary = "Pytest plugin for measuring coverage." +groups = ["test"] +dependencies = [ + "coverage[toml]>=5.2.1", + "pytest>=4.6", +] +files = [ + {file = "pytest-cov-5.0.0.tar.gz", hash = "sha256:5837b58e9f6ebd335b0f8060eecce69b662415b16dc503883a02f45dfeb14857"}, + {file = "pytest_cov-5.0.0-py3-none-any.whl", hash = "sha256:4f0764a1219df53214206bf1feea4633c3b558a2925c8b59f144f682861ce652"}, +] + +[[package]] +name = "pytest-dotenv" +version = "0.5.2" +summary = "A py.test plugin that parses environment files before running tests" +groups = ["test"] +dependencies = [ + "pytest>=5.0.0", + "python-dotenv>=0.9.1", +] +files = [ + {file = "pytest-dotenv-0.5.2.tar.gz", hash = "sha256:2dc6c3ac6d8764c71c6d2804e902d0ff810fa19692e95fe138aefc9b1aa73732"}, + {file = "pytest_dotenv-0.5.2-py3-none-any.whl", hash = "sha256:40a2cece120a213898afaa5407673f6bd924b1fa7eafce6bda0e8abffe2f710f"}, +] + +[[package]] +name = "pytest-md-report" +version = "0.6.2" +requires_python = ">=3.7" +summary = "A pytest plugin to generate test outcomes reports with markdown table format." +groups = ["test"] +dependencies = [ + "pytablewriter<2,>=1.2.0", + "pytest!=6.0.0,<9,>=3.3.2", + "tcolorpy<1,>=0.0.5", + "typepy<2,>=1.1.1", +] +files = [ + {file = "pytest_md_report-0.6.2-py3-none-any.whl", hash = "sha256:66e27efa5c155c87eb4700d60876e61a85c13361448c4031fda964c43e63c9b9"}, + {file = "pytest_md_report-0.6.2.tar.gz", hash = "sha256:5e96c655ebc9b5c3c7b78bf7c5382c1f68056e96904430252790f8737de5ce99"}, +] + +[[package]] +name = "pytest-mock" +version = "3.14.0" +requires_python = ">=3.8" +summary = "Thin-wrapper around the mock package for easier use with pytest" +groups = ["test"] +dependencies = [ + "pytest>=6.2.5", +] +files = [ + {file = "pytest-mock-3.14.0.tar.gz", hash = "sha256:2719255a1efeceadbc056d6bf3df3d1c5015530fb40cf347c0f9afac88410bd0"}, + {file = "pytest_mock-3.14.0-py3-none-any.whl", hash = "sha256:0b72c38033392a5f4621342fe11e9219ac11ec9d375f8e2a0c164539e0d70f6f"}, +] + +[[package]] +name = "python-dateutil" +version = "2.9.0.post0" +requires_python = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" +summary = "Extensions to the standard Python datetime module" +groups = ["default", "test"] +dependencies = [ + "six>=1.5", +] +files = [ + {file = "python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3"}, + {file = "python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427"}, +] + +[[package]] +name = "python-dotenv" +version = "1.0.1" +requires_python = ">=3.8" +summary = "Read key-value pairs from a .env file and set them as environment variables" +groups = ["test"] +files = [ + {file = "python-dotenv-1.0.1.tar.gz", hash = "sha256:e324ee90a023d808f1959c46bcbc04446a10ced277783dc6ee09987c37ec10ca"}, + {file = "python_dotenv-1.0.1-py3-none-any.whl", hash = "sha256:f7b63ef50f1b690dddf550d03497b66d609393b40b564ed0d674909a68ebf16a"}, +] + +[[package]] +name = "pytz" +version = "2024.1" +summary = "World timezone definitions, modern and historical" +groups = ["test"] +files = [ + {file = "pytz-2024.1-py2.py3-none-any.whl", hash = "sha256:328171f4e3623139da4983451950b28e95ac706e13f3f2630a879749e7a8b319"}, + {file = "pytz-2024.1.tar.gz", hash = "sha256:2a29735ea9c18baf14b448846bde5a48030ed267578472d8955cd0e7443a9812"}, +] + +[[package]] +name = "setuptools" +version = "70.2.0" +requires_python = ">=3.8" +summary = "Easily download, build, install, upgrade, and uninstall Python packages" +groups = ["test"] +files = [ + {file = "setuptools-70.2.0-py3-none-any.whl", hash = "sha256:b8b8060bb426838fbe942479c90296ce976249451118ef566a5a0b7d8b78fb05"}, + {file = "setuptools-70.2.0.tar.gz", hash = "sha256:bd63e505105011b25c3c11f753f7e3b8465ea739efddaccef8f0efac2137bac1"}, +] + +[[package]] +name = "six" +version = "1.16.0" +requires_python = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*" +summary = "Python 2 and 3 compatibility utilities" +groups = ["default", "test"] +files = [ + {file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"}, + {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"}, +] + +[[package]] +name = "tabledata" +version = "1.3.3" +requires_python = ">=3.7" +summary = "tabledata is a Python library to represent tabular data. Used for pytablewriter/pytablereader/SimpleSQLite/etc." +groups = ["test"] +dependencies = [ + "DataProperty<2,>=1.0.1", + "typepy<2,>=1.2.0", +] +files = [ + {file = "tabledata-1.3.3-py3-none-any.whl", hash = "sha256:4abad1c996d8607e23b045b44dc0c5f061668f3c37585302c5f6c84c93a89962"}, + {file = "tabledata-1.3.3.tar.gz", hash = "sha256:c90daaba9a408e4397934b3ff2f6c06797d5289676420bf520c741ad43e6ff91"}, +] + +[[package]] +name = "tcolorpy" +version = "0.1.6" +requires_python = ">=3.7" +summary = "tcolopy is a Python library to apply true color for terminal text." +groups = ["test"] +files = [ + {file = "tcolorpy-0.1.6-py3-none-any.whl", hash = "sha256:8c15cb3167f30b0a433d72297e9d68667c825bd9e2af41c8dd7dfbd3d7f7e207"}, + {file = "tcolorpy-0.1.6.tar.gz", hash = "sha256:8cea0bf5f8cf03f77528a9acfbf312df935573892ba5ea3b2516e61fa54de9a5"}, +] + +[[package]] +name = "tomli" +version = "2.0.1" +requires_python = ">=3.7" +summary = "A lil' TOML parser" +groups = ["test"] +marker = "python_version < \"3.11\"" +files = [ + {file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"}, + {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"}, +] + +[[package]] +name = "typepy" +version = "1.3.2" +requires_python = ">=3.7" +summary = "typepy is a Python library for variable type checker/validator/converter at a run time." +groups = ["test"] +dependencies = [ + "mbstrdecoder<2,>=1.0.0", +] +files = [ + {file = "typepy-1.3.2-py3-none-any.whl", hash = "sha256:d5d1022a424132622993800f1d2cd16cfdb691ac4e3b9c325f0fcb37799db1ae"}, + {file = "typepy-1.3.2.tar.gz", hash = "sha256:b69fd48b9f50cdb3809906eef36b855b3134ff66c8893a4f8580abddb0b39517"}, +] + +[[package]] +name = "typepy" +version = "1.3.2" +extras = ["datetime"] +requires_python = ">=3.7" +summary = "typepy is a Python library for variable type checker/validator/converter at a run time." +groups = ["test"] +dependencies = [ + "packaging", + "python-dateutil<3.0.0,>=2.8.0", + "pytz>=2018.9", + "typepy==1.3.2", +] +files = [ + {file = "typepy-1.3.2-py3-none-any.whl", hash = "sha256:d5d1022a424132622993800f1d2cd16cfdb691ac4e3b9c325f0fcb37799db1ae"}, + {file = "typepy-1.3.2.tar.gz", hash = "sha256:b69fd48b9f50cdb3809906eef36b855b3134ff66c8893a4f8580abddb0b39517"}, +] + +[[package]] +name = "typing-extensions" +version = "4.12.2" +requires_python = ">=3.8" +summary = "Backported and Experimental Type Hints for Python 3.8+" +groups = ["default"] +files = [ + {file = "typing_extensions-4.12.2-py3-none-any.whl", hash = "sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d"}, + {file = "typing_extensions-4.12.2.tar.gz", hash = "sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8"}, +] + +[[package]] +name = "urllib3" +version = "2.2.2" +requires_python = ">=3.8" +summary = "HTTP library with thread-safe connection pooling, file post, and more." +groups = ["default"] +files = [ + {file = "urllib3-2.2.2-py3-none-any.whl", hash = "sha256:a448b2f64d686155468037e1ace9f2d2199776e17f0a46610480d311f73e3472"}, + {file = "urllib3-2.2.2.tar.gz", hash = "sha256:dd505485549a7a552833da5e6063639d0d177c04f23bc3864e41e5dc5f612168"}, +] diff --git a/unstract/metrics/pyproject.toml b/unstract/metrics/pyproject.toml new file mode 100644 index 000000000..5df7e2941 --- /dev/null +++ b/unstract/metrics/pyproject.toml @@ -0,0 +1,38 @@ +[build-system] +requires = ["pdm-backend"] +build-backend = "pdm.backend" + +[project] +name = "unstract-metrics" +version = "0.0.1" +description = "Helps with collection of metrics from Unstract's adapters" +authors = [{ name = "Zipstack Inc.", email = "devsupport@zipstack.com" }] +dependencies = ["elasticsearch-dsl~=8.14.0"] +# <3.11.1 due to resolution error from Unstract SDK +requires-python = ">=3.9,<3.11.1" +readme = "README.md" +classifiers = ["Programming Language :: Python"] + +[tool.pdm.dev-dependencies] +test = [ + "pytest>=8.2.2", + "pytest-mock>=3.14.0", + "pytest-dotenv>=0.5.2", + "pytest-cov>=5.0.0", + "pytest-md-report>=0.6.2", +] + +[tool.pdm.build] +includes = ["src"] +package-dir = "src" + +[tool.pytest.ini_options] +env_files = ["tests/.env"] +addopts = "-s" +log_level = "INFO" +log_cli = true + +[tool.pdm.scripts] +test.cmd = "pytest -s -v" +test.env_file = "tests/.env" +test.help = "Runs pytests for Unstract Metrics" diff --git a/unstract/metrics/src/unstract/metrics/__init__.py b/unstract/metrics/src/unstract/metrics/__init__.py new file mode 100644 index 000000000..2d6c206d6 --- /dev/null +++ b/unstract/metrics/src/unstract/metrics/__init__.py @@ -0,0 +1,16 @@ +import os + +from elasticsearch_dsl import connections + +from .metrics import MetricsAggregator, capture_metrics # noqa: F401 + +ES_URL = os.getenv("ES_URL") +ES_CLOUD_ID = os.getenv("ES_CLOUD_ID") +ES_API_KEY = os.getenv("ES_API_KEY") +if not ES_URL or (ES_CLOUD_ID and ES_API_KEY): + raise ValueError( + "Either env ES_URL or ES_CLOUD_ID and ES_API_KEY " + "is required to import unstract-metrics" + ) + +connections.create_connection(hosts=[ES_URL], cloud_id=ES_CLOUD_ID, api_key=ES_API_KEY) diff --git a/unstract/metrics/src/unstract/metrics/constants.py b/unstract/metrics/src/unstract/metrics/constants.py new file mode 100644 index 000000000..e29fe7d4e --- /dev/null +++ b/unstract/metrics/src/unstract/metrics/constants.py @@ -0,0 +1,6 @@ +class MetricsConstants: + DATETIME_FORMAT = "%Y-%m-%dT%H:%M:%S.%f%z" + + +class MetricsEnv: + COLLECT_UNSTRACT_METRICS = "COLLECT_UNSTRACT_METRICS" diff --git a/unstract/metrics/src/unstract/metrics/metrics.py b/unstract/metrics/src/unstract/metrics/metrics.py new file mode 100644 index 000000000..4952db09e --- /dev/null +++ b/unstract/metrics/src/unstract/metrics/metrics.py @@ -0,0 +1,63 @@ +import functools +import json +import logging +import os +from datetime import datetime +from typing import Optional +from uuid import uuid4 + +from unstract.metrics.constants import MetricsConstants, MetricsEnv +from unstract.metrics.models.metrics import Metrics + +logger = logging.getLogger(__name__) + + +class MetricsAggregator: + + def __init__(self, index_to_clone: Optional[str] = None) -> None: + # TODO: Create index with dynamic templates through a separate command + if not Metrics._index.exists(): + Metrics.init(index=index_to_clone) + + def add_metrics(self, metrics, index: str = "unstract-metrics-0"): + metrics_doc = Metrics(**metrics) + metrics_doc.save(index=index) + + def query_metrics(self, run_id: str, index: str = "unstract-metrics-0"): + s = Metrics.search(index=index).query("match", run_id=run_id) + response = s.execute() + return response.to_dict() + + +def capture_metrics(index="unstract-metrics-0", **metric_kwargs): + def decorator(func): + @functools.wraps(func) + def wrapper(*args, **kwargs): + + if ( + os.getenv(MetricsEnv.COLLECT_UNSTRACT_METRICS, "False").lower() + == "false" + ): + return func(*args, **kwargs) + + logger.debug( + f"Collecting metrics with kwargs: {json.dumps(metric_kwargs, indent=2)}" + ) + metrics = Metrics(**metric_kwargs) + if not metrics.run_id: + metrics.run_id = uuid4() + metrics.start_time = datetime.now().strftime( + MetricsConstants.DATETIME_FORMAT + ) + try: + result = func(*args, **kwargs) + finally: + metrics.end_time = datetime.now().strftime( + MetricsConstants.DATETIME_FORMAT + ) + metrics.save(index=index) + return result + + return wrapper + + return decorator diff --git a/unstract/metrics/src/unstract/metrics/models/log.py b/unstract/metrics/src/unstract/metrics/models/log.py new file mode 100644 index 000000000..1b19e7679 --- /dev/null +++ b/unstract/metrics/src/unstract/metrics/models/log.py @@ -0,0 +1,7 @@ +from elasticsearch_dsl import Date, InnerDoc, Keyword, Text + + +class Log(InnerDoc): + level = Keyword() + time = Date() + message = Text() diff --git a/unstract/metrics/src/unstract/metrics/models/metrics.py b/unstract/metrics/src/unstract/metrics/models/metrics.py new file mode 100644 index 000000000..59abb8959 --- /dev/null +++ b/unstract/metrics/src/unstract/metrics/models/metrics.py @@ -0,0 +1,100 @@ +import logging + +from elasticsearch_dsl import Date, Document, Keyword, Nested, Text + +from .operation import ( + EmbeddingOperation, + LLMOperation, + Operation, + VectorDBOperation, + X2TextOperation, +) + +logger = logging.getLogger(__name__) + + +class Metrics(Document): + org_id = Keyword(required=True) + run_id = Keyword() + start_time = Date(required=True) + end_time = Date() + owner = Keyword() + agent = Keyword() # TODO: Enum - WF | API | PS + agent_name = Text() + agent_id = Keyword() + status = Keyword() # TODO: Make enum + api_key = Text() + operations = Nested(Operation) + + class Index: + name = "unstract-metrics-*" + settings = {"number_of_replicas": 0, "number_of_shards": 1} + + def save( + self, + using=None, + index=None, + validate=True, + skip_empty=True, + return_doc_meta=False, + **kwargs, + ): + self.meta.id = self.run_id + return super().save( + using, index, validate, skip_empty, return_doc_meta, **kwargs + ) + + @classmethod + def create_index(cls): + cls.init() + # Add dynamic templates for sub_process specific mappings + cls._index.put_mapping( + body={ + "dynamic_templates": [ + { + "llm_template": { + "path_match": "operations.sub_process", + "match_mapping_type": "string", + "mapping": { + "type": "nested", + "properties": LLMOperation._doc_type.mapping.properties.to_dict(), # noqa: E501 + }, + "match": "LLM", + } + }, + { + "vectordb_template": { + "path_match": "operations.sub_process", + "match_mapping_type": "string", + "mapping": { + "type": "nested", + "properties": VectorDBOperation._doc_type.mapping.properties.to_dict(), # noqa: E501 + }, + "match": "VECTORDB", + } + }, + { + "embedding_template": { + "path_match": "operations.sub_process", + "match_mapping_type": "string", + "mapping": { + "type": "nested", + "properties": EmbeddingOperation._doc_type.mapping.properties.to_dict(), # noqa: E501 + }, + "match": "EMBEDDING", + } + }, + { + "embedding_template": { + "path_match": "operations.sub_process", + "match_mapping_type": "string", + "mapping": { + "type": "nested", + "properties": X2TextOperation._doc_type.mapping.properties.to_dict(), # noqa: E501 + }, + "match": "X2TEXT", + } + }, + ] + } + ) diff --git a/unstract/metrics/src/unstract/metrics/models/operation.py b/unstract/metrics/src/unstract/metrics/models/operation.py new file mode 100644 index 000000000..4b56678ce --- /dev/null +++ b/unstract/metrics/src/unstract/metrics/models/operation.py @@ -0,0 +1,102 @@ +from elasticsearch_dsl import ( + Date, + Float, + InnerDoc, + Integer, + Keyword, + Nested, + Object, + Text, +) + +from unstract.metrics.models.log import Log + + +class Operation(InnerDoc): + operation_id = Keyword() + process = Keyword() # TODO: Specify enum + sub_process = Keyword() # LLM | VECTORDB | EMBEDDING | X2TEXT + context = Text() # REVIEW: Make Keyword() if we wish to search by filename + status = Keyword() + start_time = Date() + end_time = Date() + chunk_size = Integer(doc_values=False) + chunk_overlap = Integer(doc_values=False) + prompt_key_name = Text() + # adapter_metadata = Object() + connector_metadata = Object() + metrics = Object() + logs = Nested(Log) + + +class LLMOperation(InnerDoc): + prompt = Text() + generated_response = Text() + adapter_metadata = Object( + properties={ + "adapter_instance_id": Keyword(), + "type": Keyword(), + "name": Text(), + "model": Text(), + "max_retries": Integer(doc_values=False), + "max_output_tokens": Integer(doc_values=False), + } + ) + metrics = Object( + properties={ + "input_tokens": Integer(), + "output_tokens": Integer(), + "latency": Float(), + "input_tokens_cost": Float(), + "output_tokens_cost": Float(), + "total_cost": Float(), + } + ) + + +class VectorDBOperation(InnerDoc): + doc_id = Keyword() + retrieved_docs = Keyword(multi=True) + adapter_metadata = Object( + properties={ + "adapter_instance_id": Keyword(), + "type": Keyword(), + "name": Text(), + "dimension": Integer(doc_values=False), + } + ) + metrics = Object( + properties={"operation": Keyword(), "count": Integer(), "latency": Float()} + ) + + +class EmbeddingOperation(InnerDoc): + adapter_metadata = Object( + properties={ + "adapter_instance_id": Keyword(), + "type": Keyword(), + "name": Text(), + "model": Text(), + "embed_batch_size": Integer(), + } + ) + metrics = Object( + properties={"tokens": Integer(), "latency": Float(), "cost": Float()} + ) + + +class X2TextOperation(InnerDoc): + adapter_metadata = Object( + properties={ + "adapter_instance_id": Keyword(), + "type": Keyword(), + "name": Text(), + "mode": Text(), + } + ) + metrics = Object( + properties={ + "pages_extracted": Integer(), + "latency": Float(), + } + ) diff --git a/unstract/metrics/tests/conftest.py b/unstract/metrics/tests/conftest.py new file mode 100644 index 000000000..1358c3507 --- /dev/null +++ b/unstract/metrics/tests/conftest.py @@ -0,0 +1,24 @@ +from pathlib import Path + +import pytest +from elasticsearch import Elasticsearch + +from unstract.metrics import MetricsAggregator + +BASE_DIR = Path(__file__).resolve().parent.parent +SEED_DATA = [ + BASE_DIR.joinpath("tests/data/seed_metrics.json"), +] +TEST_INDEX_NAME = "unstract-metrics-test" + + +@pytest.fixture(scope="module") +def es_client(): + client = Elasticsearch(hosts=["http://localhost:9200"]) + yield client + client.options(ignore_status=[400, 404]).indices.delete(index=TEST_INDEX_NAME) + + +@pytest.fixture +def metrics_agg(): + yield MetricsAggregator() diff --git a/unstract/metrics/tests/data/seed_metrics.json b/unstract/metrics/tests/data/seed_metrics.json new file mode 100644 index 000000000..ada66306f --- /dev/null +++ b/unstract/metrics/tests/data/seed_metrics.json @@ -0,0 +1,115 @@ +{ + "org_id": "52699f65-2b57-40e7-88d9-6f0977bef5fe", + "run_id": "63b1acb2-4508-46fa-8d24-84a8bad032c1", + "project_id": "10584e6a-6898-41c3-b32a-fe721f8c3a97", + "start_time": "2024-07-02T12:00:00", + "end_time": "2024-07-02T12:05:00", + "owner": "SYSTEM", + "agent": "PROMPT_STUDIO", + "agent_name": "Test Metrics PS", + "agent_id": "10584e6a-6898-41c3-b32a-fe721f8c3a93", + "status": "SUCCESS", + "api_key": "NA", + "operations": [ + { + "operation_id": "10584e6a-6898-41c3-b32a-fe721f8c3a98", + "process": "LLM", + "sub_process": "LLM", + "context": "test_file", + "status": "SUCCESS", + "start_time": "2024-07-02T12:00:00", + "end_time": "2024-07-02T12:01:00", + "prompt_key_name": "Name of Candidate", + "prompt": "What is AI?", + "generated_response": "AI stands for Artificial Intelligence.", + "adapter_metadata": { + "adapter_instance_id": "10584e6a-6898-41c3-b32a-fe721f8c3a99", + "type": "OpenAI", + "name": "OpenAI", + "display_name": "GPT 4", + "model": "gpt-4-1106-preview", + "max_retries": 5, + "max_output_tokens": 8192 + }, + "metrics": { + "input_tokens": 10, + "output_tokens": 20, + "latency": 0.5, + "input_tokens_cost": 0.005, + "output_tokens_cost": 0.01, + "total_cost": 0.015 + } + }, + { + "operation_id": "20584e6a-6898-41c3-b32a-fe721f8c3a98", + "process": "VECTORDB", + "sub_process": "VECTORDB", + "context": "test_file", + "status": "SUCCESS", + "start_time": "2024-07-02T12:00:00", + "end_time": "2024-07-02T12:01:00", + "doc_id": "doc_123", + "retrieved_docs": ["node_id1", "node_id2"], + "adapter_metadata": { + "adapter_instance_id": "12584e6a-6898-41c3-b32a-fe721f8c3a98", + "type": "Postgres", + "name": "PG1", + "dimension": 1536 + }, + "metrics": { + "operation": "QUERY", + "count": 5, + "latency": 0.5 + } + }, + { + "operation_id": "10584e6a-6898-41c3-a32a-fe721f8c3a98", + "process": "EMBEDDING", + "sub_process": "EMBEDDING", + "context": "test_file", + "status": "SUCCESS", + "start_time": "2024-07-02T12:00:00", + "end_time": "2024-07-02T12:01:00", + "adapter_metadata": { + "adapter_instance_id": "10584e6a-2898-41c3-b32a-fe721f8c3a98", + "type": "OpenAI", + "name": "OAI Embedding", + "model": "text-embedding-ada-002", + "deployment_name": "Pandora-one-text-embedding-ada-002", + "embed_batch_size": 5 + }, + "metrics": { + "tokens": 15, + "latency": 0.5, + "cost": 0.02 + } + }, + { + "operation_id": "10584e6a-2898-41c3-b32a-fe721f8c3a92", + "process": "X2TEXT", + "sub_process": "X2TEXT", + "context": "test_file", + "status": "SUCCESS", + "start_time": "2024-07-02T12:00:00", + "end_time": "2024-07-02T12:01:00", + "adapter_metadata": { + "adapter_instance_id": "10584e6a-2898-41c3-b32a-fe721f8c3a91", + "type": "LLMWhisperer", + "name": "line_printer LLMW", + "processing_mode": "text", + "output_mode": "line_printer" + }, + "metrics": { + "pages_extracted": 10, + "latency": 0.5 + } + } + ], + "user_feedback": [ + { + "type": "RATING", + "rating": 5.0, + "feedback": "Great!" + } + ] + } diff --git a/unstract/metrics/tests/sample.env b/unstract/metrics/tests/sample.env new file mode 100644 index 000000000..d718874a9 --- /dev/null +++ b/unstract/metrics/tests/sample.env @@ -0,0 +1,7 @@ +# To connect to ES +ES_URL=http://localhost:9200 +ES_CLOUD_ID= +ES_API_KEY= + +# Configure lib behaviour +COLLECT_UNSTRACT_METRICS=False diff --git a/unstract/metrics/tests/test_metrics.py b/unstract/metrics/tests/test_metrics.py new file mode 100644 index 000000000..34f14db0d --- /dev/null +++ b/unstract/metrics/tests/test_metrics.py @@ -0,0 +1,83 @@ +import json +import time + +import pytest +from conftest import SEED_DATA, TEST_INDEX_NAME + +from unstract.metrics import MetricsAggregator, capture_metrics + + +@pytest.mark.parametrize("input_file", SEED_DATA) +def test_add_metrics(metrics_agg: MetricsAggregator, mocker, es_client, input_file): + with open(input_file) as file: + mock_metrics = json.load(file) + + add_metrics_mock = mocker.patch.object( + metrics_agg, "add_metrics", wraps=metrics_agg.add_metrics + ) + metrics_agg.add_metrics(metrics=mock_metrics, index=TEST_INDEX_NAME) + add_metrics_mock.assert_called_once_with( + metrics=mock_metrics, index=TEST_INDEX_NAME + ) + + # Assert data in Elasticsearch + es_client.indices.refresh(index=TEST_INDEX_NAME) + result = es_client.search(index=TEST_INDEX_NAME, body={"query": {"match_all": {}}}) + + # Assert if the record is in the index + assert result["hits"]["total"]["value"] > 0 + indexed_doc = result["hits"]["hits"][0]["_source"] + + assert indexed_doc["org_id"] == mock_metrics["org_id"] + assert indexed_doc["run_id"] == mock_metrics["run_id"] + assert indexed_doc["project_id"] == mock_metrics["project_id"] + assert indexed_doc["start_time"] == mock_metrics["start_time"] + assert indexed_doc["end_time"] == mock_metrics["end_time"] + assert indexed_doc["owner"] == mock_metrics["owner"] + assert indexed_doc["agent"] == mock_metrics["agent"] + assert indexed_doc["agent_name"] == mock_metrics["agent_name"] + assert indexed_doc["agent_id"] == mock_metrics["agent_id"] + assert indexed_doc["status"] == mock_metrics["status"] + assert indexed_doc["api_key"] == mock_metrics["api_key"] + + +@pytest.mark.parametrize("input_file", SEED_DATA) +def test_query_metrics(metrics_agg: MetricsAggregator, mocker, es_client, input_file): + with open(input_file) as file: + mock_metrics = json.load(file) + + es_client.index(index=TEST_INDEX_NAME, body=mock_metrics, refresh=True) + + response = metrics_agg.query_metrics( + run_id=mock_metrics["run_id"], index=TEST_INDEX_NAME + ) + + assert len(response["hits"]["hits"]) > 0 + queried_doc = response["hits"]["hits"][0]["_source"] + + assert queried_doc["org_id"] == mock_metrics["org_id"] + assert queried_doc["run_id"] == mock_metrics["run_id"] + assert queried_doc["project_id"] == mock_metrics["project_id"] + assert queried_doc["start_time"] == mock_metrics["start_time"] + assert queried_doc["end_time"] == mock_metrics["end_time"] + assert queried_doc["owner"] == mock_metrics["owner"] + assert queried_doc["agent"] == mock_metrics["agent"] + assert queried_doc["agent_name"] == mock_metrics["agent_name"] + assert queried_doc["agent_id"] == mock_metrics["agent_id"] + assert queried_doc["status"] == mock_metrics["status"] + assert queried_doc["api_key"] == mock_metrics["api_key"] + + +@pytest.mark.parametrize("input_file", SEED_DATA) +def test_metrics_capture(metrics_agg: MetricsAggregator, mocker, es_client, input_file): + with open(input_file) as file: + mock_metrics = json.load(file) + + @capture_metrics(index=TEST_INDEX_NAME, **mock_metrics) + def waited_add(a, b): + result = a + b + time.sleep(1) + return result + + waited_add(2, 3) + # TODO: Make assertions