From 12881d045ca20bd4a3d353f9cf6b2acaaf17af5d Mon Sep 17 00:00:00 2001 From: danyalxahid-askui Date: Mon, 18 Aug 2025 14:53:58 +0200 Subject: [PATCH 01/12] feat(excel): add support for Excel file processing in VisionAgent - Introduced `ExcelSource` class to handle Excel files as input sources. - Updated `AgentBase` and related APIs to accept `ExcelSource` alongside existing image and PDF sources. - Implemented error handling for unsupported Excel processing in specific models. - Added tests for Excel file handling and processing. - Created utility functions for converting Excel content to markdown format. - Added dummy Excel file for testing purposes. --- pdm.lock | 406 ++++++++++++++++++++- pyproject.toml | 84 ++--- src/askui/agent_base.py | 14 +- src/askui/models/anthropic/messages_api.py | 7 +- src/askui/models/askui/google_genai_api.py | 11 + src/askui/models/askui/inference_api.py | 7 +- src/askui/models/openrouter/model.py | 7 +- src/askui/models/ui_tars_ep/ui_tars_api.py | 5 +- src/askui/utils/excel_utils.py | 42 +++ src/askui/utils/markdown_utils.py | 24 ++ src/askui/utils/source_utils.py | 20 +- tests/conftest.py | 12 + tests/e2e/agent/test_get.py | 41 +++ tests/fixtures/excel/dummy.xlsx | Bin 0 -> 10175 bytes 14 files changed, 611 insertions(+), 69 deletions(-) create mode 100644 src/askui/utils/excel_utils.py create mode 100644 src/askui/utils/markdown_utils.py create mode 100644 tests/fixtures/excel/dummy.xlsx diff --git a/pdm.lock b/pdm.lock index e4020b63..dc05c4e4 100644 --- a/pdm.lock +++ b/pdm.lock @@ -5,7 +5,7 @@ groups = ["default", "all", "android", "chat", "dev", "mcp", "pynput", "test", "web"] strategy = ["inherit_metadata"] lock_version = "4.5.0" -content_hash = "sha256:d6c650ba146a47cd38d87783aa405fd730daf80a573ce3d618dd2fa7b190db43" +content_hash = "sha256:8809525501dc5a4160128323355483d98a968d1237bc67a87503882d18e06a55" [[metadata.targets]] requires_python = ">=3.10" @@ -108,6 +108,21 @@ files = [ {file = "backoff-2.2.1.tar.gz", hash = "sha256:03f829f5bb1923180821643f8753b0502c3b682293992485b0eef2807afa5cba"}, ] +[[package]] +name = "beautifulsoup4" +version = "4.13.4" +requires_python = ">=3.7.0" +summary = "Screen-scraping library" +groups = ["default"] +dependencies = [ + "soupsieve>1.2", + "typing-extensions>=4.0.0", +] +files = [ + {file = "beautifulsoup4-4.13.4-py3-none-any.whl", hash = "sha256:9bbbb14bfde9d79f38b8cd5f8c7c85f4b8f2523190ebed90e950a8dea4cb1c4b"}, + {file = "beautifulsoup4-4.13.4.tar.gz", hash = "sha256:dbb3c4e1ceae6aefebdaf2423247260cd062430a410e38c66f2baa50a8437195"}, +] + [[package]] name = "black" version = "25.1.0" @@ -294,7 +309,7 @@ name = "click" version = "8.1.8" requires_python = ">=3.7" summary = "Composable command line interface toolkit" -groups = ["all", "chat", "dev", "mcp"] +groups = ["default", "all", "chat", "dev", "mcp"] dependencies = [ "colorama; platform_system == \"Windows\"", "importlib-metadata; python_version < \"3.8\"", @@ -316,6 +331,21 @@ files = [ {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, ] +[[package]] +name = "coloredlogs" +version = "15.0.1" +requires_python = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" +summary = "Colored terminal output for Python's logging module" +groups = ["default"] +marker = "python_version > \"3.9\"" +dependencies = [ + "humanfriendly>=9.1", +] +files = [ + {file = "coloredlogs-15.0.1-py2.py3-none-any.whl", hash = "sha256:612ee75c546f53e92e70049c9dbfcc18c935a2b9a53b66085ce9ef6a6e5c0934"}, + {file = "coloredlogs-15.0.1.tar.gz", hash = "sha256:7c991aa71a4577af2f82600d8f8f3a89f936baeaf9b50a9c197da014e5bf16b0"}, +] + [[package]] name = "coverage" version = "7.8.0" @@ -536,6 +566,17 @@ files = [ {file = "datamodel_code_generator-0.31.2.tar.gz", hash = "sha256:47887b8aa6fd69865e07e2893c1e76e34dae753b9a97f1020357af8337bc4cdb"}, ] +[[package]] +name = "defusedxml" +version = "0.7.1" +requires_python = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" +summary = "XML bomb protection for Python stdlib modules" +groups = ["default"] +files = [ + {file = "defusedxml-0.7.1-py2.py3-none-any.whl", hash = "sha256:a352e7e428770286cc899e2542b6cdaedb2b4953ff269a210103ec58f6198a61"}, + {file = "defusedxml-0.7.1.tar.gz", hash = "sha256:1bb3032db185915b62d7c6209c5a8792be6a32ab2fedacc84e01b52c51aa3e69"}, +] + [[package]] name = "distro" version = "1.9.0" @@ -596,6 +637,17 @@ files = [ {file = "email_validator-2.2.0.tar.gz", hash = "sha256:cb690f344c617a714f22e66ae771445a1ceb46821152df8e165c5f9a364582b7"}, ] +[[package]] +name = "et-xmlfile" +version = "2.0.0" +requires_python = ">=3.8" +summary = "An implementation of lxml.xmlfile for the standard library" +groups = ["default"] +files = [ + {file = "et_xmlfile-2.0.0-py3-none-any.whl", hash = "sha256:7a91720bc756843502c3b7504c77b8fe44217c85c537d85037f0f536151b2caa"}, + {file = "et_xmlfile-2.0.0.tar.gz", hash = "sha256:dab3f4764309081ce75662649be815c4c9081e88f0837825f90fd28317d4da54"}, +] + [[package]] name = "evdev" version = "1.9.2" @@ -689,6 +741,17 @@ files = [ {file = "filetype-1.2.0.tar.gz", hash = "sha256:66b56cd6474bf41d8c54660347d37afcc3f7d1970648de365c102ef77548aadb"}, ] +[[package]] +name = "flatbuffers" +version = "25.2.10" +summary = "The FlatBuffers serialization format for Python" +groups = ["default"] +marker = "python_version > \"3.9\"" +files = [ + {file = "flatbuffers-25.2.10-py2.py3-none-any.whl", hash = "sha256:ebba5f4d5ea615af3f7fd70fc310636fbb2bbd1f566ac0a23d98dd412de50051"}, + {file = "flatbuffers-25.2.10.tar.gz", hash = "sha256:97e451377a41262f8d9bd4295cc836133415cc03d8cb966410a4af92eb00d26e"}, +] + [[package]] name = "fsspec" version = "2025.3.2" @@ -1014,6 +1077,23 @@ files = [ {file = "huggingface_hub-0.30.1.tar.gz", hash = "sha256:f379e8b8d0791295602538856638460ae3cf679c7f304201eb80fb98c771950e"}, ] +[[package]] +name = "humanfriendly" +version = "10.0" +requires_python = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" +summary = "Human friendly output for text interfaces using Python" +groups = ["default"] +marker = "python_version > \"3.9\"" +dependencies = [ + "monotonic; python_version == \"2.7\"", + "pyreadline3; sys_platform == \"win32\" and python_version >= \"3.8\"", + "pyreadline; sys_platform == \"win32\" and python_version < \"3.8\"", +] +files = [ + {file = "humanfriendly-10.0-py2.py3-none-any.whl", hash = "sha256:1697e1a8a8f550fd43c2865cd84542fc175a61dcb779b6fee18cf6b6ccba1477"}, + {file = "humanfriendly-10.0.tar.gz", hash = "sha256:6b0b831ce8f15f7300721aa49829fc4e83921a9a301cc7f606be6686a2288ddc"}, +] + [[package]] name = "idna" version = "3.10" @@ -1180,6 +1260,29 @@ files = [ {file = "jsonschema_specifications-2025.4.1.tar.gz", hash = "sha256:630159c9f4dbea161a6a2205c3011cc4f18ff381b189fff48bb39b9bf26ae608"}, ] +[[package]] +name = "magika" +version = "0.6.2" +requires_python = ">=3.8" +summary = "A tool to determine the content type of a file with deep learning" +groups = ["default"] +dependencies = [ + "click>=8.1.7", + "numpy>=1.24; python_version < \"3.12\"", + "numpy>=1.26; python_version >= \"3.12\" and python_version < \"3.13\"", + "numpy>=2.1.0; python_version >= \"3.13\"", + "onnxruntime<1.20.0,>=1.17.0; python_version <= \"3.9\"", + "onnxruntime>=1.17.0; python_version > \"3.9\"", + "python-dotenv>=1.0.1", +] +files = [ + {file = "magika-0.6.2-py3-none-any.whl", hash = "sha256:5ef72fbc07723029b3684ef81454bc224ac5f60986aa0fc5a28f4456eebcb5b2"}, + {file = "magika-0.6.2-py3-none-macosx_11_0_arm64.whl", hash = "sha256:9109309328a1553886c8ff36c2ee9a5e9cfd36893ad81b65bf61a57debdd9d0e"}, + {file = "magika-0.6.2-py3-none-manylinux_2_28_x86_64.whl", hash = "sha256:57cd1d64897634d15de552bd6b3ae9c6ff6ead9c60d384dc46497c08288e4559"}, + {file = "magika-0.6.2-py3-none-win_amd64.whl", hash = "sha256:711f427a633e0182737dcc2074748004842f870643585813503ff2553b973b9f"}, + {file = "magika-0.6.2.tar.gz", hash = "sha256:37eb6ae8020f6e68f231bc06052c0a0cbe8e6fa27492db345e8dc867dbceb067"}, +] + [[package]] name = "markdown-it-py" version = "3.0.0" @@ -1194,6 +1297,58 @@ files = [ {file = "markdown_it_py-3.0.0-py3-none-any.whl", hash = "sha256:355216845c60bd96232cd8d8c40e8f9765cc86f46880e43a8fd22dc1a1a8cab1"}, ] +[[package]] +name = "markdownify" +version = "1.2.0" +summary = "Convert HTML to markdown." +groups = ["default"] +dependencies = [ + "beautifulsoup4<5,>=4.9", + "six<2,>=1.15", +] +files = [ + {file = "markdownify-1.2.0-py3-none-any.whl", hash = "sha256:48e150a1c4993d4d50f282f725c0111bd9eb25645d41fa2f543708fd44161351"}, + {file = "markdownify-1.2.0.tar.gz", hash = "sha256:f6c367c54eb24ee953921804dfe6d6575c5e5b42c643955e7242034435de634c"}, +] + +[[package]] +name = "markitdown" +version = "0.1.2" +requires_python = ">=3.10" +summary = "Utility tool for converting various files to Markdown" +groups = ["default"] +dependencies = [ + "beautifulsoup4", + "charset-normalizer", + "defusedxml", + "magika~=0.6.1", + "markdownify", + "requests", +] +files = [ + {file = "markitdown-0.1.2-py3-none-any.whl", hash = "sha256:4881f0768794ffccb52d09dd86498813a6896ba9639b4fc15512817f56ed9d74"}, + {file = "markitdown-0.1.2.tar.gz", hash = "sha256:85fe108a92bd18f317e75a36cf567a6fa812072612a898abf8c156d5d74c13c4"}, +] + +[[package]] +name = "markitdown" +version = "0.1.2" +extras = ["xls", "xlsx"] +requires_python = ">=3.10" +summary = "Utility tool for converting various files to Markdown" +groups = ["default"] +dependencies = [ + "markitdown==0.1.2", + "openpyxl", + "pandas", + "pandas", + "xlrd", +] +files = [ + {file = "markitdown-0.1.2-py3-none-any.whl", hash = "sha256:4881f0768794ffccb52d09dd86498813a6896ba9639b4fc15512817f56ed9d74"}, + {file = "markitdown-0.1.2.tar.gz", hash = "sha256:85fe108a92bd18f317e75a36cf567a6fa812072612a898abf8c156d5d74c13c4"}, +] + [[package]] name = "markupsafe" version = "3.0.2" @@ -1300,6 +1455,17 @@ files = [ {file = "more_itertools-10.7.0.tar.gz", hash = "sha256:9fddd5403be01a94b204faadcff459ec3568cf110265d3c54323e1e866ad29d3"}, ] +[[package]] +name = "mpmath" +version = "1.3.0" +summary = "Python library for arbitrary-precision floating-point arithmetic" +groups = ["default"] +marker = "python_version > \"3.9\"" +files = [ + {file = "mpmath-1.3.0-py3-none-any.whl", hash = "sha256:a0b2b9fe80bbcd81a6647ff13108738cfb482d481d826cc0e02f5b35e5c88d2c"}, + {file = "mpmath-1.3.0.tar.gz", hash = "sha256:7a28eb2a9774d00c7bc92411c19a89209d5da7c4c9a9e227be8330a23a25b91f"}, +] + [[package]] name = "mss" version = "10.0.0" @@ -1362,6 +1528,106 @@ files = [ {file = "mypy_extensions-1.0.0.tar.gz", hash = "sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782"}, ] +[[package]] +name = "numpy" +version = "2.2.6" +requires_python = ">=3.10" +summary = "Fundamental package for array computing in Python" +groups = ["default"] +files = [ + {file = "numpy-2.2.6-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b412caa66f72040e6d268491a59f2c43bf03eb6c96dd8f0307829feb7fa2b6fb"}, + {file = "numpy-2.2.6-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8e41fd67c52b86603a91c1a505ebaef50b3314de0213461c7a6e99c9a3beff90"}, + {file = "numpy-2.2.6-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:37e990a01ae6ec7fe7fa1c26c55ecb672dd98b19c3d0e1d1f326fa13cb38d163"}, + {file = "numpy-2.2.6-cp310-cp310-macosx_14_0_x86_64.whl", hash = "sha256:5a6429d4be8ca66d889b7cf70f536a397dc45ba6faeb5f8c5427935d9592e9cf"}, + {file = "numpy-2.2.6-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:efd28d4e9cd7d7a8d39074a4d44c63eda73401580c5c76acda2ce969e0a38e83"}, + {file = "numpy-2.2.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fc7b73d02efb0e18c000e9ad8b83480dfcd5dfd11065997ed4c6747470ae8915"}, + {file = "numpy-2.2.6-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:74d4531beb257d2c3f4b261bfb0fc09e0f9ebb8842d82a7b4209415896adc680"}, + {file = "numpy-2.2.6-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:8fc377d995680230e83241d8a96def29f204b5782f371c532579b4f20607a289"}, + {file = "numpy-2.2.6-cp310-cp310-win32.whl", hash = "sha256:b093dd74e50a8cba3e873868d9e93a85b78e0daf2e98c6797566ad8044e8363d"}, + {file = "numpy-2.2.6-cp310-cp310-win_amd64.whl", hash = "sha256:f0fd6321b839904e15c46e0d257fdd101dd7f530fe03fd6359c1ea63738703f3"}, + {file = "numpy-2.2.6-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:f9f1adb22318e121c5c69a09142811a201ef17ab257a1e66ca3025065b7f53ae"}, + {file = "numpy-2.2.6-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:c820a93b0255bc360f53eca31a0e676fd1101f673dda8da93454a12e23fc5f7a"}, + {file = "numpy-2.2.6-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:3d70692235e759f260c3d837193090014aebdf026dfd167834bcba43e30c2a42"}, + {file = "numpy-2.2.6-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:481b49095335f8eed42e39e8041327c05b0f6f4780488f61286ed3c01368d491"}, + {file = "numpy-2.2.6-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b64d8d4d17135e00c8e346e0a738deb17e754230d7e0810ac5012750bbd85a5a"}, + {file = "numpy-2.2.6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba10f8411898fc418a521833e014a77d3ca01c15b0c6cdcce6a0d2897e6dbbdf"}, + {file = "numpy-2.2.6-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:bd48227a919f1bafbdda0583705e547892342c26fb127219d60a5c36882609d1"}, + {file = "numpy-2.2.6-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:9551a499bf125c1d4f9e250377c1ee2eddd02e01eac6644c080162c0c51778ab"}, + {file = "numpy-2.2.6-cp311-cp311-win32.whl", hash = "sha256:0678000bb9ac1475cd454c6b8c799206af8107e310843532b04d49649c717a47"}, + {file = "numpy-2.2.6-cp311-cp311-win_amd64.whl", hash = "sha256:e8213002e427c69c45a52bbd94163084025f533a55a59d6f9c5b820774ef3303"}, + {file = "numpy-2.2.6-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:41c5a21f4a04fa86436124d388f6ed60a9343a6f767fced1a8a71c3fbca038ff"}, + {file = "numpy-2.2.6-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:de749064336d37e340f640b05f24e9e3dd678c57318c7289d222a8a2f543e90c"}, + {file = "numpy-2.2.6-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:894b3a42502226a1cac872f840030665f33326fc3dac8e57c607905773cdcde3"}, + {file = "numpy-2.2.6-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:71594f7c51a18e728451bb50cc60a3ce4e6538822731b2933209a1f3614e9282"}, + {file = "numpy-2.2.6-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f2618db89be1b4e05f7a1a847a9c1c0abd63e63a1607d892dd54668dd92faf87"}, + {file = "numpy-2.2.6-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fd83c01228a688733f1ded5201c678f0c53ecc1006ffbc404db9f7a899ac6249"}, + {file = "numpy-2.2.6-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:37c0ca431f82cd5fa716eca9506aefcabc247fb27ba69c5062a6d3ade8cf8f49"}, + {file = "numpy-2.2.6-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:fe27749d33bb772c80dcd84ae7e8df2adc920ae8297400dabec45f0dedb3f6de"}, + {file = "numpy-2.2.6-cp312-cp312-win32.whl", hash = "sha256:4eeaae00d789f66c7a25ac5f34b71a7035bb474e679f410e5e1a94deb24cf2d4"}, + {file = "numpy-2.2.6-cp312-cp312-win_amd64.whl", hash = "sha256:c1f9540be57940698ed329904db803cf7a402f3fc200bfe599334c9bd84a40b2"}, + {file = "numpy-2.2.6-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:0811bb762109d9708cca4d0b13c4f67146e3c3b7cf8d34018c722adb2d957c84"}, + {file = "numpy-2.2.6-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:287cc3162b6f01463ccd86be154f284d0893d2b3ed7292439ea97eafa8170e0b"}, + {file = "numpy-2.2.6-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:f1372f041402e37e5e633e586f62aa53de2eac8d98cbfb822806ce4bbefcb74d"}, + {file = "numpy-2.2.6-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:55a4d33fa519660d69614a9fad433be87e5252f4b03850642f88993f7b2ca566"}, + {file = "numpy-2.2.6-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f92729c95468a2f4f15e9bb94c432a9229d0d50de67304399627a943201baa2f"}, + {file = "numpy-2.2.6-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1bc23a79bfabc5d056d106f9befb8d50c31ced2fbc70eedb8155aec74a45798f"}, + {file = "numpy-2.2.6-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:e3143e4451880bed956e706a3220b4e5cf6172ef05fcc397f6f36a550b1dd868"}, + {file = "numpy-2.2.6-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:b4f13750ce79751586ae2eb824ba7e1e8dba64784086c98cdbbcc6a42112ce0d"}, + {file = "numpy-2.2.6-cp313-cp313-win32.whl", hash = "sha256:5beb72339d9d4fa36522fc63802f469b13cdbe4fdab4a288f0c441b74272ebfd"}, + {file = "numpy-2.2.6-cp313-cp313-win_amd64.whl", hash = "sha256:b0544343a702fa80c95ad5d3d608ea3599dd54d4632df855e4c8d24eb6ecfa1c"}, + {file = "numpy-2.2.6-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:0bca768cd85ae743b2affdc762d617eddf3bcf8724435498a1e80132d04879e6"}, + {file = "numpy-2.2.6-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:fc0c5673685c508a142ca65209b4e79ed6740a4ed6b2267dbba90f34b0b3cfda"}, + {file = "numpy-2.2.6-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:5bd4fc3ac8926b3819797a7c0e2631eb889b4118a9898c84f585a54d475b7e40"}, + {file = "numpy-2.2.6-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:fee4236c876c4e8369388054d02d0e9bb84821feb1a64dd59e137e6511a551f8"}, + {file = "numpy-2.2.6-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e1dda9c7e08dc141e0247a5b8f49cf05984955246a327d4c48bda16821947b2f"}, + {file = "numpy-2.2.6-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f447e6acb680fd307f40d3da4852208af94afdfab89cf850986c3ca00562f4fa"}, + {file = "numpy-2.2.6-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:389d771b1623ec92636b0786bc4ae56abafad4a4c513d36a55dce14bd9ce8571"}, + {file = "numpy-2.2.6-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:8e9ace4a37db23421249ed236fdcdd457d671e25146786dfc96835cd951aa7c1"}, + {file = "numpy-2.2.6-cp313-cp313t-win32.whl", hash = "sha256:038613e9fb8c72b0a41f025a7e4c3f0b7a1b5d768ece4796b674c8f3fe13efff"}, + {file = "numpy-2.2.6-cp313-cp313t-win_amd64.whl", hash = "sha256:6031dd6dfecc0cf9f668681a37648373bddd6421fff6c66ec1624eed0180ee06"}, + {file = "numpy-2.2.6-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:0b605b275d7bd0c640cad4e5d30fa701a8d59302e127e5f79138ad62762c3e3d"}, + {file = "numpy-2.2.6-pp310-pypy310_pp73-macosx_14_0_x86_64.whl", hash = "sha256:7befc596a7dc9da8a337f79802ee8adb30a552a94f792b9c9d18c840055907db"}, + {file = "numpy-2.2.6-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ce47521a4754c8f4593837384bd3424880629f718d87c5d44f8ed763edd63543"}, + {file = "numpy-2.2.6-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:d042d24c90c41b54fd506da306759e06e568864df8ec17ccc17e9e884634fd00"}, + {file = "numpy-2.2.6.tar.gz", hash = "sha256:e29554e2bef54a90aa5cc07da6ce955accb83f21ab5de01a62c8478897b264fd"}, +] + +[[package]] +name = "onnxruntime" +version = "1.22.1" +requires_python = ">=3.10" +summary = "ONNX Runtime is a runtime accelerator for Machine Learning models" +groups = ["default"] +marker = "python_version > \"3.9\"" +dependencies = [ + "coloredlogs", + "flatbuffers", + "numpy>=1.21.6", + "packaging", + "protobuf", + "sympy", +] +files = [ + {file = "onnxruntime-1.22.1-cp310-cp310-macosx_13_0_universal2.whl", hash = "sha256:80e7f51da1f5201c1379b8d6ef6170505cd800e40da216290f5e06be01aadf95"}, + {file = "onnxruntime-1.22.1-cp310-cp310-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b89ddfdbbdaf7e3a59515dee657f6515601d55cb21a0f0f48c81aefc54ff1b73"}, + {file = "onnxruntime-1.22.1-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bddc75868bcf6f9ed76858a632f65f7b1846bdcefc6d637b1e359c2c68609964"}, + {file = "onnxruntime-1.22.1-cp310-cp310-win_amd64.whl", hash = "sha256:01e2f21b2793eb0c8642d2be3cee34cc7d96b85f45f6615e4e220424158877ce"}, + {file = "onnxruntime-1.22.1-cp311-cp311-macosx_13_0_universal2.whl", hash = "sha256:f4581bccb786da68725d8eac7c63a8f31a89116b8761ff8b4989dc58b61d49a0"}, + {file = "onnxruntime-1.22.1-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7ae7526cf10f93454beb0f751e78e5cb7619e3b92f9fc3bd51aa6f3b7a8977e5"}, + {file = "onnxruntime-1.22.1-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f6effa1299ac549a05c784d50292e3378dbbf010346ded67400193b09ddc2f04"}, + {file = "onnxruntime-1.22.1-cp311-cp311-win_amd64.whl", hash = "sha256:f28a42bb322b4ca6d255531bb334a2b3e21f172e37c1741bd5e66bc4b7b61f03"}, + {file = "onnxruntime-1.22.1-cp312-cp312-macosx_13_0_universal2.whl", hash = "sha256:a938d11c0dc811badf78e435daa3899d9af38abee950d87f3ab7430eb5b3cf5a"}, + {file = "onnxruntime-1.22.1-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:984cea2a02fcc5dfea44ade9aca9fe0f7a8a2cd6f77c258fc4388238618f3928"}, + {file = "onnxruntime-1.22.1-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2d39a530aff1ec8d02e365f35e503193991417788641b184f5b1e8c9a6d5ce8d"}, + {file = "onnxruntime-1.22.1-cp312-cp312-win_amd64.whl", hash = "sha256:6a64291d57ea966a245f749eb970f4fa05a64d26672e05a83fdb5db6b7d62f87"}, + {file = "onnxruntime-1.22.1-cp313-cp313-macosx_13_0_universal2.whl", hash = "sha256:d29c7d87b6cbed8fecfd09dca471832384d12a69e1ab873e5effbb94adc3e966"}, + {file = "onnxruntime-1.22.1-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:460487d83b7056ba98f1f7bac80287224c31d8149b15712b0d6f5078fcc33d0f"}, + {file = "onnxruntime-1.22.1-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b0c37070268ba4e02a1a9d28560cd00cd1e94f0d4f275cbef283854f861a65fa"}, + {file = "onnxruntime-1.22.1-cp313-cp313-win_amd64.whl", hash = "sha256:70980d729145a36a05f74b573435531f55ef9503bcda81fc6c3d6b9306199982"}, + {file = "onnxruntime-1.22.1-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:33a7980bbc4b7f446bac26c3785652fe8730ed02617d765399e89ac7d44e0f7d"}, + {file = "onnxruntime-1.22.1-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6e7e823624b015ea879d976cbef8bfaed2f7e2cc233d7506860a76dd37f8f381"}, +] + [[package]] name = "openai" version = "1.85.0" @@ -1397,6 +1663,20 @@ files = [ {file = "openapi_pydantic-0.5.1.tar.gz", hash = "sha256:ff6835af6bde7a459fb93eb93bb92b8749b754fc6e51b2f1590a19dc3005ee0d"}, ] +[[package]] +name = "openpyxl" +version = "3.1.5" +requires_python = ">=3.8" +summary = "A Python library to read/write Excel 2010 xlsx/xlsm files" +groups = ["default"] +dependencies = [ + "et-xmlfile", +] +files = [ + {file = "openpyxl-3.1.5-py2.py3-none-any.whl", hash = "sha256:5282c12b107bffeef825f4617dc029afaf41d0ea60823bbb665ef3079dc79de2"}, + {file = "openpyxl-3.1.5.tar.gz", hash = "sha256:cf0e3cf56142039133628b5acffe8ef0c12bc902d2aadd3e0fe5878dc08d1050"}, +] + [[package]] name = "packaging" version = "24.2" @@ -1408,6 +1688,58 @@ files = [ {file = "packaging-24.2.tar.gz", hash = "sha256:c228a6dc5e932d346bc5739379109d49e8853dd8223571c7c5b55260edc0b97f"}, ] +[[package]] +name = "pandas" +version = "2.3.1" +requires_python = ">=3.9" +summary = "Powerful data structures for data analysis, time series, and statistics" +groups = ["default"] +dependencies = [ + "numpy>=1.22.4; python_version < \"3.11\"", + "numpy>=1.23.2; python_version == \"3.11\"", + "numpy>=1.26.0; python_version >= \"3.12\"", + "python-dateutil>=2.8.2", + "pytz>=2020.1", + "tzdata>=2022.7", +] +files = [ + {file = "pandas-2.3.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:22c2e866f7209ebc3a8f08d75766566aae02bcc91d196935a1d9e59c7b990ac9"}, + {file = "pandas-2.3.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:3583d348546201aff730c8c47e49bc159833f971c2899d6097bce68b9112a4f1"}, + {file = "pandas-2.3.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0f951fbb702dacd390561e0ea45cdd8ecfa7fb56935eb3dd78e306c19104b9b0"}, + {file = "pandas-2.3.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cd05b72ec02ebfb993569b4931b2e16fbb4d6ad6ce80224a3ee838387d83a191"}, + {file = "pandas-2.3.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:1b916a627919a247d865aed068eb65eb91a344b13f5b57ab9f610b7716c92de1"}, + {file = "pandas-2.3.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:fe67dc676818c186d5a3d5425250e40f179c2a89145df477dd82945eaea89e97"}, + {file = "pandas-2.3.1-cp310-cp310-win_amd64.whl", hash = "sha256:2eb789ae0274672acbd3c575b0598d213345660120a257b47b5dafdc618aec83"}, + {file = "pandas-2.3.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:2b0540963d83431f5ce8870ea02a7430adca100cec8a050f0811f8e31035541b"}, + {file = "pandas-2.3.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:fe7317f578c6a153912bd2292f02e40c1d8f253e93c599e82620c7f69755c74f"}, + {file = "pandas-2.3.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e6723a27ad7b244c0c79d8e7007092d7c8f0f11305770e2f4cd778b3ad5f9f85"}, + {file = "pandas-2.3.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3462c3735fe19f2638f2c3a40bd94ec2dc5ba13abbb032dd2fa1f540a075509d"}, + {file = "pandas-2.3.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:98bcc8b5bf7afed22cc753a28bc4d9e26e078e777066bc53fac7904ddef9a678"}, + {file = "pandas-2.3.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:4d544806b485ddf29e52d75b1f559142514e60ef58a832f74fb38e48d757b299"}, + {file = "pandas-2.3.1-cp311-cp311-win_amd64.whl", hash = "sha256:b3cd4273d3cb3707b6fffd217204c52ed92859533e31dc03b7c5008aa933aaab"}, + {file = "pandas-2.3.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:689968e841136f9e542020698ee1c4fbe9caa2ed2213ae2388dc7b81721510d3"}, + {file = "pandas-2.3.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:025e92411c16cbe5bb2a4abc99732a6b132f439b8aab23a59fa593eb00704232"}, + {file = "pandas-2.3.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9b7ff55f31c4fcb3e316e8f7fa194566b286d6ac430afec0d461163312c5841e"}, + {file = "pandas-2.3.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7dcb79bf373a47d2a40cf7232928eb7540155abbc460925c2c96d2d30b006eb4"}, + {file = "pandas-2.3.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:56a342b231e8862c96bdb6ab97170e203ce511f4d0429589c8ede1ee8ece48b8"}, + {file = "pandas-2.3.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:ca7ed14832bce68baef331f4d7f294411bed8efd032f8109d690df45e00c4679"}, + {file = "pandas-2.3.1-cp312-cp312-win_amd64.whl", hash = "sha256:ac942bfd0aca577bef61f2bc8da8147c4ef6879965ef883d8e8d5d2dc3e744b8"}, + {file = "pandas-2.3.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:9026bd4a80108fac2239294a15ef9003c4ee191a0f64b90f170b40cfb7cf2d22"}, + {file = "pandas-2.3.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:6de8547d4fdb12421e2d047a2c446c623ff4c11f47fddb6b9169eb98ffba485a"}, + {file = "pandas-2.3.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:782647ddc63c83133b2506912cc6b108140a38a37292102aaa19c81c83db2928"}, + {file = "pandas-2.3.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2ba6aff74075311fc88504b1db890187a3cd0f887a5b10f5525f8e2ef55bfdb9"}, + {file = "pandas-2.3.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:e5635178b387bd2ba4ac040f82bc2ef6e6b500483975c4ebacd34bec945fda12"}, + {file = "pandas-2.3.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:6f3bf5ec947526106399a9e1d26d40ee2b259c66422efdf4de63c848492d91bb"}, + {file = "pandas-2.3.1-cp313-cp313-win_amd64.whl", hash = "sha256:1c78cf43c8fde236342a1cb2c34bcff89564a7bfed7e474ed2fffa6aed03a956"}, + {file = "pandas-2.3.1-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:8dfc17328e8da77be3cf9f47509e5637ba8f137148ed0e9b5241e1baf526e20a"}, + {file = "pandas-2.3.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:ec6c851509364c59a5344458ab935e6451b31b818be467eb24b0fe89bd05b6b9"}, + {file = "pandas-2.3.1-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:911580460fc4884d9b05254b38a6bfadddfcc6aaef856fb5859e7ca202e45275"}, + {file = "pandas-2.3.1-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2f4d6feeba91744872a600e6edbbd5b033005b431d5ae8379abee5bcfa479fab"}, + {file = "pandas-2.3.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:fe37e757f462d31a9cd7580236a82f353f5713a80e059a29753cf938c6775d96"}, + {file = "pandas-2.3.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:5db9637dbc24b631ff3707269ae4559bce4b7fd75c1c4d7e13f40edc42df4444"}, + {file = "pandas-2.3.1.tar.gz", hash = "sha256:0a95b9ac964fe83ce317827f80304d37388ea77616b1425f0ae41c9d2d0d7bb2"}, +] + [[package]] name = "pathspec" version = "0.12.1" @@ -1905,6 +2237,18 @@ files = [ {file = "pyperclip-1.9.0.tar.gz", hash = "sha256:b7de0142ddc81bfc5c7507eea19da920b92252b548b96186caf94a5e2527d310"}, ] +[[package]] +name = "pyreadline3" +version = "3.5.4" +requires_python = ">=3.8" +summary = "A python implementation of GNU readline." +groups = ["default"] +marker = "sys_platform == \"win32\" and python_version > \"3.9\"" +files = [ + {file = "pyreadline3-3.5.4-py3-none-any.whl", hash = "sha256:eaf8e6cc3c49bcccf145fc6067ba8643d1df34d604a1ec0eccbf7a18e6d3fae6"}, + {file = "pyreadline3-3.5.4.tar.gz", hash = "sha256:8d57d53039a1c75adba8e50dd3d992b28143480816187ea5efbd5c78e6c885b7"}, +] + [[package]] name = "pytest" version = "8.3.5" @@ -2032,6 +2376,16 @@ files = [ {file = "python_xlib-0.33-py2.py3-none-any.whl", hash = "sha256:c3534038d42e0df2f1392a1b30a15a4ff5fdc2b86cfa94f072bf11b10a164398"}, ] +[[package]] +name = "pytz" +version = "2025.2" +summary = "World timezone definitions, modern and historical" +groups = ["default"] +files = [ + {file = "pytz-2025.2-py2.py3-none-any.whl", hash = "sha256:5ddf76296dd8c44c26eb8f4b6f35488f3ccbf6fbbd7adee0b7262d43f0ec2f00"}, + {file = "pytz-2025.2.tar.gz", hash = "sha256:360b9e3dbb49a209c21ad61809c7fb453643e048b38924c765813546746e81c3"}, +] + [[package]] name = "pywin32" version = "311" @@ -2385,6 +2739,17 @@ files = [ {file = "sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc"}, ] +[[package]] +name = "soupsieve" +version = "2.7" +requires_python = ">=3.8" +summary = "A modern CSS selector implementation for Beautiful Soup." +groups = ["default"] +files = [ + {file = "soupsieve-2.7-py3-none-any.whl", hash = "sha256:6e60cc5c1ffaf1cebcc12e8188320b72071e922c2e897f737cadce79ad5d30c4"}, + {file = "soupsieve-2.7.tar.gz", hash = "sha256:ad282f9b6926286d2ead4750552c8a6142bc4c783fd66b0293547c8fe6ae126a"}, +] + [[package]] name = "sse-starlette" version = "2.4.1" @@ -2414,6 +2779,21 @@ files = [ {file = "starlette-0.46.2.tar.gz", hash = "sha256:7f7361f34eed179294600af672f565727419830b54b7b084efe44bb82d2fccd5"}, ] +[[package]] +name = "sympy" +version = "1.14.0" +requires_python = ">=3.9" +summary = "Computer algebra system (CAS) in Python" +groups = ["default"] +marker = "python_version > \"3.9\"" +dependencies = [ + "mpmath<1.4,>=1.1.0", +] +files = [ + {file = "sympy-1.14.0-py3-none-any.whl", hash = "sha256:e091cc3e99d2141a0ba2847328f5479b05d94a6635cb96148ccb3f34671bd8f5"}, + {file = "sympy-1.14.0.tar.gz", hash = "sha256:d3d3fe8df1e5a0b42f0e7bdf50541697dbe7d23746e894990c030e2b05e72517"}, +] + [[package]] name = "tenacity" version = "9.1.2" @@ -2590,6 +2970,17 @@ files = [ {file = "typing_inspection-0.4.0.tar.gz", hash = "sha256:9765c87de36671694a67904bf2c96e395be9c6439bb6c87b5142569dcdd65122"}, ] +[[package]] +name = "tzdata" +version = "2025.2" +requires_python = ">=2" +summary = "Provider of IANA time zone data" +groups = ["default"] +files = [ + {file = "tzdata-2025.2-py2.py3-none-any.whl", hash = "sha256:1a403fada01ff9221ca8044d701868fa132215d84beb92242d9acd2147f667a8"}, + {file = "tzdata-2025.2.tar.gz", hash = "sha256:b60a638fcc0daffadf82fe0f57e53d06bdec2f36c4df66280ae79bce6bd6f2b9"}, +] + [[package]] name = "urllib3" version = "2.3.0" @@ -2689,3 +3080,14 @@ files = [ {file = "winregistry-2.1.0-py3-none-any.whl", hash = "sha256:7591bc93ba5513b389a0234dfa665ac0752e964bddf44757c266a3b754c941e1"}, {file = "winregistry-2.1.0.tar.gz", hash = "sha256:370c2872f9cf9a512ed344039efae2a2943eb36355bc867336ff049e0f9d1db4"}, ] + +[[package]] +name = "xlrd" +version = "2.0.2" +requires_python = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7" +summary = "Library for developers to extract data from Microsoft Excel (tm) .xls spreadsheet files" +groups = ["default"] +files = [ + {file = "xlrd-2.0.2-py2.py3-none-any.whl", hash = "sha256:ea762c3d29f4cca48d82df517b6d89fbce4db3107f9d78713e48cd321d5c9aa9"}, + {file = "xlrd-2.0.2.tar.gz", hash = "sha256:08b5e25de58f21ce71dc7db3b3b8106c1fa776f3024c54e45b45b374e89234c9"}, +] diff --git a/pyproject.toml b/pyproject.toml index 04295678..cdd645f6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,9 +1,7 @@ [project] name = "askui" description = "Automate computer tasks in Python" -authors = [ - {name = "askui GmbH", email = "info@askui.com"}, -] +authors = [{ name = "askui GmbH", email = "info@askui.com" }] dependencies = [ "anthropic>=0.54.0", "fastapi>=0.115.12", @@ -26,10 +24,11 @@ dependencies = [ "protobuf>=6.31.1", "google-genai>=1.20.0", "filetype>=1.2.0", + "markitdown[xls,xlsx]>=0.1.2", ] requires-python = ">=3.10" readme = "README.md" -license = {text = "MIT"} +license = { text = "MIT" } dynamic = ["version"] [build-system] @@ -60,11 +59,7 @@ typecheck = "mypy" "typecheck:all" = "mypy ." "chat:api" = "uvicorn askui.chat.api.app:app --reload --port 9261" "mcp:dev" = "mcp dev src/askui/mcp/__init__.py" -"qa:fix" = { composite = [ - "typecheck:all", - "format", - "lint:fix", -] } +"qa:fix" = { composite = ["typecheck:all", "format", "lint:fix"] } "grpc:gen" = "bash scripts/grpc-gen.sh" "json:gen" = "datamodel-codegen --output-model-type pydantic_v2.BaseModel --input src/askui/tools/askui/askui_ui_controller_grpc/json_schema/ --input-file-type jsonschema --output src/askui/tools/askui/askui_ui_controller_grpc/generated/" @@ -86,10 +81,7 @@ test = [ "types-pynput>=1.8.1.20250318", "playwright>=1.41.0", ] -dev = [ - "datamodel-code-generator>=0.31.2", - "grpcio-tools>=1.73.1", -] +dev = ["datamodel-code-generator>=0.31.2", "grpcio-tools>=1.73.1"] [tool.pytest.ini_options] @@ -123,7 +115,7 @@ exclude = [ "dist", "node_modules", "venv", - "src/askui/tools/askui/askui_ui_controller_grpc/generated" + "src/askui/tools/askui/askui_ui_controller_grpc/generated", ] # Same as Black. @@ -142,26 +134,26 @@ line-ending = "auto" # Enable all rules select = ["ALL"] ignore = [ - "ANN", # Type annotations - handled by mypy + "ANN", # Type annotations - handled by mypy "COM812", # Unused import - "D", # Documentation - we'll handle this separately - "ERA", # Commented out code - "FBT", # Boolean trap - "ICN", # Import conventions - "ISC", # Implicit string concatenation - "N", # Naming - "PGH", # PyGithub - "PL", # Pylint - "PT", # Pytest - "Q", # Quotes - "RUF", # Ruff-specific rules - "S", # Bandit - "SIM", # Simplify - "T", # Pycodestyle - "TID", # isort - "UP", # Pyupgrade - "W", # Pycodestyle - "YTT", # flake8-2020 + "D", # Documentation - we'll handle this separately + "ERA", # Commented out code + "FBT", # Boolean trap + "ICN", # Import conventions + "ISC", # Implicit string concatenation + "N", # Naming + "PGH", # PyGithub + "PL", # Pylint + "PT", # Pytest + "Q", # Quotes + "RUF", # Ruff-specific rules + "S", # Bandit + "SIM", # Simplify + "T", # Pycodestyle + "TID", # isort + "UP", # Pyupgrade + "W", # Pycodestyle + "YTT", # flake8-2020 ] # Allow autofix for all enabled rules @@ -194,7 +186,9 @@ dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$" "tests/e2e/agent/test_locate_with_relations.py" = ["E501"] "tests/unit/locators/test_locators.py" = ["E501"] "tests/unit/locators/serializers/test_askui_locator_serializer.py" = ["E501"] -"tests/unit/locators/serializers/test_locator_string_representation.py" = ["E501"] +"tests/unit/locators/serializers/test_locator_string_representation.py" = [ + "E501", +] "tests/unit/utils/test_image_utils.py" = ["E501"] [tool.ruff.lint.flake8-quotes] @@ -208,20 +202,8 @@ known-third-party = ["pytest", "mypy"] [project.optional-dependencies] all = ["askui[android,chat,mcp,pynput,web]"] -android = [ - "pure-python-adb>=0.3.0.dev0" -] -chat = [ - "askui[android,pynput,web]", - "uvicorn>=0.34.3", -] -mcp = [ - "fastmcp>=2.3.4", -] -pynput = [ - "mss>=10.0.0", - "pynput>=1.8.1", -] -web = [ - "playwright>=1.41.0", -] +android = ["pure-python-adb>=0.3.0.dev0"] +chat = ["askui[android,pynput,web]", "uvicorn>=0.34.3"] +mcp = ["fastmcp>=2.3.4"] +pynput = ["mss>=10.0.0", "pynput>=1.8.1"] +web = ["playwright>=1.41.0"] diff --git a/src/askui/agent_base.py b/src/askui/agent_base.py index a270b6f7..46d82cf4 100644 --- a/src/askui/agent_base.py +++ b/src/askui/agent_base.py @@ -16,6 +16,7 @@ from askui.models.shared.tools import Tool from askui.tools.agent_os import AgentOs from askui.tools.android.agent_os import AndroidAgentOs +from askui.utils.excel_utils import Excel from askui.utils.image_utils import ImageSource, Img from askui.utils.pdf_utils import Pdf from askui.utils.source_utils import load_image_source, load_source @@ -193,7 +194,7 @@ def get( query: Annotated[str, Field(min_length=1)], response_schema: None = None, model: str | None = None, - source: Optional[Img | Pdf] = None, + source: Optional[Img | Pdf | Excel] = None, ) -> str: ... @overload def get( @@ -201,7 +202,7 @@ def get( query: Annotated[str, Field(min_length=1)], response_schema: Type[ResponseSchema], model: str | None = None, - source: Optional[Img | Pdf] = None, + source: Optional[Img | Pdf | Excel] = None, ) -> ResponseSchema: ... @telemetry.record_call(exclude={"query", "source", "response_schema"}) @@ -211,7 +212,7 @@ def get( query: Annotated[str, Field(min_length=1)], response_schema: Type[ResponseSchema] | None = None, model: str | None = None, - source: Optional[Img | Pdf] = None, + source: Optional[Img | Pdf | Excel] = None, ) -> ResponseSchema | str: """ Retrieves information from an image or PDF based on the provided `query`. @@ -220,9 +221,10 @@ def get( Args: query (str): The query describing what information to retrieve. - source (Img | Pdf | None, optional): The source to extract information from. - Can be a path to a PDF file, a path to an image file, a PIL Image - object or a data URL. Defaults to a screenshot of the current screen. + source (Img | Pdf | Excel | None, optional): The source to extract + information from. Can be a path to a PDF file, a path to an image file, + a path to an Excel file, a PIL Image object or a data URL. Defaults to a + screenshot of the current screen. response_schema (Type[ResponseSchema] | None, optional): A Pydantic model class that defines the response schema. If not provided, returns a string. diff --git a/src/askui/models/anthropic/messages_api.py b/src/askui/models/anthropic/messages_api.py index b92e9f9c..78646aca 100644 --- a/src/askui/models/anthropic/messages_api.py +++ b/src/askui/models/anthropic/messages_api.py @@ -42,6 +42,7 @@ from askui.models.shared.tools import ToolCollection from askui.models.types.response_schemas import ResponseSchema from askui.utils.dict_utils import IdentityDefaultDict +from askui.utils.excel_utils import ExcelSource from askui.utils.image_utils import ( ImageSource, image_to_base64, @@ -242,8 +243,10 @@ def get( response_schema: Type[ResponseSchema] | None, model_choice: str, ) -> ResponseSchema | str: - if isinstance(source, PdfSource): - err_msg = f"PDF processing is not supported for the model {model_choice}" + if isinstance(source, (PdfSource, ExcelSource)): + err_msg = ( + f"PDF or Excel processing is not supported for the model {model_choice}" + ) raise NotImplementedError(err_msg) try: if response_schema is not None: diff --git a/src/askui/models/askui/google_genai_api.py b/src/askui/models/askui/google_genai_api.py index 8d691023..f598963b 100644 --- a/src/askui/models/askui/google_genai_api.py +++ b/src/askui/models/askui/google_genai_api.py @@ -21,6 +21,7 @@ from askui.models.models import GetModel, ModelName from askui.models.shared.prompts import SYSTEM_PROMPT_GET from askui.models.types.response_schemas import ResponseSchema, to_response_schema +from askui.utils.excel_utils import ExcelSource from askui.utils.http_utils import parse_retry_after_header from askui.utils.image_utils import ImageSource from askui.utils.source_utils import Source @@ -185,6 +186,16 @@ def _create_genai_part_from_source(self, source: Source) -> genai_types.Part: data=data, mime_type="image/png", ) + if isinstance(source, ExcelSource): + with source.reader as r: + data = r.read() + if len(data) > MAX_FILE_SIZE_BYTES: + _err_msg = ( + "Excel file size exceeds the limit of " + f"{MAX_FILE_SIZE_BYTES} bytes." + ) + raise ValueError(_err_msg) + return genai_types.Part.from_text(text=data.decode()) with source.reader as r: data = r.read() if len(data) > MAX_FILE_SIZE_BYTES: diff --git a/src/askui/models/askui/inference_api.py b/src/askui/models/askui/inference_api.py index 231ae093..94ca0530 100644 --- a/src/askui/models/askui/inference_api.py +++ b/src/askui/models/askui/inference_api.py @@ -26,6 +26,7 @@ from askui.models.shared.settings import MessageSettings from askui.models.shared.tools import ToolCollection from askui.models.types.response_schemas import ResponseSchema +from askui.utils.excel_utils import ExcelSource from askui.utils.image_utils import ImageSource from askui.utils.pdf_utils import PdfSource from askui.utils.source_utils import Source @@ -205,8 +206,10 @@ def get( response_schema: Type[ResponseSchema] | None, model_choice: str, ) -> ResponseSchema | str: - if isinstance(source, PdfSource): - err_msg = f"PDF processing is not supported for the model {model_choice}" + if isinstance(source, (PdfSource, ExcelSource)): + err_msg = ( + f"PDF or Excel processing is not supported for the model {model_choice}" + ) raise NotImplementedError(err_msg) json: dict[str, Any] = { "image": source.to_data_url(), diff --git a/src/askui/models/openrouter/model.py b/src/askui/models/openrouter/model.py index a5a6882c..fa64563f 100644 --- a/src/askui/models/openrouter/model.py +++ b/src/askui/models/openrouter/model.py @@ -10,6 +10,7 @@ from askui.models.models import GetModel from askui.models.shared.prompts import SYSTEM_PROMPT_GET from askui.models.types.response_schemas import ResponseSchema, to_response_schema +from askui.utils.excel_utils import ExcelSource from askui.utils.pdf_utils import PdfSource from askui.utils.source_utils import Source @@ -174,8 +175,10 @@ def get( response_schema: Type[ResponseSchema] | None, model_choice: str, ) -> ResponseSchema | str: - if isinstance(source, PdfSource): - err_msg = f"PDF processing is not supported for the model {model_choice}" + if isinstance(source, (PdfSource, ExcelSource)): + err_msg = ( + f"PDF or Excel processing is not supported for the model {model_choice}" + ) raise NotImplementedError(err_msg) response = self._predict( image_url=source.to_data_url(), diff --git a/src/askui/models/ui_tars_ep/ui_tars_api.py b/src/askui/models/ui_tars_ep/ui_tars_api.py index 1eec36bd..bc843ef1 100644 --- a/src/askui/models/ui_tars_ep/ui_tars_api.py +++ b/src/askui/models/ui_tars_ep/ui_tars_api.py @@ -24,6 +24,7 @@ from askui.models.shared.tools import Tool from askui.models.types.response_schemas import ResponseSchema from askui.reporting import Reporter +from askui.utils.excel_utils import ExcelSource from askui.utils.image_utils import ImageSource, image_to_base64 from askui.utils.pdf_utils import PdfSource from askui.utils.source_utils import Source @@ -188,8 +189,8 @@ def get( response_schema: Type[ResponseSchema] | None, model_choice: str, ) -> ResponseSchema | str: - if isinstance(source, PdfSource): - err_msg = f"PDF processing is not supported for the model {model_choice}" + if isinstance(source, (PdfSource, ExcelSource)): + err_msg = f"PDF and Excel processing is not supported for the model {model_choice}" raise NotImplementedError(err_msg) if response_schema is not None: error_msg = f'Response schema is not supported for model "{model_choice}"' diff --git a/src/askui/utils/excel_utils.py b/src/askui/utils/excel_utils.py new file mode 100644 index 00000000..f9aeb24a --- /dev/null +++ b/src/askui/utils/excel_utils.py @@ -0,0 +1,42 @@ +from io import BytesIO +from pathlib import Path +from typing import Union + +from pydantic import ConfigDict, RootModel + +from askui.utils.markdown_utils import convert_to_markdown + +Excel = Union[str, Path] +"""Type of the input Excel for `askui.VisionAgent.get()`, etc. + +Accepts: +- Relative or absolute file path (`str` or `pathlib.Path`) +""" + + +class ExcelSource(RootModel): + """Represents an Excel source that can be read as markdown. + + The class can be initialized with: + - A file path (str or pathlib.Path) + + Attributes: + root (bytes): The underlying Excel bytes. + + Args: + root (Excel): The Excel source to load from. + """ + + model_config = ConfigDict(arbitrary_types_allowed=True) + root: bytes | Path + + @property + def reader(self) -> BytesIO: + markdown_content = convert_to_markdown(self.root) + return BytesIO(markdown_content.encode()) + + +__all__ = [ + "Excel", + "ExcelSource", +] diff --git a/src/askui/utils/markdown_utils.py b/src/askui/utils/markdown_utils.py new file mode 100644 index 00000000..6eb523de --- /dev/null +++ b/src/askui/utils/markdown_utils.py @@ -0,0 +1,24 @@ +from io import BytesIO +from pathlib import Path +from typing import BinaryIO + +from markitdown import MarkItDown + +_MARKDOWN_CONVERTER = MarkItDown() + + +def convert_to_markdown(source: Path | bytes | BinaryIO) -> str: + """Converts a source to markdown text. + + Args: + source (Path | bytes | BinaryIO): The source to convert. + + Returns: + str: The markdown representation of the source. + """ + if isinstance(source, bytes): + bytes_source = BytesIO(source) + result = _MARKDOWN_CONVERTER.convert(bytes_source) + return result.text_content + result = _MARKDOWN_CONVERTER.convert(source) + return result.text_content diff --git a/src/askui/utils/source_utils.py b/src/askui/utils/source_utils.py index 619f134a..ebc91511 100644 --- a/src/askui/utils/source_utils.py +++ b/src/askui/utils/source_utils.py @@ -9,15 +9,20 @@ from filetype import guess # type: ignore[import-untyped] from PIL import Image as PILImage +from askui.utils.excel_utils import ExcelSource from askui.utils.image_utils import ImageSource from askui.utils.pdf_utils import PdfSource -Source = Union[ImageSource, PdfSource] +Source = Union[ImageSource, PdfSource, ExcelSource] _DATA_URL_WITH_MIMETYPE_RE = re.compile(r"^data:([^;,]+)([^,]*)?,(.*)$", re.DOTALL) _SupportedImageMimeTypes = Literal["image/png", "image/jpeg", "image/gif", "image/webp"] -_SupportedApplicationMimeTypes = Literal["application/pdf"] +_SupportedApplicationMimeTypes = Literal[ + "application/pdf", + "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", + "application/vnd.ms-excel", +] _SupportedMimeTypes = _SupportedImageMimeTypes | _SupportedApplicationMimeTypes _SUPPORTED_MIME_TYPES: list[_SupportedMimeTypes] = [ @@ -26,6 +31,8 @@ "image/gif", "image/webp", "application/pdf", + "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", + "application/vnd.ms-excel", ] @@ -49,6 +56,13 @@ def is_supported(self) -> bool: def is_pdf(self) -> bool: return self.mime == "application/pdf" + @property + def is_excel(self) -> bool: + return self.mime in [ + "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", + "application/vnd.ms-excel", + ] + @property def is_image(self) -> bool: if self.mime: @@ -133,6 +147,8 @@ def load_source(source: Union[str, Path, PILImage.Image]) -> Source: raise ValueError(msg) if source_analysis.is_pdf: return PdfSource(source_analysis.content) + if source_analysis.is_excel: + return ExcelSource(source_analysis.content) if source_analysis.is_image: return ImageSource( PILImage.open( diff --git a/tests/conftest.py b/tests/conftest.py index 72b7cba4..7483002d 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -40,6 +40,18 @@ def path_fixtures_dummy_pdf(path_fixtures_pdf: pathlib.Path) -> pathlib.Path: return path_fixtures_pdf / "dummy.pdf" +@pytest.fixture +def path_fixtures_excel(path_fixtures: pathlib.Path) -> pathlib.Path: + """Fixture providing the path to the excel directory.""" + return path_fixtures / "excel" + + +@pytest.fixture +def path_fixtures_dummy_excel(path_fixtures_excel: pathlib.Path) -> pathlib.Path: + """Fixture providing the path to the dummy excel.""" + return path_fixtures_excel / "dummy.xlsx" + + @pytest.fixture def github_login_screenshot(path_fixtures_screenshots: pathlib.Path) -> Image.Image: """Fixture providing the GitHub login screenshot.""" diff --git a/tests/e2e/agent/test_get.py b/tests/e2e/agent/test_get.py index 7977efdf..ed13ab14 100644 --- a/tests/e2e/agent/test_get.py +++ b/tests/e2e/agent/test_get.py @@ -125,6 +125,47 @@ def test_get_with_pdf_too_large_with_default_model( ) +def test_get_with_xlsx_with_non_gemini_model_raises_not_implemented( + vision_agent: VisionAgent, path_fixtures_dummy_excel: pathlib.Path +) -> None: + with pytest.raises(NotImplementedError): + vision_agent.get( + "What is in the xlsx?", + source=path_fixtures_dummy_excel, + model=ModelName.ANTHROPIC__CLAUDE__3_5__SONNET__20241022, + ) + + +@pytest.mark.parametrize( + "model", + [ + ModelName.ASKUI__GEMINI__2_5__FLASH, + ModelName.ASKUI__GEMINI__2_5__PRO, + ], +) +def test_get_with_xlsx_with_gemini_model( + vision_agent: VisionAgent, model: str, path_fixtures_dummy_excel: pathlib.Path +) -> None: + response = vision_agent.get( + "What is in the salary of Doe?", + source=path_fixtures_dummy_excel, + model=model, + ) + assert isinstance(response, str) + assert "20000" in response.lower() + + +def test_get_with_xlsx_with_default_model_with_chart_data( + vision_agent: VisionAgent, path_fixtures_dummy_excel: pathlib.Path +) -> None: + response = vision_agent.get( + "What does the chart show?", + source=path_fixtures_dummy_excel, + ) + assert isinstance(response, str) + assert "count of names" in response.lower() + + def test_get_with_model_composition_should_use_default_model( agent_toolbox_mock: AgentToolbox, askui_facade: ModelFacade, diff --git a/tests/fixtures/excel/dummy.xlsx b/tests/fixtures/excel/dummy.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..9dedc474e87ca32ed63b9fe3af97a6ba223bf480 GIT binary patch literal 10175 zcmeHtWmFw)vNamq-R2^U>|MV6o+>NG%EFVcTjS6|x5FC}__NqiSgdZ4hawZ!Zle2CzakPhK zz72BohE61xA4*5d9YSx6A-=sLQ|=-`aCcdhwL(*?D6T+CgWn6Xt!&T;T72aL<;GNk zX0|DPlu4HBg0FGBosB$VCS|}!XGO6cLO^b-GFY}J&TYvjoK@FS@9yapE{|tb^p&_$ zMrhaOSTUe(PlD-OtCl4?dmaQU26f9EpEsIo!#$}5<7St(@>v9W$a5z?91=yqv86j@X^(S%SDkubv>;IItqX1IA#-&&`X+8DnVmeX8h~B9ARczOS|W zPAJl>$}K3Z?ILENxsP!PTwVqS_FJ8;3ko1kM0yS8QWBK*y{KKKZ#+(>=n;wwDP0?MqV;|)I9P&S|%@TN7i4koNm7Tnl`5??R%0xUbXuQ${?g%TxozcXDzRU)yv z46&-JY$4bJBS@?IhoRrlQ7j8X+Fr)y%QnN8VM+yYOR!6%S7$;&62V>2uYkFEZf)GT z61m*SCNL;-g73+0GAECJ@CEg#64^ziE1W2Vd4-DOyN`;+PI_S-9jd=Mdd-HPn%Wo= zy|=Hqe0j08SJ9i}S(Py{cDXAUr|`CjYy+Vi=s$_=LaBOSwy=DLi-5YUOI>d2@xfA4 zovrqa^+&Hr?NQ`=)Vltfu$ToCk+ZT>jPX`eewQ3)c*V&h`dZ`MGzg?hx}pQ zb3~fM%nh8_vR~lhV9?5;>bs~uhC9Vh89KBpR>hGP1@eJ)`8ygepDA76ZgI`1POBdk zcP4Y}m|ZrZtJXdYqh8qzBKJj+iJx5g@v)<|4H(oBxgZZSuFMkEz<>kW~0ujg&M_02; zJ0mH4%?HPIgMYfvquA6pq0_T~*$qzgqMe~u#I+}|luElP#z@@h>)UEmNTRp>JvCh4 zR~Jg@dO0d-=q*aM7mEhU$*XnlT?wOa<_mqtb})DD{P&FaZ(RBGK8^gJEB`lK$@<{R z*nX=yAgWk%#;u?YiIU>#I2Hr6SQsXB2!|kn!=PfUSOSWkw=4l57%T(S>NP?4(l1Hkaje3pI$uE~ zt@L%>F0FS>RHEChb}UK~h{H+_LY}p8=ATAWG1*YyBd@@HZY0B{Y;jQy^tUlCL&uYp zESB)uOT*iqiK6^5pi(Tfp|y^ST<_uLKaWjttZIh!T*L@X$#M|dG7obJf)j1j!@Z)u zyk9zDibg2QnQ?4=CYC|5PcO-~;UH^Lr_huzOc7})EZU}SV3Hv~;fMN^(J|nBrUc?E zuG6{RxN&{1g=|`6v=-@gp{7YevPH0yLJE9RQCxbQl<-Uq%-j@*^t=Ji`p4<^sl+p2 zB8~tuilDN8brO0g0(9dp%v)W2<gL;W6JFe^M@_H#N(ZuMVg`RL9lBcBZo!Of%ZCtEW^|4iz2AB`yfm2W2H zgb7#H1=08LqZL=wG*+j!pEcypyIu`OvWhyWNyfCBQmv&IAYr*}L)^PSpY@YY+1(8S{|y9=p!Is-%izApjii-02MFP$iEg;0VeUfMdX6yiZq`bTqDix%6UkwW?S|%+|k(0!gG+knrfpQ>T8eq(_XwL~@z^oNs=5R=bkF=YQ zX9s6s`Xmus%0THi(Mr+{lozpLP4y$X8kPi%dMJ`xvI=+z0t?!0;Zs;qw`bm z^9~aW+eGsTG1ui-ZanPB2vfc(ys!e|K0qK0PHEI2 zQ;d5b6n)&4E58d5y8k9TekSEuE$b{&%mA;}8<_U%AfD8j$&YgNa^;QYN!K+wK@@o4 zkl$HuJtHG1HO||{>A4yX+LIwc^XKo6HFA(tpNeIYkVZ=7+cCYMy}aIaaSG)#2p*8i z2FQ%~<2j^z$nazj=NJySf<;k@Wmh_em1a_6bbQY@MpuE)hAst8P+Pnnlzf&X>i>b1 zOJAr)R8_dJ8mO#b)0c*62^Dh&KT=4J{UsSRwc$&3iS^tj|DA&WX&yQT9J>5~YhRaO zb*P|^l@uW88Rsc-6nmEQCRTMkOMH6;?dys#>aL+qTt1dPlIr)DsOKw#Rr3YLi!1eF zm(Tdz!!e(5Aqr$#_0-|a8KCV*>JxOO_Be=@8HzUTFero<)fg7fvW9za=n$)C*I~AH zrovlaDbHqlmqYO`aeZ%BH!(J_uIW0EP7UmpX#x&b4zIp#Ppe?L3y-}zv7m+&tgO5} zl$Y1*wTzkA7u}5o&3Qw?C^zTr-Xy^t5nQ#%6=k}tOvB^iI zj%rj0Qz0OSg(zUcsrpF+1gR3V_5&4$A+Nb|Qu$&yS{kp)LCE?QtB zFli254RBr=U8hjLp(YVG2dwcyii@lBCuz53^IM*`2#bEOEeilJU}k*cSWl!1+onSN zfWSaTC5fx=07K@3Z`##TL=giTy2LZU_h2T>r#}OPFwDW-S_)P^RsTTwU3{};wsSfttfE_jE>7}*ypl-N8KB>{1i9&WWHaLwHJZ?!0QVfiL?YH6$MXn*Z0faG zkT}>fosEiy3FOqvk&rC4N>OpHc_}~H(ttd7rKy3wB;69!14 z<42bU9mm>Ol7WDQtxXyuvioM+XHfB@lJJ!13n8K!(Jex2AWKnck?R)$qg#X$v~jz9 z^-zXSnBrm;8tFYI66BWGKqz!wMwacSPfBidW6C^^C+m5J&!t$b7v^4kt=2xcmDHX) zZJ-#(dREkVQDvhXGx`m9N|2`kIFVT^NA!p`7w2|~lU%38^x`C!#e54H{t9~I#3Ng- z1<9j&@|`cIG`u5FJSRL}xNy)ApFm;DBmtwst#?^Yg;zM#LLA-W1ujC0Ubhg}sDnsa z=?t1oW~JodwOy_$ET!TV)qG(wEJd{0khDJ92d1nW5TLYuFF}cbvkJM}?Eu9HQW)vI z=G!ga(#Sn-8Y;M<#3r0Q>hM!|{nSav>WE=61?qDRPX2ObKmy$@{jBy@My#D%XG!vM zX{bmRi&=JdHp?W}Nj6Os8$MW4E9$rnmp8bYf{+8o;`gszk_BPWgEc%oTT^&Uhp$a@ z1KvnW9SGXFK~>^St@Atkd%?Uh*R7Uq<_2@!r+6>7ee1hLJWd*L%Q;|b@gjQ_Y_uLx zySj+I0^RqFh-F~PC~ra<(}1?Dl*VHJ++!3kEKJA#WDU`6<V6P0!Q%x;GvTQbg;sK0ScpxCPC(`5Fhn?C;=<6#coCP8uR z>E3DWq>at`N|dGRD`#qOC9q(G?-``r8719oY-h6PrVWNAfmtdv=-g9PmOEP#Rr{5Z z4hiGw>4zxynL|lt)@=y_46Nb#?lvcJuz7}Hza-Ikxl^^nT6&b-It{^LcGHCGX6Y{T-k8Ej2V#{qkd?J1s=uHa z4;sZUl+DE4R7VXJ6?;vu?+g3Ysr{`B_}i!i|AdXO!W9g(!`I)o(Tfd*pF+g)V$e4P z1jly5lcCYDV&k|7C0t^!^R+9=xr7NvkYHJgM;oZK#;(M+&Zaka2}HhUBdzD-GWP2Y z@xuto1~GvuP*H&?wBdn%sI)tHhL93A~$_Z;cRJU-I{Rz1`^~{UH78H)wMx`L)sbtWY`< zh)#pbV;jfl7+Ib|PUx8|d~DvO$omkXD2;L)3@R3uLFblhZ+yBF{N^=C6YyG%d8)OO zaF=Dd_A0e+Eoxbq(O?%#UqPuG0<%^=5bn9%ZWdv=wTm;|?)E$_48yDXb_yH-8nB+0q=k=nMf!BE<0y2~< z-Z_c0yT*7l=9M!;^OrSW;wHQM33qT3MbY5a{IXN?t+9HyrZ|22r9742qey*+{kj!> z+d`*sGtY#9n@s%UAaX0^22n93WwRfhN&u)ai_6C+_BmuWLp@x|N9a~&m zy|Y)urypfX2cs#bq~Cg9?Oi7|WcCrMu1799_WPf@$2+@;0J9jovTIjIb6mUeNT9mx z%7YuYV)ZA3(sH7^_E>cw6;bc-s;iZDM>|tE6n|4AYp%}pBot}IoRE+ zbNBLxQIyb&b|9j^7x*pNv4>1Gq%h4Oa`Kym8gLWCO5^ZxF0DQCvjezfC`DKIrSj9X z{WnXtY&x-&04u$6s}@wJJQR1UmFxW)R|txb`PGC3jT%tPkN_7NyV1LBe<4TLDwVpo zK`LU9g}wXE*ngY}(y?tZTU_Xk%R8Ki3&eq)OeUkBjPr zdi%sie{;9q8>_$eV~Q7mwT=$s7bE5+S}wtbHC*vDzvM?^eh?jeT4}eW!BZ+W^Qio8;ET_B zADhyq+`wKb_2+8Z)SndEpkqmE_&#Ni=jif%M;bgpRF?^{kq3hz2BJ|CHp?;@!_74t z7I@BJ*6H&laYLCadb1Q=MU!sd50DYy)Kc~wiLe2p>lW90RoKD~MhIOfD$XoX7^Y}e zwIAPIO47pn$kl5E+g)YWI*I8O)eQ{ai+4kMwCqihHWcJ?6v0sR495l3T~&U20tuZf z7h9%#?bOfNgDdR6gF0IeNmHQ^ijzfECWy4Wb3Pas<@0Dj87;wrJ0cZ|l^f_X7L#z7 z?6YF0F+!N*IUp7q9qqJe;MVs|H^c2=;ql5wjvaBlEkatfrLAiC%3xrPKb;R=$!Z|1 z$rH8I=>fhEIsRTyXw!|@U^-PR&oW8=Kvj;rcr<_ea~*a+E6sSBJ~&m7B9?1})SQ;h z;xVsh>XO(^J3F=}l%Au@qL3ca<3^x*pGJL%d}53Urov|1$Q0b=p$q)Anal|U_q z(DxV%H%wbiiMJ|t93_|582zePxXqZa^4OXoYm?2-FP1GqG2jqmL?G7hOAz=?I)c_M zZ%*>2iQVa{0!dOC)v+!o-;|fm%*#vx7AOH-IA0e5z@(wt63u|pa1SWpw@z0CJiR59 zc!IKhqP3ezLB1|*L2)Q=F<&`(bY+8cu;Mv zB^$GqT8WEuD{G>KOA9Z;%Ze)5Y4fT)*lgh?v9=(xp2)@1tk0@dY(}aO=x+L4M3SNu zU^ZdwiZEu@v0f`J@SF|jqu`&mt@xg0Pc#@a&ms~Kv_GZ%hD1?THM`YP5p}u*=zZvl zc5_hi@5#m-?eA(Gl%FAXuN3H8Tbn<~te^!eA0Tc(vF`cj6HSEY;S@eFOjKzMywgk> zu_$JHC}s%0^*UmtJTc*Wuo{=oJTw;fKSe`K=rT;vb*=EVz+#T!spjH(c@i2EgcW|E z`yfmoWQ%}(0LtBu=h#%elNk;xg=2Ljq5XxZi^eEV%1^99n zX(*{0hE#2vs<2@W&>_wKNpaFk^w@e~MA)5%Mn-2dm?R5g8xZS$3G-0G26dTX+HnvSu=oh zY-h6MS8yBM=IytFG7fsS*y+I7Ck{#pz`K>yi8d^pt0(KNTAjObG0XkdSy;QC2Ns6; z&J(r_vkrPmWt@@MxM=}5G3W=5{Xks86>^EP{Mcd^4(IlS&bEh8IKqgwY9CqZ2TlQV zh1vIzoRgUZd*6`Wc(j66oblE)g(k(W`9AnKc2l<-0haE|FdAbELWes(Z2H?-1OCqk z_EXP!{Ma74*Txv3*X?%_`HRoIK{MTDsT{HB+{ubPRdOh#S*1q$GQvq5NBadFY>uP} zOtLCpyjX4p*i0L_&xn3>puRGJ##Bc}EV8Ss4EH`?=>LMI1~9AKOA!Ny6rWah+Ev{H zWG9+xkf_6#DtMC}zetfVDv4*YC$_B1)7qk>#KZrZJ=_1pC2^CUxC>8S%do5n8?~wq ztcrl|hI($ovEXnfFOry1D{nd0m<77;Gb!BnXzKDFMcl((#Pt-!=m4h16}&H5xjXUY zlLUD3%?gqT#7bfNvW;2K{bv@_r6pF-dJ zZ_NNJ;QO6-#{SlS2zQ_O>AZim`$EV*Kvd|@cLK5TnWBweSY`bloLHt`ivesT~n;>R($ZOvj%=J1O*L7>M|PLPB6zxT4t`^ z1S?^Inih0@U+-t)PHTK0iB0gH+L?~!PzdxVh+EghypDZx!8Ck6-%KFykx)1S;r;}Q&*0M zr0DRvn5O1H3>W}Q@}lepOkiwDd8RBTS&alOEprKkbdgAV&*&vQ#EWZbo}Q@^VQymN z10yRrL5wGv5o9?rCYo*Hfopl{#UdsUDPu;+ZylwOsX+1kzTpF|Ot&l5>^rgp%QS$~ zkE_IV(h}91P+^LLheOn>GX6LU)9Yme3N!9Xr83e(6f|_GBH3eL2;oReFy9ZKV8O(? zF;WPI<7Iav)^9GUH%F{;APJE6ntl@N{x0n>f12wHfB5m4?`GEr4#s8#yEN z`4i#sl=pW8g}eWGhwv{iVEl>jc({2#O!&3F-zjN#2!A;n{?AcF?z-&1KzY#G{zQ2^ z_4(bFh&%871zun-_1h5+voq*tOu+IPXE)a$8Fa=C;nPASPy`IY8rn6JnmM0 z54GsK+q|>rUwXAa5gr#~_g&4em5lcj;h#OupO-!^3+}76U+aeOpYHg7wcDQ;KhBBw zmBX)9O#E>1&*%Ctb;O@Ij}y*)2K}`TNPf$we_r^Q1Aq7ZEa_inCwUp@JK6vP!?^pR My&D00li&aLKTjH-4gdfE literal 0 HcmV?d00001 From 9cb5eb37cb8fb065b7b96effefc1713e853e1805 Mon Sep 17 00:00:00 2001 From: danyalxahid-askui Date: Mon, 18 Aug 2025 22:31:16 +0200 Subject: [PATCH 02/12] chore(toml): reformatting `pyproject.toml` --- pyproject.toml | 83 +++++++++++++++++++++++++++++++------------------- 1 file changed, 51 insertions(+), 32 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index cdd645f6..2d4e517b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,7 +1,9 @@ [project] name = "askui" description = "Automate computer tasks in Python" -authors = [{ name = "askui GmbH", email = "info@askui.com" }] +authors = [ + {name = "askui GmbH", email = "info@askui.com"}, +] dependencies = [ "anthropic>=0.54.0", "fastapi>=0.115.12", @@ -28,7 +30,7 @@ dependencies = [ ] requires-python = ">=3.10" readme = "README.md" -license = { text = "MIT" } +license = {text = "MIT"} dynamic = ["version"] [build-system] @@ -59,7 +61,11 @@ typecheck = "mypy" "typecheck:all" = "mypy ." "chat:api" = "uvicorn askui.chat.api.app:app --reload --port 9261" "mcp:dev" = "mcp dev src/askui/mcp/__init__.py" -"qa:fix" = { composite = ["typecheck:all", "format", "lint:fix"] } +"qa:fix" = { composite = [ + "typecheck:all", + "format", + "lint:fix", +] } "grpc:gen" = "bash scripts/grpc-gen.sh" "json:gen" = "datamodel-codegen --output-model-type pydantic_v2.BaseModel --input src/askui/tools/askui/askui_ui_controller_grpc/json_schema/ --input-file-type jsonschema --output src/askui/tools/askui/askui_ui_controller_grpc/generated/" @@ -81,7 +87,10 @@ test = [ "types-pynput>=1.8.1.20250318", "playwright>=1.41.0", ] -dev = ["datamodel-code-generator>=0.31.2", "grpcio-tools>=1.73.1"] +dev = [ + "datamodel-code-generator>=0.31.2", + "grpcio-tools>=1.73.1", +] [tool.pytest.ini_options] @@ -115,7 +124,7 @@ exclude = [ "dist", "node_modules", "venv", - "src/askui/tools/askui/askui_ui_controller_grpc/generated", + "src/askui/tools/askui/askui_ui_controller_grpc/generated" ] # Same as Black. @@ -134,26 +143,26 @@ line-ending = "auto" # Enable all rules select = ["ALL"] ignore = [ - "ANN", # Type annotations - handled by mypy + "ANN", # Type annotations - handled by mypy "COM812", # Unused import - "D", # Documentation - we'll handle this separately - "ERA", # Commented out code - "FBT", # Boolean trap - "ICN", # Import conventions - "ISC", # Implicit string concatenation - "N", # Naming - "PGH", # PyGithub - "PL", # Pylint - "PT", # Pytest - "Q", # Quotes - "RUF", # Ruff-specific rules - "S", # Bandit - "SIM", # Simplify - "T", # Pycodestyle - "TID", # isort - "UP", # Pyupgrade - "W", # Pycodestyle - "YTT", # flake8-2020 + "D", # Documentation - we'll handle this separately + "ERA", # Commented out code + "FBT", # Boolean trap + "ICN", # Import conventions + "ISC", # Implicit string concatenation + "N", # Naming + "PGH", # PyGithub + "PL", # Pylint + "PT", # Pytest + "Q", # Quotes + "RUF", # Ruff-specific rules + "S", # Bandit + "SIM", # Simplify + "T", # Pycodestyle + "TID", # isort + "UP", # Pyupgrade + "W", # Pycodestyle + "YTT", # flake8-2020 ] # Allow autofix for all enabled rules @@ -186,9 +195,7 @@ dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$" "tests/e2e/agent/test_locate_with_relations.py" = ["E501"] "tests/unit/locators/test_locators.py" = ["E501"] "tests/unit/locators/serializers/test_askui_locator_serializer.py" = ["E501"] -"tests/unit/locators/serializers/test_locator_string_representation.py" = [ - "E501", -] +"tests/unit/locators/serializers/test_locator_string_representation.py" = ["E501"] "tests/unit/utils/test_image_utils.py" = ["E501"] [tool.ruff.lint.flake8-quotes] @@ -202,8 +209,20 @@ known-third-party = ["pytest", "mypy"] [project.optional-dependencies] all = ["askui[android,chat,mcp,pynput,web]"] -android = ["pure-python-adb>=0.3.0.dev0"] -chat = ["askui[android,pynput,web]", "uvicorn>=0.34.3"] -mcp = ["fastmcp>=2.3.4"] -pynput = ["mss>=10.0.0", "pynput>=1.8.1"] -web = ["playwright>=1.41.0"] +android = [ + "pure-python-adb>=0.3.0.dev0" +] +chat = [ + "askui[android,pynput,web]", + "uvicorn>=0.34.3", +] +mcp = [ + "fastmcp>=2.3.4", +] +pynput = [ + "mss>=10.0.0", + "pynput>=1.8.1", +] +web = [ + "playwright>=1.41.0", +] \ No newline at end of file From e285cb98af7643bd5f12d1a10cada4af776560e8 Mon Sep 17 00:00:00 2001 From: danyalxahid-askui Date: Wed, 20 Aug 2025 13:26:02 +0200 Subject: [PATCH 03/12] fix(excel): update type hint for `root` attribute in `ExcelSource` class - Changed the type hint for the `root` attribute from `bytes` to `bytes | Path` to accurately reflect that it can be either the underlying Excel bytes or a file path. --- src/askui/utils/excel_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/askui/utils/excel_utils.py b/src/askui/utils/excel_utils.py index f9aeb24a..30e134ab 100644 --- a/src/askui/utils/excel_utils.py +++ b/src/askui/utils/excel_utils.py @@ -21,7 +21,7 @@ class ExcelSource(RootModel): - A file path (str or pathlib.Path) Attributes: - root (bytes): The underlying Excel bytes. + root (bytes | Path): The underlying Excel bytes or file path. Args: root (Excel): The Excel source to load from. From e5d9d541d255bb550f5474785fc8eb8d1dfd6faa Mon Sep 17 00:00:00 2001 From: danyalxahid-askui Date: Wed, 20 Aug 2025 14:00:52 +0200 Subject: [PATCH 04/12] feat(excel): rename `ExcelSource` to `OfficeDocumentSource` and update related references - Renamed the `ExcelSource` class to `OfficeDocumentSource` to better reflect its functionality for handling various office document types. - Updated all references to `ExcelSource` across the codebase to `OfficeDocumentSource`. - Adjusted error messages to specify "Office Document" processing instead of just "Excel". - Enhanced the `Source` type to include `OfficeDocumentSource` for broader compatibility. --- pdm.lock | 107 ++++++++++++++++++++- pyproject.toml | 2 +- src/askui/models/anthropic/messages_api.py | 8 +- src/askui/models/askui/google_genai_api.py | 6 +- src/askui/models/askui/inference_api.py | 8 +- src/askui/models/openrouter/model.py | 8 +- src/askui/models/ui_tars_ep/ui_tars_api.py | 4 +- src/askui/utils/excel_utils.py | 4 +- src/askui/utils/source_utils.py | 16 ++- 9 files changed, 133 insertions(+), 30 deletions(-) diff --git a/pdm.lock b/pdm.lock index dc05c4e4..1b361717 100644 --- a/pdm.lock +++ b/pdm.lock @@ -5,7 +5,7 @@ groups = ["default", "all", "android", "chat", "dev", "mcp", "pynput", "test", "web"] strategy = ["inherit_metadata"] lock_version = "4.5.0" -content_hash = "sha256:8809525501dc5a4160128323355483d98a968d1237bc67a87503882d18e06a55" +content_hash = "sha256:beb091cad08638d0d09be80ec10830745be0024dbe05a33bbd111a865950bba4" [[metadata.targets]] requires_python = ">=3.10" @@ -319,6 +319,17 @@ files = [ {file = "click-8.1.8.tar.gz", hash = "sha256:ed53c9d8990d83c2a27deae68e4ee337473f6330c040a31d4225c9574d16096a"}, ] +[[package]] +name = "cobble" +version = "0.1.4" +requires_python = ">=3.5" +summary = "Create data objects" +groups = ["default"] +files = [ + {file = "cobble-0.1.4-py3-none-any.whl", hash = "sha256:36c91b1655e599fd428e2b95fdd5f0da1ca2e9f1abb0bc871dec21a0e78a2b44"}, + {file = "cobble-0.1.4.tar.gz", hash = "sha256:de38be1539992c8a06e569630717c485a5f91be2192c461ea2b220607dfa78aa"}, +] + [[package]] name = "colorama" version = "0.4.6" @@ -1260,6 +1271,82 @@ files = [ {file = "jsonschema_specifications-2025.4.1.tar.gz", hash = "sha256:630159c9f4dbea161a6a2205c3011cc4f18ff381b189fff48bb39b9bf26ae608"}, ] +[[package]] +name = "lxml" +version = "6.0.0" +requires_python = ">=3.8" +summary = "Powerful and Pythonic XML processing library combining libxml2/libxslt with the ElementTree API." +groups = ["default"] +files = [ + {file = "lxml-6.0.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:35bc626eec405f745199200ccb5c6b36f202675d204aa29bb52e27ba2b71dea8"}, + {file = "lxml-6.0.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:246b40f8a4aec341cbbf52617cad8ab7c888d944bfe12a6abd2b1f6cfb6f6082"}, + {file = "lxml-6.0.0-cp310-cp310-manylinux2010_i686.manylinux2014_i686.manylinux_2_12_i686.manylinux_2_17_i686.whl", hash = "sha256:2793a627e95d119e9f1e19720730472f5543a6d84c50ea33313ce328d870f2dd"}, + {file = "lxml-6.0.0-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:46b9ed911f36bfeb6338e0b482e7fe7c27d362c52fde29f221fddbc9ee2227e7"}, + {file = "lxml-6.0.0-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:2b4790b558bee331a933e08883c423f65bbcd07e278f91b2272489e31ab1e2b4"}, + {file = "lxml-6.0.0-cp310-cp310-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e2030956cf4886b10be9a0285c6802e078ec2391e1dd7ff3eb509c2c95a69b76"}, + {file = "lxml-6.0.0-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4d23854ecf381ab1facc8f353dcd9adeddef3652268ee75297c1164c987c11dc"}, + {file = "lxml-6.0.0-cp310-cp310-manylinux_2_31_armv7l.whl", hash = "sha256:43fe5af2d590bf4691531b1d9a2495d7aab2090547eaacd224a3afec95706d76"}, + {file = "lxml-6.0.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:74e748012f8c19b47f7d6321ac929a9a94ee92ef12bc4298c47e8b7219b26541"}, + {file = "lxml-6.0.0-cp310-cp310-musllinux_1_2_armv7l.whl", hash = "sha256:43cfbb7db02b30ad3926e8fceaef260ba2fb7df787e38fa2df890c1ca7966c3b"}, + {file = "lxml-6.0.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:34190a1ec4f1e84af256495436b2d196529c3f2094f0af80202947567fdbf2e7"}, + {file = "lxml-6.0.0-cp310-cp310-win32.whl", hash = "sha256:5967fe415b1920a3877a4195e9a2b779249630ee49ece22021c690320ff07452"}, + {file = "lxml-6.0.0-cp310-cp310-win_amd64.whl", hash = "sha256:f3389924581d9a770c6caa4df4e74b606180869043b9073e2cec324bad6e306e"}, + {file = "lxml-6.0.0-cp310-cp310-win_arm64.whl", hash = "sha256:522fe7abb41309e9543b0d9b8b434f2b630c5fdaf6482bee642b34c8c70079c8"}, + {file = "lxml-6.0.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:4ee56288d0df919e4aac43b539dd0e34bb55d6a12a6562038e8d6f3ed07f9e36"}, + {file = "lxml-6.0.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b8dd6dd0e9c1992613ccda2bcb74fc9d49159dbe0f0ca4753f37527749885c25"}, + {file = "lxml-6.0.0-cp311-cp311-manylinux2010_i686.manylinux2014_i686.manylinux_2_12_i686.manylinux_2_17_i686.whl", hash = "sha256:d7ae472f74afcc47320238b5dbfd363aba111a525943c8a34a1b657c6be934c3"}, + {file = "lxml-6.0.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:5592401cdf3dc682194727c1ddaa8aa0f3ddc57ca64fd03226a430b955eab6f6"}, + {file = "lxml-6.0.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:58ffd35bd5425c3c3b9692d078bf7ab851441434531a7e517c4984d5634cd65b"}, + {file = "lxml-6.0.0-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f720a14aa102a38907c6d5030e3d66b3b680c3e6f6bc95473931ea3c00c59967"}, + {file = "lxml-6.0.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c2a5e8d207311a0170aca0eb6b160af91adc29ec121832e4ac151a57743a1e1e"}, + {file = "lxml-6.0.0-cp311-cp311-manylinux_2_31_armv7l.whl", hash = "sha256:2dd1cc3ea7e60bfb31ff32cafe07e24839df573a5e7c2d33304082a5019bcd58"}, + {file = "lxml-6.0.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:2cfcf84f1defed7e5798ef4f88aa25fcc52d279be731ce904789aa7ccfb7e8d2"}, + {file = "lxml-6.0.0-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:a52a4704811e2623b0324a18d41ad4b9fabf43ce5ff99b14e40a520e2190c851"}, + {file = "lxml-6.0.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:c16304bba98f48a28ae10e32a8e75c349dd742c45156f297e16eeb1ba9287a1f"}, + {file = "lxml-6.0.0-cp311-cp311-win32.whl", hash = "sha256:f8d19565ae3eb956d84da3ef367aa7def14a2735d05bd275cd54c0301f0d0d6c"}, + {file = "lxml-6.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:b2d71cdefda9424adff9a3607ba5bbfc60ee972d73c21c7e3c19e71037574816"}, + {file = "lxml-6.0.0-cp311-cp311-win_arm64.whl", hash = "sha256:8a2e76efbf8772add72d002d67a4c3d0958638696f541734304c7f28217a9cab"}, + {file = "lxml-6.0.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:78718d8454a6e928470d511bf8ac93f469283a45c354995f7d19e77292f26108"}, + {file = "lxml-6.0.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:84ef591495ffd3f9dcabffd6391db7bb70d7230b5c35ef5148354a134f56f2be"}, + {file = "lxml-6.0.0-cp312-cp312-manylinux2010_i686.manylinux2014_i686.manylinux_2_12_i686.manylinux_2_17_i686.whl", hash = "sha256:2930aa001a3776c3e2601cb8e0a15d21b8270528d89cc308be4843ade546b9ab"}, + {file = "lxml-6.0.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:219e0431ea8006e15005767f0351e3f7f9143e793e58519dc97fe9e07fae5563"}, + {file = "lxml-6.0.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:bd5913b4972681ffc9718bc2d4c53cde39ef81415e1671ff93e9aa30b46595e7"}, + {file = "lxml-6.0.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:390240baeb9f415a82eefc2e13285016f9c8b5ad71ec80574ae8fa9605093cd7"}, + {file = "lxml-6.0.0-cp312-cp312-manylinux_2_27_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:d6e200909a119626744dd81bae409fc44134389e03fbf1d68ed2a55a2fb10991"}, + {file = "lxml-6.0.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ca50bd612438258a91b5b3788c6621c1f05c8c478e7951899f492be42defc0da"}, + {file = "lxml-6.0.0-cp312-cp312-manylinux_2_31_armv7l.whl", hash = "sha256:c24b8efd9c0f62bad0439283c2c795ef916c5a6b75f03c17799775c7ae3c0c9e"}, + {file = "lxml-6.0.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:afd27d8629ae94c5d863e32ab0e1d5590371d296b87dae0a751fb22bf3685741"}, + {file = "lxml-6.0.0-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:54c4855eabd9fc29707d30141be99e5cd1102e7d2258d2892314cf4c110726c3"}, + {file = "lxml-6.0.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:c907516d49f77f6cd8ead1322198bdfd902003c3c330c77a1c5f3cc32a0e4d16"}, + {file = "lxml-6.0.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:36531f81c8214e293097cd2b7873f178997dae33d3667caaae8bdfb9666b76c0"}, + {file = "lxml-6.0.0-cp312-cp312-win32.whl", hash = "sha256:690b20e3388a7ec98e899fd54c924e50ba6693874aa65ef9cb53de7f7de9d64a"}, + {file = "lxml-6.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:310b719b695b3dd442cdfbbe64936b2f2e231bb91d998e99e6f0daf991a3eba3"}, + {file = "lxml-6.0.0-cp312-cp312-win_arm64.whl", hash = "sha256:8cb26f51c82d77483cdcd2b4a53cda55bbee29b3c2f3ddeb47182a2a9064e4eb"}, + {file = "lxml-6.0.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:6da7cd4f405fd7db56e51e96bff0865b9853ae70df0e6720624049da76bde2da"}, + {file = "lxml-6.0.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:b34339898bb556a2351a1830f88f751679f343eabf9cf05841c95b165152c9e7"}, + {file = "lxml-6.0.0-cp313-cp313-manylinux2010_i686.manylinux2014_i686.manylinux_2_12_i686.manylinux_2_17_i686.whl", hash = "sha256:51a5e4c61a4541bd1cd3ba74766d0c9b6c12d6a1a4964ef60026832aac8e79b3"}, + {file = "lxml-6.0.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d18a25b19ca7307045581b18b3ec9ead2b1db5ccd8719c291f0cd0a5cec6cb81"}, + {file = "lxml-6.0.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:d4f0c66df4386b75d2ab1e20a489f30dc7fd9a06a896d64980541506086be1f1"}, + {file = "lxml-6.0.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9f4b481b6cc3a897adb4279216695150bbe7a44c03daba3c894f49d2037e0a24"}, + {file = "lxml-6.0.0-cp313-cp313-manylinux_2_27_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:8a78d6c9168f5bcb20971bf3329c2b83078611fbe1f807baadc64afc70523b3a"}, + {file = "lxml-6.0.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2ae06fbab4f1bb7db4f7c8ca9897dc8db4447d1a2b9bee78474ad403437bcc29"}, + {file = "lxml-6.0.0-cp313-cp313-manylinux_2_31_armv7l.whl", hash = "sha256:1fa377b827ca2023244a06554c6e7dc6828a10aaf74ca41965c5d8a4925aebb4"}, + {file = "lxml-6.0.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:1676b56d48048a62ef77a250428d1f31f610763636e0784ba67a9740823988ca"}, + {file = "lxml-6.0.0-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:0e32698462aacc5c1cf6bdfebc9c781821b7e74c79f13e5ffc8bfe27c42b1abf"}, + {file = "lxml-6.0.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:4d6036c3a296707357efb375cfc24bb64cd955b9ec731abf11ebb1e40063949f"}, + {file = "lxml-6.0.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:7488a43033c958637b1a08cddc9188eb06d3ad36582cebc7d4815980b47e27ef"}, + {file = "lxml-6.0.0-cp313-cp313-win32.whl", hash = "sha256:5fcd7d3b1d8ecb91445bd71b9c88bdbeae528fefee4f379895becfc72298d181"}, + {file = "lxml-6.0.0-cp313-cp313-win_amd64.whl", hash = "sha256:2f34687222b78fff795feeb799a7d44eca2477c3d9d3a46ce17d51a4f383e32e"}, + {file = "lxml-6.0.0-cp313-cp313-win_arm64.whl", hash = "sha256:21db1ec5525780fd07251636eb5f7acb84003e9382c72c18c542a87c416ade03"}, + {file = "lxml-6.0.0-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:dbdd7679a6f4f08152818043dbb39491d1af3332128b3752c3ec5cebc0011a72"}, + {file = "lxml-6.0.0-pp310-pypy310_pp73-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:40442e2a4456e9910875ac12951476d36c0870dcb38a68719f8c4686609897c4"}, + {file = "lxml-6.0.0-pp310-pypy310_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:db0efd6bae1c4730b9c863fc4f5f3c0fa3e8f05cae2c44ae141cb9dfc7d091dc"}, + {file = "lxml-6.0.0-pp310-pypy310_pp73-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9ab542c91f5a47aaa58abdd8ea84b498e8e49fe4b883d67800017757a3eb78e8"}, + {file = "lxml-6.0.0-pp310-pypy310_pp73-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:013090383863b72c62a702d07678b658fa2567aa58d373d963cca245b017e065"}, + {file = "lxml-6.0.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:c86df1c9af35d903d2b52d22ea3e66db8058d21dc0f59842ca5deb0595921141"}, + {file = "lxml-6.0.0.tar.gz", hash = "sha256:032e65120339d44cdc3efc326c9f660f5f7205f3a535c1fdbf898b29ea01fb72"}, +] + [[package]] name = "magika" version = "0.6.2" @@ -1283,6 +1370,20 @@ files = [ {file = "magika-0.6.2.tar.gz", hash = "sha256:37eb6ae8020f6e68f231bc06052c0a0cbe8e6fa27492db345e8dc867dbceb067"}, ] +[[package]] +name = "mammoth" +version = "1.10.0" +requires_python = ">=3.7" +summary = "Convert Word documents from docx to simple and clean HTML and Markdown" +groups = ["default"] +dependencies = [ + "cobble<0.2,>=0.1.3", +] +files = [ + {file = "mammoth-1.10.0-py2.py3-none-any.whl", hash = "sha256:a1c87d5b98ca30230394267f98614b58b14b50f8031dc33ac9a535c6ab04eb99"}, + {file = "mammoth-1.10.0.tar.gz", hash = "sha256:cb6fbba41ccf8b5502859c457177d87a833fef0e0b1d4e6fd23ec372fe892c30"}, +] + [[package]] name = "markdown-it-py" version = "3.0.0" @@ -1333,11 +1434,13 @@ files = [ [[package]] name = "markitdown" version = "0.1.2" -extras = ["xls", "xlsx"] +extras = ["docx", "xls", "xlsx"] requires_python = ">=3.10" summary = "Utility tool for converting various files to Markdown" groups = ["default"] dependencies = [ + "lxml", + "mammoth", "markitdown==0.1.2", "openpyxl", "pandas", diff --git a/pyproject.toml b/pyproject.toml index 2d4e517b..dd883241 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -26,7 +26,7 @@ dependencies = [ "protobuf>=6.31.1", "google-genai>=1.20.0", "filetype>=1.2.0", - "markitdown[xls,xlsx]>=0.1.2", + "markitdown[xls,xlsx,docx]>=0.1.2", ] requires-python = ">=3.10" readme = "README.md" diff --git a/src/askui/models/anthropic/messages_api.py b/src/askui/models/anthropic/messages_api.py index 78646aca..59259dbc 100644 --- a/src/askui/models/anthropic/messages_api.py +++ b/src/askui/models/anthropic/messages_api.py @@ -42,7 +42,7 @@ from askui.models.shared.tools import ToolCollection from askui.models.types.response_schemas import ResponseSchema from askui.utils.dict_utils import IdentityDefaultDict -from askui.utils.excel_utils import ExcelSource +from askui.utils.excel_utils import OfficeDocumentSource from askui.utils.image_utils import ( ImageSource, image_to_base64, @@ -243,10 +243,8 @@ def get( response_schema: Type[ResponseSchema] | None, model_choice: str, ) -> ResponseSchema | str: - if isinstance(source, (PdfSource, ExcelSource)): - err_msg = ( - f"PDF or Excel processing is not supported for the model {model_choice}" - ) + if isinstance(source, (PdfSource, OfficeDocumentSource)): + err_msg = f"PDF or Office Document processing is not supported for the model {model_choice}" raise NotImplementedError(err_msg) try: if response_schema is not None: diff --git a/src/askui/models/askui/google_genai_api.py b/src/askui/models/askui/google_genai_api.py index f598963b..178c04bd 100644 --- a/src/askui/models/askui/google_genai_api.py +++ b/src/askui/models/askui/google_genai_api.py @@ -21,7 +21,7 @@ from askui.models.models import GetModel, ModelName from askui.models.shared.prompts import SYSTEM_PROMPT_GET from askui.models.types.response_schemas import ResponseSchema, to_response_schema -from askui.utils.excel_utils import ExcelSource +from askui.utils.excel_utils import OfficeDocumentSource from askui.utils.http_utils import parse_retry_after_header from askui.utils.image_utils import ImageSource from askui.utils.source_utils import Source @@ -186,12 +186,12 @@ def _create_genai_part_from_source(self, source: Source) -> genai_types.Part: data=data, mime_type="image/png", ) - if isinstance(source, ExcelSource): + if isinstance(source, OfficeDocumentSource): with source.reader as r: data = r.read() if len(data) > MAX_FILE_SIZE_BYTES: _err_msg = ( - "Excel file size exceeds the limit of " + "Office document file size exceeds the limit of " f"{MAX_FILE_SIZE_BYTES} bytes." ) raise ValueError(_err_msg) diff --git a/src/askui/models/askui/inference_api.py b/src/askui/models/askui/inference_api.py index 94ca0530..b6a7bcd3 100644 --- a/src/askui/models/askui/inference_api.py +++ b/src/askui/models/askui/inference_api.py @@ -26,7 +26,7 @@ from askui.models.shared.settings import MessageSettings from askui.models.shared.tools import ToolCollection from askui.models.types.response_schemas import ResponseSchema -from askui.utils.excel_utils import ExcelSource +from askui.utils.excel_utils import OfficeDocumentSource from askui.utils.image_utils import ImageSource from askui.utils.pdf_utils import PdfSource from askui.utils.source_utils import Source @@ -206,10 +206,8 @@ def get( response_schema: Type[ResponseSchema] | None, model_choice: str, ) -> ResponseSchema | str: - if isinstance(source, (PdfSource, ExcelSource)): - err_msg = ( - f"PDF or Excel processing is not supported for the model {model_choice}" - ) + if isinstance(source, (PdfSource, OfficeDocumentSource)): + err_msg = f"PDF or Office Document processing is not supported for the model {model_choice}" raise NotImplementedError(err_msg) json: dict[str, Any] = { "image": source.to_data_url(), diff --git a/src/askui/models/openrouter/model.py b/src/askui/models/openrouter/model.py index fa64563f..86690382 100644 --- a/src/askui/models/openrouter/model.py +++ b/src/askui/models/openrouter/model.py @@ -10,7 +10,7 @@ from askui.models.models import GetModel from askui.models.shared.prompts import SYSTEM_PROMPT_GET from askui.models.types.response_schemas import ResponseSchema, to_response_schema -from askui.utils.excel_utils import ExcelSource +from askui.utils.excel_utils import OfficeDocumentSource from askui.utils.pdf_utils import PdfSource from askui.utils.source_utils import Source @@ -175,10 +175,8 @@ def get( response_schema: Type[ResponseSchema] | None, model_choice: str, ) -> ResponseSchema | str: - if isinstance(source, (PdfSource, ExcelSource)): - err_msg = ( - f"PDF or Excel processing is not supported for the model {model_choice}" - ) + if isinstance(source, (PdfSource, OfficeDocumentSource)): + err_msg = f"PDF or Office Document processing is not supported for the model {model_choice}" raise NotImplementedError(err_msg) response = self._predict( image_url=source.to_data_url(), diff --git a/src/askui/models/ui_tars_ep/ui_tars_api.py b/src/askui/models/ui_tars_ep/ui_tars_api.py index bc843ef1..1f6b2df2 100644 --- a/src/askui/models/ui_tars_ep/ui_tars_api.py +++ b/src/askui/models/ui_tars_ep/ui_tars_api.py @@ -24,7 +24,7 @@ from askui.models.shared.tools import Tool from askui.models.types.response_schemas import ResponseSchema from askui.reporting import Reporter -from askui.utils.excel_utils import ExcelSource +from askui.utils.excel_utils import OfficeDocumentSource from askui.utils.image_utils import ImageSource, image_to_base64 from askui.utils.pdf_utils import PdfSource from askui.utils.source_utils import Source @@ -189,7 +189,7 @@ def get( response_schema: Type[ResponseSchema] | None, model_choice: str, ) -> ResponseSchema | str: - if isinstance(source, (PdfSource, ExcelSource)): + if isinstance(source, (PdfSource, OfficeDocumentSource)): err_msg = f"PDF and Excel processing is not supported for the model {model_choice}" raise NotImplementedError(err_msg) if response_schema is not None: diff --git a/src/askui/utils/excel_utils.py b/src/askui/utils/excel_utils.py index 30e134ab..7cb2b377 100644 --- a/src/askui/utils/excel_utils.py +++ b/src/askui/utils/excel_utils.py @@ -14,7 +14,7 @@ """ -class ExcelSource(RootModel): +class OfficeDocumentSource(RootModel): """Represents an Excel source that can be read as markdown. The class can be initialized with: @@ -38,5 +38,5 @@ def reader(self) -> BytesIO: __all__ = [ "Excel", - "ExcelSource", + "OfficeDocumentSource", ] diff --git a/src/askui/utils/source_utils.py b/src/askui/utils/source_utils.py index ebc91511..ef29a309 100644 --- a/src/askui/utils/source_utils.py +++ b/src/askui/utils/source_utils.py @@ -9,11 +9,11 @@ from filetype import guess # type: ignore[import-untyped] from PIL import Image as PILImage -from askui.utils.excel_utils import ExcelSource +from askui.utils.excel_utils import OfficeDocumentSource from askui.utils.image_utils import ImageSource from askui.utils.pdf_utils import PdfSource -Source = Union[ImageSource, PdfSource, ExcelSource] +Source = Union[ImageSource, PdfSource, OfficeDocumentSource] _DATA_URL_WITH_MIMETYPE_RE = re.compile(r"^data:([^;,]+)([^,]*)?,(.*)$", re.DOTALL) @@ -22,6 +22,8 @@ "application/pdf", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "application/vnd.ms-excel", + "application/msword", + "application/vnd.openxmlformats-officedocument.wordprocessingml.document", ] _SupportedMimeTypes = _SupportedImageMimeTypes | _SupportedApplicationMimeTypes @@ -33,6 +35,8 @@ "application/pdf", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "application/vnd.ms-excel", + "application/msword", + "application/vnd.openxmlformats-officedocument.wordprocessingml.document", ] @@ -57,10 +61,12 @@ def is_pdf(self) -> bool: return self.mime == "application/pdf" @property - def is_excel(self) -> bool: + def is_supported_office_document(self) -> bool: return self.mime in [ "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "application/vnd.ms-excel", + "application/msword", + "application/vnd.openxmlformats-officedocument.wordprocessingml.document", ] @property @@ -147,8 +153,8 @@ def load_source(source: Union[str, Path, PILImage.Image]) -> Source: raise ValueError(msg) if source_analysis.is_pdf: return PdfSource(source_analysis.content) - if source_analysis.is_excel: - return ExcelSource(source_analysis.content) + if source_analysis.is_supported_office_document: + return OfficeDocumentSource(source_analysis.content) if source_analysis.is_image: return ImageSource( PILImage.open( From 023062b487c6a53b6f2465ae5969533bc67d484c Mon Sep 17 00:00:00 2001 From: danyalxahid-askui Date: Wed, 20 Aug 2025 14:42:07 +0200 Subject: [PATCH 05/12] refactor: update type hints for image and source handling - Replaced occurrences of `Img` with `InputSource` in the codebase to standardize the type used for image, file and data uri inputs across various functions and classes. - Updated the `screenshot` parameter in methods to use `InputSource | None` instead of `Img | None` for better clarity and consistency. - Removed the `Excel` and `Pdf` type definitions from their respective modules, consolidating input types under `InputSource`. - Enhanced the `__all__` exports to include the new `InputSource` type for better accessibility. --- .cursorrules | 4 ++-- src/askui/__init__.py | 5 +++-- src/askui/agent_base.py | 29 +++++++++++++---------------- src/askui/utils/excel_utils.py | 9 --------- src/askui/utils/image_utils.py | 11 ----------- src/askui/utils/pdf_utils.py | 9 --------- src/askui/utils/source_utils.py | 11 ++++++++++- 7 files changed, 28 insertions(+), 50 deletions(-) diff --git a/.cursorrules b/.cursorrules index 31bd6766..70cb37ea 100644 --- a/.cursorrules +++ b/.cursorrules @@ -40,7 +40,7 @@ def locate( self, locator: str | Locator, - screenshot: Img | None = None, + screenshot: InputSource | None = None, model: ModelComposition | str | None = None, ) -> Point: """ @@ -48,7 +48,7 @@ Args: locator (str | Locator): The identifier or description of the element to locate. - screenshot (Img | None, optional): The screenshot to use for locating the + screenshot (InputSource | None, optional): The screenshot to use for locating the element. Can be a path to an image file, a PIL Image object or a data URL. If `None`, takes a screenshot of the currently selected screen. model (ModelComposition | str | None, optional): The composition or name of diff --git a/src/askui/__init__.py b/src/askui/__init__.py index a1b59264..0022419f 100644 --- a/src/askui/__init__.py +++ b/src/askui/__init__.py @@ -37,7 +37,8 @@ from .models.types.response_schemas import ResponseSchema, ResponseSchemaBase from .retry import ConfigurableRetry, Retry from .tools import ModifierKey, PcKey -from .utils.image_utils import ImageSource, Img +from .utils.image_utils import ImageSource +from .utils.source_utils import InputSource try: from .android_agent import AndroidVisionAgent @@ -67,7 +68,7 @@ "GetModel", "ImageBlockParam", "ImageSource", - "Img", + "InputSource", "LocateModel", "Locator", "MessageParam", diff --git a/src/askui/agent_base.py b/src/askui/agent_base.py index 46d82cf4..b28311af 100644 --- a/src/askui/agent_base.py +++ b/src/askui/agent_base.py @@ -16,10 +16,8 @@ from askui.models.shared.tools import Tool from askui.tools.agent_os import AgentOs from askui.tools.android.agent_os import AndroidAgentOs -from askui.utils.excel_utils import Excel -from askui.utils.image_utils import ImageSource, Img -from askui.utils.pdf_utils import Pdf -from askui.utils.source_utils import load_image_source, load_source +from askui.utils.image_utils import ImageSource +from askui.utils.source_utils import InputSource, load_image_source, load_source from .logger import configure_logging, logger from .models import ModelComposition @@ -194,7 +192,7 @@ def get( query: Annotated[str, Field(min_length=1)], response_schema: None = None, model: str | None = None, - source: Optional[Img | Pdf | Excel] = None, + source: Optional[InputSource] = None, ) -> str: ... @overload def get( @@ -202,7 +200,7 @@ def get( query: Annotated[str, Field(min_length=1)], response_schema: Type[ResponseSchema], model: str | None = None, - source: Optional[Img | Pdf | Excel] = None, + source: Optional[InputSource] = None, ) -> ResponseSchema: ... @telemetry.record_call(exclude={"query", "source", "response_schema"}) @@ -212,7 +210,7 @@ def get( query: Annotated[str, Field(min_length=1)], response_schema: Type[ResponseSchema] | None = None, model: str | None = None, - source: Optional[Img | Pdf | Excel] = None, + source: Optional[InputSource] = None, ) -> ResponseSchema | str: """ Retrieves information from an image or PDF based on the provided `query`. @@ -221,10 +219,9 @@ def get( Args: query (str): The query describing what information to retrieve. - source (Img | Pdf | Excel | None, optional): The source to extract - information from. Can be a path to a PDF file, a path to an image file, - a path to an Excel file, a PIL Image object or a data URL. Defaults to a - screenshot of the current screen. + source (InputSource | None, optional): The source to extract information from. + Can be a path to an image file, a PIL Image object or a data URL. + Defaults to a screenshot of the current screen. response_schema (Type[ResponseSchema] | None, optional): A Pydantic model class that defines the response schema. If not provided, returns a string. @@ -359,7 +356,7 @@ class LinkedListNode(ResponseSchemaBase): def _locate( self, locator: str | Locator, - screenshot: Optional[Img] = None, + screenshot: Optional[InputSource] = None, model: ModelComposition | str | None = None, ) -> PointList: def locate_with_screenshot() -> PointList: @@ -382,7 +379,7 @@ def locate_with_screenshot() -> PointList: def locate( self, locator: str | Locator, - screenshot: Optional[Img] = None, + screenshot: Optional[InputSource] = None, model: ModelComposition | str | None = None, ) -> Point: """ @@ -391,7 +388,7 @@ def locate( Args: locator (str | Locator): The identifier or description of the element to locate. - screenshot (Img | None, optional): The screenshot to use for locating the + screenshot (InputSource | None, optional): The screenshot to use for locating the element. Can be a path to an image file, a PIL Image object or a data URL. If `None`, takes a screenshot of the currently selected display. model (ModelComposition | str | None, optional): The composition or name @@ -421,7 +418,7 @@ def locate( def locate_all( self, locator: str | Locator, - screenshot: Optional[Img] = None, + screenshot: Optional[InputSource] = None, model: ModelComposition | str | None = None, ) -> PointList: """ @@ -433,7 +430,7 @@ def locate_all( Args: locator (str | Locator): The identifier or description of the element to locate. - screenshot (Img | None, optional): The screenshot to use for locating the + screenshot (InputSource | None, optional): The screenshot to use for locating the element. Can be a path to an image file, a PIL Image object or a data URL. If `None`, takes a screenshot of the currently selected display. model (ModelComposition | str | None, optional): The composition or name diff --git a/src/askui/utils/excel_utils.py b/src/askui/utils/excel_utils.py index 7cb2b377..bc79e2d7 100644 --- a/src/askui/utils/excel_utils.py +++ b/src/askui/utils/excel_utils.py @@ -1,18 +1,10 @@ from io import BytesIO from pathlib import Path -from typing import Union from pydantic import ConfigDict, RootModel from askui.utils.markdown_utils import convert_to_markdown -Excel = Union[str, Path] -"""Type of the input Excel for `askui.VisionAgent.get()`, etc. - -Accepts: -- Relative or absolute file path (`str` or `pathlib.Path`) -""" - class OfficeDocumentSource(RootModel): """Represents an Excel source that can be read as markdown. @@ -37,6 +29,5 @@ def reader(self) -> BytesIO: __all__ = [ - "Excel", "OfficeDocumentSource", ] diff --git a/src/askui/utils/image_utils.py b/src/askui/utils/image_utils.py index 4f166579..44b8f8ee 100644 --- a/src/askui/utils/image_utils.py +++ b/src/askui/utils/image_utils.py @@ -310,16 +310,6 @@ def scale_coordinates( return result -Img = Union[str, Path, PILImage.Image] -"""Type of the input images for `askui.VisionAgent.get()`, `askui.VisionAgent.locate()`, etc. - -Accepts: -- `PIL.Image.Image` -- Relative or absolute file path (`str` or `pathlib.Path`) -- Data URL (e.g., `"data:image/png;base64,..."`) -""" - - class ImageSource(RootModel): """A class that represents an image source and provides methods to convert it to different formats. @@ -375,5 +365,4 @@ def to_bytes(self) -> bytes: "scale_coordinates", "ScalingResults", "ImageSource", - "Img", ] diff --git a/src/askui/utils/pdf_utils.py b/src/askui/utils/pdf_utils.py index 2df0246d..65a3170e 100644 --- a/src/askui/utils/pdf_utils.py +++ b/src/askui/utils/pdf_utils.py @@ -1,16 +1,8 @@ from io import BufferedReader, BytesIO from pathlib import Path -from typing import Union from pydantic import ConfigDict, RootModel -Pdf = Union[str, Path] -"""Type of the input PDFs for `askui.VisionAgent.get()`, etc. - -Accepts: -- Relative or absolute file path (`str` or `pathlib.Path`) -""" - class PdfSource(RootModel): """A class that represents a PDF source. @@ -38,5 +30,4 @@ def reader(self) -> BufferedReader | BytesIO: __all__ = [ "PdfSource", - "Pdf", ] diff --git a/src/askui/utils/source_utils.py b/src/askui/utils/source_utils.py index ef29a309..06a3afbb 100644 --- a/src/askui/utils/source_utils.py +++ b/src/askui/utils/source_utils.py @@ -13,6 +13,15 @@ from askui.utils.image_utils import ImageSource from askui.utils.pdf_utils import PdfSource +InputSource = Union[str, Path, PILImage.Image] +"""Type of the input images for `askui.VisionAgent.get()`, `askui.VisionAgent.locate()`, etc. + +Accepts: +- `PIL.Image.Image` +- Relative or absolute file path (`str` or `pathlib.Path`) +- Data URL (e.g., `"data:image/png;base64,..."`) +""" + Source = Union[ImageSource, PdfSource, OfficeDocumentSource] _DATA_URL_WITH_MIMETYPE_RE = re.compile(r"^data:([^;,]+)([^,]*)?,(.*)$", re.DOTALL) @@ -189,4 +198,4 @@ def load_image_source(source: Union[str, Path, PILImage.Image]) -> ImageSource: return result -__all__ = ["Source", "load_source", "load_image_source"] +__all__ = ["Source", "load_source", "load_image_source", "InputSource"] From cc816060a9fff958dd291c1ba2a232376e681f35 Mon Sep 17 00:00:00 2001 From: danyalxahid-askui Date: Wed, 20 Aug 2025 14:53:02 +0200 Subject: [PATCH 06/12] refactor: enhance documentation and error messages for source handling - Updated docstrings for `source` and `screenshot` parameters in `AgentBase` class to clarify the types of input sources accepted, including image, PDF, and office document files. - Improved error messages in `AnthropicMessagesApi`, `AskUiInferenceApi`, and `OpenRouterModel` classes to provide clearer context regarding unsupported PDF and office document processing. - Reformatted docstring for `InputSource` type to enhance readability. --- src/askui/agent_base.py | 21 ++++++++++++--------- src/askui/models/anthropic/messages_api.py | 5 ++++- src/askui/models/askui/inference_api.py | 5 ++++- src/askui/models/openrouter/model.py | 5 ++++- src/askui/utils/source_utils.py | 4 +++- 5 files changed, 27 insertions(+), 13 deletions(-) diff --git a/src/askui/agent_base.py b/src/askui/agent_base.py index b28311af..cb2b15ca 100644 --- a/src/askui/agent_base.py +++ b/src/askui/agent_base.py @@ -219,9 +219,10 @@ def get( Args: query (str): The query describing what information to retrieve. - source (InputSource | None, optional): The source to extract information from. - Can be a path to an image file, a PIL Image object or a data URL. - Defaults to a screenshot of the current screen. + source (InputSource | None, optional): The source to extract information + from. Can be a path to an image, PDF, or office document file, + a PIL Image object or a data URL. Defaults to a screenshot of the + current screen. response_schema (Type[ResponseSchema] | None, optional): A Pydantic model class that defines the response schema. If not provided, returns a string. @@ -388,9 +389,10 @@ def locate( Args: locator (str | Locator): The identifier or description of the element to locate. - screenshot (InputSource | None, optional): The screenshot to use for locating the - element. Can be a path to an image file, a PIL Image object or a data - URL. If `None`, takes a screenshot of the currently selected display. + screenshot (InputSource | None, optional): The screenshot to use for + locating the element. Can be a path to an image file, a PIL Image object + or a data URL. If `None`, takes a screenshot of the currently + selected display. model (ModelComposition | str | None, optional): The composition or name of the model(s) to be used for locating the element using the `locator`. @@ -430,9 +432,10 @@ def locate_all( Args: locator (str | Locator): The identifier or description of the element to locate. - screenshot (InputSource | None, optional): The screenshot to use for locating the - element. Can be a path to an image file, a PIL Image object or a data - URL. If `None`, takes a screenshot of the currently selected display. + screenshot (InputSource | None, optional): The screenshot to use for + locating the element. Can be a path to an image file, a PIL Image object + or a data URL. If `None`, takes a screenshot of the currently + selected display. model (ModelComposition | str | None, optional): The composition or name of the model(s) to be used for locating the element using the `locator`. diff --git a/src/askui/models/anthropic/messages_api.py b/src/askui/models/anthropic/messages_api.py index 59259dbc..7f1ce775 100644 --- a/src/askui/models/anthropic/messages_api.py +++ b/src/askui/models/anthropic/messages_api.py @@ -244,7 +244,10 @@ def get( model_choice: str, ) -> ResponseSchema | str: if isinstance(source, (PdfSource, OfficeDocumentSource)): - err_msg = f"PDF or Office Document processing is not supported for the model {model_choice}" + err_msg = ( + f"PDF or Office Document processing is not supported for the model: " + f"{model_choice}" + ) raise NotImplementedError(err_msg) try: if response_schema is not None: diff --git a/src/askui/models/askui/inference_api.py b/src/askui/models/askui/inference_api.py index b6a7bcd3..cd3b1cc7 100644 --- a/src/askui/models/askui/inference_api.py +++ b/src/askui/models/askui/inference_api.py @@ -207,7 +207,10 @@ def get( model_choice: str, ) -> ResponseSchema | str: if isinstance(source, (PdfSource, OfficeDocumentSource)): - err_msg = f"PDF or Office Document processing is not supported for the model {model_choice}" + err_msg = ( + f"PDF or Office Document processing is not supported for the model: " + f"{model_choice}" + ) raise NotImplementedError(err_msg) json: dict[str, Any] = { "image": source.to_data_url(), diff --git a/src/askui/models/openrouter/model.py b/src/askui/models/openrouter/model.py index 86690382..60db2099 100644 --- a/src/askui/models/openrouter/model.py +++ b/src/askui/models/openrouter/model.py @@ -176,7 +176,10 @@ def get( model_choice: str, ) -> ResponseSchema | str: if isinstance(source, (PdfSource, OfficeDocumentSource)): - err_msg = f"PDF or Office Document processing is not supported for the model {model_choice}" + err_msg = ( + f"PDF or Office Document processing is not supported for the model: " + f"{model_choice}" + ) raise NotImplementedError(err_msg) response = self._predict( image_url=source.to_data_url(), diff --git a/src/askui/utils/source_utils.py b/src/askui/utils/source_utils.py index 06a3afbb..97733ba0 100644 --- a/src/askui/utils/source_utils.py +++ b/src/askui/utils/source_utils.py @@ -14,7 +14,9 @@ from askui.utils.pdf_utils import PdfSource InputSource = Union[str, Path, PILImage.Image] -"""Type of the input images for `askui.VisionAgent.get()`, `askui.VisionAgent.locate()`, etc. +""" +Type of the input images for `askui.VisionAgent.get()`, `askui.VisionAgent.locate()`, +etc. Accepts: - `PIL.Image.Image` From cfe4c65131392140b65895cc1a375ce49e37b3fc Mon Sep 17 00:00:00 2001 From: danyalxahid-askui Date: Wed, 20 Aug 2025 14:59:42 +0200 Subject: [PATCH 07/12] refactor: remove redundant file size check in `AskUiGoogleGenAiApi` - Eliminated the file size validation for `OfficeDocumentSource` in the `read()` method, as it was deemed unnecessary for the current implementation. --- src/askui/models/askui/google_genai_api.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/askui/models/askui/google_genai_api.py b/src/askui/models/askui/google_genai_api.py index 178c04bd..726c0ada 100644 --- a/src/askui/models/askui/google_genai_api.py +++ b/src/askui/models/askui/google_genai_api.py @@ -189,12 +189,6 @@ def _create_genai_part_from_source(self, source: Source) -> genai_types.Part: if isinstance(source, OfficeDocumentSource): with source.reader as r: data = r.read() - if len(data) > MAX_FILE_SIZE_BYTES: - _err_msg = ( - "Office document file size exceeds the limit of " - f"{MAX_FILE_SIZE_BYTES} bytes." - ) - raise ValueError(_err_msg) return genai_types.Part.from_text(text=data.decode()) with source.reader as r: data = r.read() From f006857798e9d60cbce4d8b738da443fa6b12170 Mon Sep 17 00:00:00 2001 From: danyalxahid-askui Date: Wed, 20 Aug 2025 15:07:13 +0200 Subject: [PATCH 08/12] refactor(tests): update test case for chart data response in `test_get.py` - Modified the test case `test_get_with_xlsx_with_default_model_with_chart_data` to change the query from "What does the chart show?" to "What is the salary of John?". - Updated the assertion to check for "10000" in the response instead of "count of names" to reflect the new query context. --- tests/e2e/agent/test_get.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/e2e/agent/test_get.py b/tests/e2e/agent/test_get.py index ed13ab14..50ab0798 100644 --- a/tests/e2e/agent/test_get.py +++ b/tests/e2e/agent/test_get.py @@ -159,11 +159,11 @@ def test_get_with_xlsx_with_default_model_with_chart_data( vision_agent: VisionAgent, path_fixtures_dummy_excel: pathlib.Path ) -> None: response = vision_agent.get( - "What does the chart show?", + "What is the salary of John?", source=path_fixtures_dummy_excel, ) assert isinstance(response, str) - assert "count of names" in response.lower() + assert "10000" in response.lower() def test_get_with_model_composition_should_use_default_model( From bb37c853adbf12f8f65fd27b1ff3342e5581dc7d Mon Sep 17 00:00:00 2001 From: danyalxahid-askui Date: Wed, 20 Aug 2025 15:14:17 +0200 Subject: [PATCH 09/12] feat(tests): add fixtures and test for document processing - Introduced new pytest fixtures `path_fixtures_docs` and `path_fixtures_dummy_doc` to provide paths for the docs directory and a dummy document, respectively. - Added a test case `test_get_with_docs_with_default_model` to verify the response from the `VisionAgent` when querying with a dummy document. - Included a dummy document `dummy.docx` for testing purposes. --- tests/conftest.py | 12 ++++++++++++ tests/e2e/agent/test_get.py | 11 +++++++++++ tests/fixtures/docs/dummy.docx | Bin 0 -> 6500 bytes 3 files changed, 23 insertions(+) create mode 100644 tests/fixtures/docs/dummy.docx diff --git a/tests/conftest.py b/tests/conftest.py index 7483002d..6e386721 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -52,6 +52,18 @@ def path_fixtures_dummy_excel(path_fixtures_excel: pathlib.Path) -> pathlib.Path return path_fixtures_excel / "dummy.xlsx" +@pytest.fixture +def path_fixtures_docs(path_fixtures: pathlib.Path) -> pathlib.Path: + """Fixture providing the path to the docs directory.""" + return path_fixtures / "docs" + + +@pytest.fixture +def path_fixtures_dummy_doc(path_fixtures_docs: pathlib.Path) -> pathlib.Path: + """Fixture providing the path to the dummy doc.""" + return path_fixtures_docs / "dummy.docx" + + @pytest.fixture def github_login_screenshot(path_fixtures_screenshots: pathlib.Path) -> Image.Image: """Fixture providing the GitHub login screenshot.""" diff --git a/tests/e2e/agent/test_get.py b/tests/e2e/agent/test_get.py index 50ab0798..39f56241 100644 --- a/tests/e2e/agent/test_get.py +++ b/tests/e2e/agent/test_get.py @@ -166,6 +166,17 @@ def test_get_with_xlsx_with_default_model_with_chart_data( assert "10000" in response.lower() +def test_get_with_docs_with_default_model( + vision_agent: VisionAgent, path_fixtures_dummy_doc: pathlib.Path +) -> None: + response = vision_agent.get( + "At what time in 24h format does the person sleeps?", + source=path_fixtures_dummy_doc, + ) + assert isinstance(response, str) + assert "22:00" in response.lower() + + def test_get_with_model_composition_should_use_default_model( agent_toolbox_mock: AgentToolbox, askui_facade: ModelFacade, diff --git a/tests/fixtures/docs/dummy.docx b/tests/fixtures/docs/dummy.docx new file mode 100644 index 0000000000000000000000000000000000000000..f5132090de76af6dd7a0e0ec244070c35a386f58 GIT binary patch literal 6500 zcmaJ_1yqz<*B)ZXp@wdTloSwA0VJeLnxR`dq`Og4q(fOq|G#(7TC-+m%|7Qn`|SPfXFqDnD5yjL3=9l_G!Kyh;1>hme0H^UFu7~vYz;Ma zu&^=ba)8S!h5szzu zBd=|{V;1fi(Zo`13kpATQtHl51l1v4VZO~>-8ym>WI)-FycZ>R6iZdlFwoWEmA)xe zBJ|2T&~KDWosX&hR3L<2-m$bb{PmhJ>U}igv;7dg96)UlwMI6FTLp>6*>k>-71jc7 zW+vd*94Sy-1}t^vOd~pRNKWmkm?hAq&ojOl#7Q)D70V;VC5r z=@i68Y4YNvMp@uhIi9!nVJgfvN{=sAgj8E+!&sWChnhl{o`iJuGc!FF5u-PWejv$eW@p#S8GA3&gh(n5TZ@@%m@N!zZuYVQZ)X`TB7GM{ z#Rf2O0jM$ntOfhLzH{fUP{4dok^q(ZNT3Y>&t^-h-LbQf)lJ{Bv~@@ADvt1}vUNyu z=QBDKF&q;xXA6>=GCIafyAkqiBmf{84FFL4$15iI-xWKWIyv23u;b4gj-PW#xw+x= zjJ?IN<_<_r3U^yQ46!5Yxb_XnMA5HXIGj=C$4MN(AMY(LLMQ&}qYwM!0N35J^i<9W z8A>);+~{C34!8tZQ+<8$wC8k4^H}P^L zlZ_^ocka+%KH4AETz28I1%FJ%n3H?W_iPteBQcbX5az`-L|z0Cy4r$7qy{;LCxdZb zOxMXcP691+x|Nh=UfX{)FsFhmibDKpc-&O#+f4`U2yqL@xLX8EC8W2}NgE&Ohf&a) z3Zk%OG545q*l30#C6vFzH4>4r^oqxE;!^_HITrP zl)H&UbLZquSXuxU(J~1SIF&N`zUn$cfPtUX_3IfRM-Fv^|i&2>-Cb;Du~;eUonJ z7&2njni9wWfIP;(rQe?jV`gjP^!ORn%JgUc#VOm&{edtTpXOr2KPUhPB**CHou*Cz zgwfF$M?nzNuG+h%TT8Q&bnNVkRF?~&zVwcri4^TV$&7@_Rg-EJMS4)xlVn~N-|Hq0 z8pUN4GTeTOH!<1!s}`rGb0$IjZ7k|GUI@L1h4@e83e4k17ZWBi2?K*5)2H(jV(v{IY3!{sk{S_Y zv3(FEbHEMI^2BAhZ_gKbk2Q=MyX zP^b1^Q3vl&)NyjRGX04;^9egPv)n|l%8XC_4&SR{K%VEn`x@M!TG(K*!?n0b27NOf zzaug8ab%bjf~lqOW)jy2pY}qu$H&_X%2oPw8CKpc1(q|P-2YU8mPk*6-;k>JK7DV5 zc~XKc14-ufa#D5IctgVvr-~${>hN+Zm@iu%PqJR7hL`0WI7kr9eOM6D;MgVuQo7$Y z9wAu__Iaf{DEUSx1U@@kx^ku+@$4Qufm1TWFf;%)#wzpmKrAN->e7$AeSE`ZGcH#U z6Aywd9EMHxMTnbCy4_SV>|)%>TvuYUj1x*N-&KdR2j;{tA4?&S*UP{mL?Q(}W34{s z+TsY2&I|NIQ}=L6YWEq0GV|c;Lscw4NSJOrQj3ss%I=Uf=< zn{+~Bg`gvV+}qN}AMLk6C}>lVaQh0XWSo9zUa4wRfcwkS(UI=)5F_ey%cv86uMXo< zMpc_cXV9w|+#$*3MRZONSEOoEA9d>!FI#Q0qn`dKD%c>Nhp)-d@UswGr~Xo#axTQ0 zQ>v(YfB37Q_V0oq!=7UEG~6`3w3Vio1Jj5-@PuP>NYV_O;m+qPgn_UCh4W)8^nqd=Nkw<@s%G}78U>m| z_<9Zo1sj?8<}=*z zeiC9(&+#>O0FE22veD|8qyeF%XpBXmPoPzvTM|&l^B>h@J$i@ntmqmyUdz9Riosu$ zQNwa8i^3`-ZBAMu5GM9LXJ5!H1`{Q@mQ0fhJwxCmF2XQOJF6kcNx!%F5&g$eLPN;n zt{`@u#E7@Aq^H&alB8z=@?1a@DgnZ;5XmF~pO>ao@f+bG&buxursn>0KO%Jnd|jQK z!(!h_N4)+uexRhV{-N(X`PHD!es3>K>ph%rHp3`q@_v2peqVU#7-KmpQ9ig_f0%YM-qr5T@b{R0;4^e z-*~I$znf1XfPE3+1J5T00y@=9zIOJ4ACNcO(*c%rKIk{|l2^z~EP~&jznwUKue2)D zp5F1Xvr;`Ntr97)q6q(3!Mz1dLn8mauD=ZU6Tasa?h7b!JB1#hu~|c5YT-6oM#=+~ z`7{hGc&_UM<%Fx*UeYIxliW;AG>Dgbadx$Gb+jYjEvz`xgYy^M62FV>>ar%Wwm|w_ z?y1bs2u8j$nSdw>kRwdW$RX?S{ZBDjir~V2BgQgce&LVzTT{N&4azKFi!PL<32AsM zz2KD(l8D2!Nlj}jw(g2s{w_U1sZ&KvYRJM+@zp~i#;Gg?42M76E-`X>KCCrS!tjG@ zdhQ{yCg3nA}RlOQyy%(S&CUSb-ytQuj?wKTMH+^uXeP^6FC`Ri^ zu@HQN39<>pBOCvu0}tazKd9W~aig;H2#pc1O}L-l^aFScCQ*Q*t%IAuaNGdOKaUvL zH~;t}YCj_tJ|cqK&J99aB(w|e6;U1V!?zpUN*u|-N z{SJ)_^ZnO-Zao#U7n1ZOA zd-djFB%x6VeONxvy}FD1e2?m)cH}kAOSd-b$nq{Ev1 zF`HhM^_Mo@cv5#MsGGnteLEI{zTDd%&(F4V+oPlgDxcU*5^9fXWg9N{fdLj(`ED|A z&=b@GWtsp+h8W(B$=5W|^&L_4U6pZ3lDJ5$=8Rf73PcimOaj$qa~rL(oaw2Yx;w{2 zps5VwZ}2%`-NZunElV2xpiOdqvsm?OW!irfK0?o~?-hvGvEHLW`RJ9!>jfHu>x*f&!M z2g{MBAX<2z2}Y4sT8maZiGy6Fd9*(OnrP&R7fCjXJA%d#TLm2R*einE*rd;?YXc8j z6kS}oTAJ;V>s3@a8X*Pw^+R;4f^Hhs(Ptrb zp=-UVIBn!T(?~;k3GKUk0%PNvr*PhyBIehhqV%rgjnfu_yR4!K;(~uLzI+BL4Zubn z?pCln#?)G}ffM4dn1GS>%5fQ$>RuKFRmZxzJjgY6tXY3}H_oryGVJ2qLHfN_yj7(h zowHZhenZ6~C?2JY8ywlAZR~dYA&yAUyrMm^nAj043;+ z<^+@onobtaf05cvd4TX_W*YA4s|K>ng?;Q?{tBXgSE5zBtKZw9I7iWDf%!@u`G?7@ z1vmC`&04{y)|eOe!^>KZ+Cu!UNo~XgAyJlu1KfH2G^$3qo%b%cZ0;R*oRb;DnDZvG z#vHDsIk(S8qsQQGmVNzJ`l2h(Wjk#)X)_>{Vbmax`$yY{1uq{Or9VVnb*WNlvRopm z=DF@i1$*3~W?>~MQ_UW;km5V6#J~dpN?bIDL z#4S@R)~H?(^{qj&J!_Hg7SRWN@ZOQAh~=;ihyBUH^3(a5(zdDHgVGvO} zf*A!wE{IDGH@z5YI(27!v>~z=%4t@~^Vm`&Ld(CcbM}>1t~hB~Ho6wRbbecxU)(H1 z!TGjX!!%2dR5FT|SUCBez6 z=@tIrE6ZQ(7W8$nG#n-Y*a;-}w3@bMM2&0AFT+}_^GA&3=`!;*7wg#H6zeMO!7a+y&S9*LHJ0l-`|H8%_(_n46HB&OD;Bd zya2ovfO#=I#nu&WQW+fxlzJI%1dYEmg+HYs%F+>?wj z6<&XL|5Z)-`}WYAhyCgPH{tAe|6ixQTg~M!)4R#8KeqoeT$>A+~{>!j&{_Fk^T>AHwekEVGWb!YQyW#A=P|M#Pe=Vb1ZuXaP-$di*@qcG( mzc2A?a@^M2UzUEe;{Q{9YRbTy&;kIMH!rE1u>wST`|UqIq!y Date: Wed, 20 Aug 2025 15:27:45 +0200 Subject: [PATCH 10/12] feat(tests): add response schema validation for xlsx file - Updated the test case `test_get_with_xlsx_with_gemini_model` to change the query to "What is the salary of Doe?" for clarity. - Introduced a new test case `test_get_with_xlsx_with_gemini_model_with_response_schema` to validate the response structure using the `SalaryResponse` schema. - Added `Salary` and `SalaryResponse` classes to define the expected response format. - Included assertions to verify the correctness of the salary data returned for multiple individuals. --- tests/e2e/agent/test_get.py | 36 +++++++++++++++++++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) diff --git a/tests/e2e/agent/test_get.py b/tests/e2e/agent/test_get.py index 39f56241..0ce389b2 100644 --- a/tests/e2e/agent/test_get.py +++ b/tests/e2e/agent/test_get.py @@ -147,7 +147,7 @@ def test_get_with_xlsx_with_gemini_model( vision_agent: VisionAgent, model: str, path_fixtures_dummy_excel: pathlib.Path ) -> None: response = vision_agent.get( - "What is in the salary of Doe?", + "What is the salary of Doe?", source=path_fixtures_dummy_excel, model=model, ) @@ -155,6 +155,40 @@ def test_get_with_xlsx_with_gemini_model( assert "20000" in response.lower() +class Salary(ResponseSchemaBase): + salary: int + name: str + + +class SalaryResponse(ResponseSchemaBase): + salaries: list[Salary] + + +@pytest.mark.parametrize( + "model", + [ + ModelName.ASKUI__GEMINI__2_5__FLASH, + ModelName.ASKUI__GEMINI__2_5__PRO, + ], +) +def test_get_with_xlsx_with_gemini_model_with_response_schema( + vision_agent: VisionAgent, model: str, path_fixtures_dummy_excel: pathlib.Path +) -> None: + response = vision_agent.get( + "What is the salary of Everyone?", + source=path_fixtures_dummy_excel, + model=model, + response_schema=SalaryResponse, + ) + assert isinstance(response, SalaryResponse) + # sort salaries by name for easier assertion + response.salaries.sort(key=lambda x: x.name) + assert response.salaries[0].name == "Doe" + assert response.salaries[0].salary == 20000 + assert response.salaries[1].name == "John" + assert response.salaries[1].salary == 10000 + + def test_get_with_xlsx_with_default_model_with_chart_data( vision_agent: VisionAgent, path_fixtures_dummy_excel: pathlib.Path ) -> None: From a7ed0889e19a3da5fce751266ccb16133ac5d34a Mon Sep 17 00:00:00 2001 From: danyalxahid-askui Date: Wed, 20 Aug 2025 16:03:13 +0200 Subject: [PATCH 11/12] docs(README): add section on document processing with `markitdown` - Introduced a new section detailing the use of the `markitdown` library for extracting data from documents like Docs and Excel files. - Highlighted key features of `markitdown`, including LLM-friendly output, inclusion of sheet names, enhanced image descriptions, no local inference requirements, optional dependencies, and Microsoft maintenance. --- README.md | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/README.md b/README.md index c8c19368..0ec0175d 100644 --- a/README.md +++ b/README.md @@ -756,6 +756,17 @@ with VisionAgent() as agent: as we try different models under the hood with your schema to see which one works best. - PDF processing is only supported for Gemini models hosted on AskUI and for PDFs up to 20MB. +### 📄 Document Processing with `markitdown` + +When extracting data from documents like Docs or Excel files, we use the `markitdown` library to convert them into markdown format. We chose `markitdown` over other tools for several reasons: + +- **LLM-Friendly Output:** The markdown output is optimized for token usage, which is efficient for subsequent processing with large language models. +- **Includes Sheet Names:** When converting Excel files, the name of the sheet is included in the generated markdown, providing better context. +- **Enhanced Image Descriptions:** It can use an OpenAI client (`llm_client` and `llm_model`) to generate more descriptive captions for images within documents. +- **No Local Inference:** No model inference is performed on the client machine, which means no need to install and maintain heavy packages like `torch`. +- **Optional Dependencies:** It allows for optional imports, meaning you only need to install the dependencies for the file types you are working with. This reduces the number of packages to manage. +- **Microsoft Maintained:** Being maintained by Microsoft, it offers robust support for converting Office documents. + ## What is AskUI Vision Agent? **AskUI Vision Agent** is a versatile AI powered framework that enables you to automate computer tasks in Python. From 104e62b922efb4977cd4e3e0e6c4a44635aa4d1b Mon Sep 17 00:00:00 2001 From: danyalxahid-askui Date: Wed, 20 Aug 2025 16:17:09 +0200 Subject: [PATCH 12/12] docs(source_utils): improve docstring for `InputSource` type - Clarified the description of the `InputSource` type to specify that it includes both images and files for `askui.VisionAgent.get()` and images for `askui.VisionAgent.locate()`. --- src/askui/utils/source_utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/askui/utils/source_utils.py b/src/askui/utils/source_utils.py index 97733ba0..f937acea 100644 --- a/src/askui/utils/source_utils.py +++ b/src/askui/utils/source_utils.py @@ -15,8 +15,8 @@ InputSource = Union[str, Path, PILImage.Image] """ -Type of the input images for `askui.VisionAgent.get()`, `askui.VisionAgent.locate()`, -etc. +Type of the input images and files for `askui.VisionAgent.get()` and images for +`askui.VisionAgent.locate()`, etc. Accepts: - `PIL.Image.Image`