diff --git a/.cursorrules b/.cursorrules index 31bd6766..70cb37ea 100644 --- a/.cursorrules +++ b/.cursorrules @@ -40,7 +40,7 @@ def locate( self, locator: str | Locator, - screenshot: Img | None = None, + screenshot: InputSource | None = None, model: ModelComposition | str | None = None, ) -> Point: """ @@ -48,7 +48,7 @@ Args: locator (str | Locator): The identifier or description of the element to locate. - screenshot (Img | None, optional): The screenshot to use for locating the + screenshot (InputSource | None, optional): The screenshot to use for locating the element. Can be a path to an image file, a PIL Image object or a data URL. If `None`, takes a screenshot of the currently selected screen. model (ModelComposition | str | None, optional): The composition or name of diff --git a/README.md b/README.md index c8c19368..0ec0175d 100644 --- a/README.md +++ b/README.md @@ -756,6 +756,17 @@ with VisionAgent() as agent: as we try different models under the hood with your schema to see which one works best. - PDF processing is only supported for Gemini models hosted on AskUI and for PDFs up to 20MB. +### 📄 Document Processing with `markitdown` + +When extracting data from documents like Docs or Excel files, we use the `markitdown` library to convert them into markdown format. We chose `markitdown` over other tools for several reasons: + +- **LLM-Friendly Output:** The markdown output is optimized for token usage, which is efficient for subsequent processing with large language models. +- **Includes Sheet Names:** When converting Excel files, the name of the sheet is included in the generated markdown, providing better context. +- **Enhanced Image Descriptions:** It can use an OpenAI client (`llm_client` and `llm_model`) to generate more descriptive captions for images within documents. +- **No Local Inference:** No model inference is performed on the client machine, which means no need to install and maintain heavy packages like `torch`. +- **Optional Dependencies:** It allows for optional imports, meaning you only need to install the dependencies for the file types you are working with. This reduces the number of packages to manage. +- **Microsoft Maintained:** Being maintained by Microsoft, it offers robust support for converting Office documents. + ## What is AskUI Vision Agent? **AskUI Vision Agent** is a versatile AI powered framework that enables you to automate computer tasks in Python. diff --git a/pdm.lock b/pdm.lock index e4020b63..1b361717 100644 --- a/pdm.lock +++ b/pdm.lock @@ -5,7 +5,7 @@ groups = ["default", "all", "android", "chat", "dev", "mcp", "pynput", "test", "web"] strategy = ["inherit_metadata"] lock_version = "4.5.0" -content_hash = "sha256:d6c650ba146a47cd38d87783aa405fd730daf80a573ce3d618dd2fa7b190db43" +content_hash = "sha256:beb091cad08638d0d09be80ec10830745be0024dbe05a33bbd111a865950bba4" [[metadata.targets]] requires_python = ">=3.10" @@ -108,6 +108,21 @@ files = [ {file = "backoff-2.2.1.tar.gz", hash = "sha256:03f829f5bb1923180821643f8753b0502c3b682293992485b0eef2807afa5cba"}, ] +[[package]] +name = "beautifulsoup4" +version = "4.13.4" +requires_python = ">=3.7.0" +summary = "Screen-scraping library" +groups = ["default"] +dependencies = [ + "soupsieve>1.2", + "typing-extensions>=4.0.0", +] +files = [ + {file = "beautifulsoup4-4.13.4-py3-none-any.whl", hash = "sha256:9bbbb14bfde9d79f38b8cd5f8c7c85f4b8f2523190ebed90e950a8dea4cb1c4b"}, + {file = "beautifulsoup4-4.13.4.tar.gz", hash = "sha256:dbb3c4e1ceae6aefebdaf2423247260cd062430a410e38c66f2baa50a8437195"}, +] + [[package]] name = "black" version = "25.1.0" @@ -294,7 +309,7 @@ name = "click" version = "8.1.8" requires_python = ">=3.7" summary = "Composable command line interface toolkit" -groups = ["all", "chat", "dev", "mcp"] +groups = ["default", "all", "chat", "dev", "mcp"] dependencies = [ "colorama; platform_system == \"Windows\"", "importlib-metadata; python_version < \"3.8\"", @@ -304,6 +319,17 @@ files = [ {file = "click-8.1.8.tar.gz", hash = "sha256:ed53c9d8990d83c2a27deae68e4ee337473f6330c040a31d4225c9574d16096a"}, ] +[[package]] +name = "cobble" +version = "0.1.4" +requires_python = ">=3.5" +summary = "Create data objects" +groups = ["default"] +files = [ + {file = "cobble-0.1.4-py3-none-any.whl", hash = "sha256:36c91b1655e599fd428e2b95fdd5f0da1ca2e9f1abb0bc871dec21a0e78a2b44"}, + {file = "cobble-0.1.4.tar.gz", hash = "sha256:de38be1539992c8a06e569630717c485a5f91be2192c461ea2b220607dfa78aa"}, +] + [[package]] name = "colorama" version = "0.4.6" @@ -316,6 +342,21 @@ files = [ {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, ] +[[package]] +name = "coloredlogs" +version = "15.0.1" +requires_python = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" +summary = "Colored terminal output for Python's logging module" +groups = ["default"] +marker = "python_version > \"3.9\"" +dependencies = [ + "humanfriendly>=9.1", +] +files = [ + {file = "coloredlogs-15.0.1-py2.py3-none-any.whl", hash = "sha256:612ee75c546f53e92e70049c9dbfcc18c935a2b9a53b66085ce9ef6a6e5c0934"}, + {file = "coloredlogs-15.0.1.tar.gz", hash = "sha256:7c991aa71a4577af2f82600d8f8f3a89f936baeaf9b50a9c197da014e5bf16b0"}, +] + [[package]] name = "coverage" version = "7.8.0" @@ -536,6 +577,17 @@ files = [ {file = "datamodel_code_generator-0.31.2.tar.gz", hash = "sha256:47887b8aa6fd69865e07e2893c1e76e34dae753b9a97f1020357af8337bc4cdb"}, ] +[[package]] +name = "defusedxml" +version = "0.7.1" +requires_python = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" +summary = "XML bomb protection for Python stdlib modules" +groups = ["default"] +files = [ + {file = "defusedxml-0.7.1-py2.py3-none-any.whl", hash = "sha256:a352e7e428770286cc899e2542b6cdaedb2b4953ff269a210103ec58f6198a61"}, + {file = "defusedxml-0.7.1.tar.gz", hash = "sha256:1bb3032db185915b62d7c6209c5a8792be6a32ab2fedacc84e01b52c51aa3e69"}, +] + [[package]] name = "distro" version = "1.9.0" @@ -596,6 +648,17 @@ files = [ {file = "email_validator-2.2.0.tar.gz", hash = "sha256:cb690f344c617a714f22e66ae771445a1ceb46821152df8e165c5f9a364582b7"}, ] +[[package]] +name = "et-xmlfile" +version = "2.0.0" +requires_python = ">=3.8" +summary = "An implementation of lxml.xmlfile for the standard library" +groups = ["default"] +files = [ + {file = "et_xmlfile-2.0.0-py3-none-any.whl", hash = "sha256:7a91720bc756843502c3b7504c77b8fe44217c85c537d85037f0f536151b2caa"}, + {file = "et_xmlfile-2.0.0.tar.gz", hash = "sha256:dab3f4764309081ce75662649be815c4c9081e88f0837825f90fd28317d4da54"}, +] + [[package]] name = "evdev" version = "1.9.2" @@ -689,6 +752,17 @@ files = [ {file = "filetype-1.2.0.tar.gz", hash = "sha256:66b56cd6474bf41d8c54660347d37afcc3f7d1970648de365c102ef77548aadb"}, ] +[[package]] +name = "flatbuffers" +version = "25.2.10" +summary = "The FlatBuffers serialization format for Python" +groups = ["default"] +marker = "python_version > \"3.9\"" +files = [ + {file = "flatbuffers-25.2.10-py2.py3-none-any.whl", hash = "sha256:ebba5f4d5ea615af3f7fd70fc310636fbb2bbd1f566ac0a23d98dd412de50051"}, + {file = "flatbuffers-25.2.10.tar.gz", hash = "sha256:97e451377a41262f8d9bd4295cc836133415cc03d8cb966410a4af92eb00d26e"}, +] + [[package]] name = "fsspec" version = "2025.3.2" @@ -1014,6 +1088,23 @@ files = [ {file = "huggingface_hub-0.30.1.tar.gz", hash = "sha256:f379e8b8d0791295602538856638460ae3cf679c7f304201eb80fb98c771950e"}, ] +[[package]] +name = "humanfriendly" +version = "10.0" +requires_python = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" +summary = "Human friendly output for text interfaces using Python" +groups = ["default"] +marker = "python_version > \"3.9\"" +dependencies = [ + "monotonic; python_version == \"2.7\"", + "pyreadline3; sys_platform == \"win32\" and python_version >= \"3.8\"", + "pyreadline; sys_platform == \"win32\" and python_version < \"3.8\"", +] +files = [ + {file = "humanfriendly-10.0-py2.py3-none-any.whl", hash = "sha256:1697e1a8a8f550fd43c2865cd84542fc175a61dcb779b6fee18cf6b6ccba1477"}, + {file = "humanfriendly-10.0.tar.gz", hash = "sha256:6b0b831ce8f15f7300721aa49829fc4e83921a9a301cc7f606be6686a2288ddc"}, +] + [[package]] name = "idna" version = "3.10" @@ -1180,6 +1271,119 @@ files = [ {file = "jsonschema_specifications-2025.4.1.tar.gz", hash = "sha256:630159c9f4dbea161a6a2205c3011cc4f18ff381b189fff48bb39b9bf26ae608"}, ] +[[package]] +name = "lxml" +version = "6.0.0" +requires_python = ">=3.8" +summary = "Powerful and Pythonic XML processing library combining libxml2/libxslt with the ElementTree API." +groups = ["default"] +files = [ + {file = "lxml-6.0.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:35bc626eec405f745199200ccb5c6b36f202675d204aa29bb52e27ba2b71dea8"}, + {file = "lxml-6.0.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:246b40f8a4aec341cbbf52617cad8ab7c888d944bfe12a6abd2b1f6cfb6f6082"}, + {file = "lxml-6.0.0-cp310-cp310-manylinux2010_i686.manylinux2014_i686.manylinux_2_12_i686.manylinux_2_17_i686.whl", hash = "sha256:2793a627e95d119e9f1e19720730472f5543a6d84c50ea33313ce328d870f2dd"}, + {file = "lxml-6.0.0-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:46b9ed911f36bfeb6338e0b482e7fe7c27d362c52fde29f221fddbc9ee2227e7"}, + {file = "lxml-6.0.0-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:2b4790b558bee331a933e08883c423f65bbcd07e278f91b2272489e31ab1e2b4"}, + {file = "lxml-6.0.0-cp310-cp310-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e2030956cf4886b10be9a0285c6802e078ec2391e1dd7ff3eb509c2c95a69b76"}, + {file = "lxml-6.0.0-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4d23854ecf381ab1facc8f353dcd9adeddef3652268ee75297c1164c987c11dc"}, + {file = "lxml-6.0.0-cp310-cp310-manylinux_2_31_armv7l.whl", hash = "sha256:43fe5af2d590bf4691531b1d9a2495d7aab2090547eaacd224a3afec95706d76"}, + {file = "lxml-6.0.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:74e748012f8c19b47f7d6321ac929a9a94ee92ef12bc4298c47e8b7219b26541"}, + {file = "lxml-6.0.0-cp310-cp310-musllinux_1_2_armv7l.whl", hash = "sha256:43cfbb7db02b30ad3926e8fceaef260ba2fb7df787e38fa2df890c1ca7966c3b"}, + {file = "lxml-6.0.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:34190a1ec4f1e84af256495436b2d196529c3f2094f0af80202947567fdbf2e7"}, + {file = "lxml-6.0.0-cp310-cp310-win32.whl", hash = "sha256:5967fe415b1920a3877a4195e9a2b779249630ee49ece22021c690320ff07452"}, + {file = "lxml-6.0.0-cp310-cp310-win_amd64.whl", hash = "sha256:f3389924581d9a770c6caa4df4e74b606180869043b9073e2cec324bad6e306e"}, + {file = "lxml-6.0.0-cp310-cp310-win_arm64.whl", hash = "sha256:522fe7abb41309e9543b0d9b8b434f2b630c5fdaf6482bee642b34c8c70079c8"}, + {file = "lxml-6.0.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:4ee56288d0df919e4aac43b539dd0e34bb55d6a12a6562038e8d6f3ed07f9e36"}, + {file = "lxml-6.0.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b8dd6dd0e9c1992613ccda2bcb74fc9d49159dbe0f0ca4753f37527749885c25"}, + {file = "lxml-6.0.0-cp311-cp311-manylinux2010_i686.manylinux2014_i686.manylinux_2_12_i686.manylinux_2_17_i686.whl", hash = "sha256:d7ae472f74afcc47320238b5dbfd363aba111a525943c8a34a1b657c6be934c3"}, + {file = "lxml-6.0.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:5592401cdf3dc682194727c1ddaa8aa0f3ddc57ca64fd03226a430b955eab6f6"}, + {file = "lxml-6.0.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:58ffd35bd5425c3c3b9692d078bf7ab851441434531a7e517c4984d5634cd65b"}, + {file = "lxml-6.0.0-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f720a14aa102a38907c6d5030e3d66b3b680c3e6f6bc95473931ea3c00c59967"}, + {file = "lxml-6.0.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c2a5e8d207311a0170aca0eb6b160af91adc29ec121832e4ac151a57743a1e1e"}, + {file = "lxml-6.0.0-cp311-cp311-manylinux_2_31_armv7l.whl", hash = "sha256:2dd1cc3ea7e60bfb31ff32cafe07e24839df573a5e7c2d33304082a5019bcd58"}, + {file = "lxml-6.0.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:2cfcf84f1defed7e5798ef4f88aa25fcc52d279be731ce904789aa7ccfb7e8d2"}, + {file = "lxml-6.0.0-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:a52a4704811e2623b0324a18d41ad4b9fabf43ce5ff99b14e40a520e2190c851"}, + {file = "lxml-6.0.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:c16304bba98f48a28ae10e32a8e75c349dd742c45156f297e16eeb1ba9287a1f"}, + {file = "lxml-6.0.0-cp311-cp311-win32.whl", hash = "sha256:f8d19565ae3eb956d84da3ef367aa7def14a2735d05bd275cd54c0301f0d0d6c"}, + {file = "lxml-6.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:b2d71cdefda9424adff9a3607ba5bbfc60ee972d73c21c7e3c19e71037574816"}, + {file = "lxml-6.0.0-cp311-cp311-win_arm64.whl", hash = "sha256:8a2e76efbf8772add72d002d67a4c3d0958638696f541734304c7f28217a9cab"}, + {file = "lxml-6.0.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:78718d8454a6e928470d511bf8ac93f469283a45c354995f7d19e77292f26108"}, + {file = "lxml-6.0.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:84ef591495ffd3f9dcabffd6391db7bb70d7230b5c35ef5148354a134f56f2be"}, + {file = "lxml-6.0.0-cp312-cp312-manylinux2010_i686.manylinux2014_i686.manylinux_2_12_i686.manylinux_2_17_i686.whl", hash = "sha256:2930aa001a3776c3e2601cb8e0a15d21b8270528d89cc308be4843ade546b9ab"}, + {file = "lxml-6.0.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:219e0431ea8006e15005767f0351e3f7f9143e793e58519dc97fe9e07fae5563"}, + {file = "lxml-6.0.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:bd5913b4972681ffc9718bc2d4c53cde39ef81415e1671ff93e9aa30b46595e7"}, + {file = "lxml-6.0.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:390240baeb9f415a82eefc2e13285016f9c8b5ad71ec80574ae8fa9605093cd7"}, + {file = "lxml-6.0.0-cp312-cp312-manylinux_2_27_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:d6e200909a119626744dd81bae409fc44134389e03fbf1d68ed2a55a2fb10991"}, + {file = "lxml-6.0.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ca50bd612438258a91b5b3788c6621c1f05c8c478e7951899f492be42defc0da"}, + {file = "lxml-6.0.0-cp312-cp312-manylinux_2_31_armv7l.whl", hash = "sha256:c24b8efd9c0f62bad0439283c2c795ef916c5a6b75f03c17799775c7ae3c0c9e"}, + {file = "lxml-6.0.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:afd27d8629ae94c5d863e32ab0e1d5590371d296b87dae0a751fb22bf3685741"}, + {file = "lxml-6.0.0-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:54c4855eabd9fc29707d30141be99e5cd1102e7d2258d2892314cf4c110726c3"}, + {file = "lxml-6.0.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:c907516d49f77f6cd8ead1322198bdfd902003c3c330c77a1c5f3cc32a0e4d16"}, + {file = "lxml-6.0.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:36531f81c8214e293097cd2b7873f178997dae33d3667caaae8bdfb9666b76c0"}, + {file = "lxml-6.0.0-cp312-cp312-win32.whl", hash = "sha256:690b20e3388a7ec98e899fd54c924e50ba6693874aa65ef9cb53de7f7de9d64a"}, + {file = "lxml-6.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:310b719b695b3dd442cdfbbe64936b2f2e231bb91d998e99e6f0daf991a3eba3"}, + {file = "lxml-6.0.0-cp312-cp312-win_arm64.whl", hash = "sha256:8cb26f51c82d77483cdcd2b4a53cda55bbee29b3c2f3ddeb47182a2a9064e4eb"}, + {file = "lxml-6.0.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:6da7cd4f405fd7db56e51e96bff0865b9853ae70df0e6720624049da76bde2da"}, + {file = "lxml-6.0.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:b34339898bb556a2351a1830f88f751679f343eabf9cf05841c95b165152c9e7"}, + {file = "lxml-6.0.0-cp313-cp313-manylinux2010_i686.manylinux2014_i686.manylinux_2_12_i686.manylinux_2_17_i686.whl", hash = "sha256:51a5e4c61a4541bd1cd3ba74766d0c9b6c12d6a1a4964ef60026832aac8e79b3"}, + {file = "lxml-6.0.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d18a25b19ca7307045581b18b3ec9ead2b1db5ccd8719c291f0cd0a5cec6cb81"}, + {file = "lxml-6.0.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:d4f0c66df4386b75d2ab1e20a489f30dc7fd9a06a896d64980541506086be1f1"}, + {file = "lxml-6.0.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9f4b481b6cc3a897adb4279216695150bbe7a44c03daba3c894f49d2037e0a24"}, + {file = "lxml-6.0.0-cp313-cp313-manylinux_2_27_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:8a78d6c9168f5bcb20971bf3329c2b83078611fbe1f807baadc64afc70523b3a"}, + {file = "lxml-6.0.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2ae06fbab4f1bb7db4f7c8ca9897dc8db4447d1a2b9bee78474ad403437bcc29"}, + {file = "lxml-6.0.0-cp313-cp313-manylinux_2_31_armv7l.whl", hash = "sha256:1fa377b827ca2023244a06554c6e7dc6828a10aaf74ca41965c5d8a4925aebb4"}, + {file = "lxml-6.0.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:1676b56d48048a62ef77a250428d1f31f610763636e0784ba67a9740823988ca"}, + {file = "lxml-6.0.0-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:0e32698462aacc5c1cf6bdfebc9c781821b7e74c79f13e5ffc8bfe27c42b1abf"}, + {file = "lxml-6.0.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:4d6036c3a296707357efb375cfc24bb64cd955b9ec731abf11ebb1e40063949f"}, + {file = "lxml-6.0.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:7488a43033c958637b1a08cddc9188eb06d3ad36582cebc7d4815980b47e27ef"}, + {file = "lxml-6.0.0-cp313-cp313-win32.whl", hash = "sha256:5fcd7d3b1d8ecb91445bd71b9c88bdbeae528fefee4f379895becfc72298d181"}, + {file = "lxml-6.0.0-cp313-cp313-win_amd64.whl", hash = "sha256:2f34687222b78fff795feeb799a7d44eca2477c3d9d3a46ce17d51a4f383e32e"}, + {file = "lxml-6.0.0-cp313-cp313-win_arm64.whl", hash = "sha256:21db1ec5525780fd07251636eb5f7acb84003e9382c72c18c542a87c416ade03"}, + {file = "lxml-6.0.0-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:dbdd7679a6f4f08152818043dbb39491d1af3332128b3752c3ec5cebc0011a72"}, + {file = "lxml-6.0.0-pp310-pypy310_pp73-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:40442e2a4456e9910875ac12951476d36c0870dcb38a68719f8c4686609897c4"}, + {file = "lxml-6.0.0-pp310-pypy310_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:db0efd6bae1c4730b9c863fc4f5f3c0fa3e8f05cae2c44ae141cb9dfc7d091dc"}, + {file = "lxml-6.0.0-pp310-pypy310_pp73-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9ab542c91f5a47aaa58abdd8ea84b498e8e49fe4b883d67800017757a3eb78e8"}, + {file = "lxml-6.0.0-pp310-pypy310_pp73-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:013090383863b72c62a702d07678b658fa2567aa58d373d963cca245b017e065"}, + {file = "lxml-6.0.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:c86df1c9af35d903d2b52d22ea3e66db8058d21dc0f59842ca5deb0595921141"}, + {file = "lxml-6.0.0.tar.gz", hash = "sha256:032e65120339d44cdc3efc326c9f660f5f7205f3a535c1fdbf898b29ea01fb72"}, +] + +[[package]] +name = "magika" +version = "0.6.2" +requires_python = ">=3.8" +summary = "A tool to determine the content type of a file with deep learning" +groups = ["default"] +dependencies = [ + "click>=8.1.7", + "numpy>=1.24; python_version < \"3.12\"", + "numpy>=1.26; python_version >= \"3.12\" and python_version < \"3.13\"", + "numpy>=2.1.0; python_version >= \"3.13\"", + "onnxruntime<1.20.0,>=1.17.0; python_version <= \"3.9\"", + "onnxruntime>=1.17.0; python_version > \"3.9\"", + "python-dotenv>=1.0.1", +] +files = [ + {file = "magika-0.6.2-py3-none-any.whl", hash = "sha256:5ef72fbc07723029b3684ef81454bc224ac5f60986aa0fc5a28f4456eebcb5b2"}, + {file = "magika-0.6.2-py3-none-macosx_11_0_arm64.whl", hash = "sha256:9109309328a1553886c8ff36c2ee9a5e9cfd36893ad81b65bf61a57debdd9d0e"}, + {file = "magika-0.6.2-py3-none-manylinux_2_28_x86_64.whl", hash = "sha256:57cd1d64897634d15de552bd6b3ae9c6ff6ead9c60d384dc46497c08288e4559"}, + {file = "magika-0.6.2-py3-none-win_amd64.whl", hash = "sha256:711f427a633e0182737dcc2074748004842f870643585813503ff2553b973b9f"}, + {file = "magika-0.6.2.tar.gz", hash = "sha256:37eb6ae8020f6e68f231bc06052c0a0cbe8e6fa27492db345e8dc867dbceb067"}, +] + +[[package]] +name = "mammoth" +version = "1.10.0" +requires_python = ">=3.7" +summary = "Convert Word documents from docx to simple and clean HTML and Markdown" +groups = ["default"] +dependencies = [ + "cobble<0.2,>=0.1.3", +] +files = [ + {file = "mammoth-1.10.0-py2.py3-none-any.whl", hash = "sha256:a1c87d5b98ca30230394267f98614b58b14b50f8031dc33ac9a535c6ab04eb99"}, + {file = "mammoth-1.10.0.tar.gz", hash = "sha256:cb6fbba41ccf8b5502859c457177d87a833fef0e0b1d4e6fd23ec372fe892c30"}, +] + [[package]] name = "markdown-it-py" version = "3.0.0" @@ -1194,6 +1398,60 @@ files = [ {file = "markdown_it_py-3.0.0-py3-none-any.whl", hash = "sha256:355216845c60bd96232cd8d8c40e8f9765cc86f46880e43a8fd22dc1a1a8cab1"}, ] +[[package]] +name = "markdownify" +version = "1.2.0" +summary = "Convert HTML to markdown." +groups = ["default"] +dependencies = [ + "beautifulsoup4<5,>=4.9", + "six<2,>=1.15", +] +files = [ + {file = "markdownify-1.2.0-py3-none-any.whl", hash = "sha256:48e150a1c4993d4d50f282f725c0111bd9eb25645d41fa2f543708fd44161351"}, + {file = "markdownify-1.2.0.tar.gz", hash = "sha256:f6c367c54eb24ee953921804dfe6d6575c5e5b42c643955e7242034435de634c"}, +] + +[[package]] +name = "markitdown" +version = "0.1.2" +requires_python = ">=3.10" +summary = "Utility tool for converting various files to Markdown" +groups = ["default"] +dependencies = [ + "beautifulsoup4", + "charset-normalizer", + "defusedxml", + "magika~=0.6.1", + "markdownify", + "requests", +] +files = [ + {file = "markitdown-0.1.2-py3-none-any.whl", hash = "sha256:4881f0768794ffccb52d09dd86498813a6896ba9639b4fc15512817f56ed9d74"}, + {file = "markitdown-0.1.2.tar.gz", hash = "sha256:85fe108a92bd18f317e75a36cf567a6fa812072612a898abf8c156d5d74c13c4"}, +] + +[[package]] +name = "markitdown" +version = "0.1.2" +extras = ["docx", "xls", "xlsx"] +requires_python = ">=3.10" +summary = "Utility tool for converting various files to Markdown" +groups = ["default"] +dependencies = [ + "lxml", + "mammoth", + "markitdown==0.1.2", + "openpyxl", + "pandas", + "pandas", + "xlrd", +] +files = [ + {file = "markitdown-0.1.2-py3-none-any.whl", hash = "sha256:4881f0768794ffccb52d09dd86498813a6896ba9639b4fc15512817f56ed9d74"}, + {file = "markitdown-0.1.2.tar.gz", hash = "sha256:85fe108a92bd18f317e75a36cf567a6fa812072612a898abf8c156d5d74c13c4"}, +] + [[package]] name = "markupsafe" version = "3.0.2" @@ -1300,6 +1558,17 @@ files = [ {file = "more_itertools-10.7.0.tar.gz", hash = "sha256:9fddd5403be01a94b204faadcff459ec3568cf110265d3c54323e1e866ad29d3"}, ] +[[package]] +name = "mpmath" +version = "1.3.0" +summary = "Python library for arbitrary-precision floating-point arithmetic" +groups = ["default"] +marker = "python_version > \"3.9\"" +files = [ + {file = "mpmath-1.3.0-py3-none-any.whl", hash = "sha256:a0b2b9fe80bbcd81a6647ff13108738cfb482d481d826cc0e02f5b35e5c88d2c"}, + {file = "mpmath-1.3.0.tar.gz", hash = "sha256:7a28eb2a9774d00c7bc92411c19a89209d5da7c4c9a9e227be8330a23a25b91f"}, +] + [[package]] name = "mss" version = "10.0.0" @@ -1362,6 +1631,106 @@ files = [ {file = "mypy_extensions-1.0.0.tar.gz", hash = "sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782"}, ] +[[package]] +name = "numpy" +version = "2.2.6" +requires_python = ">=3.10" +summary = "Fundamental package for array computing in Python" +groups = ["default"] +files = [ + {file = "numpy-2.2.6-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b412caa66f72040e6d268491a59f2c43bf03eb6c96dd8f0307829feb7fa2b6fb"}, + {file = "numpy-2.2.6-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8e41fd67c52b86603a91c1a505ebaef50b3314de0213461c7a6e99c9a3beff90"}, + {file = "numpy-2.2.6-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:37e990a01ae6ec7fe7fa1c26c55ecb672dd98b19c3d0e1d1f326fa13cb38d163"}, + {file = "numpy-2.2.6-cp310-cp310-macosx_14_0_x86_64.whl", hash = "sha256:5a6429d4be8ca66d889b7cf70f536a397dc45ba6faeb5f8c5427935d9592e9cf"}, + {file = "numpy-2.2.6-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:efd28d4e9cd7d7a8d39074a4d44c63eda73401580c5c76acda2ce969e0a38e83"}, + {file = "numpy-2.2.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fc7b73d02efb0e18c000e9ad8b83480dfcd5dfd11065997ed4c6747470ae8915"}, + {file = "numpy-2.2.6-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:74d4531beb257d2c3f4b261bfb0fc09e0f9ebb8842d82a7b4209415896adc680"}, + {file = "numpy-2.2.6-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:8fc377d995680230e83241d8a96def29f204b5782f371c532579b4f20607a289"}, + {file = "numpy-2.2.6-cp310-cp310-win32.whl", hash = "sha256:b093dd74e50a8cba3e873868d9e93a85b78e0daf2e98c6797566ad8044e8363d"}, + {file = "numpy-2.2.6-cp310-cp310-win_amd64.whl", hash = "sha256:f0fd6321b839904e15c46e0d257fdd101dd7f530fe03fd6359c1ea63738703f3"}, + {file = "numpy-2.2.6-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:f9f1adb22318e121c5c69a09142811a201ef17ab257a1e66ca3025065b7f53ae"}, + {file = "numpy-2.2.6-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:c820a93b0255bc360f53eca31a0e676fd1101f673dda8da93454a12e23fc5f7a"}, + {file = "numpy-2.2.6-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:3d70692235e759f260c3d837193090014aebdf026dfd167834bcba43e30c2a42"}, + {file = "numpy-2.2.6-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:481b49095335f8eed42e39e8041327c05b0f6f4780488f61286ed3c01368d491"}, + {file = "numpy-2.2.6-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b64d8d4d17135e00c8e346e0a738deb17e754230d7e0810ac5012750bbd85a5a"}, + {file = "numpy-2.2.6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba10f8411898fc418a521833e014a77d3ca01c15b0c6cdcce6a0d2897e6dbbdf"}, + {file = "numpy-2.2.6-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:bd48227a919f1bafbdda0583705e547892342c26fb127219d60a5c36882609d1"}, + {file = "numpy-2.2.6-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:9551a499bf125c1d4f9e250377c1ee2eddd02e01eac6644c080162c0c51778ab"}, + {file = "numpy-2.2.6-cp311-cp311-win32.whl", hash = "sha256:0678000bb9ac1475cd454c6b8c799206af8107e310843532b04d49649c717a47"}, + {file = "numpy-2.2.6-cp311-cp311-win_amd64.whl", hash = "sha256:e8213002e427c69c45a52bbd94163084025f533a55a59d6f9c5b820774ef3303"}, + {file = "numpy-2.2.6-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:41c5a21f4a04fa86436124d388f6ed60a9343a6f767fced1a8a71c3fbca038ff"}, + {file = "numpy-2.2.6-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:de749064336d37e340f640b05f24e9e3dd678c57318c7289d222a8a2f543e90c"}, + {file = "numpy-2.2.6-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:894b3a42502226a1cac872f840030665f33326fc3dac8e57c607905773cdcde3"}, + {file = "numpy-2.2.6-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:71594f7c51a18e728451bb50cc60a3ce4e6538822731b2933209a1f3614e9282"}, + {file = "numpy-2.2.6-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f2618db89be1b4e05f7a1a847a9c1c0abd63e63a1607d892dd54668dd92faf87"}, + {file = "numpy-2.2.6-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fd83c01228a688733f1ded5201c678f0c53ecc1006ffbc404db9f7a899ac6249"}, + {file = "numpy-2.2.6-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:37c0ca431f82cd5fa716eca9506aefcabc247fb27ba69c5062a6d3ade8cf8f49"}, + {file = "numpy-2.2.6-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:fe27749d33bb772c80dcd84ae7e8df2adc920ae8297400dabec45f0dedb3f6de"}, + {file = "numpy-2.2.6-cp312-cp312-win32.whl", hash = "sha256:4eeaae00d789f66c7a25ac5f34b71a7035bb474e679f410e5e1a94deb24cf2d4"}, + {file = "numpy-2.2.6-cp312-cp312-win_amd64.whl", hash = "sha256:c1f9540be57940698ed329904db803cf7a402f3fc200bfe599334c9bd84a40b2"}, + {file = "numpy-2.2.6-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:0811bb762109d9708cca4d0b13c4f67146e3c3b7cf8d34018c722adb2d957c84"}, + {file = "numpy-2.2.6-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:287cc3162b6f01463ccd86be154f284d0893d2b3ed7292439ea97eafa8170e0b"}, + {file = "numpy-2.2.6-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:f1372f041402e37e5e633e586f62aa53de2eac8d98cbfb822806ce4bbefcb74d"}, + {file = "numpy-2.2.6-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:55a4d33fa519660d69614a9fad433be87e5252f4b03850642f88993f7b2ca566"}, + {file = "numpy-2.2.6-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f92729c95468a2f4f15e9bb94c432a9229d0d50de67304399627a943201baa2f"}, + {file = "numpy-2.2.6-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1bc23a79bfabc5d056d106f9befb8d50c31ced2fbc70eedb8155aec74a45798f"}, + {file = "numpy-2.2.6-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:e3143e4451880bed956e706a3220b4e5cf6172ef05fcc397f6f36a550b1dd868"}, + {file = "numpy-2.2.6-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:b4f13750ce79751586ae2eb824ba7e1e8dba64784086c98cdbbcc6a42112ce0d"}, + {file = "numpy-2.2.6-cp313-cp313-win32.whl", hash = "sha256:5beb72339d9d4fa36522fc63802f469b13cdbe4fdab4a288f0c441b74272ebfd"}, + {file = "numpy-2.2.6-cp313-cp313-win_amd64.whl", hash = "sha256:b0544343a702fa80c95ad5d3d608ea3599dd54d4632df855e4c8d24eb6ecfa1c"}, + {file = "numpy-2.2.6-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:0bca768cd85ae743b2affdc762d617eddf3bcf8724435498a1e80132d04879e6"}, + {file = "numpy-2.2.6-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:fc0c5673685c508a142ca65209b4e79ed6740a4ed6b2267dbba90f34b0b3cfda"}, + {file = "numpy-2.2.6-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:5bd4fc3ac8926b3819797a7c0e2631eb889b4118a9898c84f585a54d475b7e40"}, + {file = "numpy-2.2.6-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:fee4236c876c4e8369388054d02d0e9bb84821feb1a64dd59e137e6511a551f8"}, + {file = "numpy-2.2.6-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e1dda9c7e08dc141e0247a5b8f49cf05984955246a327d4c48bda16821947b2f"}, + {file = "numpy-2.2.6-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f447e6acb680fd307f40d3da4852208af94afdfab89cf850986c3ca00562f4fa"}, + {file = "numpy-2.2.6-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:389d771b1623ec92636b0786bc4ae56abafad4a4c513d36a55dce14bd9ce8571"}, + {file = "numpy-2.2.6-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:8e9ace4a37db23421249ed236fdcdd457d671e25146786dfc96835cd951aa7c1"}, + {file = "numpy-2.2.6-cp313-cp313t-win32.whl", hash = "sha256:038613e9fb8c72b0a41f025a7e4c3f0b7a1b5d768ece4796b674c8f3fe13efff"}, + {file = "numpy-2.2.6-cp313-cp313t-win_amd64.whl", hash = "sha256:6031dd6dfecc0cf9f668681a37648373bddd6421fff6c66ec1624eed0180ee06"}, + {file = "numpy-2.2.6-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:0b605b275d7bd0c640cad4e5d30fa701a8d59302e127e5f79138ad62762c3e3d"}, + {file = "numpy-2.2.6-pp310-pypy310_pp73-macosx_14_0_x86_64.whl", hash = "sha256:7befc596a7dc9da8a337f79802ee8adb30a552a94f792b9c9d18c840055907db"}, + {file = "numpy-2.2.6-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ce47521a4754c8f4593837384bd3424880629f718d87c5d44f8ed763edd63543"}, + {file = "numpy-2.2.6-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:d042d24c90c41b54fd506da306759e06e568864df8ec17ccc17e9e884634fd00"}, + {file = "numpy-2.2.6.tar.gz", hash = "sha256:e29554e2bef54a90aa5cc07da6ce955accb83f21ab5de01a62c8478897b264fd"}, +] + +[[package]] +name = "onnxruntime" +version = "1.22.1" +requires_python = ">=3.10" +summary = "ONNX Runtime is a runtime accelerator for Machine Learning models" +groups = ["default"] +marker = "python_version > \"3.9\"" +dependencies = [ + "coloredlogs", + "flatbuffers", + "numpy>=1.21.6", + "packaging", + "protobuf", + "sympy", +] +files = [ + {file = "onnxruntime-1.22.1-cp310-cp310-macosx_13_0_universal2.whl", hash = "sha256:80e7f51da1f5201c1379b8d6ef6170505cd800e40da216290f5e06be01aadf95"}, + {file = "onnxruntime-1.22.1-cp310-cp310-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b89ddfdbbdaf7e3a59515dee657f6515601d55cb21a0f0f48c81aefc54ff1b73"}, + {file = "onnxruntime-1.22.1-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bddc75868bcf6f9ed76858a632f65f7b1846bdcefc6d637b1e359c2c68609964"}, + {file = "onnxruntime-1.22.1-cp310-cp310-win_amd64.whl", hash = "sha256:01e2f21b2793eb0c8642d2be3cee34cc7d96b85f45f6615e4e220424158877ce"}, + {file = "onnxruntime-1.22.1-cp311-cp311-macosx_13_0_universal2.whl", hash = "sha256:f4581bccb786da68725d8eac7c63a8f31a89116b8761ff8b4989dc58b61d49a0"}, + {file = "onnxruntime-1.22.1-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7ae7526cf10f93454beb0f751e78e5cb7619e3b92f9fc3bd51aa6f3b7a8977e5"}, + {file = "onnxruntime-1.22.1-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f6effa1299ac549a05c784d50292e3378dbbf010346ded67400193b09ddc2f04"}, + {file = "onnxruntime-1.22.1-cp311-cp311-win_amd64.whl", hash = "sha256:f28a42bb322b4ca6d255531bb334a2b3e21f172e37c1741bd5e66bc4b7b61f03"}, + {file = "onnxruntime-1.22.1-cp312-cp312-macosx_13_0_universal2.whl", hash = "sha256:a938d11c0dc811badf78e435daa3899d9af38abee950d87f3ab7430eb5b3cf5a"}, + {file = "onnxruntime-1.22.1-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:984cea2a02fcc5dfea44ade9aca9fe0f7a8a2cd6f77c258fc4388238618f3928"}, + {file = "onnxruntime-1.22.1-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2d39a530aff1ec8d02e365f35e503193991417788641b184f5b1e8c9a6d5ce8d"}, + {file = "onnxruntime-1.22.1-cp312-cp312-win_amd64.whl", hash = "sha256:6a64291d57ea966a245f749eb970f4fa05a64d26672e05a83fdb5db6b7d62f87"}, + {file = "onnxruntime-1.22.1-cp313-cp313-macosx_13_0_universal2.whl", hash = "sha256:d29c7d87b6cbed8fecfd09dca471832384d12a69e1ab873e5effbb94adc3e966"}, + {file = "onnxruntime-1.22.1-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:460487d83b7056ba98f1f7bac80287224c31d8149b15712b0d6f5078fcc33d0f"}, + {file = "onnxruntime-1.22.1-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b0c37070268ba4e02a1a9d28560cd00cd1e94f0d4f275cbef283854f861a65fa"}, + {file = "onnxruntime-1.22.1-cp313-cp313-win_amd64.whl", hash = "sha256:70980d729145a36a05f74b573435531f55ef9503bcda81fc6c3d6b9306199982"}, + {file = "onnxruntime-1.22.1-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:33a7980bbc4b7f446bac26c3785652fe8730ed02617d765399e89ac7d44e0f7d"}, + {file = "onnxruntime-1.22.1-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6e7e823624b015ea879d976cbef8bfaed2f7e2cc233d7506860a76dd37f8f381"}, +] + [[package]] name = "openai" version = "1.85.0" @@ -1397,6 +1766,20 @@ files = [ {file = "openapi_pydantic-0.5.1.tar.gz", hash = "sha256:ff6835af6bde7a459fb93eb93bb92b8749b754fc6e51b2f1590a19dc3005ee0d"}, ] +[[package]] +name = "openpyxl" +version = "3.1.5" +requires_python = ">=3.8" +summary = "A Python library to read/write Excel 2010 xlsx/xlsm files" +groups = ["default"] +dependencies = [ + "et-xmlfile", +] +files = [ + {file = "openpyxl-3.1.5-py2.py3-none-any.whl", hash = "sha256:5282c12b107bffeef825f4617dc029afaf41d0ea60823bbb665ef3079dc79de2"}, + {file = "openpyxl-3.1.5.tar.gz", hash = "sha256:cf0e3cf56142039133628b5acffe8ef0c12bc902d2aadd3e0fe5878dc08d1050"}, +] + [[package]] name = "packaging" version = "24.2" @@ -1408,6 +1791,58 @@ files = [ {file = "packaging-24.2.tar.gz", hash = "sha256:c228a6dc5e932d346bc5739379109d49e8853dd8223571c7c5b55260edc0b97f"}, ] +[[package]] +name = "pandas" +version = "2.3.1" +requires_python = ">=3.9" +summary = "Powerful data structures for data analysis, time series, and statistics" +groups = ["default"] +dependencies = [ + "numpy>=1.22.4; python_version < \"3.11\"", + "numpy>=1.23.2; python_version == \"3.11\"", + "numpy>=1.26.0; python_version >= \"3.12\"", + "python-dateutil>=2.8.2", + "pytz>=2020.1", + "tzdata>=2022.7", +] +files = [ + {file = "pandas-2.3.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:22c2e866f7209ebc3a8f08d75766566aae02bcc91d196935a1d9e59c7b990ac9"}, + {file = "pandas-2.3.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:3583d348546201aff730c8c47e49bc159833f971c2899d6097bce68b9112a4f1"}, + {file = "pandas-2.3.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0f951fbb702dacd390561e0ea45cdd8ecfa7fb56935eb3dd78e306c19104b9b0"}, + {file = "pandas-2.3.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cd05b72ec02ebfb993569b4931b2e16fbb4d6ad6ce80224a3ee838387d83a191"}, + {file = "pandas-2.3.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:1b916a627919a247d865aed068eb65eb91a344b13f5b57ab9f610b7716c92de1"}, + {file = "pandas-2.3.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:fe67dc676818c186d5a3d5425250e40f179c2a89145df477dd82945eaea89e97"}, + {file = "pandas-2.3.1-cp310-cp310-win_amd64.whl", hash = "sha256:2eb789ae0274672acbd3c575b0598d213345660120a257b47b5dafdc618aec83"}, + {file = "pandas-2.3.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:2b0540963d83431f5ce8870ea02a7430adca100cec8a050f0811f8e31035541b"}, + {file = "pandas-2.3.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:fe7317f578c6a153912bd2292f02e40c1d8f253e93c599e82620c7f69755c74f"}, + {file = "pandas-2.3.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e6723a27ad7b244c0c79d8e7007092d7c8f0f11305770e2f4cd778b3ad5f9f85"}, + {file = "pandas-2.3.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3462c3735fe19f2638f2c3a40bd94ec2dc5ba13abbb032dd2fa1f540a075509d"}, + {file = "pandas-2.3.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:98bcc8b5bf7afed22cc753a28bc4d9e26e078e777066bc53fac7904ddef9a678"}, + {file = "pandas-2.3.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:4d544806b485ddf29e52d75b1f559142514e60ef58a832f74fb38e48d757b299"}, + {file = "pandas-2.3.1-cp311-cp311-win_amd64.whl", hash = "sha256:b3cd4273d3cb3707b6fffd217204c52ed92859533e31dc03b7c5008aa933aaab"}, + {file = "pandas-2.3.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:689968e841136f9e542020698ee1c4fbe9caa2ed2213ae2388dc7b81721510d3"}, + {file = "pandas-2.3.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:025e92411c16cbe5bb2a4abc99732a6b132f439b8aab23a59fa593eb00704232"}, + {file = "pandas-2.3.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9b7ff55f31c4fcb3e316e8f7fa194566b286d6ac430afec0d461163312c5841e"}, + {file = "pandas-2.3.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7dcb79bf373a47d2a40cf7232928eb7540155abbc460925c2c96d2d30b006eb4"}, + {file = "pandas-2.3.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:56a342b231e8862c96bdb6ab97170e203ce511f4d0429589c8ede1ee8ece48b8"}, + {file = "pandas-2.3.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:ca7ed14832bce68baef331f4d7f294411bed8efd032f8109d690df45e00c4679"}, + {file = "pandas-2.3.1-cp312-cp312-win_amd64.whl", hash = "sha256:ac942bfd0aca577bef61f2bc8da8147c4ef6879965ef883d8e8d5d2dc3e744b8"}, + {file = "pandas-2.3.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:9026bd4a80108fac2239294a15ef9003c4ee191a0f64b90f170b40cfb7cf2d22"}, + {file = "pandas-2.3.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:6de8547d4fdb12421e2d047a2c446c623ff4c11f47fddb6b9169eb98ffba485a"}, + {file = "pandas-2.3.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:782647ddc63c83133b2506912cc6b108140a38a37292102aaa19c81c83db2928"}, + {file = "pandas-2.3.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2ba6aff74075311fc88504b1db890187a3cd0f887a5b10f5525f8e2ef55bfdb9"}, + {file = "pandas-2.3.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:e5635178b387bd2ba4ac040f82bc2ef6e6b500483975c4ebacd34bec945fda12"}, + {file = "pandas-2.3.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:6f3bf5ec947526106399a9e1d26d40ee2b259c66422efdf4de63c848492d91bb"}, + {file = "pandas-2.3.1-cp313-cp313-win_amd64.whl", hash = "sha256:1c78cf43c8fde236342a1cb2c34bcff89564a7bfed7e474ed2fffa6aed03a956"}, + {file = "pandas-2.3.1-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:8dfc17328e8da77be3cf9f47509e5637ba8f137148ed0e9b5241e1baf526e20a"}, + {file = "pandas-2.3.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:ec6c851509364c59a5344458ab935e6451b31b818be467eb24b0fe89bd05b6b9"}, + {file = "pandas-2.3.1-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:911580460fc4884d9b05254b38a6bfadddfcc6aaef856fb5859e7ca202e45275"}, + {file = "pandas-2.3.1-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2f4d6feeba91744872a600e6edbbd5b033005b431d5ae8379abee5bcfa479fab"}, + {file = "pandas-2.3.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:fe37e757f462d31a9cd7580236a82f353f5713a80e059a29753cf938c6775d96"}, + {file = "pandas-2.3.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:5db9637dbc24b631ff3707269ae4559bce4b7fd75c1c4d7e13f40edc42df4444"}, + {file = "pandas-2.3.1.tar.gz", hash = "sha256:0a95b9ac964fe83ce317827f80304d37388ea77616b1425f0ae41c9d2d0d7bb2"}, +] + [[package]] name = "pathspec" version = "0.12.1" @@ -1905,6 +2340,18 @@ files = [ {file = "pyperclip-1.9.0.tar.gz", hash = "sha256:b7de0142ddc81bfc5c7507eea19da920b92252b548b96186caf94a5e2527d310"}, ] +[[package]] +name = "pyreadline3" +version = "3.5.4" +requires_python = ">=3.8" +summary = "A python implementation of GNU readline." +groups = ["default"] +marker = "sys_platform == \"win32\" and python_version > \"3.9\"" +files = [ + {file = "pyreadline3-3.5.4-py3-none-any.whl", hash = "sha256:eaf8e6cc3c49bcccf145fc6067ba8643d1df34d604a1ec0eccbf7a18e6d3fae6"}, + {file = "pyreadline3-3.5.4.tar.gz", hash = "sha256:8d57d53039a1c75adba8e50dd3d992b28143480816187ea5efbd5c78e6c885b7"}, +] + [[package]] name = "pytest" version = "8.3.5" @@ -2032,6 +2479,16 @@ files = [ {file = "python_xlib-0.33-py2.py3-none-any.whl", hash = "sha256:c3534038d42e0df2f1392a1b30a15a4ff5fdc2b86cfa94f072bf11b10a164398"}, ] +[[package]] +name = "pytz" +version = "2025.2" +summary = "World timezone definitions, modern and historical" +groups = ["default"] +files = [ + {file = "pytz-2025.2-py2.py3-none-any.whl", hash = "sha256:5ddf76296dd8c44c26eb8f4b6f35488f3ccbf6fbbd7adee0b7262d43f0ec2f00"}, + {file = "pytz-2025.2.tar.gz", hash = "sha256:360b9e3dbb49a209c21ad61809c7fb453643e048b38924c765813546746e81c3"}, +] + [[package]] name = "pywin32" version = "311" @@ -2385,6 +2842,17 @@ files = [ {file = "sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc"}, ] +[[package]] +name = "soupsieve" +version = "2.7" +requires_python = ">=3.8" +summary = "A modern CSS selector implementation for Beautiful Soup." +groups = ["default"] +files = [ + {file = "soupsieve-2.7-py3-none-any.whl", hash = "sha256:6e60cc5c1ffaf1cebcc12e8188320b72071e922c2e897f737cadce79ad5d30c4"}, + {file = "soupsieve-2.7.tar.gz", hash = "sha256:ad282f9b6926286d2ead4750552c8a6142bc4c783fd66b0293547c8fe6ae126a"}, +] + [[package]] name = "sse-starlette" version = "2.4.1" @@ -2414,6 +2882,21 @@ files = [ {file = "starlette-0.46.2.tar.gz", hash = "sha256:7f7361f34eed179294600af672f565727419830b54b7b084efe44bb82d2fccd5"}, ] +[[package]] +name = "sympy" +version = "1.14.0" +requires_python = ">=3.9" +summary = "Computer algebra system (CAS) in Python" +groups = ["default"] +marker = "python_version > \"3.9\"" +dependencies = [ + "mpmath<1.4,>=1.1.0", +] +files = [ + {file = "sympy-1.14.0-py3-none-any.whl", hash = "sha256:e091cc3e99d2141a0ba2847328f5479b05d94a6635cb96148ccb3f34671bd8f5"}, + {file = "sympy-1.14.0.tar.gz", hash = "sha256:d3d3fe8df1e5a0b42f0e7bdf50541697dbe7d23746e894990c030e2b05e72517"}, +] + [[package]] name = "tenacity" version = "9.1.2" @@ -2590,6 +3073,17 @@ files = [ {file = "typing_inspection-0.4.0.tar.gz", hash = "sha256:9765c87de36671694a67904bf2c96e395be9c6439bb6c87b5142569dcdd65122"}, ] +[[package]] +name = "tzdata" +version = "2025.2" +requires_python = ">=2" +summary = "Provider of IANA time zone data" +groups = ["default"] +files = [ + {file = "tzdata-2025.2-py2.py3-none-any.whl", hash = "sha256:1a403fada01ff9221ca8044d701868fa132215d84beb92242d9acd2147f667a8"}, + {file = "tzdata-2025.2.tar.gz", hash = "sha256:b60a638fcc0daffadf82fe0f57e53d06bdec2f36c4df66280ae79bce6bd6f2b9"}, +] + [[package]] name = "urllib3" version = "2.3.0" @@ -2689,3 +3183,14 @@ files = [ {file = "winregistry-2.1.0-py3-none-any.whl", hash = "sha256:7591bc93ba5513b389a0234dfa665ac0752e964bddf44757c266a3b754c941e1"}, {file = "winregistry-2.1.0.tar.gz", hash = "sha256:370c2872f9cf9a512ed344039efae2a2943eb36355bc867336ff049e0f9d1db4"}, ] + +[[package]] +name = "xlrd" +version = "2.0.2" +requires_python = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7" +summary = "Library for developers to extract data from Microsoft Excel (tm) .xls spreadsheet files" +groups = ["default"] +files = [ + {file = "xlrd-2.0.2-py2.py3-none-any.whl", hash = "sha256:ea762c3d29f4cca48d82df517b6d89fbce4db3107f9d78713e48cd321d5c9aa9"}, + {file = "xlrd-2.0.2.tar.gz", hash = "sha256:08b5e25de58f21ce71dc7db3b3b8106c1fa776f3024c54e45b45b374e89234c9"}, +] diff --git a/pyproject.toml b/pyproject.toml index 04295678..dd883241 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -26,6 +26,7 @@ dependencies = [ "protobuf>=6.31.1", "google-genai>=1.20.0", "filetype>=1.2.0", + "markitdown[xls,xlsx,docx]>=0.1.2", ] requires-python = ">=3.10" readme = "README.md" @@ -224,4 +225,4 @@ pynput = [ ] web = [ "playwright>=1.41.0", -] +] \ No newline at end of file diff --git a/src/askui/__init__.py b/src/askui/__init__.py index a1b59264..0022419f 100644 --- a/src/askui/__init__.py +++ b/src/askui/__init__.py @@ -37,7 +37,8 @@ from .models.types.response_schemas import ResponseSchema, ResponseSchemaBase from .retry import ConfigurableRetry, Retry from .tools import ModifierKey, PcKey -from .utils.image_utils import ImageSource, Img +from .utils.image_utils import ImageSource +from .utils.source_utils import InputSource try: from .android_agent import AndroidVisionAgent @@ -67,7 +68,7 @@ "GetModel", "ImageBlockParam", "ImageSource", - "Img", + "InputSource", "LocateModel", "Locator", "MessageParam", diff --git a/src/askui/agent_base.py b/src/askui/agent_base.py index a270b6f7..cb2b15ca 100644 --- a/src/askui/agent_base.py +++ b/src/askui/agent_base.py @@ -16,9 +16,8 @@ from askui.models.shared.tools import Tool from askui.tools.agent_os import AgentOs from askui.tools.android.agent_os import AndroidAgentOs -from askui.utils.image_utils import ImageSource, Img -from askui.utils.pdf_utils import Pdf -from askui.utils.source_utils import load_image_source, load_source +from askui.utils.image_utils import ImageSource +from askui.utils.source_utils import InputSource, load_image_source, load_source from .logger import configure_logging, logger from .models import ModelComposition @@ -193,7 +192,7 @@ def get( query: Annotated[str, Field(min_length=1)], response_schema: None = None, model: str | None = None, - source: Optional[Img | Pdf] = None, + source: Optional[InputSource] = None, ) -> str: ... @overload def get( @@ -201,7 +200,7 @@ def get( query: Annotated[str, Field(min_length=1)], response_schema: Type[ResponseSchema], model: str | None = None, - source: Optional[Img | Pdf] = None, + source: Optional[InputSource] = None, ) -> ResponseSchema: ... @telemetry.record_call(exclude={"query", "source", "response_schema"}) @@ -211,7 +210,7 @@ def get( query: Annotated[str, Field(min_length=1)], response_schema: Type[ResponseSchema] | None = None, model: str | None = None, - source: Optional[Img | Pdf] = None, + source: Optional[InputSource] = None, ) -> ResponseSchema | str: """ Retrieves information from an image or PDF based on the provided `query`. @@ -220,9 +219,10 @@ def get( Args: query (str): The query describing what information to retrieve. - source (Img | Pdf | None, optional): The source to extract information from. - Can be a path to a PDF file, a path to an image file, a PIL Image - object or a data URL. Defaults to a screenshot of the current screen. + source (InputSource | None, optional): The source to extract information + from. Can be a path to an image, PDF, or office document file, + a PIL Image object or a data URL. Defaults to a screenshot of the + current screen. response_schema (Type[ResponseSchema] | None, optional): A Pydantic model class that defines the response schema. If not provided, returns a string. @@ -357,7 +357,7 @@ class LinkedListNode(ResponseSchemaBase): def _locate( self, locator: str | Locator, - screenshot: Optional[Img] = None, + screenshot: Optional[InputSource] = None, model: ModelComposition | str | None = None, ) -> PointList: def locate_with_screenshot() -> PointList: @@ -380,7 +380,7 @@ def locate_with_screenshot() -> PointList: def locate( self, locator: str | Locator, - screenshot: Optional[Img] = None, + screenshot: Optional[InputSource] = None, model: ModelComposition | str | None = None, ) -> Point: """ @@ -389,9 +389,10 @@ def locate( Args: locator (str | Locator): The identifier or description of the element to locate. - screenshot (Img | None, optional): The screenshot to use for locating the - element. Can be a path to an image file, a PIL Image object or a data - URL. If `None`, takes a screenshot of the currently selected display. + screenshot (InputSource | None, optional): The screenshot to use for + locating the element. Can be a path to an image file, a PIL Image object + or a data URL. If `None`, takes a screenshot of the currently + selected display. model (ModelComposition | str | None, optional): The composition or name of the model(s) to be used for locating the element using the `locator`. @@ -419,7 +420,7 @@ def locate( def locate_all( self, locator: str | Locator, - screenshot: Optional[Img] = None, + screenshot: Optional[InputSource] = None, model: ModelComposition | str | None = None, ) -> PointList: """ @@ -431,9 +432,10 @@ def locate_all( Args: locator (str | Locator): The identifier or description of the element to locate. - screenshot (Img | None, optional): The screenshot to use for locating the - element. Can be a path to an image file, a PIL Image object or a data - URL. If `None`, takes a screenshot of the currently selected display. + screenshot (InputSource | None, optional): The screenshot to use for + locating the element. Can be a path to an image file, a PIL Image object + or a data URL. If `None`, takes a screenshot of the currently + selected display. model (ModelComposition | str | None, optional): The composition or name of the model(s) to be used for locating the element using the `locator`. diff --git a/src/askui/models/anthropic/messages_api.py b/src/askui/models/anthropic/messages_api.py index b92e9f9c..7f1ce775 100644 --- a/src/askui/models/anthropic/messages_api.py +++ b/src/askui/models/anthropic/messages_api.py @@ -42,6 +42,7 @@ from askui.models.shared.tools import ToolCollection from askui.models.types.response_schemas import ResponseSchema from askui.utils.dict_utils import IdentityDefaultDict +from askui.utils.excel_utils import OfficeDocumentSource from askui.utils.image_utils import ( ImageSource, image_to_base64, @@ -242,8 +243,11 @@ def get( response_schema: Type[ResponseSchema] | None, model_choice: str, ) -> ResponseSchema | str: - if isinstance(source, PdfSource): - err_msg = f"PDF processing is not supported for the model {model_choice}" + if isinstance(source, (PdfSource, OfficeDocumentSource)): + err_msg = ( + f"PDF or Office Document processing is not supported for the model: " + f"{model_choice}" + ) raise NotImplementedError(err_msg) try: if response_schema is not None: diff --git a/src/askui/models/askui/google_genai_api.py b/src/askui/models/askui/google_genai_api.py index 8d691023..726c0ada 100644 --- a/src/askui/models/askui/google_genai_api.py +++ b/src/askui/models/askui/google_genai_api.py @@ -21,6 +21,7 @@ from askui.models.models import GetModel, ModelName from askui.models.shared.prompts import SYSTEM_PROMPT_GET from askui.models.types.response_schemas import ResponseSchema, to_response_schema +from askui.utils.excel_utils import OfficeDocumentSource from askui.utils.http_utils import parse_retry_after_header from askui.utils.image_utils import ImageSource from askui.utils.source_utils import Source @@ -185,6 +186,10 @@ def _create_genai_part_from_source(self, source: Source) -> genai_types.Part: data=data, mime_type="image/png", ) + if isinstance(source, OfficeDocumentSource): + with source.reader as r: + data = r.read() + return genai_types.Part.from_text(text=data.decode()) with source.reader as r: data = r.read() if len(data) > MAX_FILE_SIZE_BYTES: diff --git a/src/askui/models/askui/inference_api.py b/src/askui/models/askui/inference_api.py index 231ae093..cd3b1cc7 100644 --- a/src/askui/models/askui/inference_api.py +++ b/src/askui/models/askui/inference_api.py @@ -26,6 +26,7 @@ from askui.models.shared.settings import MessageSettings from askui.models.shared.tools import ToolCollection from askui.models.types.response_schemas import ResponseSchema +from askui.utils.excel_utils import OfficeDocumentSource from askui.utils.image_utils import ImageSource from askui.utils.pdf_utils import PdfSource from askui.utils.source_utils import Source @@ -205,8 +206,11 @@ def get( response_schema: Type[ResponseSchema] | None, model_choice: str, ) -> ResponseSchema | str: - if isinstance(source, PdfSource): - err_msg = f"PDF processing is not supported for the model {model_choice}" + if isinstance(source, (PdfSource, OfficeDocumentSource)): + err_msg = ( + f"PDF or Office Document processing is not supported for the model: " + f"{model_choice}" + ) raise NotImplementedError(err_msg) json: dict[str, Any] = { "image": source.to_data_url(), diff --git a/src/askui/models/openrouter/model.py b/src/askui/models/openrouter/model.py index a5a6882c..60db2099 100644 --- a/src/askui/models/openrouter/model.py +++ b/src/askui/models/openrouter/model.py @@ -10,6 +10,7 @@ from askui.models.models import GetModel from askui.models.shared.prompts import SYSTEM_PROMPT_GET from askui.models.types.response_schemas import ResponseSchema, to_response_schema +from askui.utils.excel_utils import OfficeDocumentSource from askui.utils.pdf_utils import PdfSource from askui.utils.source_utils import Source @@ -174,8 +175,11 @@ def get( response_schema: Type[ResponseSchema] | None, model_choice: str, ) -> ResponseSchema | str: - if isinstance(source, PdfSource): - err_msg = f"PDF processing is not supported for the model {model_choice}" + if isinstance(source, (PdfSource, OfficeDocumentSource)): + err_msg = ( + f"PDF or Office Document processing is not supported for the model: " + f"{model_choice}" + ) raise NotImplementedError(err_msg) response = self._predict( image_url=source.to_data_url(), diff --git a/src/askui/models/ui_tars_ep/ui_tars_api.py b/src/askui/models/ui_tars_ep/ui_tars_api.py index 1eec36bd..1f6b2df2 100644 --- a/src/askui/models/ui_tars_ep/ui_tars_api.py +++ b/src/askui/models/ui_tars_ep/ui_tars_api.py @@ -24,6 +24,7 @@ from askui.models.shared.tools import Tool from askui.models.types.response_schemas import ResponseSchema from askui.reporting import Reporter +from askui.utils.excel_utils import OfficeDocumentSource from askui.utils.image_utils import ImageSource, image_to_base64 from askui.utils.pdf_utils import PdfSource from askui.utils.source_utils import Source @@ -188,8 +189,8 @@ def get( response_schema: Type[ResponseSchema] | None, model_choice: str, ) -> ResponseSchema | str: - if isinstance(source, PdfSource): - err_msg = f"PDF processing is not supported for the model {model_choice}" + if isinstance(source, (PdfSource, OfficeDocumentSource)): + err_msg = f"PDF and Excel processing is not supported for the model {model_choice}" raise NotImplementedError(err_msg) if response_schema is not None: error_msg = f'Response schema is not supported for model "{model_choice}"' diff --git a/src/askui/utils/excel_utils.py b/src/askui/utils/excel_utils.py new file mode 100644 index 00000000..bc79e2d7 --- /dev/null +++ b/src/askui/utils/excel_utils.py @@ -0,0 +1,33 @@ +from io import BytesIO +from pathlib import Path + +from pydantic import ConfigDict, RootModel + +from askui.utils.markdown_utils import convert_to_markdown + + +class OfficeDocumentSource(RootModel): + """Represents an Excel source that can be read as markdown. + + The class can be initialized with: + - A file path (str or pathlib.Path) + + Attributes: + root (bytes | Path): The underlying Excel bytes or file path. + + Args: + root (Excel): The Excel source to load from. + """ + + model_config = ConfigDict(arbitrary_types_allowed=True) + root: bytes | Path + + @property + def reader(self) -> BytesIO: + markdown_content = convert_to_markdown(self.root) + return BytesIO(markdown_content.encode()) + + +__all__ = [ + "OfficeDocumentSource", +] diff --git a/src/askui/utils/image_utils.py b/src/askui/utils/image_utils.py index 4f166579..44b8f8ee 100644 --- a/src/askui/utils/image_utils.py +++ b/src/askui/utils/image_utils.py @@ -310,16 +310,6 @@ def scale_coordinates( return result -Img = Union[str, Path, PILImage.Image] -"""Type of the input images for `askui.VisionAgent.get()`, `askui.VisionAgent.locate()`, etc. - -Accepts: -- `PIL.Image.Image` -- Relative or absolute file path (`str` or `pathlib.Path`) -- Data URL (e.g., `"data:image/png;base64,..."`) -""" - - class ImageSource(RootModel): """A class that represents an image source and provides methods to convert it to different formats. @@ -375,5 +365,4 @@ def to_bytes(self) -> bytes: "scale_coordinates", "ScalingResults", "ImageSource", - "Img", ] diff --git a/src/askui/utils/markdown_utils.py b/src/askui/utils/markdown_utils.py new file mode 100644 index 00000000..6eb523de --- /dev/null +++ b/src/askui/utils/markdown_utils.py @@ -0,0 +1,24 @@ +from io import BytesIO +from pathlib import Path +from typing import BinaryIO + +from markitdown import MarkItDown + +_MARKDOWN_CONVERTER = MarkItDown() + + +def convert_to_markdown(source: Path | bytes | BinaryIO) -> str: + """Converts a source to markdown text. + + Args: + source (Path | bytes | BinaryIO): The source to convert. + + Returns: + str: The markdown representation of the source. + """ + if isinstance(source, bytes): + bytes_source = BytesIO(source) + result = _MARKDOWN_CONVERTER.convert(bytes_source) + return result.text_content + result = _MARKDOWN_CONVERTER.convert(source) + return result.text_content diff --git a/src/askui/utils/pdf_utils.py b/src/askui/utils/pdf_utils.py index 2df0246d..65a3170e 100644 --- a/src/askui/utils/pdf_utils.py +++ b/src/askui/utils/pdf_utils.py @@ -1,16 +1,8 @@ from io import BufferedReader, BytesIO from pathlib import Path -from typing import Union from pydantic import ConfigDict, RootModel -Pdf = Union[str, Path] -"""Type of the input PDFs for `askui.VisionAgent.get()`, etc. - -Accepts: -- Relative or absolute file path (`str` or `pathlib.Path`) -""" - class PdfSource(RootModel): """A class that represents a PDF source. @@ -38,5 +30,4 @@ def reader(self) -> BufferedReader | BytesIO: __all__ = [ "PdfSource", - "Pdf", ] diff --git a/src/askui/utils/source_utils.py b/src/askui/utils/source_utils.py index 619f134a..f937acea 100644 --- a/src/askui/utils/source_utils.py +++ b/src/askui/utils/source_utils.py @@ -9,15 +9,33 @@ from filetype import guess # type: ignore[import-untyped] from PIL import Image as PILImage +from askui.utils.excel_utils import OfficeDocumentSource from askui.utils.image_utils import ImageSource from askui.utils.pdf_utils import PdfSource -Source = Union[ImageSource, PdfSource] +InputSource = Union[str, Path, PILImage.Image] +""" +Type of the input images and files for `askui.VisionAgent.get()` and images for +`askui.VisionAgent.locate()`, etc. + +Accepts: +- `PIL.Image.Image` +- Relative or absolute file path (`str` or `pathlib.Path`) +- Data URL (e.g., `"data:image/png;base64,..."`) +""" + +Source = Union[ImageSource, PdfSource, OfficeDocumentSource] _DATA_URL_WITH_MIMETYPE_RE = re.compile(r"^data:([^;,]+)([^,]*)?,(.*)$", re.DOTALL) _SupportedImageMimeTypes = Literal["image/png", "image/jpeg", "image/gif", "image/webp"] -_SupportedApplicationMimeTypes = Literal["application/pdf"] +_SupportedApplicationMimeTypes = Literal[ + "application/pdf", + "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", + "application/vnd.ms-excel", + "application/msword", + "application/vnd.openxmlformats-officedocument.wordprocessingml.document", +] _SupportedMimeTypes = _SupportedImageMimeTypes | _SupportedApplicationMimeTypes _SUPPORTED_MIME_TYPES: list[_SupportedMimeTypes] = [ @@ -26,6 +44,10 @@ "image/gif", "image/webp", "application/pdf", + "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", + "application/vnd.ms-excel", + "application/msword", + "application/vnd.openxmlformats-officedocument.wordprocessingml.document", ] @@ -49,6 +71,15 @@ def is_supported(self) -> bool: def is_pdf(self) -> bool: return self.mime == "application/pdf" + @property + def is_supported_office_document(self) -> bool: + return self.mime in [ + "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", + "application/vnd.ms-excel", + "application/msword", + "application/vnd.openxmlformats-officedocument.wordprocessingml.document", + ] + @property def is_image(self) -> bool: if self.mime: @@ -133,6 +164,8 @@ def load_source(source: Union[str, Path, PILImage.Image]) -> Source: raise ValueError(msg) if source_analysis.is_pdf: return PdfSource(source_analysis.content) + if source_analysis.is_supported_office_document: + return OfficeDocumentSource(source_analysis.content) if source_analysis.is_image: return ImageSource( PILImage.open( @@ -167,4 +200,4 @@ def load_image_source(source: Union[str, Path, PILImage.Image]) -> ImageSource: return result -__all__ = ["Source", "load_source", "load_image_source"] +__all__ = ["Source", "load_source", "load_image_source", "InputSource"] diff --git a/tests/conftest.py b/tests/conftest.py index 72b7cba4..6e386721 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -40,6 +40,30 @@ def path_fixtures_dummy_pdf(path_fixtures_pdf: pathlib.Path) -> pathlib.Path: return path_fixtures_pdf / "dummy.pdf" +@pytest.fixture +def path_fixtures_excel(path_fixtures: pathlib.Path) -> pathlib.Path: + """Fixture providing the path to the excel directory.""" + return path_fixtures / "excel" + + +@pytest.fixture +def path_fixtures_dummy_excel(path_fixtures_excel: pathlib.Path) -> pathlib.Path: + """Fixture providing the path to the dummy excel.""" + return path_fixtures_excel / "dummy.xlsx" + + +@pytest.fixture +def path_fixtures_docs(path_fixtures: pathlib.Path) -> pathlib.Path: + """Fixture providing the path to the docs directory.""" + return path_fixtures / "docs" + + +@pytest.fixture +def path_fixtures_dummy_doc(path_fixtures_docs: pathlib.Path) -> pathlib.Path: + """Fixture providing the path to the dummy doc.""" + return path_fixtures_docs / "dummy.docx" + + @pytest.fixture def github_login_screenshot(path_fixtures_screenshots: pathlib.Path) -> Image.Image: """Fixture providing the GitHub login screenshot.""" diff --git a/tests/e2e/agent/test_get.py b/tests/e2e/agent/test_get.py index 7977efdf..0ce389b2 100644 --- a/tests/e2e/agent/test_get.py +++ b/tests/e2e/agent/test_get.py @@ -125,6 +125,92 @@ def test_get_with_pdf_too_large_with_default_model( ) +def test_get_with_xlsx_with_non_gemini_model_raises_not_implemented( + vision_agent: VisionAgent, path_fixtures_dummy_excel: pathlib.Path +) -> None: + with pytest.raises(NotImplementedError): + vision_agent.get( + "What is in the xlsx?", + source=path_fixtures_dummy_excel, + model=ModelName.ANTHROPIC__CLAUDE__3_5__SONNET__20241022, + ) + + +@pytest.mark.parametrize( + "model", + [ + ModelName.ASKUI__GEMINI__2_5__FLASH, + ModelName.ASKUI__GEMINI__2_5__PRO, + ], +) +def test_get_with_xlsx_with_gemini_model( + vision_agent: VisionAgent, model: str, path_fixtures_dummy_excel: pathlib.Path +) -> None: + response = vision_agent.get( + "What is the salary of Doe?", + source=path_fixtures_dummy_excel, + model=model, + ) + assert isinstance(response, str) + assert "20000" in response.lower() + + +class Salary(ResponseSchemaBase): + salary: int + name: str + + +class SalaryResponse(ResponseSchemaBase): + salaries: list[Salary] + + +@pytest.mark.parametrize( + "model", + [ + ModelName.ASKUI__GEMINI__2_5__FLASH, + ModelName.ASKUI__GEMINI__2_5__PRO, + ], +) +def test_get_with_xlsx_with_gemini_model_with_response_schema( + vision_agent: VisionAgent, model: str, path_fixtures_dummy_excel: pathlib.Path +) -> None: + response = vision_agent.get( + "What is the salary of Everyone?", + source=path_fixtures_dummy_excel, + model=model, + response_schema=SalaryResponse, + ) + assert isinstance(response, SalaryResponse) + # sort salaries by name for easier assertion + response.salaries.sort(key=lambda x: x.name) + assert response.salaries[0].name == "Doe" + assert response.salaries[0].salary == 20000 + assert response.salaries[1].name == "John" + assert response.salaries[1].salary == 10000 + + +def test_get_with_xlsx_with_default_model_with_chart_data( + vision_agent: VisionAgent, path_fixtures_dummy_excel: pathlib.Path +) -> None: + response = vision_agent.get( + "What is the salary of John?", + source=path_fixtures_dummy_excel, + ) + assert isinstance(response, str) + assert "10000" in response.lower() + + +def test_get_with_docs_with_default_model( + vision_agent: VisionAgent, path_fixtures_dummy_doc: pathlib.Path +) -> None: + response = vision_agent.get( + "At what time in 24h format does the person sleeps?", + source=path_fixtures_dummy_doc, + ) + assert isinstance(response, str) + assert "22:00" in response.lower() + + def test_get_with_model_composition_should_use_default_model( agent_toolbox_mock: AgentToolbox, askui_facade: ModelFacade, diff --git a/tests/fixtures/docs/dummy.docx b/tests/fixtures/docs/dummy.docx new file mode 100644 index 00000000..f5132090 Binary files /dev/null and b/tests/fixtures/docs/dummy.docx differ diff --git a/tests/fixtures/excel/dummy.xlsx b/tests/fixtures/excel/dummy.xlsx new file mode 100644 index 00000000..9dedc474 Binary files /dev/null and b/tests/fixtures/excel/dummy.xlsx differ