diff --git a/.tool-versions b/.tool-versions index 97da906..e111eee 100644 --- a/.tool-versions +++ b/.tool-versions @@ -1,3 +1,3 @@ -python 3.11.14 -poetry 2.3.3 +python 3.12.12 +poetry 2.4.1 java liberica-1.8.0 diff --git a/poetry.lock b/poetry.lock index 9f84732..4c417f2 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 2.3.3 and should not be changed by hand. +# This file is automatically @generated by Poetry 2.4.1 and should not be changed by hand. [[package]] name = "argcomplete" @@ -950,65 +950,65 @@ toml = ["tomli ; python_full_version <= \"3.11.0a6\""] [[package]] name = "cryptography" -version = "47.0.0" +version = "48.0.1" description = "cryptography is a package which provides cryptographic recipes and primitives to Python developers." optional = false -python-versions = "!=3.9.0,!=3.9.1,>=3.8" +python-versions = "!=3.9.0,!=3.9.1,>=3.9" groups = ["dev", "test"] files = [ - {file = "cryptography-47.0.0-cp311-abi3-macosx_10_9_universal2.whl", hash = "sha256:160ad728f128972d362e714054f6ba0067cab7fb350c5202a9ae8ae4ce3ef1a0"}, - {file = "cryptography-47.0.0-cp311-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:b9a8943e359b7615db1a3ba587994618e094ff3d6fa5a390c73d079ce18b3973"}, - {file = "cryptography-47.0.0-cp311-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:f5c15764f261394b22aef6b00252f5195f46f2ca300bec57149474e2538b31f8"}, - {file = "cryptography-47.0.0-cp311-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:9c59ab0e0fa3a180a5a9c59f3a5abe3ef90d474bc56d7fadfbe80359491b615b"}, - {file = "cryptography-47.0.0-cp311-abi3-manylinux_2_28_ppc64le.whl", hash = "sha256:34b4358b925a5ea3e14384ca781a2c0ef7ac219b57bb9eacc4457078e2b19f92"}, - {file = "cryptography-47.0.0-cp311-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:0024b87d47ae2399165a6bfb20d24888881eeab83ae2566d62467c5ff0030ce7"}, - {file = "cryptography-47.0.0-cp311-abi3-manylinux_2_31_armv7l.whl", hash = "sha256:1e47422b5557bb82d3fff997e8d92cff4e28b9789576984f08c248d2b3535d93"}, - {file = "cryptography-47.0.0-cp311-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:6f29f36582e6151d9686235e586dd35bb67491f024767d10b842e520dc6a07ac"}, - {file = "cryptography-47.0.0-cp311-abi3-manylinux_2_34_ppc64le.whl", hash = "sha256:a9b761f012a943b7de0e828843c5688d0de94a0578d44d6c85a1bae32f87791f"}, - {file = "cryptography-47.0.0-cp311-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:4e1de79e047e25d6e9f8cea71c86b4a53aced64134f0f003bbcbf3655fd172c8"}, - {file = "cryptography-47.0.0-cp311-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:ef6b3634087f18d2155b1e8ce264e5345a753da2c5fa9815e7d41315c90f8318"}, - {file = "cryptography-47.0.0-cp311-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:11dbb9f50a0f1bb9757b3d8c27c1101780efb8f0bdecfb12439c22a74d64c001"}, - {file = "cryptography-47.0.0-cp311-abi3-win32.whl", hash = "sha256:7fda2f02c9015db3f42bb8a22324a454516ed10a8c29ca6ece6cdbb5efe2a203"}, - {file = "cryptography-47.0.0-cp311-abi3-win_amd64.whl", hash = "sha256:f5c3296dab66202f1b18a91fa266be93d6aa0c2806ea3d67762c69f60adc71aa"}, - {file = "cryptography-47.0.0-cp314-cp314t-macosx_10_9_universal2.whl", hash = "sha256:be12cb6a204f77ed968bcefe68086eb061695b540a3dd05edac507a3111b25f0"}, - {file = "cryptography-47.0.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:2ebd84adf0728c039a3be2700289378e1c164afc6748df1a5ed456767bef9ba7"}, - {file = "cryptography-47.0.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:7f68d6fbc7fbbcfb0939fea72c3b96a9f9a6edfc0e1b1d29778a2066030418b1"}, - {file = "cryptography-47.0.0-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:6651d32eff255423503aa276739da98c30f26c40cbeffcc6048e0d54ef704c0c"}, - {file = "cryptography-47.0.0-cp314-cp314t-manylinux_2_28_ppc64le.whl", hash = "sha256:3fb8fa48075fad7193f2e5496135c6a76ac4b2aa5a38433df0a539296b377829"}, - {file = "cryptography-47.0.0-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:11438c7518132d95f354fa01a4aa2f806d172a061a7bed18cf18cbdacdb204d7"}, - {file = "cryptography-47.0.0-cp314-cp314t-manylinux_2_31_armv7l.whl", hash = "sha256:8c1a736bbb3288005796c3f7ccb9453360d7fed483b13b9f468aea5171432923"}, - {file = "cryptography-47.0.0-cp314-cp314t-manylinux_2_34_aarch64.whl", hash = "sha256:f1557695e5c2b86e204f6ce9470497848634100787935ab7adc5397c54abd7ab"}, - {file = "cryptography-47.0.0-cp314-cp314t-manylinux_2_34_ppc64le.whl", hash = "sha256:f9a034b642b960767fb343766ae5ba6ad653f2e890ddd82955aef288ffea8736"}, - {file = "cryptography-47.0.0-cp314-cp314t-manylinux_2_34_x86_64.whl", hash = "sha256:b1c76fca783aa7698eb21eb14f9c4aa09452248ee54a627d125025a43f83e7a7"}, - {file = "cryptography-47.0.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:4f7722c97826770bab8ae92959a2e7b20a5e9e9bf4deae68fd86c3ca457bab52"}, - {file = "cryptography-47.0.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:09f6d7bf6724f8db8b32f11eccf23efc8e759924bc5603800335cf8859a3ddbd"}, - {file = "cryptography-47.0.0-cp314-cp314t-win32.whl", hash = "sha256:6eebcaf0df1d21ce1f90605c9b432dd2c4f4ab665ac29a40d5e3fc68f51b5e63"}, - {file = "cryptography-47.0.0-cp314-cp314t-win_amd64.whl", hash = "sha256:51c9313e90bd1690ec5a75ed047c27c0b8e6c570029712943d6116ef9a90620b"}, - {file = "cryptography-47.0.0-cp38-abi3-macosx_10_9_universal2.whl", hash = "sha256:14432c8a9bcb37009784f9594a62fae211a2ae9543e96c92b2a8e4c3cd5cd0c4"}, - {file = "cryptography-47.0.0-cp38-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:07efe86201817e7d3c18781ca9770bc0db04e1e48c994be384e4602bc38f8f27"}, - {file = "cryptography-47.0.0-cp38-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:2b45761c6ec22b7c726d6a829558777e32d0f1c8be7c3f3480f9c912d5ee8a10"}, - {file = "cryptography-47.0.0-cp38-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:edd4da498015da5b9f26d38d3bfc2e90257bfa9cbed1f6767c282a0025ae649b"}, - {file = "cryptography-47.0.0-cp38-abi3-manylinux_2_28_ppc64le.whl", hash = "sha256:9af828c0d5a65c70ec729cd7495a4bf1a67ecb66417b8f02ff125ab8a6326a74"}, - {file = "cryptography-47.0.0-cp38-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:256d07c78a04d6b276f5df935a9923275f53bd1522f214447fdf365494e2d515"}, - {file = "cryptography-47.0.0-cp38-abi3-manylinux_2_31_armv7l.whl", hash = "sha256:5d0e362ff51041b0c0d219cc7d6924d7b8996f57ce5712bdcef71eb3c65a59cc"}, - {file = "cryptography-47.0.0-cp38-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:1581aef4219f7ca2849d0250edaa3866212fb74bf5667284f46aa92f9e65c1ca"}, - {file = "cryptography-47.0.0-cp38-abi3-manylinux_2_34_ppc64le.whl", hash = "sha256:a49a3eb5341b9503fa3000a9a0db033161db90d47285291f53c2a9d2cd1b7f76"}, - {file = "cryptography-47.0.0-cp38-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:2207a498b03275d0051589e326b79d4cf59985c99031b05bb292ac52631c37fe"}, - {file = "cryptography-47.0.0-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:7a02675e2fabd0c0fc04c868b8781863cbf1967691543c22f5470500ff840b31"}, - {file = "cryptography-47.0.0-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:80887c5cbd1774683cb126f0ab4184567f080071d5acf62205acb354b4b753b7"}, - {file = "cryptography-47.0.0-cp38-abi3-win32.whl", hash = "sha256:ed67ea4e0cfb5faa5bc7ecb6e2b8838f3807a03758eec239d6c21c8769355310"}, - {file = "cryptography-47.0.0-cp38-abi3-win_amd64.whl", hash = "sha256:835d2d7f47cdc53b3224e90810fb1d36ca94ea29cc1801fb4c1bc43876735769"}, - {file = "cryptography-47.0.0-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:7f1207974a904e005f762869996cf620e9bf79ecb4622f148550bb48e0eb35a7"}, - {file = "cryptography-47.0.0-pp311-pypy311_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:1a405c08857258c11016777e11c02bacbe7ef596faf259305d282272a3a05cbe"}, - {file = "cryptography-47.0.0-pp311-pypy311_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:20fdbe3e38fb67c385d233c89371fa27f9909f6ebca1cecc20c13518dae65475"}, - {file = "cryptography-47.0.0-pp311-pypy311_pp73-manylinux_2_34_aarch64.whl", hash = "sha256:f7db373287273d8af1414cf95dc4118b13ffdc62be521997b0f2b270771fef50"}, - {file = "cryptography-47.0.0-pp311-pypy311_pp73-manylinux_2_34_x86_64.whl", hash = "sha256:9fe6b7c64926c765f9dff301f9c1b867febcda5768868ca084e18589113732ab"}, - {file = "cryptography-47.0.0-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:cffbba3392df0fa8629bb7f43454ee2925059ee158e23c54620b9063912b86c8"}, - {file = "cryptography-47.0.0.tar.gz", hash = "sha256:9f8e55fe4e63613a5e1cc5819030f27b97742d720203a087802ce4ce9ceb52bb"}, + {file = "cryptography-48.0.1-cp311-abi3-macosx_10_9_universal2.whl", hash = "sha256:3e4a1a3232eef2e6c732827d5722db29a0cc8b27af2a4d865b094cf954be9ca1"}, + {file = "cryptography-48.0.1-cp311-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:32143b24adb918f078134e1e230f1eb8cc04886b92c28b5f0041aaf3e5699225"}, + {file = "cryptography-48.0.1-cp311-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:f0d27a5696721ef7a672b8c810f6aded391058e0b9486e63e6d93baf765da691"}, + {file = "cryptography-48.0.1-cp311-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:eb86ce1af36fe65041b6db9a8bb064ee621a7e5fded0f80d475ec243477cd242"}, + {file = "cryptography-48.0.1-cp311-abi3-manylinux_2_28_ppc64le.whl", hash = "sha256:b024e784ad6c077ee0147b35ea9cbfc1e34e1fd4c1dcca214c2794d73a12df08"}, + {file = "cryptography-48.0.1-cp311-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:3752f2dbc8f07a30aad2932c986cea495b03bb554887828225da104f732852b6"}, + {file = "cryptography-48.0.1-cp311-abi3-manylinux_2_31_armv7l.whl", hash = "sha256:bd81490cd5801d755cf97bb68ac191f14b708470b1c7cf4580f669b9c9264cd8"}, + {file = "cryptography-48.0.1-cp311-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:66fd0771e7b9c6dcd44cf1120690d2338d16d72795cf40cae2786a39eba65429"}, + {file = "cryptography-48.0.1-cp311-abi3-manylinux_2_34_ppc64le.whl", hash = "sha256:3fd2ca57062b241c856670b073487d2e86c4637937ca5601e48f97bf8e11fc8f"}, + {file = "cryptography-48.0.1-cp311-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:0ee6ea481db1ab889cba043ec1eda17bb9c1ea79db6722f779c3667f9f70322f"}, + {file = "cryptography-48.0.1-cp311-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:f2ceef93cb096aa3c4cc4b5c94ca6131f9196d28c64d6111533402a9b2054d41"}, + {file = "cryptography-48.0.1-cp311-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:9bd3f92d76217892b15df84ca256c2c113d386fdda7a7d8691aeeced976507c6"}, + {file = "cryptography-48.0.1-cp311-abi3-win32.whl", hash = "sha256:b9a32b876490d66c8bcc9963ef220199569748434ab01a9d6aaeabf88e7f5158"}, + {file = "cryptography-48.0.1-cp311-abi3-win_amd64.whl", hash = "sha256:39489bfca54c7a1f6b297efcd8bc608ab92d16c4ca631b0cad4da46724588b24"}, + {file = "cryptography-48.0.1-cp314-cp314t-macosx_10_9_universal2.whl", hash = "sha256:f817adc181390bd54f2f700107a7419040fb7c1bdf2fc26f36551a06a68c3345"}, + {file = "cryptography-48.0.1-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d5d30989c6917b478b5817902e85fddaea2261efa8648383d965381ccb9e1ac4"}, + {file = "cryptography-48.0.1-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:df637c05205ea7c1d7fbcbe54bbfea648a52951155f997af13d895d0ecc96991"}, + {file = "cryptography-48.0.1-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:869c3b8a53bfe27147832df48b32adadf558249d50e76cb3769d40e986b13265"}, + {file = "cryptography-48.0.1-cp314-cp314t-manylinux_2_28_ppc64le.whl", hash = "sha256:e361afba8918070d376df76f408a4f67fec0ee9cff81a99e48fe9a233ef59e17"}, + {file = "cryptography-48.0.1-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:d069066deead00ac7f090be101be875a06855908f7ec004c27b8fefb4acfb411"}, + {file = "cryptography-48.0.1-cp314-cp314t-manylinux_2_31_armv7l.whl", hash = "sha256:09f73a725d582cef64b91281a322cd798d14a33b2b6f2b7ad9531dc336d84c02"}, + {file = "cryptography-48.0.1-cp314-cp314t-manylinux_2_34_aarch64.whl", hash = "sha256:15254441469dd6bf027039453288e2072124f8b6603563f5d759e1c9b69273fa"}, + {file = "cryptography-48.0.1-cp314-cp314t-manylinux_2_34_ppc64le.whl", hash = "sha256:8ace4507d1e6533c125f4fac754f8bb8b6a74c08e92179dabd7e16571a3efbf3"}, + {file = "cryptography-48.0.1-cp314-cp314t-manylinux_2_34_x86_64.whl", hash = "sha256:b4e391975f038e66432328639620a4aff2d307513b004f1ca06d6225bced815c"}, + {file = "cryptography-48.0.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:42fcd8e26fe555d9b3577a135f5091fefa0aa4e99129c23fb56787a1bd4ada72"}, + {file = "cryptography-48.0.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:c1400da5e32a43253392277eac7490a60e497d810a63dd5608d71bbd7af507c9"}, + {file = "cryptography-48.0.1-cp314-cp314t-win32.whl", hash = "sha256:0df56b056bc17c1b7d6821dfa65216e62bd232d8ab05eb3db44e71d235651471"}, + {file = "cryptography-48.0.1-cp314-cp314t-win_amd64.whl", hash = "sha256:9de21387aa95e2a895823d0745b430bed4f33503ba9ab5e0b5311f33e37d66d2"}, + {file = "cryptography-48.0.1-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:4fdc69f8e4316bcf0c8c8ec1f26f285d12e8142d88d96c876a59a03be3f6ae67"}, + {file = "cryptography-48.0.1-cp39-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:48fe40804d4caa2288f24e70ca8c64c42dd826da0ad7e4f1b41b2128d679e6c8"}, + {file = "cryptography-48.0.1-cp39-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:86be3b1b0b6bf09482fb50a979c508d2950ed95f5621ec77f4e385962006b83a"}, + {file = "cryptography-48.0.1-cp39-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:4ab0a343c807bbcd90c971cd1ecf072937cd01847a9e002bef88fb47ac6be577"}, + {file = "cryptography-48.0.1-cp39-abi3-manylinux_2_28_ppc64le.whl", hash = "sha256:9621de99d2da096006b629979efd8ae7eb2d8b822488d0c89ee4000c306c59b1"}, + {file = "cryptography-48.0.1-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:88c852a0ae366e262e5a1744b685e6a433dc8788dd2a277e418bf4904203609d"}, + {file = "cryptography-48.0.1-cp39-abi3-manylinux_2_31_armv7l.whl", hash = "sha256:43c5835e2cb98c8733d86f57d6fc879b613f5c3478607281c3e36daffc6dd8a6"}, + {file = "cryptography-48.0.1-cp39-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:fe0180af5bf9236518a087e35bf2d9a347d5f5f51e63c579d683ddff424e3d46"}, + {file = "cryptography-48.0.1-cp39-abi3-manylinux_2_34_ppc64le.whl", hash = "sha256:b7a2d1a937a738a881737cec135a38bb61470589b17515b9f73f571d0ae10401"}, + {file = "cryptography-48.0.1-cp39-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:b74ca3b8e5ecdd833bf6a002ca41b4793bb27fb8f1c06ffaf2643c9e9140e31b"}, + {file = "cryptography-48.0.1-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:2c37f2461406063b417837f5f3daab668652acd82423efcd7f0a9f04be972de1"}, + {file = "cryptography-48.0.1-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:86fe77abb1bd87afb251d4d02ada7ecf53a32cee9b67d976abb2e45a13297475"}, + {file = "cryptography-48.0.1-cp39-abi3-win32.whl", hash = "sha256:6b2c0c3e6ccf3ade7750f836ef3ee36eea250cc467d45c256895573ac08cc6f1"}, + {file = "cryptography-48.0.1-cp39-abi3-win_amd64.whl", hash = "sha256:9a49ca6c81417f6a5edb50375a60cccdd70fa0a91a5211829dbea74eba94d2ac"}, + {file = "cryptography-48.0.1-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:08a597acce1ff37f347400087776599e2348a3a8bc53b44120e463cd274efe4a"}, + {file = "cryptography-48.0.1-pp311-pypy311_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:735824ec41b7f74a7c45fb1591349333e4c696cb6c044e5f46356e560143e4cd"}, + {file = "cryptography-48.0.1-pp311-pypy311_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:92a46e1d638daa264ba2971c0b0489c9409787943efae4d60ffda3d091ef832c"}, + {file = "cryptography-48.0.1-pp311-pypy311_pp73-manylinux_2_34_aarch64.whl", hash = "sha256:7e234ac052af99f2700826a5c29ea99d9c1b1f80341cde62d11c8154dc8e0bd9"}, + {file = "cryptography-48.0.1-pp311-pypy311_pp73-manylinux_2_34_x86_64.whl", hash = "sha256:33842cf0888951cef5bc7ac724ab844a42044c1727b967b7f8997289a0464f92"}, + {file = "cryptography-48.0.1-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:6184ca7b174f28d7c703f1290d4b297217c45355f77a98f67e9b7f14549ac54a"}, + {file = "cryptography-48.0.1.tar.gz", hash = "sha256:266f4ee051abb2f725b74ef8072b521ce1feacf685a3364fa6a6b45548db791a"}, ] [package.dependencies] -cffi = {version = ">=2.0.0", markers = "python_full_version >= \"3.9.0\" and platform_python_implementation != \"PyPy\""} +cffi = {version = ">=2.0.0", markers = "platform_python_implementation != \"PyPy\""} typing-extensions = {version = ">=4.13.2", markers = "python_full_version < \"3.11.0\""} [package.extras] @@ -2118,8 +2118,8 @@ files = [ [package.dependencies] numpy = [ {version = ">=1.22.4", markers = "python_version < \"3.11\""}, - {version = ">=1.26.0", markers = "python_version >= \"3.12\""}, {version = ">=1.23.2", markers = "python_version == \"3.11\""}, + {version = ">=1.26.0", markers = "python_version >= \"3.12\""}, ] python-dateutil = ">=2.8.2" pytz = ">=2020.1" @@ -2506,8 +2506,8 @@ astroid = ">=3.3.8,<=3.4.0.dev0" colorama = {version = ">=0.4.5", markers = "sys_platform == \"win32\""} dill = [ {version = ">=0.2", markers = "python_version < \"3.11\""}, - {version = ">=0.3.7", markers = "python_version >= \"3.12\""}, {version = ">=0.3.6", markers = "python_version == \"3.11\""}, + {version = ">=0.3.7", markers = "python_version >= \"3.12\""}, ] isort = ">=4.2.5,<5.13 || >5.13,<7" mccabe = ">=0.6,<0.8" @@ -3347,4 +3347,4 @@ type = ["pytest-mypy"] [metadata] lock-version = "2.1" python-versions = ">=3.10,<3.13" -content-hash = "0983208c21c55de6aaa08cd631a72b69fa9456c039bebff07092e5413bc6edec" +content-hash = "1ffa0b3df5f9c75bed670f80d681ee8f4dbc42f0b86cf89e48189ef0eacf6be5" diff --git a/pyproject.toml b/pyproject.toml index ea33699..8818863 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -68,6 +68,7 @@ optional = true behave = "1.3.3" coverage = "7.11.0" moto = {extras = ["s3"], version = "4.2.14"} +cryptography = "48.0.1" # dependency of `moto` requests = "2.33.0" # dependency of `moto` Werkzeug = "3.1.6" pytest = "9.0.3" diff --git a/src/dve/core_engine/backends/base/reader.py b/src/dve/core_engine/backends/base/reader.py index e9ed9e8..ae0e99f 100644 --- a/src/dve/core_engine/backends/base/reader.py +++ b/src/dve/core_engine/backends/base/reader.py @@ -119,10 +119,7 @@ def read_to_entity_type( """ if entity_name == Iterator[dict[str, Any]]: return self.read_to_py_iterator( - resource, - entity_name, - schema, # type: ignore - all_model_fields + resource, entity_name, schema, all_model_fields # type: ignore ) self.raise_if_not_sensible_file(resource, entity_name) @@ -133,11 +130,7 @@ def read_to_entity_type( raise ReaderLacksEntityTypeSupport(entity_type=entity_type) from err return reader_func( - self, - resource, - entity_name, - schema, - all_model_fields=all_model_fields # type: ignore + self, resource, entity_name, schema, all_model_fields=all_model_fields # type: ignore ) def add_record_index(self, entity: EntityType, **kwargs) -> EntityType: diff --git a/src/dve/core_engine/backends/base/rules.py b/src/dve/core_engine/backends/base/rules.py index b66b3ae..9b6b4fe 100644 --- a/src/dve/core_engine/backends/base/rules.py +++ b/src/dve/core_engine/backends/base/rules.py @@ -681,3 +681,13 @@ def read_parquet(self, path: URI, **kwargs) -> EntityType: def write_parquet(self, entity: EntityType, target_location: URI, **kwargs) -> URI: """Method to write parquet files""" raise NotImplementedError() + + def filter_data_contract_record_rejections( + self, + working_directory: URI, + entity: EntityType, + entity_name: EntityName, + **kwargs, + ): + """Method to filter out record rejection errors from the data contract for a given entity""" + raise NotImplementedError() diff --git a/src/dve/core_engine/backends/exceptions.py b/src/dve/core_engine/backends/exceptions.py index c72f252..bb58516 100644 --- a/src/dve/core_engine/backends/exceptions.py +++ b/src/dve/core_engine/backends/exceptions.py @@ -37,10 +37,7 @@ class UnableToParseCSVError(MessageBearingError): """An error raised when unable to parse a CSV file""" def __init__( - self, - entity_name: str, - field_check_error_message: str, - field_check_error_code: str + self, entity_name: str, field_check_error_message: str, field_check_error_code: str ): super().__init__( messages=[ diff --git a/src/dve/core_engine/backends/implementations/duckdb/duckdb_helpers.py b/src/dve/core_engine/backends/implementations/duckdb/duckdb_helpers.py index 627822b..65e20af 100644 --- a/src/dve/core_engine/backends/implementations/duckdb/duckdb_helpers.py +++ b/src/dve/core_engine/backends/implementations/duckdb/duckdb_helpers.py @@ -18,9 +18,10 @@ from pydantic import BaseModel from typing_extensions import Annotated, get_args, get_origin, get_type_hints +from dve.common.error_utils import get_feedback_errors_uri from dve.core_engine.backends.base.utilities import _get_non_heterogenous_type from dve.core_engine.constants import RECORD_INDEX_COLUMN_NAME -from dve.core_engine.type_hints import URI +from dve.core_engine.type_hints import URI, EntityName from dve.parser.file_handling.service import LocalFilesystemImplementation, _get_implementation @@ -100,7 +101,7 @@ def table_exists(connection: DuckDBPyConnection, table_name: str) -> bool: def relation_is_empty(relation: DuckDBPyRelation) -> bool: """Check if a duckdb relation is empty""" - if relation.limit(1).count("*"): + if relation.limit(1).shape[0] > 0: return False return True @@ -256,6 +257,48 @@ def duckdb_write_parquet(cls): return cls +def _ddb_filter_contract_errors( + self, + working_directory: URI, + entity: DuckDBPyRelation, + entity_name: EntityName, +) -> DuckDBPyRelation: + contract_error_location = get_feedback_errors_uri(working_directory, "data_contract") + if not Path(contract_error_location).exists(): + return entity + relevant_record_rejection_codes_rel = ( + self._connection.read_json( + contract_error_location, + columns={ + "RecordIndex": "INTEGER", + "FailureType": "STRING", + "Status": "STRING", + "Entity": "STRING", + }, + ) + .filter( + f"FailureType == 'record' AND Status != 'informational' AND Entity = '{entity_name}'" + ) # pylint: disable=C0301 + .select("RecordIndex") + .distinct() + .order("RecordIndex asc") + ) + + if relation_is_empty(relevant_record_rejection_codes_rel): + return entity + + filtered_entity = entity.join( + relevant_record_rejection_codes_rel, condition="__record_index__ == RecordIndex", how="anti" + ) + return filtered_entity + + +def ddb_filter_contract_errors(cls): + """Class decorator to filter out records that failed casting and have record rejection scope""" + cls.filter_data_contract_record_rejections = _ddb_filter_contract_errors + return cls + + @staticmethod # type: ignore def _duckdb_get_entity_count(entity: DuckDBPyRelation) -> int: """Method to obtain entity count from a persisted parquet entity""" diff --git a/src/dve/core_engine/backends/implementations/duckdb/readers/csv.py b/src/dve/core_engine/backends/implementations/duckdb/readers/csv.py index 717ced2..3cd931d 100644 --- a/src/dve/core_engine/backends/implementations/duckdb/readers/csv.py +++ b/src/dve/core_engine/backends/implementations/duckdb/readers/csv.py @@ -71,7 +71,7 @@ def __init__( quote_char=quotechar, field_check=field_check, field_check_error_code=field_check_error_code, - field_check_error_message=field_check_error_message + field_check_error_message=field_check_error_message, ) def read_to_py_iterator( @@ -254,7 +254,7 @@ def read_to_relation( # pylint: disable=unused-argument resource=resource, entity_name=entity_name, schema=schema, - all_model_fields=all_model_fields + all_model_fields=all_model_fields, ) entity = entity.select(StarExpression(exclude=[RECORD_INDEX_COLUMN_NAME])).distinct() no_records = entity.shape[0] diff --git a/src/dve/core_engine/backends/implementations/duckdb/rules.py b/src/dve/core_engine/backends/implementations/duckdb/rules.py index debb8fe..dc73dad 100644 --- a/src/dve/core_engine/backends/implementations/duckdb/rules.py +++ b/src/dve/core_engine/backends/implementations/duckdb/rules.py @@ -22,6 +22,7 @@ from dve.core_engine.backends.exceptions import ConstraintError from dve.core_engine.backends.implementations.duckdb.duckdb_helpers import ( DDBStruct, + ddb_filter_contract_errors, duckdb_read_parquet, duckdb_record_index, duckdb_rel_to_dictionaries, @@ -61,6 +62,7 @@ @duckdb_record_index @duckdb_write_parquet @duckdb_read_parquet +@ddb_filter_contract_errors class DuckDBStepImplementations(BaseStepImplementations[DuckDBPyRelation]): """An implementation of transformation steps in duckdb.""" diff --git a/src/dve/core_engine/backends/implementations/spark/readers/csv.py b/src/dve/core_engine/backends/implementations/spark/readers/csv.py index c983cdf..2df30c5 100644 --- a/src/dve/core_engine/backends/implementations/spark/readers/csv.py +++ b/src/dve/core_engine/backends/implementations/spark/readers/csv.py @@ -9,13 +9,13 @@ from pyspark.sql.types import StructType from dve.core_engine.backends.base.reader import read_function -from dve.core_engine.backends.readers.csv import CSVFileReader from dve.core_engine.backends.exceptions import EmptyFileError from dve.core_engine.backends.implementations.spark.spark_helpers import ( get_type_from_annotation, spark_record_index, spark_write_parquet, ) +from dve.core_engine.backends.readers.csv import CSVFileReader from dve.core_engine.type_hints import URI, EntityName from dve.parser.file_handling import get_content_length diff --git a/src/dve/core_engine/backends/implementations/spark/rules.py b/src/dve/core_engine/backends/implementations/spark/rules.py index 307e71a..825ee15 100644 --- a/src/dve/core_engine/backends/implementations/spark/rules.py +++ b/src/dve/core_engine/backends/implementations/spark/rules.py @@ -14,6 +14,7 @@ create_udf, get_all_registered_udfs, object_to_spark_literal, + spark_filter_contract_errors, spark_read_parquet, spark_record_index, spark_write_parquet, @@ -53,6 +54,7 @@ @spark_record_index @spark_write_parquet @spark_read_parquet +@spark_filter_contract_errors class SparkStepImplementations(BaseStepImplementations[DataFrame]): """An implementation of transformation steps in Apache Spark.""" diff --git a/src/dve/core_engine/backends/implementations/spark/spark_helpers.py b/src/dve/core_engine/backends/implementations/spark/spark_helpers.py index ced985a..c8e949d 100644 --- a/src/dve/core_engine/backends/implementations/spark/spark_helpers.py +++ b/src/dve/core_engine/backends/implementations/spark/spark_helpers.py @@ -12,6 +12,7 @@ from dataclasses import dataclass, is_dataclass from decimal import Decimal from functools import wraps +from pathlib import Path from typing import Any, ClassVar, Optional, TypeVar, Union, overload from delta.exceptions import ConcurrentAppendException, DeltaConcurrentModificationException @@ -25,9 +26,10 @@ from pyspark.sql.types import LongType, StructField, StructType from typing_extensions import Annotated, Protocol, TypedDict, get_args, get_origin, get_type_hints +from dve.common.error_utils import get_feedback_errors_uri from dve.core_engine.backends.base.utilities import _get_non_heterogenous_type from dve.core_engine.constants import RECORD_INDEX_COLUMN_NAME -from dve.core_engine.type_hints import URI +from dve.core_engine.type_hints import URI, EntityName # It would be really nice if there was a more parameterisable # way of doing this. @@ -365,6 +367,53 @@ def spark_write_parquet(cls): return cls +def _spark_filter_contract_errors( + self, + working_directory: URI, + entity: DataFrame, + entity_name: EntityName, +) -> DataFrame: + contract_error_location = get_feedback_errors_uri(working_directory, "data_contract") + if not Path(contract_error_location).exists(): + return entity + + relevant_record_rejections_codes_df = ( + self.spark_session.read.json( + path=contract_error_location, + schema=st.StructType( + [ + st.StructField("RecordIndex", st.IntegerType()), + st.StructField("FailureType", st.StringType()), + st.StructField("Status", st.StringType()), + st.StructField("Entity", st.StringType()), + ] + ), + ) + .filter( + (sf.col("FailureType") == sf.lit("record")) + & (sf.col("Status") != sf.lit("informational")) + & (sf.col("Entity") == sf.lit(entity_name)) + ) + .distinct() + .orderBy(sf.asc(sf.col("RecordIndex"))) + # todo - ^^ possibly relook at join strat. Does this help? Over prescriptive? + ) + if df_is_empty(relevant_record_rejections_codes_df): + return entity + filtered_entity = entity.join( + relevant_record_rejections_codes_df, + on=entity.__record_index__ == relevant_record_rejections_codes_df.RecordIndex, + how="anti", + ) + return filtered_entity + + +def spark_filter_contract_errors(cls): + """Class decorator to filter out records that failed casting and have record rejection scope""" + cls.filter_data_contract_record_rejections = _spark_filter_contract_errors + return cls + + @staticmethod # type: ignore def _spark_get_entity_count(entity: DataFrame) -> int: """Method to obtain entity count from a persisted parquet entity""" diff --git a/src/dve/core_engine/backends/metadata/contract.py b/src/dve/core_engine/backends/metadata/contract.py index 234a1e9..b233e92 100644 --- a/src/dve/core_engine/backends/metadata/contract.py +++ b/src/dve/core_engine/backends/metadata/contract.py @@ -44,7 +44,7 @@ def schemas(self) -> dict[EntityName, type[BaseModel]]: """The per-entity schemas, as pydantic models.""" if not self._schemas: for entity_name, validator in self.validators.items(): - self._schemas[entity_name] = validator.model # type: ignore # pylint: disable=E1137 + self._schemas[entity_name] = validator.model # type: ignore # pylint: disable=E1137 return self._schemas.copy() # pylint: disable=E1101 @root_validator(allow_reuse=True) diff --git a/src/dve/core_engine/backends/readers/csv.py b/src/dve/core_engine/backends/readers/csv.py index 9d37b06..dbace5b 100644 --- a/src/dve/core_engine/backends/readers/csv.py +++ b/src/dve/core_engine/backends/readers/csv.py @@ -16,11 +16,11 @@ MissingHeaderError, ) from dve.core_engine.backends.readers.utilities import ( - raise_message_bearing_error_on_header_differences + get_all_model_fields, + raise_message_bearing_error_on_header_differences, ) from dve.core_engine.backends.utilities import get_polars_type_from_annotation, stringify_model from dve.core_engine.constants import RECORD_INDEX_COLUMN_NAME -from dve.core_engine.backends.readers.utilities import get_all_model_fields from dve.core_engine.type_hints import EntityName from dve.parser.file_handling import get_content_length, open_stream from dve.parser.file_handling.implementations.file import file_uri_to_local_path diff --git a/src/dve/core_engine/backends/readers/utilities.py b/src/dve/core_engine/backends/readers/utilities.py index c46cac7..00086bd 100644 --- a/src/dve/core_engine/backends/readers/utilities.py +++ b/src/dve/core_engine/backends/readers/utilities.py @@ -44,16 +44,16 @@ def raise_message_bearing_error_on_header_differences( header or vice versa. """ missing, additional = check_csv_header_expected( - resource, - expected_schema, - all_model_fields, - delimiter, - quote_char + resource, expected_schema, all_model_fields, delimiter, quote_char ) if missing or additional: - record_details_missing = f"missing fields: {', '.join(sorted(missing))};" if missing else "" # pylint: disable=C0301 - record_details_additional = f"additional fields: {', '.join(sorted(additional))};" if additional else "" # pylint: disable=C0301 + record_details_missing = ( + f"missing fields: {', '.join(sorted(missing))};" if missing else "" + ) # pylint: disable=C0301 + record_details_additional = ( + f"additional fields: {', '.join(sorted(additional))};" if additional else "" + ) # pylint: disable=C0301 raise MessageBearingError( "The CSV header doesn't match what is expected", messages=[ diff --git a/src/dve/core_engine/configuration/v1/__init__.py b/src/dve/core_engine/configuration/v1/__init__.py index 04d5e07..6221957 100644 --- a/src/dve/core_engine/configuration/v1/__init__.py +++ b/src/dve/core_engine/configuration/v1/__init__.py @@ -281,7 +281,9 @@ def _load_rules_and_vars(self) -> tuple[list[Rule], list[TemplateVariables]]: rules, local_variable_list = [], [] added_rules: set[RuleName] = set() - for index, complex_rule_config in enumerate(self.transformations.complex_rules): # pylint: disable=E1101 + for index, complex_rule_config in enumerate( + self.transformations.complex_rules + ): # pylint: disable=E1101 rule, local_params, deps = self._resolve_business_rule(complex_rule_config) missing_rules = deps - added_rules if missing_rules: diff --git a/src/dve/core_engine/engine.py b/src/dve/core_engine/engine.py index b2ec9a7..838bfb5 100644 --- a/src/dve/core_engine/engine.py +++ b/src/dve/core_engine/engine.py @@ -170,7 +170,9 @@ def __exit__( exc_value: Optional[Exception], traceback: Optional[TracebackType], ) -> None: - self.main_log.info(f"Exiting pipeline context, clearing {self.cache_prefix!r}") # pylint: disable=E1101 + self.main_log.info( + f"Exiting pipeline context, clearing {self.cache_prefix!r}" + ) # pylint: disable=E1101 cache_dir = self._cache_dir self._cache_dir = None @@ -198,7 +200,9 @@ def _write_entity_outputs(self, entities: SparkEntities) -> SparkEntities: """ output_entities = {} - self.main_log.info(f"Writing entities to the output location: {self.output_prefix_uri}") # pylint: disable=E1101 + self.main_log.info( + f"Writing entities to the output location: {self.output_prefix_uri}" + ) # pylint: disable=E1101 for entity_name, entity in entities.items(): entity = entity.drop(RECORD_INDEX_COLUMN_NAME) @@ -206,9 +210,13 @@ def _write_entity_outputs(self, entities: SparkEntities) -> SparkEntities: output_uri = joinuri(self.output_prefix_uri, entity_name) if get_resource_exists(output_uri): - self.main_log.info(f"{output_uri} already exists - will be overwritten") # pylint: disable=E1101 + self.main_log.info( + f"{output_uri} already exists - will be overwritten" + ) # pylint: disable=E1101 - self.main_log.info(f"+ Writing parquet output to {output_uri!r}") # pylint: disable=E1101 + self.main_log.info( + f"+ Writing parquet output to {output_uri!r}" + ) # pylint: disable=E1101 entity.write.mode("overwrite").parquet(output_uri) spark_session = SparkSession.builder.getOrCreate() output_entities[entity_name] = spark_session.read.format("parquet").load( diff --git a/src/dve/core_engine/message.py b/src/dve/core_engine/message.py index 627ae3a..626a710 100644 --- a/src/dve/core_engine/message.py +++ b/src/dve/core_engine/message.py @@ -36,6 +36,8 @@ class DataContractErrorDetail(BaseModel): """Define custom error codes for validation issues raised during the data contract phase""" error_code: str + error_level: Optional[FailureType] = "record" + is_informational: Optional[bool] = False error_message: Optional[str] = None reporting_entity: Optional[str] = None @@ -247,26 +249,14 @@ def from_pydantic_error( messages: Messages = [] for error_dict in error.errors(): error_type = error_dict["type"] + # TODO - review in pydantic v2 - how handles null vs not provided values if "none.not_allowed" in error_type or "value_error.missing" in error_type: category = "Blank" else: category = "Bad value" - error_code = error_type - if "." in error_code: - error_code = error_code.split(".", 1)[-1] - - if error_code in INTEGRITY_ERROR_CODES: - failure_type: FailureType = "integrity" - elif error_code in SUBMISSION_ERROR_CODES: - failure_type = "submission" - else: - failure_type = "record" error_field = ".".join([idx for idx in error_dict["loc"] if not isinstance(idx, int)]) - is_informational = False - if error_code.endswith("warning"): - is_informational = True error_detail: DataContractErrorDetail = error_details.get( # type: ignore error_field, DEFAULT_ERROR_DETAIL ).get(category) @@ -276,8 +266,8 @@ def from_pydantic_error( entity=error_detail.reporting_entity or entity, original_entity=entity, record=record, - failure_type=failure_type, - is_informational=is_informational, + failure_type=error_detail.error_level, # type: ignore + is_informational=error_detail.is_informational, # type: ignore error_type=error_type, error_location=error_dict["loc"], # type: ignore error_message=error_detail.template_message(record, error_dict["loc"]), diff --git a/src/dve/core_engine/models.py b/src/dve/core_engine/models.py index 09fcbb3..f29889a 100644 --- a/src/dve/core_engine/models.py +++ b/src/dve/core_engine/models.py @@ -105,6 +105,8 @@ class SubmissionStatisticsRecord(AuditRecord): record_count: Optional[int] """Count of records in the submitted file""" + number_submission_rejections: Optional[int] + """Number of submission rejections raised following validation""" number_record_rejections: Optional[int] """Number of record rejections raised following validation""" number_warnings: Optional[int] diff --git a/src/dve/pipeline/pipeline.py b/src/dve/pipeline/pipeline.py index 00a0c51..7a43391 100644 --- a/src/dve/pipeline/pipeline.py +++ b/src/dve/pipeline/pipeline.py @@ -42,6 +42,7 @@ from dve.parser.file_handling.implementations.file import LocalFilesystemImplementation from dve.parser.file_handling.service import _get_implementation from dve.pipeline.utils import SubmissionStatus, deadletter_file, load_config, load_reader +from dve.reporting.constants import ErrorReportStatus from dve.reporting.error_report import ERROR_SCHEMA, calculate_aggregates PERMISSIBLE_EXCEPTIONS: tuple[type[Exception]] = ( @@ -223,7 +224,7 @@ def write_file_to_parquet( submission_file_uri, model_name, stringify_model(model), # type: ignore - get_all_model_fields(models.values()) # type: ignore + get_all_model_fields(models.values()), # type: ignore ), f"{out}{model_name}", ) @@ -379,9 +380,6 @@ def file_transformation_step( failed.append((submission_info, submission_status)) else: success.append((submission_info, submission_status)) - except AttributeError as exc: - self._logger.error(f"File transformation raised exception: {exc}") - raise exc except PERMISSIBLE_EXCEPTIONS as exc: self._logger.warning( f"File transformation raised exception: {exc}. Will be retried later." @@ -509,9 +507,6 @@ def data_contract_step( submission_info: SubmissionInfo submission_status: SubmissionStatus submission_info, submission_status = future.result() - except AttributeError as exc: - self._logger.error(f"Data Contract raised exception: {exc}") - raise exc except PERMISSIBLE_EXCEPTIONS as exc: self._logger.warning( f"Data Contract raised exception: {exc}. Will be retried later." @@ -616,8 +611,19 @@ def apply_business_rules( # pylint: disable=R0914 submission_status.processing_failed = True for entity_name, entity in entity_manager.entities.items(): + # Note BI filtering done within the apply_rules + self._logger.info(f"applying data contract filter to {entity_name}.") + if not entity_name.startswith("Original"): + filtered_entity = self._step_implementations.filter_data_contract_record_rejections( + working_directory, + entity, + entity_name, + ) + else: + self._logger.info(f"Skipping {entity_name}. Marked original.") + filtered_entity = entity projected = self._step_implementations.write_parquet( # type: ignore - entity, + filtered_entity, fh.joinuri( self.processed_files_path, submission_info.submission_id, @@ -629,6 +635,7 @@ def apply_business_rules( # pylint: disable=R0914 projected ) + # todo - add to submission_status around records that have passed record validations/rejected submission_status.number_of_records = self.get_entity_count( entity=entity_manager.entities[ f"""Original{rules.global_variables.get( @@ -682,9 +689,6 @@ def business_rule_step( unsucessful_files.append((submission_info, submission_status)) # type: ignore else: successful_files.append((submission_info, submission_status)) # type: ignore - except AttributeError as exc: - self._logger.error(f"Business Rules raised exception: {exc}") - raise exc except PERMISSIBLE_EXCEPTIONS as exc: self._logger.warning( f"Business Rules raised exception: {exc}. Will be retried later." @@ -758,10 +762,12 @@ def _get_error_dataframes(self, submission_id: str): df = pl.DataFrame(errors, schema={key: pl.Utf8() for key in errors[0]}) # type: ignore df = df.with_columns( - pl.when(pl.col("Status") == pl.lit("error")) # type: ignore - .then(pl.lit("Submission Failure")) # type: ignore - .otherwise(pl.lit("Warning")) # type: ignore - .alias("error_type") + pl.when(pl.col("Status") == pl.lit("informational")) + .then(pl.lit("Warning")) + .when(pl.col("FailureType") == pl.lit("submission")) # type: ignore + .then(pl.lit(ErrorReportStatus.FILE_REJECTION.reporting_name)) # type: ignore + .otherwise(pl.lit(ErrorReportStatus.RECORD_REJECTION.reporting_name)) # type: ignore + .alias("error_type") # type: ignore ) df = df.select( pl.col("Entity").alias("Table"), # type: ignore @@ -823,8 +829,13 @@ def error_report( sub_stats = SubmissionStatisticsRecord( submission_id=submission_info.submission_id, record_count=submission_status.number_of_records, - number_record_rejections=err_types.get("Submission Failure", 0), - number_warnings=err_types.get("Warning", 0), + number_submission_rejections=err_types.get( + ErrorReportStatus.FILE_REJECTION.reporting_name, 0 + ), + number_record_rejections=err_types.get( + ErrorReportStatus.RECORD_REJECTION.reporting_name, 0 + ), + number_warnings=err_types.get(ErrorReportStatus.WARNING.reporting_name, 0), ) summary_dict = { @@ -835,7 +846,7 @@ def error_report( summary_items = er.SummaryItems( submission_status=submission_status, summary_dict=summary_dict, - row_headings=["Submission Failure", "Warning"], + row_headings=[e.reporting_name for e in ErrorReportStatus], ) workbook = er.ExcelFormat( @@ -894,9 +905,6 @@ def error_report_step( try: submission_info, submission_status, submission_stats, feedback_uri = future.result() reports.append((submission_info, submission_status, submission_stats, feedback_uri)) - except AttributeError as exc: - self._logger.error(f"Error reports raised exception: {exc}") - raise exc except PERMISSIBLE_EXCEPTIONS as exc: self._logger.warning( f"Error reports raised exception: {exc}. Will be retried later." diff --git a/src/dve/reporting/constants.py b/src/dve/reporting/constants.py new file mode 100644 index 0000000..657375e --- /dev/null +++ b/src/dve/reporting/constants.py @@ -0,0 +1,22 @@ +""" +Constants used within the error reports +""" + +from enum import Enum + + +class ErrorReportStatus(Enum): + """ + Constant to centrally hold error report status. + """ + + FILE_REJECTION = 1, "File Rejection" + RECORD_REJECTION = 2, "Record Rejection" + WARNING = 3, "Warning" + + @property + def reporting_name(self): + """ + The error report 'friendly' name. + """ + return self.value[1] diff --git a/src/dve/reporting/error_report.py b/src/dve/reporting/error_report.py index 9e947bf..8169aba 100644 --- a/src/dve/reporting/error_report.py +++ b/src/dve/reporting/error_report.py @@ -11,6 +11,7 @@ from dve.common.error_utils import conditional_cast from dve.core_engine.message import FeedbackMessage from dve.parser.file_handling.service import open_stream +from dve.reporting.constants import ErrorReportStatus ERROR_SCHEMA = { "Table": Utf8(), @@ -85,7 +86,7 @@ def create_error_dataframe(errors: deque[FeedbackMessage], key_fields): df = df.with_columns( # type: ignore pl.when(pl.col("Status") == pl.lit("error")) # type: ignore - .then(pl.lit("Submission Failure")) # type: ignore + .then(pl.lit(ErrorReportStatus.FILE_REJECTION.reporting_name)) # type: ignore .otherwise(pl.lit("Warning")) # type: ignore .alias("error_type") ) diff --git a/src/dve/reporting/excel_report.py b/src/dve/reporting/excel_report.py index 82aa510..de264d5 100644 --- a/src/dve/reporting/excel_report.py +++ b/src/dve/reporting/excel_report.py @@ -17,6 +17,7 @@ from polars.exceptions import ColumnNotFoundError from dve.pipeline.utils import SubmissionStatus +from dve.reporting.constants import ErrorReportStatus @dataclass @@ -97,9 +98,9 @@ def get_submission_status(self, aggregates: DataFrame) -> str: if aggregates.is_empty(): return "File has been accepted, no issues to report" failures = aggregates["Type"].unique() - if "Submission Failure" in failures: + if ErrorReportStatus.FILE_REJECTION.reporting_name in failures: status = "File has been rejected" - elif "Warning" in failures: + elif ErrorReportStatus.WARNING.reporting_name in failures: status = "File has been accepted, all records accepted with warnings" else: status = "File has been accepted, no issues to report" @@ -141,6 +142,17 @@ def _add_submission_info(self, status: str, summary: Worksheet): for key, value in self.summary_dict.items(): summary.append(["", _key_renames.get(key, key), str(value)]) + summary.append( + [ + "", + "Total Number of Records Processed", + ( + self.submission_status.number_of_records + if self.submission_status.number_of_records + else 0 + ), # pylint: disable=C0301 + ] + ) summary.append(["", ""]) diff --git a/tests/features/animals.feature b/tests/features/animals.feature new file mode 100644 index 0000000..d68ddbf --- /dev/null +++ b/tests/features/animals.feature @@ -0,0 +1,59 @@ +Feature: Pipeline tests using the animal dataset + Test record rejection and ensuring that records are correctly removed from the entity and that + the correct validation feedback is raised in the error report. + + Scenario: Validate XML data with just record level rejections (duckdb) + Given I submit the animals file animals.xml for processing + And A duckdb pipeline is configured with schema file 'animals.dischema.json' + And I add initial audit entries for the submission + Then the latest audit record for the submission is marked with processing status file_transformation + When I run the file transformation phase + Then the animals entity is stored as a parquet after the file_transformation phase + And the latest audit record for the submission is marked with processing status data_contract + When I run the data contract phase + Then there are no record rejections from the data_contract phase + And the animals entity is stored as a parquet after the data_contract phase + And the latest audit record for the submission is marked with processing status business_rules + When I run the business rules phase + Then there are errors with the following details and associated error_count from the business_rules phase + | ErrorType | ErrorCode | error_count | + | record | ANE01 | 2 | + And The rules restrict "animals" to 3 qualifying records + When I run the error report phase + Then An error report is produced + And The statistics entry for the submission shows the following information + | parameter | value | + | record_count | 5 | + | number_record_rejections | 2 | + | number_warnings | 0 | + + Scenario: Validate XML data with a mixture of error types in (duckdb) + Given I submit the animals file animals_mixture.xml for processing + And A duckdb pipeline is configured with schema file 'animals.dischema.json' + And I add initial audit entries for the submission + Then the latest audit record for the submission is marked with processing status file_transformation + When I run the file transformation phase + Then the animals entity is stored as a parquet after the file_transformation phase + And the latest audit record for the submission is marked with processing status data_contract + When I run the data contract phase + Then there are no record rejections from the data_contract phase + # Then there are errors with the following details and associated error_count from the data_contract phase + # | FailureType | Status | ErrorCode | error_count | + # | record | error | FieldBlank | 1 | + And the animals entity is stored as a parquet after the data_contract phase + And the latest audit record for the submission is marked with processing status business_rules + When I run the business rules phase + Then there are errors with the following details and associated error_count from the business_rules phase + | FailureType | Status | ErrorCode | error_count | + | record | error | ANE01 | 2 | + | submission | error | ANE02 | 1 | + | record | informational | ANE03 | 1 | + And The rules restrict "animals" to 5 qualifying records + When I run the error report phase + Then An error report is produced + And The statistics entry for the submission shows the following information + | parameter | value | + | record_count | 7 | + | number_submission_rejections | 1 | + | number_record_rejections | 2 | + | number_warnings | 1 | diff --git a/tests/features/demographics.feature b/tests/features/demographics.feature index aa59bfc..af4b62a 100644 --- a/tests/features/demographics.feature +++ b/tests/features/demographics.feature @@ -17,7 +17,7 @@ Feature: Pipeline tests using the ambsys dataset And the demographics entity is stored as a parquet after the data_contract phase And the latest audit record for the submission is marked with processing status business_rules When I run the business rules phase - Then The rules restrict "demographics" to 6 qualifying records + Then The rules restrict "demographics" to 2 qualifying records And At least one row from "demographics" has generated error code "BAD_NHS" And the demographics entity is stored as a parquet after the business_rules phase And The entity "demographics" does not contain an entry for "FALSE" in column "NHS_Number_Valid" @@ -43,7 +43,7 @@ Feature: Pipeline tests using the ambsys dataset And the demographics entity is stored as a parquet after the data_contract phase And the latest audit record for the submission is marked with processing status business_rules When I run the business rules phase - Then The rules restrict "demographics" to 6 qualifying records + Then The rules restrict "demographics" to 2 qualifying records And At least one row from "demographics" has generated error code "BAD_NHS" And the demographics entity is stored as a parquet after the business_rules phase And The entity "demographics" does not contain an entry for "FALSE" in column "NHS_Number_Valid" diff --git a/tests/features/movies.feature b/tests/features/movies.feature index fa041ea..750975e 100644 --- a/tests/features/movies.feature +++ b/tests/features/movies.feature @@ -19,16 +19,18 @@ Feature: Pipeline tests using the movies dataset Then the movies entity is stored as a parquet after the file_transformation phase And the latest audit record for the submission is marked with processing status data_contract When I run the data contract phase - Then there are 3 record rejections from the data_contract phase + Then there is 1 submission rejection from the data_contract phase + And there are 3 record rejections from the data_contract phase And there are errors with the following details and associated error_count from the data_contract phase - | Entity | ErrorCode | ErrorMessage | RecordIndex | error_count | - | movies | BLANKYEAR | year not provided | 2 | 1 | - | movies_rename_test | DODGYYEAR | year value (NOT_A_NUMBER) is invalid | 1 | 1 | - | movies | DODGYDATE | date_joined value is not valid: daft_date | 1 | 1 | + | Entity | ErrorCode | ErrorMessage | RecordIndex | error_count | + | movies | BLANKYEAR | year not provided | 2 | 1 | + | movies_rename_test | DODGYYEAR | year value (NOT_A_NUMBER) is invalid | 1 | 1 | + | movies | DODGYDATE | date_joined value is not valid: daft_date | 1 | 1 | + | movies | BLANKTITLE | title should not be blank | 4 | 1 | And the movies entity is stored as a parquet after the data_contract phase And the latest audit record for the submission is marked with processing status business_rules When I run the business rules phase - Then The rules restrict "movies" to 4 qualifying records + Then The rules restrict "movies" to 3 qualifying records And there are errors with the following details and associated error_count from the business_rules phase | ErrorCode | ErrorMessage | RecordIndex | error_count | | LIMITED_RATINGS | Movie has too few ratings ([6.5]) | 4 | 1 | @@ -37,10 +39,11 @@ Feature: Pipeline tests using the movies dataset When I run the error report phase Then An error report is produced And The statistics entry for the submission shows the following information - | parameter | value | - | record_count | 5 | - | number_record_rejections | 4 | - | number_warnings | 1 | + | parameter | value | + | record_count | 5 | + | number_submission_rejections | 1 | + | number_record_rejections | 3 | + | number_warnings | 2 | And the error aggregates are persisted Scenario: Validate and filter movies (duckdb) @@ -55,16 +58,18 @@ Feature: Pipeline tests using the movies dataset Then the movies entity is stored as a parquet after the file_transformation phase And the latest audit record for the submission is marked with processing status data_contract When I run the data contract phase - Then there are 3 record rejections from the data_contract phase + Then there is 1 submission rejection from the data_contract phase + And there are 3 record rejections from the data_contract phase And there are errors with the following details and associated error_count from the data_contract phase | Entity | ErrorCode | ErrorMessage | RecordIndex | error_count | | movies | BLANKYEAR | year not provided | 2 | 1 | | movies_rename_test | DODGYYEAR | year value (NOT_A_NUMBER) is invalid | 1 | 1 | | movies | DODGYDATE | date_joined value is not valid: daft_date | 1 | 1 | + | movies | BLANKTITLE | title should not be blank | 4 | 1 | And the movies entity is stored as a parquet after the data_contract phase And the latest audit record for the submission is marked with processing status business_rules When I run the business rules phase - Then The rules restrict "movies" to 4 qualifying records + Then The rules restrict "movies" to 3 qualifying records And there are errors with the following details and associated error_count from the business_rules phase | ErrorCode | ErrorMessage | RecordIndex | error_count | | LIMITED_RATINGS | Movie has too few ratings ([6.5]) | 4 | 1 | @@ -73,9 +78,10 @@ Feature: Pipeline tests using the movies dataset When I run the error report phase Then An error report is produced And The statistics entry for the submission shows the following information - | parameter | value | - | record_count | 5 | - | number_record_rejections | 4 | - | number_warnings | 1 | + | parameter | value | + | record_count | 5 | + | number_submission_rejections | 1 | + | number_record_rejections | 3 | + | number_warnings | 2 | And the error aggregates are persisted diff --git a/tests/features/planets.feature b/tests/features/planets.feature index c469a92..b37b60b 100644 --- a/tests/features/planets.feature +++ b/tests/features/planets.feature @@ -18,6 +18,7 @@ Feature: Pipeline tests using the planets dataset And the latest audit record for the submission is marked with processing status data_contract When I run the data contract phase Then there is 1 record rejection from the data_contract phase + And there are no submission rejections from the data_contract phase And the planets entity is stored as a parquet after the data_contract phase And the latest audit record for the submission is marked with processing status business_rules When I run the business rules phase diff --git a/tests/features/steps/steps_pipeline.py b/tests/features/steps/steps_pipeline.py index 55acadd..c72c873 100644 --- a/tests/features/steps/steps_pipeline.py +++ b/tests/features/steps/steps_pipeline.py @@ -50,14 +50,14 @@ def setup_spark_pipeline( rules_path = get_test_file_path(f"{dataset_id}/{schema_file_name}").resolve().as_uri() return SparkDVEPipeline( - processed_files_path=processing_path.as_uri(), + processed_files_path=processing_path.as_posix(), audit_tables=SparkAuditingManager( database="dve", spark=spark, ), job_run_id=12345, rules_path=rules_path, - submitted_files_path=processing_path.as_uri(), + submitted_files_path=processing_path.as_posix(), spark=spark, ) @@ -155,13 +155,13 @@ def create_error_report(context: Context): -@then("there are {expected_num_errors:d} record rejections from the {service} phase") -@then("there is {expected_num_errors:d} record rejection from the {service} phase") -@then("there are no record rejections from the {service} phase") -def get_record_rejects_from_service(context: Context, service: str, expected_num_errors: int = 0): +@then("there are {expected_num_errors:d} {error_type} rejections from the {service} phase") +@then("there is {expected_num_errors:d} {error_type} rejection from the {service} phase") +@then("there are no {error_type} rejections from the {service} phase") +def get_record_rejects_from_service(context: Context, service: str, error_type: str, expected_num_errors: int = 0): processing_path = ctxt.get_processing_location(context) message_df = load_errors_from_service(processing_path, service) - num_rejections = message_df.filter(pl.col("FailureType").eq("record")).shape[0] + num_rejections = message_df.filter(pl.col("FailureType").eq(error_type)).shape[0] assert num_rejections == expected_num_errors, f"Got {num_rejections} actual rejections" diff --git a/tests/test_core_engine/test_backends/test_implementations/test_duckdb/test_duckdb_helpers.py b/tests/test_core_engine/test_backends/test_implementations/test_duckdb/test_duckdb_helpers.py index 19e96e2..4a24960 100644 --- a/tests/test_core_engine/test_backends/test_implementations/test_duckdb/test_duckdb_helpers.py +++ b/tests/test_core_engine/test_backends/test_implementations/test_duckdb/test_duckdb_helpers.py @@ -1,10 +1,15 @@ """Test Duck DB helpers""" +# pylint: disable=C0301,C0116 + import datetime +import json +import os import tempfile from pathlib import Path from typing import Any, List +import polars as pl import pytest import pyspark.sql.types as pst from duckdb import DuckDBPyRelation, DuckDBPyConnection @@ -12,10 +17,13 @@ from pyspark.sql import Row, SparkSession from dve.core_engine.backends.implementations.duckdb.duckdb_helpers import ( + _ddb_filter_contract_errors, _ddb_read_parquet, duckdb_rel_to_dictionaries, get_duckdb_cast_statement_from_annotation, - get_duckdb_type_from_annotation) + get_duckdb_type_from_annotation, + relation_is_empty, +) @pytest.fixture def casting_test_table(temp_ddb_conn): @@ -51,8 +59,60 @@ def casting_test_table(temp_ddb_conn): yield temp_ddb_conn conn.sql("DROP TABLE IF EXISTS test_casting") - - + + +@pytest.fixture +def example_data_contract_error_codes(temp_ddb_conn): + _, con = temp_ddb_conn + + test_df = pl.DataFrame([ # pylint: disable=W0612 + {"id": "field1", "attr": 1, "__record_index__": 1,}, + {"id": "field2", "attr": None, "__record_index__": 2,}, + {"id": "field3", "attr": 2, "__record_index__": 3,}, + {"id": "field4", "attr": None, "__record_index__": 4,}, + ]) + test_entity = con.sql("SELECT * FROM test_df") + error_contract_messages = [ + { + "Entity": "test_entity", + "Key": "", + "FailureType": "record", + "Status": "error", + "ErrorType": "", + "ErrorLocation": "attr", + "ErrorMessage": "", + "ErrorCode": "", + "ReportingField": "attr", + "RecordIndex": 2, + "Value": "hello", + "Category": "Bad value" + }, + { + "Entity": "test_entity", + "Key": "", + "FailureType": "record", + "Status": "error", + "ErrorType": "", + "ErrorLocation": "attr", + "ErrorMessage": "", + "ErrorCode": "", + "ReportingField": "attr", + "RecordIndex": 4, + "Value": "world", + "Category": "Bad value" + } + ] + with tempfile.TemporaryDirectory() as temp_dir_path: + os.mkdir(Path(temp_dir_path, "errors")) + temp_error_file = Path(temp_dir_path, "errors", "data_contract_errors.jsonl") + with open(temp_error_file, encoding="utf-8", mode="w") as tpf: + for error in error_contract_messages: + json.dump(error, tpf) + tpf.write("\n") + + yield con, test_entity, temp_dir_path + + class BasicModel(BaseModel): str_field: str @@ -176,4 +236,23 @@ def test_use_cast_statements(casting_test_table): not dodgy_date_rec.get("basic_model",{}).get("date_field") and all(not val.get("date_field") for val in dodgy_date_rec.get("another_model",{}).get("basic_models",[])) ) - + + +def test_ddb_filter_contract_errors(example_data_contract_error_codes): # pylint: disable=W0621 + ddb_cnn, entity_rel, temp_dir = example_data_contract_error_codes + expected_df = pl.DataFrame([ # pylint: disable=W0612 + {"id": "field1", "attr": 1, "__record_index__": 1,}, + {"id": "field3", "attr": 2, "__record_index__": 3,}, + ]) + expected_rel = ddb_cnn.sql("SELECT * FROM expected_df") + result_rel = _ddb_filter_contract_errors( + TempConnection(ddb_cnn), temp_dir, entity_rel, "test_entity" + ) + assert result_rel.pl().shape[0] == 2 + assert expected_rel.join(result_rel, "__record_index__", "anti").pl().shape[0] == 0 + + +def test_relation_is_empty(temp_ddb_conn: DuckDBPyConnection): + _, con = temp_ddb_conn + rel = con.sql("SELECT 'abc' AS test").filter("test IS NULL") + assert relation_is_empty(rel) diff --git a/tests/test_core_engine/test_backends/test_implementations/test_spark/test_spark_helpers.py b/tests/test_core_engine/test_backends/test_implementations/test_spark/test_spark_helpers.py index 7502673..8a0e45e 100644 --- a/tests/test_core_engine/test_backends/test_implementations/test_spark/test_spark_helpers.py +++ b/tests/test_core_engine/test_backends/test_implementations/test_spark/test_spark_helpers.py @@ -1,9 +1,15 @@ """Tests for UDF helpers.""" # pylint: disable=redefined-outer-name +# pylint: disable=C0301,C0115,C0116 + import datetime as dt +import json +import os +import tempfile from dataclasses import dataclass from decimal import Decimal +from pathlib import Path from typing import Any, List, Optional, Union from uuid import UUID @@ -19,6 +25,7 @@ from dve.core_engine.backends.implementations.spark.spark_helpers import ( DecimalConfig, create_udf, + _spark_filter_contract_errors, get_spark_cast_statement_from_annotation, get_type_from_annotation, object_to_spark_literal, @@ -42,9 +49,56 @@ def casting_dataframe(spark): StructField("basic_model", bm_schema), StructField("another_model", StructType([StructField("unique_id", StringType()), StructField("basic_models", ArrayType(bm_schema))]))]) yield spark.createDataFrame(data, schema=schema) - - - + + +@pytest.fixture +def example_data_contract_error_codes(spark: SparkSession): + test_df = spark.createDataFrame([ # pylint: disable=W0612 + {"id": "field1", "attr": 1, "__record_index__": 1,}, + {"id": "field2", "attr": None, "__record_index__": 2,}, + {"id": "field3", "attr": 2, "__record_index__": 3,}, + {"id": "field4", "attr": None, "__record_index__": 4,}, + ]) + error_contract_messages = [ + { + "Entity": "test_entity", + "Key": "", + "FailureType": "record", + "Status": "error", + "ErrorType": "", + "ErrorLocation": "attr", + "ErrorMessage": "", + "ErrorCode": "", + "ReportingField": "attr", + "RecordIndex": 2, + "Value": "hello", + "Category": "Bad value" + }, + { + "Entity": "test_entity", + "Key": "", + "FailureType": "record", + "Status": "error", + "ErrorType": "", + "ErrorLocation": "attr", + "ErrorMessage": "", + "ErrorCode": "", + "ReportingField": "attr", + "RecordIndex": 4, + "Value": "world", + "Category": "Bad value" + } + ] + with tempfile.TemporaryDirectory() as temp_dir_path: + os.mkdir(Path(temp_dir_path, "errors")) + temp_error_file = Path(temp_dir_path, "errors", "data_contract_errors.jsonl") + with open(temp_error_file, encoding="utf-8", mode="w") as tpf: + for error in error_contract_messages: + json.dump(error, tpf) + tpf.write("\n") + + yield test_df, temp_dir_path + class BasicModel(BaseModel): str_field: str @@ -264,4 +318,25 @@ def test_use_cast_statements(spark, casting_dataframe): not dodgy_date_rec.get("basic_model",{}).get("date_field") and all(not val.get("date_field") for val in dodgy_date_rec.get("another_model",{}).get("basic_models",[])) ) - assert cast_df \ No newline at end of file + assert cast_df + + +class TempSparkSession: + def __init__(self, spark: SparkSession): + self.spark_session = spark + + +def test_spark_filter_contract_errors(spark: SparkSession, example_data_contract_error_codes): # pylint: disable=W0621 + entity_df, temp_dir = example_data_contract_error_codes + expected_df = spark.createDataFrame([ # pylint: disable=W0612 + {"id": "field1", "attr": 1, "__record_index__": 1,}, + {"id": "field3", "attr": 2, "__record_index__": 3,}, + ]) + result_df = _spark_filter_contract_errors( + TempSparkSession(spark), + temp_dir, + entity_df, + "test_entity" + ) + assert result_df.count() == 2 + assert expected_df.join(result_df, "__record_index__", "anti").count() == 0 diff --git a/tests/test_core_engine/test_message.py b/tests/test_core_engine/test_message.py index ccb6736..4c092f1 100644 --- a/tests/test_core_engine/test_message.py +++ b/tests/test_core_engine/test_message.py @@ -183,9 +183,11 @@ class TestModel(BaseModel): custom_error_details: str = """ {"idx": {"Blank": {"error_code": "IDBLANKERRCODE", - "error_message": "idx is a mandatory field"}, + "error_message": "idx is a mandatory field", + "is_informational": true}, "Bad value": {"error_code": "IDDODGYVALCODE", - "error_message": "idx value is dodgy: {{idx}}"}}, + "error_message": "idx value is dodgy: {{idx}}", + "error_level": "submission"}}, "date_field": {"Bad value": {"error_code": "DATEDODGYVALCODE", "error_message": "date_field value is dodgy: idx: {{idx}}, date_field: {{date_field}}"}}} """ @@ -216,10 +218,16 @@ class TestModel(BaseModel): assert len(msgs_bad) == 3 assert msgs_bad[0].error_code == error_details.get("date_field").get("Bad value").error_code assert msgs_bad[0].error_message == error_details.get("date_field").get("Bad value").template_message(_bad_value_data) + assert msgs_bad[0].failure_type == "record" + assert not msgs_bad[0].is_informational assert msgs_bad[1].error_code == error_details.get("idx").get("Bad value").error_code assert msgs_bad[1].error_message == error_details.get("idx").get("Bad value").template_message(_bad_value_data) + assert msgs_bad[1].failure_type == "submission" + assert not msgs_bad[1].is_informational assert msgs_bad[2].error_code == bad_val_default.error_code assert msgs_bad[2].error_message == bad_val_default.error_message + assert msgs_bad[2].failure_type == "record" + assert not msgs_bad[2].is_informational msgs_blank = FeedbackMessage.from_pydantic_error(entity="test_entity", record = _blank_value_data, @@ -232,6 +240,7 @@ class TestModel(BaseModel): assert len(msgs_blank) == 2 assert msgs_blank[0].error_code == error_details.get("idx").get("Blank").error_code assert msgs_blank[0].error_message == error_details.get("idx").get("Blank").template_message(_blank_value_data) + assert msgs_blank[0].is_informational assert msgs_blank[1].error_code == blank_default.error_code assert msgs_blank[1].error_message == blank_default.error_message @@ -281,4 +290,5 @@ class TestModel(BaseModel): msg = msg[0] assert msg.error_code == "DATEDODGYVALCODE" assert msg.error_message == "date_field value is dodgy: a_field: test, date_field: Barry" + diff --git a/tests/test_pipeline/test_spark_pipeline.py b/tests/test_pipeline/test_spark_pipeline.py index b3048a1..0cf7fe6 100644 --- a/tests/test_pipeline/test_spark_pipeline.py +++ b/tests/test_pipeline/test_spark_pipeline.py @@ -439,7 +439,9 @@ def test_error_report_where_report_is_expected( # pylint: disable=redefined-out ("Dataset Id", "planets"), ("File Name", "doesnotmatter"), ("File Extension", "json"), - ("Submission Failure", "2"), + ("Total Number of Records Processed", "9"), + ("File Rejection", "0"), + ("Record Rejection", "2"), ("Warning", "0"), ] @@ -455,7 +457,7 @@ def test_error_report_where_report_is_expected( # pylint: disable=redefined-out [ OrderedDict( **{ - "Type": "Submission Failure", + "Type": "Record Rejection", "Group": "planets", "Data Item Submission Name": "orbitalPeriod", "Category": "Bad value", @@ -465,7 +467,7 @@ def test_error_report_where_report_is_expected( # pylint: disable=redefined-out ), OrderedDict( **{ - "Type": "Submission Failure", + "Type": "Record Rejection", "Group": "planets", "Data Item Submission Name": "gravity", "Category": "Bad value", @@ -485,7 +487,7 @@ def test_error_report_where_report_is_expected( # pylint: disable=redefined-out OrderedDict( **{ "Group": "planets", - "Type": "Submission Failure", + "Type": "Record Rejection", "Error Code": "LONG_ORBIT", "Data Item Submission Name": "orbitalPeriod", "Errors and Warnings": "Planet has long orbital period", @@ -498,7 +500,7 @@ def test_error_report_where_report_is_expected( # pylint: disable=redefined-out OrderedDict( **{ "Group": "planets", - "Type": "Submission Failure", + "Type": "Record Rejection", "Error Code": "STRONG_GRAVITY", "Data Item Submission Name": "gravity", "Errors and Warnings": "Planet has too strong gravity", diff --git a/tests/testdata/animals/animals.dischema.json b/tests/testdata/animals/animals.dischema.json new file mode 100644 index 0000000..0e1eda1 --- /dev/null +++ b/tests/testdata/animals/animals.dischema.json @@ -0,0 +1,54 @@ +{ + "contract": { + "schemas": {}, + "datasets": { + "animals": { + "fields": { + "name": "str", + "height": "float", + "weight": "float", + "region": "str" + }, + "reader_config": { + ".xml": { + "reader": "DuckDBXMLStreamReader", + "kwargs": { + "record_tag": "animal", + "root_tag": "animals" + } + } + }, + "mandatory_fields": [ + "name" + ] + } + } + }, + "transformations": { + "filters": [ + { + "entity": "animals", + "name": "check_valid_region", + "expression": "lower(region) in ('africa', 'asia')", + "error_code": "ANE01", + "failure_message": "Record rejected - `{{ region }}` is not in a valid region." + }, + { + "entity": "animals", + "name": "check_for_pets", + "expression": "lower(name) != 'human'", + "error_code": "ANE02", + "failure_message": "Submission Rejected - 'Human' is not a valid animal.", + "failure_type": "submission" + }, + { + "entity": "animals", + "name": "check_valid_weight", + "expression": "weight > 0", + "error_code": "ANE03", + "failure_message": "Warning - `{{ weight }}` is below zero.", + "is_informational": true + } + ] + } +} \ No newline at end of file diff --git a/tests/testdata/animals/animals.xml b/tests/testdata/animals/animals.xml new file mode 100644 index 0000000..60bdcef --- /dev/null +++ b/tests/testdata/animals/animals.xml @@ -0,0 +1,33 @@ + + + + African Elephant + 3.5 + 6000.0 + Africa + + + Bengal Tiger + 1.1 + 260.0 + Asia + + + Giraffe + 5.5 + 1200.0 + Africa + + + Polar Bear + 2.6 + 900.0 + Arctic + + + Blue Whale + 24.0 + 180000.0 + Oceans + + diff --git a/tests/testdata/animals/animals_mixture.xml b/tests/testdata/animals/animals_mixture.xml new file mode 100644 index 0000000..230f790 --- /dev/null +++ b/tests/testdata/animals/animals_mixture.xml @@ -0,0 +1,45 @@ + + + + African Elephant + 3.5 + 6000.0 + Africa + + + Bengal Tiger + 1.1 + 260.0 + Asia + + + Giraffe + 5.5 + 1200.0 + Africa + + + Polar Bear + 2.6 + 900.0 + Arctic + + + Blue Whale + 24.0 + 180000.0 + Oceans + + + Human + 1.7 + 70.0 + Africa + + + African Elephant + 3.5 + -6000.0 + Africa + + diff --git a/tests/testdata/movies/movies.json b/tests/testdata/movies/movies.json index afa606d..db6029f 100644 --- a/tests/testdata/movies/movies.json +++ b/tests/testdata/movies/movies.json @@ -32,7 +32,6 @@ ] }, { - "title": "One with a cat and a dog", "year": 2020, "genre": ["Fantasy", "Family"], "duration_minutes": 110, diff --git a/tests/testdata/movies/movies_contract_error_details.json b/tests/testdata/movies/movies_contract_error_details.json index f8cd934..260ee96 100644 --- a/tests/testdata/movies/movies_contract_error_details.json +++ b/tests/testdata/movies/movies_contract_error_details.json @@ -2,13 +2,15 @@ "title": { "Blank": { "error_code": "BLANKTITLE", - "error_message": "title should not be blank" + "error_message": "title should not be blank", + "error_level": "submission" } }, "year": { "Blank": { "error_code": "BLANKYEAR", - "error_message": "year not provided" + "error_message": "year not provided", + "is_informational": true }, "Bad value": { "error_code": "DODGYYEAR",