diff --git a/.tool-versions b/.tool-versions
index 97da906..e111eee 100644
--- a/.tool-versions
+++ b/.tool-versions
@@ -1,3 +1,3 @@
-python 3.11.14
-poetry 2.3.3
+python 3.12.12
+poetry 2.4.1
java liberica-1.8.0
diff --git a/poetry.lock b/poetry.lock
index 9f84732..4c417f2 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 2.3.3 and should not be changed by hand.
+# This file is automatically @generated by Poetry 2.4.1 and should not be changed by hand.
[[package]]
name = "argcomplete"
@@ -950,65 +950,65 @@ toml = ["tomli ; python_full_version <= \"3.11.0a6\""]
[[package]]
name = "cryptography"
-version = "47.0.0"
+version = "48.0.1"
description = "cryptography is a package which provides cryptographic recipes and primitives to Python developers."
optional = false
-python-versions = "!=3.9.0,!=3.9.1,>=3.8"
+python-versions = "!=3.9.0,!=3.9.1,>=3.9"
groups = ["dev", "test"]
files = [
- {file = "cryptography-47.0.0-cp311-abi3-macosx_10_9_universal2.whl", hash = "sha256:160ad728f128972d362e714054f6ba0067cab7fb350c5202a9ae8ae4ce3ef1a0"},
- {file = "cryptography-47.0.0-cp311-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:b9a8943e359b7615db1a3ba587994618e094ff3d6fa5a390c73d079ce18b3973"},
- {file = "cryptography-47.0.0-cp311-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:f5c15764f261394b22aef6b00252f5195f46f2ca300bec57149474e2538b31f8"},
- {file = "cryptography-47.0.0-cp311-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:9c59ab0e0fa3a180a5a9c59f3a5abe3ef90d474bc56d7fadfbe80359491b615b"},
- {file = "cryptography-47.0.0-cp311-abi3-manylinux_2_28_ppc64le.whl", hash = "sha256:34b4358b925a5ea3e14384ca781a2c0ef7ac219b57bb9eacc4457078e2b19f92"},
- {file = "cryptography-47.0.0-cp311-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:0024b87d47ae2399165a6bfb20d24888881eeab83ae2566d62467c5ff0030ce7"},
- {file = "cryptography-47.0.0-cp311-abi3-manylinux_2_31_armv7l.whl", hash = "sha256:1e47422b5557bb82d3fff997e8d92cff4e28b9789576984f08c248d2b3535d93"},
- {file = "cryptography-47.0.0-cp311-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:6f29f36582e6151d9686235e586dd35bb67491f024767d10b842e520dc6a07ac"},
- {file = "cryptography-47.0.0-cp311-abi3-manylinux_2_34_ppc64le.whl", hash = "sha256:a9b761f012a943b7de0e828843c5688d0de94a0578d44d6c85a1bae32f87791f"},
- {file = "cryptography-47.0.0-cp311-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:4e1de79e047e25d6e9f8cea71c86b4a53aced64134f0f003bbcbf3655fd172c8"},
- {file = "cryptography-47.0.0-cp311-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:ef6b3634087f18d2155b1e8ce264e5345a753da2c5fa9815e7d41315c90f8318"},
- {file = "cryptography-47.0.0-cp311-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:11dbb9f50a0f1bb9757b3d8c27c1101780efb8f0bdecfb12439c22a74d64c001"},
- {file = "cryptography-47.0.0-cp311-abi3-win32.whl", hash = "sha256:7fda2f02c9015db3f42bb8a22324a454516ed10a8c29ca6ece6cdbb5efe2a203"},
- {file = "cryptography-47.0.0-cp311-abi3-win_amd64.whl", hash = "sha256:f5c3296dab66202f1b18a91fa266be93d6aa0c2806ea3d67762c69f60adc71aa"},
- {file = "cryptography-47.0.0-cp314-cp314t-macosx_10_9_universal2.whl", hash = "sha256:be12cb6a204f77ed968bcefe68086eb061695b540a3dd05edac507a3111b25f0"},
- {file = "cryptography-47.0.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:2ebd84adf0728c039a3be2700289378e1c164afc6748df1a5ed456767bef9ba7"},
- {file = "cryptography-47.0.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:7f68d6fbc7fbbcfb0939fea72c3b96a9f9a6edfc0e1b1d29778a2066030418b1"},
- {file = "cryptography-47.0.0-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:6651d32eff255423503aa276739da98c30f26c40cbeffcc6048e0d54ef704c0c"},
- {file = "cryptography-47.0.0-cp314-cp314t-manylinux_2_28_ppc64le.whl", hash = "sha256:3fb8fa48075fad7193f2e5496135c6a76ac4b2aa5a38433df0a539296b377829"},
- {file = "cryptography-47.0.0-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:11438c7518132d95f354fa01a4aa2f806d172a061a7bed18cf18cbdacdb204d7"},
- {file = "cryptography-47.0.0-cp314-cp314t-manylinux_2_31_armv7l.whl", hash = "sha256:8c1a736bbb3288005796c3f7ccb9453360d7fed483b13b9f468aea5171432923"},
- {file = "cryptography-47.0.0-cp314-cp314t-manylinux_2_34_aarch64.whl", hash = "sha256:f1557695e5c2b86e204f6ce9470497848634100787935ab7adc5397c54abd7ab"},
- {file = "cryptography-47.0.0-cp314-cp314t-manylinux_2_34_ppc64le.whl", hash = "sha256:f9a034b642b960767fb343766ae5ba6ad653f2e890ddd82955aef288ffea8736"},
- {file = "cryptography-47.0.0-cp314-cp314t-manylinux_2_34_x86_64.whl", hash = "sha256:b1c76fca783aa7698eb21eb14f9c4aa09452248ee54a627d125025a43f83e7a7"},
- {file = "cryptography-47.0.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:4f7722c97826770bab8ae92959a2e7b20a5e9e9bf4deae68fd86c3ca457bab52"},
- {file = "cryptography-47.0.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:09f6d7bf6724f8db8b32f11eccf23efc8e759924bc5603800335cf8859a3ddbd"},
- {file = "cryptography-47.0.0-cp314-cp314t-win32.whl", hash = "sha256:6eebcaf0df1d21ce1f90605c9b432dd2c4f4ab665ac29a40d5e3fc68f51b5e63"},
- {file = "cryptography-47.0.0-cp314-cp314t-win_amd64.whl", hash = "sha256:51c9313e90bd1690ec5a75ed047c27c0b8e6c570029712943d6116ef9a90620b"},
- {file = "cryptography-47.0.0-cp38-abi3-macosx_10_9_universal2.whl", hash = "sha256:14432c8a9bcb37009784f9594a62fae211a2ae9543e96c92b2a8e4c3cd5cd0c4"},
- {file = "cryptography-47.0.0-cp38-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:07efe86201817e7d3c18781ca9770bc0db04e1e48c994be384e4602bc38f8f27"},
- {file = "cryptography-47.0.0-cp38-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:2b45761c6ec22b7c726d6a829558777e32d0f1c8be7c3f3480f9c912d5ee8a10"},
- {file = "cryptography-47.0.0-cp38-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:edd4da498015da5b9f26d38d3bfc2e90257bfa9cbed1f6767c282a0025ae649b"},
- {file = "cryptography-47.0.0-cp38-abi3-manylinux_2_28_ppc64le.whl", hash = "sha256:9af828c0d5a65c70ec729cd7495a4bf1a67ecb66417b8f02ff125ab8a6326a74"},
- {file = "cryptography-47.0.0-cp38-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:256d07c78a04d6b276f5df935a9923275f53bd1522f214447fdf365494e2d515"},
- {file = "cryptography-47.0.0-cp38-abi3-manylinux_2_31_armv7l.whl", hash = "sha256:5d0e362ff51041b0c0d219cc7d6924d7b8996f57ce5712bdcef71eb3c65a59cc"},
- {file = "cryptography-47.0.0-cp38-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:1581aef4219f7ca2849d0250edaa3866212fb74bf5667284f46aa92f9e65c1ca"},
- {file = "cryptography-47.0.0-cp38-abi3-manylinux_2_34_ppc64le.whl", hash = "sha256:a49a3eb5341b9503fa3000a9a0db033161db90d47285291f53c2a9d2cd1b7f76"},
- {file = "cryptography-47.0.0-cp38-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:2207a498b03275d0051589e326b79d4cf59985c99031b05bb292ac52631c37fe"},
- {file = "cryptography-47.0.0-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:7a02675e2fabd0c0fc04c868b8781863cbf1967691543c22f5470500ff840b31"},
- {file = "cryptography-47.0.0-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:80887c5cbd1774683cb126f0ab4184567f080071d5acf62205acb354b4b753b7"},
- {file = "cryptography-47.0.0-cp38-abi3-win32.whl", hash = "sha256:ed67ea4e0cfb5faa5bc7ecb6e2b8838f3807a03758eec239d6c21c8769355310"},
- {file = "cryptography-47.0.0-cp38-abi3-win_amd64.whl", hash = "sha256:835d2d7f47cdc53b3224e90810fb1d36ca94ea29cc1801fb4c1bc43876735769"},
- {file = "cryptography-47.0.0-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:7f1207974a904e005f762869996cf620e9bf79ecb4622f148550bb48e0eb35a7"},
- {file = "cryptography-47.0.0-pp311-pypy311_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:1a405c08857258c11016777e11c02bacbe7ef596faf259305d282272a3a05cbe"},
- {file = "cryptography-47.0.0-pp311-pypy311_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:20fdbe3e38fb67c385d233c89371fa27f9909f6ebca1cecc20c13518dae65475"},
- {file = "cryptography-47.0.0-pp311-pypy311_pp73-manylinux_2_34_aarch64.whl", hash = "sha256:f7db373287273d8af1414cf95dc4118b13ffdc62be521997b0f2b270771fef50"},
- {file = "cryptography-47.0.0-pp311-pypy311_pp73-manylinux_2_34_x86_64.whl", hash = "sha256:9fe6b7c64926c765f9dff301f9c1b867febcda5768868ca084e18589113732ab"},
- {file = "cryptography-47.0.0-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:cffbba3392df0fa8629bb7f43454ee2925059ee158e23c54620b9063912b86c8"},
- {file = "cryptography-47.0.0.tar.gz", hash = "sha256:9f8e55fe4e63613a5e1cc5819030f27b97742d720203a087802ce4ce9ceb52bb"},
+ {file = "cryptography-48.0.1-cp311-abi3-macosx_10_9_universal2.whl", hash = "sha256:3e4a1a3232eef2e6c732827d5722db29a0cc8b27af2a4d865b094cf954be9ca1"},
+ {file = "cryptography-48.0.1-cp311-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:32143b24adb918f078134e1e230f1eb8cc04886b92c28b5f0041aaf3e5699225"},
+ {file = "cryptography-48.0.1-cp311-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:f0d27a5696721ef7a672b8c810f6aded391058e0b9486e63e6d93baf765da691"},
+ {file = "cryptography-48.0.1-cp311-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:eb86ce1af36fe65041b6db9a8bb064ee621a7e5fded0f80d475ec243477cd242"},
+ {file = "cryptography-48.0.1-cp311-abi3-manylinux_2_28_ppc64le.whl", hash = "sha256:b024e784ad6c077ee0147b35ea9cbfc1e34e1fd4c1dcca214c2794d73a12df08"},
+ {file = "cryptography-48.0.1-cp311-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:3752f2dbc8f07a30aad2932c986cea495b03bb554887828225da104f732852b6"},
+ {file = "cryptography-48.0.1-cp311-abi3-manylinux_2_31_armv7l.whl", hash = "sha256:bd81490cd5801d755cf97bb68ac191f14b708470b1c7cf4580f669b9c9264cd8"},
+ {file = "cryptography-48.0.1-cp311-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:66fd0771e7b9c6dcd44cf1120690d2338d16d72795cf40cae2786a39eba65429"},
+ {file = "cryptography-48.0.1-cp311-abi3-manylinux_2_34_ppc64le.whl", hash = "sha256:3fd2ca57062b241c856670b073487d2e86c4637937ca5601e48f97bf8e11fc8f"},
+ {file = "cryptography-48.0.1-cp311-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:0ee6ea481db1ab889cba043ec1eda17bb9c1ea79db6722f779c3667f9f70322f"},
+ {file = "cryptography-48.0.1-cp311-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:f2ceef93cb096aa3c4cc4b5c94ca6131f9196d28c64d6111533402a9b2054d41"},
+ {file = "cryptography-48.0.1-cp311-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:9bd3f92d76217892b15df84ca256c2c113d386fdda7a7d8691aeeced976507c6"},
+ {file = "cryptography-48.0.1-cp311-abi3-win32.whl", hash = "sha256:b9a32b876490d66c8bcc9963ef220199569748434ab01a9d6aaeabf88e7f5158"},
+ {file = "cryptography-48.0.1-cp311-abi3-win_amd64.whl", hash = "sha256:39489bfca54c7a1f6b297efcd8bc608ab92d16c4ca631b0cad4da46724588b24"},
+ {file = "cryptography-48.0.1-cp314-cp314t-macosx_10_9_universal2.whl", hash = "sha256:f817adc181390bd54f2f700107a7419040fb7c1bdf2fc26f36551a06a68c3345"},
+ {file = "cryptography-48.0.1-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d5d30989c6917b478b5817902e85fddaea2261efa8648383d965381ccb9e1ac4"},
+ {file = "cryptography-48.0.1-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:df637c05205ea7c1d7fbcbe54bbfea648a52951155f997af13d895d0ecc96991"},
+ {file = "cryptography-48.0.1-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:869c3b8a53bfe27147832df48b32adadf558249d50e76cb3769d40e986b13265"},
+ {file = "cryptography-48.0.1-cp314-cp314t-manylinux_2_28_ppc64le.whl", hash = "sha256:e361afba8918070d376df76f408a4f67fec0ee9cff81a99e48fe9a233ef59e17"},
+ {file = "cryptography-48.0.1-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:d069066deead00ac7f090be101be875a06855908f7ec004c27b8fefb4acfb411"},
+ {file = "cryptography-48.0.1-cp314-cp314t-manylinux_2_31_armv7l.whl", hash = "sha256:09f73a725d582cef64b91281a322cd798d14a33b2b6f2b7ad9531dc336d84c02"},
+ {file = "cryptography-48.0.1-cp314-cp314t-manylinux_2_34_aarch64.whl", hash = "sha256:15254441469dd6bf027039453288e2072124f8b6603563f5d759e1c9b69273fa"},
+ {file = "cryptography-48.0.1-cp314-cp314t-manylinux_2_34_ppc64le.whl", hash = "sha256:8ace4507d1e6533c125f4fac754f8bb8b6a74c08e92179dabd7e16571a3efbf3"},
+ {file = "cryptography-48.0.1-cp314-cp314t-manylinux_2_34_x86_64.whl", hash = "sha256:b4e391975f038e66432328639620a4aff2d307513b004f1ca06d6225bced815c"},
+ {file = "cryptography-48.0.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:42fcd8e26fe555d9b3577a135f5091fefa0aa4e99129c23fb56787a1bd4ada72"},
+ {file = "cryptography-48.0.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:c1400da5e32a43253392277eac7490a60e497d810a63dd5608d71bbd7af507c9"},
+ {file = "cryptography-48.0.1-cp314-cp314t-win32.whl", hash = "sha256:0df56b056bc17c1b7d6821dfa65216e62bd232d8ab05eb3db44e71d235651471"},
+ {file = "cryptography-48.0.1-cp314-cp314t-win_amd64.whl", hash = "sha256:9de21387aa95e2a895823d0745b430bed4f33503ba9ab5e0b5311f33e37d66d2"},
+ {file = "cryptography-48.0.1-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:4fdc69f8e4316bcf0c8c8ec1f26f285d12e8142d88d96c876a59a03be3f6ae67"},
+ {file = "cryptography-48.0.1-cp39-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:48fe40804d4caa2288f24e70ca8c64c42dd826da0ad7e4f1b41b2128d679e6c8"},
+ {file = "cryptography-48.0.1-cp39-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:86be3b1b0b6bf09482fb50a979c508d2950ed95f5621ec77f4e385962006b83a"},
+ {file = "cryptography-48.0.1-cp39-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:4ab0a343c807bbcd90c971cd1ecf072937cd01847a9e002bef88fb47ac6be577"},
+ {file = "cryptography-48.0.1-cp39-abi3-manylinux_2_28_ppc64le.whl", hash = "sha256:9621de99d2da096006b629979efd8ae7eb2d8b822488d0c89ee4000c306c59b1"},
+ {file = "cryptography-48.0.1-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:88c852a0ae366e262e5a1744b685e6a433dc8788dd2a277e418bf4904203609d"},
+ {file = "cryptography-48.0.1-cp39-abi3-manylinux_2_31_armv7l.whl", hash = "sha256:43c5835e2cb98c8733d86f57d6fc879b613f5c3478607281c3e36daffc6dd8a6"},
+ {file = "cryptography-48.0.1-cp39-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:fe0180af5bf9236518a087e35bf2d9a347d5f5f51e63c579d683ddff424e3d46"},
+ {file = "cryptography-48.0.1-cp39-abi3-manylinux_2_34_ppc64le.whl", hash = "sha256:b7a2d1a937a738a881737cec135a38bb61470589b17515b9f73f571d0ae10401"},
+ {file = "cryptography-48.0.1-cp39-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:b74ca3b8e5ecdd833bf6a002ca41b4793bb27fb8f1c06ffaf2643c9e9140e31b"},
+ {file = "cryptography-48.0.1-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:2c37f2461406063b417837f5f3daab668652acd82423efcd7f0a9f04be972de1"},
+ {file = "cryptography-48.0.1-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:86fe77abb1bd87afb251d4d02ada7ecf53a32cee9b67d976abb2e45a13297475"},
+ {file = "cryptography-48.0.1-cp39-abi3-win32.whl", hash = "sha256:6b2c0c3e6ccf3ade7750f836ef3ee36eea250cc467d45c256895573ac08cc6f1"},
+ {file = "cryptography-48.0.1-cp39-abi3-win_amd64.whl", hash = "sha256:9a49ca6c81417f6a5edb50375a60cccdd70fa0a91a5211829dbea74eba94d2ac"},
+ {file = "cryptography-48.0.1-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:08a597acce1ff37f347400087776599e2348a3a8bc53b44120e463cd274efe4a"},
+ {file = "cryptography-48.0.1-pp311-pypy311_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:735824ec41b7f74a7c45fb1591349333e4c696cb6c044e5f46356e560143e4cd"},
+ {file = "cryptography-48.0.1-pp311-pypy311_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:92a46e1d638daa264ba2971c0b0489c9409787943efae4d60ffda3d091ef832c"},
+ {file = "cryptography-48.0.1-pp311-pypy311_pp73-manylinux_2_34_aarch64.whl", hash = "sha256:7e234ac052af99f2700826a5c29ea99d9c1b1f80341cde62d11c8154dc8e0bd9"},
+ {file = "cryptography-48.0.1-pp311-pypy311_pp73-manylinux_2_34_x86_64.whl", hash = "sha256:33842cf0888951cef5bc7ac724ab844a42044c1727b967b7f8997289a0464f92"},
+ {file = "cryptography-48.0.1-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:6184ca7b174f28d7c703f1290d4b297217c45355f77a98f67e9b7f14549ac54a"},
+ {file = "cryptography-48.0.1.tar.gz", hash = "sha256:266f4ee051abb2f725b74ef8072b521ce1feacf685a3364fa6a6b45548db791a"},
]
[package.dependencies]
-cffi = {version = ">=2.0.0", markers = "python_full_version >= \"3.9.0\" and platform_python_implementation != \"PyPy\""}
+cffi = {version = ">=2.0.0", markers = "platform_python_implementation != \"PyPy\""}
typing-extensions = {version = ">=4.13.2", markers = "python_full_version < \"3.11.0\""}
[package.extras]
@@ -2118,8 +2118,8 @@ files = [
[package.dependencies]
numpy = [
{version = ">=1.22.4", markers = "python_version < \"3.11\""},
- {version = ">=1.26.0", markers = "python_version >= \"3.12\""},
{version = ">=1.23.2", markers = "python_version == \"3.11\""},
+ {version = ">=1.26.0", markers = "python_version >= \"3.12\""},
]
python-dateutil = ">=2.8.2"
pytz = ">=2020.1"
@@ -2506,8 +2506,8 @@ astroid = ">=3.3.8,<=3.4.0.dev0"
colorama = {version = ">=0.4.5", markers = "sys_platform == \"win32\""}
dill = [
{version = ">=0.2", markers = "python_version < \"3.11\""},
- {version = ">=0.3.7", markers = "python_version >= \"3.12\""},
{version = ">=0.3.6", markers = "python_version == \"3.11\""},
+ {version = ">=0.3.7", markers = "python_version >= \"3.12\""},
]
isort = ">=4.2.5,<5.13 || >5.13,<7"
mccabe = ">=0.6,<0.8"
@@ -3347,4 +3347,4 @@ type = ["pytest-mypy"]
[metadata]
lock-version = "2.1"
python-versions = ">=3.10,<3.13"
-content-hash = "0983208c21c55de6aaa08cd631a72b69fa9456c039bebff07092e5413bc6edec"
+content-hash = "1ffa0b3df5f9c75bed670f80d681ee8f4dbc42f0b86cf89e48189ef0eacf6be5"
diff --git a/pyproject.toml b/pyproject.toml
index ea33699..8818863 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -68,6 +68,7 @@ optional = true
behave = "1.3.3"
coverage = "7.11.0"
moto = {extras = ["s3"], version = "4.2.14"}
+cryptography = "48.0.1" # dependency of `moto`
requests = "2.33.0" # dependency of `moto`
Werkzeug = "3.1.6"
pytest = "9.0.3"
diff --git a/src/dve/core_engine/backends/base/reader.py b/src/dve/core_engine/backends/base/reader.py
index e9ed9e8..ae0e99f 100644
--- a/src/dve/core_engine/backends/base/reader.py
+++ b/src/dve/core_engine/backends/base/reader.py
@@ -119,10 +119,7 @@ def read_to_entity_type(
"""
if entity_name == Iterator[dict[str, Any]]:
return self.read_to_py_iterator(
- resource,
- entity_name,
- schema, # type: ignore
- all_model_fields
+ resource, entity_name, schema, all_model_fields # type: ignore
)
self.raise_if_not_sensible_file(resource, entity_name)
@@ -133,11 +130,7 @@ def read_to_entity_type(
raise ReaderLacksEntityTypeSupport(entity_type=entity_type) from err
return reader_func(
- self,
- resource,
- entity_name,
- schema,
- all_model_fields=all_model_fields # type: ignore
+ self, resource, entity_name, schema, all_model_fields=all_model_fields # type: ignore
)
def add_record_index(self, entity: EntityType, **kwargs) -> EntityType:
diff --git a/src/dve/core_engine/backends/base/rules.py b/src/dve/core_engine/backends/base/rules.py
index b66b3ae..9b6b4fe 100644
--- a/src/dve/core_engine/backends/base/rules.py
+++ b/src/dve/core_engine/backends/base/rules.py
@@ -681,3 +681,13 @@ def read_parquet(self, path: URI, **kwargs) -> EntityType:
def write_parquet(self, entity: EntityType, target_location: URI, **kwargs) -> URI:
"""Method to write parquet files"""
raise NotImplementedError()
+
+ def filter_data_contract_record_rejections(
+ self,
+ working_directory: URI,
+ entity: EntityType,
+ entity_name: EntityName,
+ **kwargs,
+ ):
+ """Method to filter out record rejection errors from the data contract for a given entity"""
+ raise NotImplementedError()
diff --git a/src/dve/core_engine/backends/exceptions.py b/src/dve/core_engine/backends/exceptions.py
index c72f252..bb58516 100644
--- a/src/dve/core_engine/backends/exceptions.py
+++ b/src/dve/core_engine/backends/exceptions.py
@@ -37,10 +37,7 @@ class UnableToParseCSVError(MessageBearingError):
"""An error raised when unable to parse a CSV file"""
def __init__(
- self,
- entity_name: str,
- field_check_error_message: str,
- field_check_error_code: str
+ self, entity_name: str, field_check_error_message: str, field_check_error_code: str
):
super().__init__(
messages=[
diff --git a/src/dve/core_engine/backends/implementations/duckdb/duckdb_helpers.py b/src/dve/core_engine/backends/implementations/duckdb/duckdb_helpers.py
index 627822b..65e20af 100644
--- a/src/dve/core_engine/backends/implementations/duckdb/duckdb_helpers.py
+++ b/src/dve/core_engine/backends/implementations/duckdb/duckdb_helpers.py
@@ -18,9 +18,10 @@
from pydantic import BaseModel
from typing_extensions import Annotated, get_args, get_origin, get_type_hints
+from dve.common.error_utils import get_feedback_errors_uri
from dve.core_engine.backends.base.utilities import _get_non_heterogenous_type
from dve.core_engine.constants import RECORD_INDEX_COLUMN_NAME
-from dve.core_engine.type_hints import URI
+from dve.core_engine.type_hints import URI, EntityName
from dve.parser.file_handling.service import LocalFilesystemImplementation, _get_implementation
@@ -100,7 +101,7 @@ def table_exists(connection: DuckDBPyConnection, table_name: str) -> bool:
def relation_is_empty(relation: DuckDBPyRelation) -> bool:
"""Check if a duckdb relation is empty"""
- if relation.limit(1).count("*"):
+ if relation.limit(1).shape[0] > 0:
return False
return True
@@ -256,6 +257,48 @@ def duckdb_write_parquet(cls):
return cls
+def _ddb_filter_contract_errors(
+ self,
+ working_directory: URI,
+ entity: DuckDBPyRelation,
+ entity_name: EntityName,
+) -> DuckDBPyRelation:
+ contract_error_location = get_feedback_errors_uri(working_directory, "data_contract")
+ if not Path(contract_error_location).exists():
+ return entity
+ relevant_record_rejection_codes_rel = (
+ self._connection.read_json(
+ contract_error_location,
+ columns={
+ "RecordIndex": "INTEGER",
+ "FailureType": "STRING",
+ "Status": "STRING",
+ "Entity": "STRING",
+ },
+ )
+ .filter(
+ f"FailureType == 'record' AND Status != 'informational' AND Entity = '{entity_name}'"
+ ) # pylint: disable=C0301
+ .select("RecordIndex")
+ .distinct()
+ .order("RecordIndex asc")
+ )
+
+ if relation_is_empty(relevant_record_rejection_codes_rel):
+ return entity
+
+ filtered_entity = entity.join(
+ relevant_record_rejection_codes_rel, condition="__record_index__ == RecordIndex", how="anti"
+ )
+ return filtered_entity
+
+
+def ddb_filter_contract_errors(cls):
+ """Class decorator to filter out records that failed casting and have record rejection scope"""
+ cls.filter_data_contract_record_rejections = _ddb_filter_contract_errors
+ return cls
+
+
@staticmethod # type: ignore
def _duckdb_get_entity_count(entity: DuckDBPyRelation) -> int:
"""Method to obtain entity count from a persisted parquet entity"""
diff --git a/src/dve/core_engine/backends/implementations/duckdb/readers/csv.py b/src/dve/core_engine/backends/implementations/duckdb/readers/csv.py
index 717ced2..3cd931d 100644
--- a/src/dve/core_engine/backends/implementations/duckdb/readers/csv.py
+++ b/src/dve/core_engine/backends/implementations/duckdb/readers/csv.py
@@ -71,7 +71,7 @@ def __init__(
quote_char=quotechar,
field_check=field_check,
field_check_error_code=field_check_error_code,
- field_check_error_message=field_check_error_message
+ field_check_error_message=field_check_error_message,
)
def read_to_py_iterator(
@@ -254,7 +254,7 @@ def read_to_relation( # pylint: disable=unused-argument
resource=resource,
entity_name=entity_name,
schema=schema,
- all_model_fields=all_model_fields
+ all_model_fields=all_model_fields,
)
entity = entity.select(StarExpression(exclude=[RECORD_INDEX_COLUMN_NAME])).distinct()
no_records = entity.shape[0]
diff --git a/src/dve/core_engine/backends/implementations/duckdb/rules.py b/src/dve/core_engine/backends/implementations/duckdb/rules.py
index debb8fe..dc73dad 100644
--- a/src/dve/core_engine/backends/implementations/duckdb/rules.py
+++ b/src/dve/core_engine/backends/implementations/duckdb/rules.py
@@ -22,6 +22,7 @@
from dve.core_engine.backends.exceptions import ConstraintError
from dve.core_engine.backends.implementations.duckdb.duckdb_helpers import (
DDBStruct,
+ ddb_filter_contract_errors,
duckdb_read_parquet,
duckdb_record_index,
duckdb_rel_to_dictionaries,
@@ -61,6 +62,7 @@
@duckdb_record_index
@duckdb_write_parquet
@duckdb_read_parquet
+@ddb_filter_contract_errors
class DuckDBStepImplementations(BaseStepImplementations[DuckDBPyRelation]):
"""An implementation of transformation steps in duckdb."""
diff --git a/src/dve/core_engine/backends/implementations/spark/readers/csv.py b/src/dve/core_engine/backends/implementations/spark/readers/csv.py
index c983cdf..2df30c5 100644
--- a/src/dve/core_engine/backends/implementations/spark/readers/csv.py
+++ b/src/dve/core_engine/backends/implementations/spark/readers/csv.py
@@ -9,13 +9,13 @@
from pyspark.sql.types import StructType
from dve.core_engine.backends.base.reader import read_function
-from dve.core_engine.backends.readers.csv import CSVFileReader
from dve.core_engine.backends.exceptions import EmptyFileError
from dve.core_engine.backends.implementations.spark.spark_helpers import (
get_type_from_annotation,
spark_record_index,
spark_write_parquet,
)
+from dve.core_engine.backends.readers.csv import CSVFileReader
from dve.core_engine.type_hints import URI, EntityName
from dve.parser.file_handling import get_content_length
diff --git a/src/dve/core_engine/backends/implementations/spark/rules.py b/src/dve/core_engine/backends/implementations/spark/rules.py
index 307e71a..825ee15 100644
--- a/src/dve/core_engine/backends/implementations/spark/rules.py
+++ b/src/dve/core_engine/backends/implementations/spark/rules.py
@@ -14,6 +14,7 @@
create_udf,
get_all_registered_udfs,
object_to_spark_literal,
+ spark_filter_contract_errors,
spark_read_parquet,
spark_record_index,
spark_write_parquet,
@@ -53,6 +54,7 @@
@spark_record_index
@spark_write_parquet
@spark_read_parquet
+@spark_filter_contract_errors
class SparkStepImplementations(BaseStepImplementations[DataFrame]):
"""An implementation of transformation steps in Apache Spark."""
diff --git a/src/dve/core_engine/backends/implementations/spark/spark_helpers.py b/src/dve/core_engine/backends/implementations/spark/spark_helpers.py
index ced985a..c8e949d 100644
--- a/src/dve/core_engine/backends/implementations/spark/spark_helpers.py
+++ b/src/dve/core_engine/backends/implementations/spark/spark_helpers.py
@@ -12,6 +12,7 @@
from dataclasses import dataclass, is_dataclass
from decimal import Decimal
from functools import wraps
+from pathlib import Path
from typing import Any, ClassVar, Optional, TypeVar, Union, overload
from delta.exceptions import ConcurrentAppendException, DeltaConcurrentModificationException
@@ -25,9 +26,10 @@
from pyspark.sql.types import LongType, StructField, StructType
from typing_extensions import Annotated, Protocol, TypedDict, get_args, get_origin, get_type_hints
+from dve.common.error_utils import get_feedback_errors_uri
from dve.core_engine.backends.base.utilities import _get_non_heterogenous_type
from dve.core_engine.constants import RECORD_INDEX_COLUMN_NAME
-from dve.core_engine.type_hints import URI
+from dve.core_engine.type_hints import URI, EntityName
# It would be really nice if there was a more parameterisable
# way of doing this.
@@ -365,6 +367,53 @@ def spark_write_parquet(cls):
return cls
+def _spark_filter_contract_errors(
+ self,
+ working_directory: URI,
+ entity: DataFrame,
+ entity_name: EntityName,
+) -> DataFrame:
+ contract_error_location = get_feedback_errors_uri(working_directory, "data_contract")
+ if not Path(contract_error_location).exists():
+ return entity
+
+ relevant_record_rejections_codes_df = (
+ self.spark_session.read.json(
+ path=contract_error_location,
+ schema=st.StructType(
+ [
+ st.StructField("RecordIndex", st.IntegerType()),
+ st.StructField("FailureType", st.StringType()),
+ st.StructField("Status", st.StringType()),
+ st.StructField("Entity", st.StringType()),
+ ]
+ ),
+ )
+ .filter(
+ (sf.col("FailureType") == sf.lit("record"))
+ & (sf.col("Status") != sf.lit("informational"))
+ & (sf.col("Entity") == sf.lit(entity_name))
+ )
+ .distinct()
+ .orderBy(sf.asc(sf.col("RecordIndex")))
+ # todo - ^^ possibly relook at join strat. Does this help? Over prescriptive?
+ )
+ if df_is_empty(relevant_record_rejections_codes_df):
+ return entity
+ filtered_entity = entity.join(
+ relevant_record_rejections_codes_df,
+ on=entity.__record_index__ == relevant_record_rejections_codes_df.RecordIndex,
+ how="anti",
+ )
+ return filtered_entity
+
+
+def spark_filter_contract_errors(cls):
+ """Class decorator to filter out records that failed casting and have record rejection scope"""
+ cls.filter_data_contract_record_rejections = _spark_filter_contract_errors
+ return cls
+
+
@staticmethod # type: ignore
def _spark_get_entity_count(entity: DataFrame) -> int:
"""Method to obtain entity count from a persisted parquet entity"""
diff --git a/src/dve/core_engine/backends/metadata/contract.py b/src/dve/core_engine/backends/metadata/contract.py
index 234a1e9..b233e92 100644
--- a/src/dve/core_engine/backends/metadata/contract.py
+++ b/src/dve/core_engine/backends/metadata/contract.py
@@ -44,7 +44,7 @@ def schemas(self) -> dict[EntityName, type[BaseModel]]:
"""The per-entity schemas, as pydantic models."""
if not self._schemas:
for entity_name, validator in self.validators.items():
- self._schemas[entity_name] = validator.model # type: ignore # pylint: disable=E1137
+ self._schemas[entity_name] = validator.model # type: ignore # pylint: disable=E1137
return self._schemas.copy() # pylint: disable=E1101
@root_validator(allow_reuse=True)
diff --git a/src/dve/core_engine/backends/readers/csv.py b/src/dve/core_engine/backends/readers/csv.py
index 9d37b06..dbace5b 100644
--- a/src/dve/core_engine/backends/readers/csv.py
+++ b/src/dve/core_engine/backends/readers/csv.py
@@ -16,11 +16,11 @@
MissingHeaderError,
)
from dve.core_engine.backends.readers.utilities import (
- raise_message_bearing_error_on_header_differences
+ get_all_model_fields,
+ raise_message_bearing_error_on_header_differences,
)
from dve.core_engine.backends.utilities import get_polars_type_from_annotation, stringify_model
from dve.core_engine.constants import RECORD_INDEX_COLUMN_NAME
-from dve.core_engine.backends.readers.utilities import get_all_model_fields
from dve.core_engine.type_hints import EntityName
from dve.parser.file_handling import get_content_length, open_stream
from dve.parser.file_handling.implementations.file import file_uri_to_local_path
diff --git a/src/dve/core_engine/backends/readers/utilities.py b/src/dve/core_engine/backends/readers/utilities.py
index c46cac7..00086bd 100644
--- a/src/dve/core_engine/backends/readers/utilities.py
+++ b/src/dve/core_engine/backends/readers/utilities.py
@@ -44,16 +44,16 @@ def raise_message_bearing_error_on_header_differences(
header or vice versa.
"""
missing, additional = check_csv_header_expected(
- resource,
- expected_schema,
- all_model_fields,
- delimiter,
- quote_char
+ resource, expected_schema, all_model_fields, delimiter, quote_char
)
if missing or additional:
- record_details_missing = f"missing fields: {', '.join(sorted(missing))};" if missing else "" # pylint: disable=C0301
- record_details_additional = f"additional fields: {', '.join(sorted(additional))};" if additional else "" # pylint: disable=C0301
+ record_details_missing = (
+ f"missing fields: {', '.join(sorted(missing))};" if missing else ""
+ ) # pylint: disable=C0301
+ record_details_additional = (
+ f"additional fields: {', '.join(sorted(additional))};" if additional else ""
+ ) # pylint: disable=C0301
raise MessageBearingError(
"The CSV header doesn't match what is expected",
messages=[
diff --git a/src/dve/core_engine/configuration/v1/__init__.py b/src/dve/core_engine/configuration/v1/__init__.py
index 04d5e07..6221957 100644
--- a/src/dve/core_engine/configuration/v1/__init__.py
+++ b/src/dve/core_engine/configuration/v1/__init__.py
@@ -281,7 +281,9 @@ def _load_rules_and_vars(self) -> tuple[list[Rule], list[TemplateVariables]]:
rules, local_variable_list = [], []
added_rules: set[RuleName] = set()
- for index, complex_rule_config in enumerate(self.transformations.complex_rules): # pylint: disable=E1101
+ for index, complex_rule_config in enumerate(
+ self.transformations.complex_rules
+ ): # pylint: disable=E1101
rule, local_params, deps = self._resolve_business_rule(complex_rule_config)
missing_rules = deps - added_rules
if missing_rules:
diff --git a/src/dve/core_engine/engine.py b/src/dve/core_engine/engine.py
index b2ec9a7..838bfb5 100644
--- a/src/dve/core_engine/engine.py
+++ b/src/dve/core_engine/engine.py
@@ -170,7 +170,9 @@ def __exit__(
exc_value: Optional[Exception],
traceback: Optional[TracebackType],
) -> None:
- self.main_log.info(f"Exiting pipeline context, clearing {self.cache_prefix!r}") # pylint: disable=E1101
+ self.main_log.info(
+ f"Exiting pipeline context, clearing {self.cache_prefix!r}"
+ ) # pylint: disable=E1101
cache_dir = self._cache_dir
self._cache_dir = None
@@ -198,7 +200,9 @@ def _write_entity_outputs(self, entities: SparkEntities) -> SparkEntities:
"""
output_entities = {}
- self.main_log.info(f"Writing entities to the output location: {self.output_prefix_uri}") # pylint: disable=E1101
+ self.main_log.info(
+ f"Writing entities to the output location: {self.output_prefix_uri}"
+ ) # pylint: disable=E1101
for entity_name, entity in entities.items():
entity = entity.drop(RECORD_INDEX_COLUMN_NAME)
@@ -206,9 +210,13 @@ def _write_entity_outputs(self, entities: SparkEntities) -> SparkEntities:
output_uri = joinuri(self.output_prefix_uri, entity_name)
if get_resource_exists(output_uri):
- self.main_log.info(f"{output_uri} already exists - will be overwritten") # pylint: disable=E1101
+ self.main_log.info(
+ f"{output_uri} already exists - will be overwritten"
+ ) # pylint: disable=E1101
- self.main_log.info(f"+ Writing parquet output to {output_uri!r}") # pylint: disable=E1101
+ self.main_log.info(
+ f"+ Writing parquet output to {output_uri!r}"
+ ) # pylint: disable=E1101
entity.write.mode("overwrite").parquet(output_uri)
spark_session = SparkSession.builder.getOrCreate()
output_entities[entity_name] = spark_session.read.format("parquet").load(
diff --git a/src/dve/core_engine/message.py b/src/dve/core_engine/message.py
index 627ae3a..626a710 100644
--- a/src/dve/core_engine/message.py
+++ b/src/dve/core_engine/message.py
@@ -36,6 +36,8 @@ class DataContractErrorDetail(BaseModel):
"""Define custom error codes for validation issues raised during the data contract phase"""
error_code: str
+ error_level: Optional[FailureType] = "record"
+ is_informational: Optional[bool] = False
error_message: Optional[str] = None
reporting_entity: Optional[str] = None
@@ -247,26 +249,14 @@ def from_pydantic_error(
messages: Messages = []
for error_dict in error.errors():
error_type = error_dict["type"]
+ # TODO - review in pydantic v2 - how handles null vs not provided values
if "none.not_allowed" in error_type or "value_error.missing" in error_type:
category = "Blank"
else:
category = "Bad value"
- error_code = error_type
- if "." in error_code:
- error_code = error_code.split(".", 1)[-1]
-
- if error_code in INTEGRITY_ERROR_CODES:
- failure_type: FailureType = "integrity"
- elif error_code in SUBMISSION_ERROR_CODES:
- failure_type = "submission"
- else:
- failure_type = "record"
error_field = ".".join([idx for idx in error_dict["loc"] if not isinstance(idx, int)])
- is_informational = False
- if error_code.endswith("warning"):
- is_informational = True
error_detail: DataContractErrorDetail = error_details.get( # type: ignore
error_field, DEFAULT_ERROR_DETAIL
).get(category)
@@ -276,8 +266,8 @@ def from_pydantic_error(
entity=error_detail.reporting_entity or entity,
original_entity=entity,
record=record,
- failure_type=failure_type,
- is_informational=is_informational,
+ failure_type=error_detail.error_level, # type: ignore
+ is_informational=error_detail.is_informational, # type: ignore
error_type=error_type,
error_location=error_dict["loc"], # type: ignore
error_message=error_detail.template_message(record, error_dict["loc"]),
diff --git a/src/dve/core_engine/models.py b/src/dve/core_engine/models.py
index 09fcbb3..f29889a 100644
--- a/src/dve/core_engine/models.py
+++ b/src/dve/core_engine/models.py
@@ -105,6 +105,8 @@ class SubmissionStatisticsRecord(AuditRecord):
record_count: Optional[int]
"""Count of records in the submitted file"""
+ number_submission_rejections: Optional[int]
+ """Number of submission rejections raised following validation"""
number_record_rejections: Optional[int]
"""Number of record rejections raised following validation"""
number_warnings: Optional[int]
diff --git a/src/dve/pipeline/pipeline.py b/src/dve/pipeline/pipeline.py
index 00a0c51..7a43391 100644
--- a/src/dve/pipeline/pipeline.py
+++ b/src/dve/pipeline/pipeline.py
@@ -42,6 +42,7 @@
from dve.parser.file_handling.implementations.file import LocalFilesystemImplementation
from dve.parser.file_handling.service import _get_implementation
from dve.pipeline.utils import SubmissionStatus, deadletter_file, load_config, load_reader
+from dve.reporting.constants import ErrorReportStatus
from dve.reporting.error_report import ERROR_SCHEMA, calculate_aggregates
PERMISSIBLE_EXCEPTIONS: tuple[type[Exception]] = (
@@ -223,7 +224,7 @@ def write_file_to_parquet(
submission_file_uri,
model_name,
stringify_model(model), # type: ignore
- get_all_model_fields(models.values()) # type: ignore
+ get_all_model_fields(models.values()), # type: ignore
),
f"{out}{model_name}",
)
@@ -379,9 +380,6 @@ def file_transformation_step(
failed.append((submission_info, submission_status))
else:
success.append((submission_info, submission_status))
- except AttributeError as exc:
- self._logger.error(f"File transformation raised exception: {exc}")
- raise exc
except PERMISSIBLE_EXCEPTIONS as exc:
self._logger.warning(
f"File transformation raised exception: {exc}. Will be retried later."
@@ -509,9 +507,6 @@ def data_contract_step(
submission_info: SubmissionInfo
submission_status: SubmissionStatus
submission_info, submission_status = future.result()
- except AttributeError as exc:
- self._logger.error(f"Data Contract raised exception: {exc}")
- raise exc
except PERMISSIBLE_EXCEPTIONS as exc:
self._logger.warning(
f"Data Contract raised exception: {exc}. Will be retried later."
@@ -616,8 +611,19 @@ def apply_business_rules( # pylint: disable=R0914
submission_status.processing_failed = True
for entity_name, entity in entity_manager.entities.items():
+ # Note BI filtering done within the apply_rules
+ self._logger.info(f"applying data contract filter to {entity_name}.")
+ if not entity_name.startswith("Original"):
+ filtered_entity = self._step_implementations.filter_data_contract_record_rejections(
+ working_directory,
+ entity,
+ entity_name,
+ )
+ else:
+ self._logger.info(f"Skipping {entity_name}. Marked original.")
+ filtered_entity = entity
projected = self._step_implementations.write_parquet( # type: ignore
- entity,
+ filtered_entity,
fh.joinuri(
self.processed_files_path,
submission_info.submission_id,
@@ -629,6 +635,7 @@ def apply_business_rules( # pylint: disable=R0914
projected
)
+ # todo - add to submission_status around records that have passed record validations/rejected
submission_status.number_of_records = self.get_entity_count(
entity=entity_manager.entities[
f"""Original{rules.global_variables.get(
@@ -682,9 +689,6 @@ def business_rule_step(
unsucessful_files.append((submission_info, submission_status)) # type: ignore
else:
successful_files.append((submission_info, submission_status)) # type: ignore
- except AttributeError as exc:
- self._logger.error(f"Business Rules raised exception: {exc}")
- raise exc
except PERMISSIBLE_EXCEPTIONS as exc:
self._logger.warning(
f"Business Rules raised exception: {exc}. Will be retried later."
@@ -758,10 +762,12 @@ def _get_error_dataframes(self, submission_id: str):
df = pl.DataFrame(errors, schema={key: pl.Utf8() for key in errors[0]}) # type: ignore
df = df.with_columns(
- pl.when(pl.col("Status") == pl.lit("error")) # type: ignore
- .then(pl.lit("Submission Failure")) # type: ignore
- .otherwise(pl.lit("Warning")) # type: ignore
- .alias("error_type")
+ pl.when(pl.col("Status") == pl.lit("informational"))
+ .then(pl.lit("Warning"))
+ .when(pl.col("FailureType") == pl.lit("submission")) # type: ignore
+ .then(pl.lit(ErrorReportStatus.FILE_REJECTION.reporting_name)) # type: ignore
+ .otherwise(pl.lit(ErrorReportStatus.RECORD_REJECTION.reporting_name)) # type: ignore
+ .alias("error_type") # type: ignore
)
df = df.select(
pl.col("Entity").alias("Table"), # type: ignore
@@ -823,8 +829,13 @@ def error_report(
sub_stats = SubmissionStatisticsRecord(
submission_id=submission_info.submission_id,
record_count=submission_status.number_of_records,
- number_record_rejections=err_types.get("Submission Failure", 0),
- number_warnings=err_types.get("Warning", 0),
+ number_submission_rejections=err_types.get(
+ ErrorReportStatus.FILE_REJECTION.reporting_name, 0
+ ),
+ number_record_rejections=err_types.get(
+ ErrorReportStatus.RECORD_REJECTION.reporting_name, 0
+ ),
+ number_warnings=err_types.get(ErrorReportStatus.WARNING.reporting_name, 0),
)
summary_dict = {
@@ -835,7 +846,7 @@ def error_report(
summary_items = er.SummaryItems(
submission_status=submission_status,
summary_dict=summary_dict,
- row_headings=["Submission Failure", "Warning"],
+ row_headings=[e.reporting_name for e in ErrorReportStatus],
)
workbook = er.ExcelFormat(
@@ -894,9 +905,6 @@ def error_report_step(
try:
submission_info, submission_status, submission_stats, feedback_uri = future.result()
reports.append((submission_info, submission_status, submission_stats, feedback_uri))
- except AttributeError as exc:
- self._logger.error(f"Error reports raised exception: {exc}")
- raise exc
except PERMISSIBLE_EXCEPTIONS as exc:
self._logger.warning(
f"Error reports raised exception: {exc}. Will be retried later."
diff --git a/src/dve/reporting/constants.py b/src/dve/reporting/constants.py
new file mode 100644
index 0000000..657375e
--- /dev/null
+++ b/src/dve/reporting/constants.py
@@ -0,0 +1,22 @@
+"""
+Constants used within the error reports
+"""
+
+from enum import Enum
+
+
+class ErrorReportStatus(Enum):
+ """
+ Constant to centrally hold error report status.
+ """
+
+ FILE_REJECTION = 1, "File Rejection"
+ RECORD_REJECTION = 2, "Record Rejection"
+ WARNING = 3, "Warning"
+
+ @property
+ def reporting_name(self):
+ """
+ The error report 'friendly' name.
+ """
+ return self.value[1]
diff --git a/src/dve/reporting/error_report.py b/src/dve/reporting/error_report.py
index 9e947bf..8169aba 100644
--- a/src/dve/reporting/error_report.py
+++ b/src/dve/reporting/error_report.py
@@ -11,6 +11,7 @@
from dve.common.error_utils import conditional_cast
from dve.core_engine.message import FeedbackMessage
from dve.parser.file_handling.service import open_stream
+from dve.reporting.constants import ErrorReportStatus
ERROR_SCHEMA = {
"Table": Utf8(),
@@ -85,7 +86,7 @@ def create_error_dataframe(errors: deque[FeedbackMessage], key_fields):
df = df.with_columns( # type: ignore
pl.when(pl.col("Status") == pl.lit("error")) # type: ignore
- .then(pl.lit("Submission Failure")) # type: ignore
+ .then(pl.lit(ErrorReportStatus.FILE_REJECTION.reporting_name)) # type: ignore
.otherwise(pl.lit("Warning")) # type: ignore
.alias("error_type")
)
diff --git a/src/dve/reporting/excel_report.py b/src/dve/reporting/excel_report.py
index 82aa510..de264d5 100644
--- a/src/dve/reporting/excel_report.py
+++ b/src/dve/reporting/excel_report.py
@@ -17,6 +17,7 @@
from polars.exceptions import ColumnNotFoundError
from dve.pipeline.utils import SubmissionStatus
+from dve.reporting.constants import ErrorReportStatus
@dataclass
@@ -97,9 +98,9 @@ def get_submission_status(self, aggregates: DataFrame) -> str:
if aggregates.is_empty():
return "File has been accepted, no issues to report"
failures = aggregates["Type"].unique()
- if "Submission Failure" in failures:
+ if ErrorReportStatus.FILE_REJECTION.reporting_name in failures:
status = "File has been rejected"
- elif "Warning" in failures:
+ elif ErrorReportStatus.WARNING.reporting_name in failures:
status = "File has been accepted, all records accepted with warnings"
else:
status = "File has been accepted, no issues to report"
@@ -141,6 +142,17 @@ def _add_submission_info(self, status: str, summary: Worksheet):
for key, value in self.summary_dict.items():
summary.append(["", _key_renames.get(key, key), str(value)])
+ summary.append(
+ [
+ "",
+ "Total Number of Records Processed",
+ (
+ self.submission_status.number_of_records
+ if self.submission_status.number_of_records
+ else 0
+ ), # pylint: disable=C0301
+ ]
+ )
summary.append(["", ""])
diff --git a/tests/features/animals.feature b/tests/features/animals.feature
new file mode 100644
index 0000000..d68ddbf
--- /dev/null
+++ b/tests/features/animals.feature
@@ -0,0 +1,59 @@
+Feature: Pipeline tests using the animal dataset
+ Test record rejection and ensuring that records are correctly removed from the entity and that
+ the correct validation feedback is raised in the error report.
+
+ Scenario: Validate XML data with just record level rejections (duckdb)
+ Given I submit the animals file animals.xml for processing
+ And A duckdb pipeline is configured with schema file 'animals.dischema.json'
+ And I add initial audit entries for the submission
+ Then the latest audit record for the submission is marked with processing status file_transformation
+ When I run the file transformation phase
+ Then the animals entity is stored as a parquet after the file_transformation phase
+ And the latest audit record for the submission is marked with processing status data_contract
+ When I run the data contract phase
+ Then there are no record rejections from the data_contract phase
+ And the animals entity is stored as a parquet after the data_contract phase
+ And the latest audit record for the submission is marked with processing status business_rules
+ When I run the business rules phase
+ Then there are errors with the following details and associated error_count from the business_rules phase
+ | ErrorType | ErrorCode | error_count |
+ | record | ANE01 | 2 |
+ And The rules restrict "animals" to 3 qualifying records
+ When I run the error report phase
+ Then An error report is produced
+ And The statistics entry for the submission shows the following information
+ | parameter | value |
+ | record_count | 5 |
+ | number_record_rejections | 2 |
+ | number_warnings | 0 |
+
+ Scenario: Validate XML data with a mixture of error types in (duckdb)
+ Given I submit the animals file animals_mixture.xml for processing
+ And A duckdb pipeline is configured with schema file 'animals.dischema.json'
+ And I add initial audit entries for the submission
+ Then the latest audit record for the submission is marked with processing status file_transformation
+ When I run the file transformation phase
+ Then the animals entity is stored as a parquet after the file_transformation phase
+ And the latest audit record for the submission is marked with processing status data_contract
+ When I run the data contract phase
+ Then there are no record rejections from the data_contract phase
+ # Then there are errors with the following details and associated error_count from the data_contract phase
+ # | FailureType | Status | ErrorCode | error_count |
+ # | record | error | FieldBlank | 1 |
+ And the animals entity is stored as a parquet after the data_contract phase
+ And the latest audit record for the submission is marked with processing status business_rules
+ When I run the business rules phase
+ Then there are errors with the following details and associated error_count from the business_rules phase
+ | FailureType | Status | ErrorCode | error_count |
+ | record | error | ANE01 | 2 |
+ | submission | error | ANE02 | 1 |
+ | record | informational | ANE03 | 1 |
+ And The rules restrict "animals" to 5 qualifying records
+ When I run the error report phase
+ Then An error report is produced
+ And The statistics entry for the submission shows the following information
+ | parameter | value |
+ | record_count | 7 |
+ | number_submission_rejections | 1 |
+ | number_record_rejections | 2 |
+ | number_warnings | 1 |
diff --git a/tests/features/demographics.feature b/tests/features/demographics.feature
index aa59bfc..af4b62a 100644
--- a/tests/features/demographics.feature
+++ b/tests/features/demographics.feature
@@ -17,7 +17,7 @@ Feature: Pipeline tests using the ambsys dataset
And the demographics entity is stored as a parquet after the data_contract phase
And the latest audit record for the submission is marked with processing status business_rules
When I run the business rules phase
- Then The rules restrict "demographics" to 6 qualifying records
+ Then The rules restrict "demographics" to 2 qualifying records
And At least one row from "demographics" has generated error code "BAD_NHS"
And the demographics entity is stored as a parquet after the business_rules phase
And The entity "demographics" does not contain an entry for "FALSE" in column "NHS_Number_Valid"
@@ -43,7 +43,7 @@ Feature: Pipeline tests using the ambsys dataset
And the demographics entity is stored as a parquet after the data_contract phase
And the latest audit record for the submission is marked with processing status business_rules
When I run the business rules phase
- Then The rules restrict "demographics" to 6 qualifying records
+ Then The rules restrict "demographics" to 2 qualifying records
And At least one row from "demographics" has generated error code "BAD_NHS"
And the demographics entity is stored as a parquet after the business_rules phase
And The entity "demographics" does not contain an entry for "FALSE" in column "NHS_Number_Valid"
diff --git a/tests/features/movies.feature b/tests/features/movies.feature
index fa041ea..750975e 100644
--- a/tests/features/movies.feature
+++ b/tests/features/movies.feature
@@ -19,16 +19,18 @@ Feature: Pipeline tests using the movies dataset
Then the movies entity is stored as a parquet after the file_transformation phase
And the latest audit record for the submission is marked with processing status data_contract
When I run the data contract phase
- Then there are 3 record rejections from the data_contract phase
+ Then there is 1 submission rejection from the data_contract phase
+ And there are 3 record rejections from the data_contract phase
And there are errors with the following details and associated error_count from the data_contract phase
- | Entity | ErrorCode | ErrorMessage | RecordIndex | error_count |
- | movies | BLANKYEAR | year not provided | 2 | 1 |
- | movies_rename_test | DODGYYEAR | year value (NOT_A_NUMBER) is invalid | 1 | 1 |
- | movies | DODGYDATE | date_joined value is not valid: daft_date | 1 | 1 |
+ | Entity | ErrorCode | ErrorMessage | RecordIndex | error_count |
+ | movies | BLANKYEAR | year not provided | 2 | 1 |
+ | movies_rename_test | DODGYYEAR | year value (NOT_A_NUMBER) is invalid | 1 | 1 |
+ | movies | DODGYDATE | date_joined value is not valid: daft_date | 1 | 1 |
+ | movies | BLANKTITLE | title should not be blank | 4 | 1 |
And the movies entity is stored as a parquet after the data_contract phase
And the latest audit record for the submission is marked with processing status business_rules
When I run the business rules phase
- Then The rules restrict "movies" to 4 qualifying records
+ Then The rules restrict "movies" to 3 qualifying records
And there are errors with the following details and associated error_count from the business_rules phase
| ErrorCode | ErrorMessage | RecordIndex | error_count |
| LIMITED_RATINGS | Movie has too few ratings ([6.5]) | 4 | 1 |
@@ -37,10 +39,11 @@ Feature: Pipeline tests using the movies dataset
When I run the error report phase
Then An error report is produced
And The statistics entry for the submission shows the following information
- | parameter | value |
- | record_count | 5 |
- | number_record_rejections | 4 |
- | number_warnings | 1 |
+ | parameter | value |
+ | record_count | 5 |
+ | number_submission_rejections | 1 |
+ | number_record_rejections | 3 |
+ | number_warnings | 2 |
And the error aggregates are persisted
Scenario: Validate and filter movies (duckdb)
@@ -55,16 +58,18 @@ Feature: Pipeline tests using the movies dataset
Then the movies entity is stored as a parquet after the file_transformation phase
And the latest audit record for the submission is marked with processing status data_contract
When I run the data contract phase
- Then there are 3 record rejections from the data_contract phase
+ Then there is 1 submission rejection from the data_contract phase
+ And there are 3 record rejections from the data_contract phase
And there are errors with the following details and associated error_count from the data_contract phase
| Entity | ErrorCode | ErrorMessage | RecordIndex | error_count |
| movies | BLANKYEAR | year not provided | 2 | 1 |
| movies_rename_test | DODGYYEAR | year value (NOT_A_NUMBER) is invalid | 1 | 1 |
| movies | DODGYDATE | date_joined value is not valid: daft_date | 1 | 1 |
+ | movies | BLANKTITLE | title should not be blank | 4 | 1 |
And the movies entity is stored as a parquet after the data_contract phase
And the latest audit record for the submission is marked with processing status business_rules
When I run the business rules phase
- Then The rules restrict "movies" to 4 qualifying records
+ Then The rules restrict "movies" to 3 qualifying records
And there are errors with the following details and associated error_count from the business_rules phase
| ErrorCode | ErrorMessage | RecordIndex | error_count |
| LIMITED_RATINGS | Movie has too few ratings ([6.5]) | 4 | 1 |
@@ -73,9 +78,10 @@ Feature: Pipeline tests using the movies dataset
When I run the error report phase
Then An error report is produced
And The statistics entry for the submission shows the following information
- | parameter | value |
- | record_count | 5 |
- | number_record_rejections | 4 |
- | number_warnings | 1 |
+ | parameter | value |
+ | record_count | 5 |
+ | number_submission_rejections | 1 |
+ | number_record_rejections | 3 |
+ | number_warnings | 2 |
And the error aggregates are persisted
diff --git a/tests/features/planets.feature b/tests/features/planets.feature
index c469a92..b37b60b 100644
--- a/tests/features/planets.feature
+++ b/tests/features/planets.feature
@@ -18,6 +18,7 @@ Feature: Pipeline tests using the planets dataset
And the latest audit record for the submission is marked with processing status data_contract
When I run the data contract phase
Then there is 1 record rejection from the data_contract phase
+ And there are no submission rejections from the data_contract phase
And the planets entity is stored as a parquet after the data_contract phase
And the latest audit record for the submission is marked with processing status business_rules
When I run the business rules phase
diff --git a/tests/features/steps/steps_pipeline.py b/tests/features/steps/steps_pipeline.py
index 55acadd..c72c873 100644
--- a/tests/features/steps/steps_pipeline.py
+++ b/tests/features/steps/steps_pipeline.py
@@ -50,14 +50,14 @@ def setup_spark_pipeline(
rules_path = get_test_file_path(f"{dataset_id}/{schema_file_name}").resolve().as_uri()
return SparkDVEPipeline(
- processed_files_path=processing_path.as_uri(),
+ processed_files_path=processing_path.as_posix(),
audit_tables=SparkAuditingManager(
database="dve",
spark=spark,
),
job_run_id=12345,
rules_path=rules_path,
- submitted_files_path=processing_path.as_uri(),
+ submitted_files_path=processing_path.as_posix(),
spark=spark,
)
@@ -155,13 +155,13 @@ def create_error_report(context: Context):
-@then("there are {expected_num_errors:d} record rejections from the {service} phase")
-@then("there is {expected_num_errors:d} record rejection from the {service} phase")
-@then("there are no record rejections from the {service} phase")
-def get_record_rejects_from_service(context: Context, service: str, expected_num_errors: int = 0):
+@then("there are {expected_num_errors:d} {error_type} rejections from the {service} phase")
+@then("there is {expected_num_errors:d} {error_type} rejection from the {service} phase")
+@then("there are no {error_type} rejections from the {service} phase")
+def get_record_rejects_from_service(context: Context, service: str, error_type: str, expected_num_errors: int = 0):
processing_path = ctxt.get_processing_location(context)
message_df = load_errors_from_service(processing_path, service)
- num_rejections = message_df.filter(pl.col("FailureType").eq("record")).shape[0]
+ num_rejections = message_df.filter(pl.col("FailureType").eq(error_type)).shape[0]
assert num_rejections == expected_num_errors, f"Got {num_rejections} actual rejections"
diff --git a/tests/test_core_engine/test_backends/test_implementations/test_duckdb/test_duckdb_helpers.py b/tests/test_core_engine/test_backends/test_implementations/test_duckdb/test_duckdb_helpers.py
index 19e96e2..4a24960 100644
--- a/tests/test_core_engine/test_backends/test_implementations/test_duckdb/test_duckdb_helpers.py
+++ b/tests/test_core_engine/test_backends/test_implementations/test_duckdb/test_duckdb_helpers.py
@@ -1,10 +1,15 @@
"""Test Duck DB helpers"""
+# pylint: disable=C0301,C0116
+
import datetime
+import json
+import os
import tempfile
from pathlib import Path
from typing import Any, List
+import polars as pl
import pytest
import pyspark.sql.types as pst
from duckdb import DuckDBPyRelation, DuckDBPyConnection
@@ -12,10 +17,13 @@
from pyspark.sql import Row, SparkSession
from dve.core_engine.backends.implementations.duckdb.duckdb_helpers import (
+ _ddb_filter_contract_errors,
_ddb_read_parquet,
duckdb_rel_to_dictionaries,
get_duckdb_cast_statement_from_annotation,
- get_duckdb_type_from_annotation)
+ get_duckdb_type_from_annotation,
+ relation_is_empty,
+)
@pytest.fixture
def casting_test_table(temp_ddb_conn):
@@ -51,8 +59,60 @@ def casting_test_table(temp_ddb_conn):
yield temp_ddb_conn
conn.sql("DROP TABLE IF EXISTS test_casting")
-
-
+
+
+@pytest.fixture
+def example_data_contract_error_codes(temp_ddb_conn):
+ _, con = temp_ddb_conn
+
+ test_df = pl.DataFrame([ # pylint: disable=W0612
+ {"id": "field1", "attr": 1, "__record_index__": 1,},
+ {"id": "field2", "attr": None, "__record_index__": 2,},
+ {"id": "field3", "attr": 2, "__record_index__": 3,},
+ {"id": "field4", "attr": None, "__record_index__": 4,},
+ ])
+ test_entity = con.sql("SELECT * FROM test_df")
+ error_contract_messages = [
+ {
+ "Entity": "test_entity",
+ "Key": "",
+ "FailureType": "record",
+ "Status": "error",
+ "ErrorType": "",
+ "ErrorLocation": "attr",
+ "ErrorMessage": "",
+ "ErrorCode": "",
+ "ReportingField": "attr",
+ "RecordIndex": 2,
+ "Value": "hello",
+ "Category": "Bad value"
+ },
+ {
+ "Entity": "test_entity",
+ "Key": "",
+ "FailureType": "record",
+ "Status": "error",
+ "ErrorType": "",
+ "ErrorLocation": "attr",
+ "ErrorMessage": "",
+ "ErrorCode": "",
+ "ReportingField": "attr",
+ "RecordIndex": 4,
+ "Value": "world",
+ "Category": "Bad value"
+ }
+ ]
+ with tempfile.TemporaryDirectory() as temp_dir_path:
+ os.mkdir(Path(temp_dir_path, "errors"))
+ temp_error_file = Path(temp_dir_path, "errors", "data_contract_errors.jsonl")
+ with open(temp_error_file, encoding="utf-8", mode="w") as tpf:
+ for error in error_contract_messages:
+ json.dump(error, tpf)
+ tpf.write("\n")
+
+ yield con, test_entity, temp_dir_path
+
+
class BasicModel(BaseModel):
str_field: str
@@ -176,4 +236,23 @@ def test_use_cast_statements(casting_test_table):
not dodgy_date_rec.get("basic_model",{}).get("date_field")
and all(not val.get("date_field") for val in dodgy_date_rec.get("another_model",{}).get("basic_models",[]))
)
-
+
+
+def test_ddb_filter_contract_errors(example_data_contract_error_codes): # pylint: disable=W0621
+ ddb_cnn, entity_rel, temp_dir = example_data_contract_error_codes
+ expected_df = pl.DataFrame([ # pylint: disable=W0612
+ {"id": "field1", "attr": 1, "__record_index__": 1,},
+ {"id": "field3", "attr": 2, "__record_index__": 3,},
+ ])
+ expected_rel = ddb_cnn.sql("SELECT * FROM expected_df")
+ result_rel = _ddb_filter_contract_errors(
+ TempConnection(ddb_cnn), temp_dir, entity_rel, "test_entity"
+ )
+ assert result_rel.pl().shape[0] == 2
+ assert expected_rel.join(result_rel, "__record_index__", "anti").pl().shape[0] == 0
+
+
+def test_relation_is_empty(temp_ddb_conn: DuckDBPyConnection):
+ _, con = temp_ddb_conn
+ rel = con.sql("SELECT 'abc' AS test").filter("test IS NULL")
+ assert relation_is_empty(rel)
diff --git a/tests/test_core_engine/test_backends/test_implementations/test_spark/test_spark_helpers.py b/tests/test_core_engine/test_backends/test_implementations/test_spark/test_spark_helpers.py
index 7502673..8a0e45e 100644
--- a/tests/test_core_engine/test_backends/test_implementations/test_spark/test_spark_helpers.py
+++ b/tests/test_core_engine/test_backends/test_implementations/test_spark/test_spark_helpers.py
@@ -1,9 +1,15 @@
"""Tests for UDF helpers."""
# pylint: disable=redefined-outer-name
+# pylint: disable=C0301,C0115,C0116
+
import datetime as dt
+import json
+import os
+import tempfile
from dataclasses import dataclass
from decimal import Decimal
+from pathlib import Path
from typing import Any, List, Optional, Union
from uuid import UUID
@@ -19,6 +25,7 @@
from dve.core_engine.backends.implementations.spark.spark_helpers import (
DecimalConfig,
create_udf,
+ _spark_filter_contract_errors,
get_spark_cast_statement_from_annotation,
get_type_from_annotation,
object_to_spark_literal,
@@ -42,9 +49,56 @@ def casting_dataframe(spark):
StructField("basic_model", bm_schema),
StructField("another_model", StructType([StructField("unique_id", StringType()), StructField("basic_models", ArrayType(bm_schema))]))])
yield spark.createDataFrame(data, schema=schema)
-
-
-
+
+
+@pytest.fixture
+def example_data_contract_error_codes(spark: SparkSession):
+ test_df = spark.createDataFrame([ # pylint: disable=W0612
+ {"id": "field1", "attr": 1, "__record_index__": 1,},
+ {"id": "field2", "attr": None, "__record_index__": 2,},
+ {"id": "field3", "attr": 2, "__record_index__": 3,},
+ {"id": "field4", "attr": None, "__record_index__": 4,},
+ ])
+ error_contract_messages = [
+ {
+ "Entity": "test_entity",
+ "Key": "",
+ "FailureType": "record",
+ "Status": "error",
+ "ErrorType": "",
+ "ErrorLocation": "attr",
+ "ErrorMessage": "",
+ "ErrorCode": "",
+ "ReportingField": "attr",
+ "RecordIndex": 2,
+ "Value": "hello",
+ "Category": "Bad value"
+ },
+ {
+ "Entity": "test_entity",
+ "Key": "",
+ "FailureType": "record",
+ "Status": "error",
+ "ErrorType": "",
+ "ErrorLocation": "attr",
+ "ErrorMessage": "",
+ "ErrorCode": "",
+ "ReportingField": "attr",
+ "RecordIndex": 4,
+ "Value": "world",
+ "Category": "Bad value"
+ }
+ ]
+ with tempfile.TemporaryDirectory() as temp_dir_path:
+ os.mkdir(Path(temp_dir_path, "errors"))
+ temp_error_file = Path(temp_dir_path, "errors", "data_contract_errors.jsonl")
+ with open(temp_error_file, encoding="utf-8", mode="w") as tpf:
+ for error in error_contract_messages:
+ json.dump(error, tpf)
+ tpf.write("\n")
+
+ yield test_df, temp_dir_path
+
class BasicModel(BaseModel):
str_field: str
@@ -264,4 +318,25 @@ def test_use_cast_statements(spark, casting_dataframe):
not dodgy_date_rec.get("basic_model",{}).get("date_field")
and all(not val.get("date_field") for val in dodgy_date_rec.get("another_model",{}).get("basic_models",[]))
)
- assert cast_df
\ No newline at end of file
+ assert cast_df
+
+
+class TempSparkSession:
+ def __init__(self, spark: SparkSession):
+ self.spark_session = spark
+
+
+def test_spark_filter_contract_errors(spark: SparkSession, example_data_contract_error_codes): # pylint: disable=W0621
+ entity_df, temp_dir = example_data_contract_error_codes
+ expected_df = spark.createDataFrame([ # pylint: disable=W0612
+ {"id": "field1", "attr": 1, "__record_index__": 1,},
+ {"id": "field3", "attr": 2, "__record_index__": 3,},
+ ])
+ result_df = _spark_filter_contract_errors(
+ TempSparkSession(spark),
+ temp_dir,
+ entity_df,
+ "test_entity"
+ )
+ assert result_df.count() == 2
+ assert expected_df.join(result_df, "__record_index__", "anti").count() == 0
diff --git a/tests/test_core_engine/test_message.py b/tests/test_core_engine/test_message.py
index ccb6736..4c092f1 100644
--- a/tests/test_core_engine/test_message.py
+++ b/tests/test_core_engine/test_message.py
@@ -183,9 +183,11 @@ class TestModel(BaseModel):
custom_error_details: str = """
{"idx": {"Blank": {"error_code": "IDBLANKERRCODE",
- "error_message": "idx is a mandatory field"},
+ "error_message": "idx is a mandatory field",
+ "is_informational": true},
"Bad value": {"error_code": "IDDODGYVALCODE",
- "error_message": "idx value is dodgy: {{idx}}"}},
+ "error_message": "idx value is dodgy: {{idx}}",
+ "error_level": "submission"}},
"date_field": {"Bad value": {"error_code": "DATEDODGYVALCODE",
"error_message": "date_field value is dodgy: idx: {{idx}}, date_field: {{date_field}}"}}}
"""
@@ -216,10 +218,16 @@ class TestModel(BaseModel):
assert len(msgs_bad) == 3
assert msgs_bad[0].error_code == error_details.get("date_field").get("Bad value").error_code
assert msgs_bad[0].error_message == error_details.get("date_field").get("Bad value").template_message(_bad_value_data)
+ assert msgs_bad[0].failure_type == "record"
+ assert not msgs_bad[0].is_informational
assert msgs_bad[1].error_code == error_details.get("idx").get("Bad value").error_code
assert msgs_bad[1].error_message == error_details.get("idx").get("Bad value").template_message(_bad_value_data)
+ assert msgs_bad[1].failure_type == "submission"
+ assert not msgs_bad[1].is_informational
assert msgs_bad[2].error_code == bad_val_default.error_code
assert msgs_bad[2].error_message == bad_val_default.error_message
+ assert msgs_bad[2].failure_type == "record"
+ assert not msgs_bad[2].is_informational
msgs_blank = FeedbackMessage.from_pydantic_error(entity="test_entity",
record = _blank_value_data,
@@ -232,6 +240,7 @@ class TestModel(BaseModel):
assert len(msgs_blank) == 2
assert msgs_blank[0].error_code == error_details.get("idx").get("Blank").error_code
assert msgs_blank[0].error_message == error_details.get("idx").get("Blank").template_message(_blank_value_data)
+ assert msgs_blank[0].is_informational
assert msgs_blank[1].error_code == blank_default.error_code
assert msgs_blank[1].error_message == blank_default.error_message
@@ -281,4 +290,5 @@ class TestModel(BaseModel):
msg = msg[0]
assert msg.error_code == "DATEDODGYVALCODE"
assert msg.error_message == "date_field value is dodgy: a_field: test, date_field: Barry"
+
diff --git a/tests/test_pipeline/test_spark_pipeline.py b/tests/test_pipeline/test_spark_pipeline.py
index b3048a1..0cf7fe6 100644
--- a/tests/test_pipeline/test_spark_pipeline.py
+++ b/tests/test_pipeline/test_spark_pipeline.py
@@ -439,7 +439,9 @@ def test_error_report_where_report_is_expected( # pylint: disable=redefined-out
("Dataset Id", "planets"),
("File Name", "doesnotmatter"),
("File Extension", "json"),
- ("Submission Failure", "2"),
+ ("Total Number of Records Processed", "9"),
+ ("File Rejection", "0"),
+ ("Record Rejection", "2"),
("Warning", "0"),
]
@@ -455,7 +457,7 @@ def test_error_report_where_report_is_expected( # pylint: disable=redefined-out
[
OrderedDict(
**{
- "Type": "Submission Failure",
+ "Type": "Record Rejection",
"Group": "planets",
"Data Item Submission Name": "orbitalPeriod",
"Category": "Bad value",
@@ -465,7 +467,7 @@ def test_error_report_where_report_is_expected( # pylint: disable=redefined-out
),
OrderedDict(
**{
- "Type": "Submission Failure",
+ "Type": "Record Rejection",
"Group": "planets",
"Data Item Submission Name": "gravity",
"Category": "Bad value",
@@ -485,7 +487,7 @@ def test_error_report_where_report_is_expected( # pylint: disable=redefined-out
OrderedDict(
**{
"Group": "planets",
- "Type": "Submission Failure",
+ "Type": "Record Rejection",
"Error Code": "LONG_ORBIT",
"Data Item Submission Name": "orbitalPeriod",
"Errors and Warnings": "Planet has long orbital period",
@@ -498,7 +500,7 @@ def test_error_report_where_report_is_expected( # pylint: disable=redefined-out
OrderedDict(
**{
"Group": "planets",
- "Type": "Submission Failure",
+ "Type": "Record Rejection",
"Error Code": "STRONG_GRAVITY",
"Data Item Submission Name": "gravity",
"Errors and Warnings": "Planet has too strong gravity",
diff --git a/tests/testdata/animals/animals.dischema.json b/tests/testdata/animals/animals.dischema.json
new file mode 100644
index 0000000..0e1eda1
--- /dev/null
+++ b/tests/testdata/animals/animals.dischema.json
@@ -0,0 +1,54 @@
+{
+ "contract": {
+ "schemas": {},
+ "datasets": {
+ "animals": {
+ "fields": {
+ "name": "str",
+ "height": "float",
+ "weight": "float",
+ "region": "str"
+ },
+ "reader_config": {
+ ".xml": {
+ "reader": "DuckDBXMLStreamReader",
+ "kwargs": {
+ "record_tag": "animal",
+ "root_tag": "animals"
+ }
+ }
+ },
+ "mandatory_fields": [
+ "name"
+ ]
+ }
+ }
+ },
+ "transformations": {
+ "filters": [
+ {
+ "entity": "animals",
+ "name": "check_valid_region",
+ "expression": "lower(region) in ('africa', 'asia')",
+ "error_code": "ANE01",
+ "failure_message": "Record rejected - `{{ region }}` is not in a valid region."
+ },
+ {
+ "entity": "animals",
+ "name": "check_for_pets",
+ "expression": "lower(name) != 'human'",
+ "error_code": "ANE02",
+ "failure_message": "Submission Rejected - 'Human' is not a valid animal.",
+ "failure_type": "submission"
+ },
+ {
+ "entity": "animals",
+ "name": "check_valid_weight",
+ "expression": "weight > 0",
+ "error_code": "ANE03",
+ "failure_message": "Warning - `{{ weight }}` is below zero.",
+ "is_informational": true
+ }
+ ]
+ }
+}
\ No newline at end of file
diff --git a/tests/testdata/animals/animals.xml b/tests/testdata/animals/animals.xml
new file mode 100644
index 0000000..60bdcef
--- /dev/null
+++ b/tests/testdata/animals/animals.xml
@@ -0,0 +1,33 @@
+
+
+
+ African Elephant
+ 3.5
+ 6000.0
+ Africa
+
+
+ Bengal Tiger
+ 1.1
+ 260.0
+ Asia
+
+
+ Giraffe
+ 5.5
+ 1200.0
+ Africa
+
+
+ Polar Bear
+ 2.6
+ 900.0
+ Arctic
+
+
+ Blue Whale
+ 24.0
+ 180000.0
+ Oceans
+
+
diff --git a/tests/testdata/animals/animals_mixture.xml b/tests/testdata/animals/animals_mixture.xml
new file mode 100644
index 0000000..230f790
--- /dev/null
+++ b/tests/testdata/animals/animals_mixture.xml
@@ -0,0 +1,45 @@
+
+
+
+ African Elephant
+ 3.5
+ 6000.0
+ Africa
+
+
+ Bengal Tiger
+ 1.1
+ 260.0
+ Asia
+
+
+ Giraffe
+ 5.5
+ 1200.0
+ Africa
+
+
+ Polar Bear
+ 2.6
+ 900.0
+ Arctic
+
+
+ Blue Whale
+ 24.0
+ 180000.0
+ Oceans
+
+
+ Human
+ 1.7
+ 70.0
+ Africa
+
+
+ African Elephant
+ 3.5
+ -6000.0
+ Africa
+
+
diff --git a/tests/testdata/movies/movies.json b/tests/testdata/movies/movies.json
index afa606d..db6029f 100644
--- a/tests/testdata/movies/movies.json
+++ b/tests/testdata/movies/movies.json
@@ -32,7 +32,6 @@
]
},
{
- "title": "One with a cat and a dog",
"year": 2020,
"genre": ["Fantasy", "Family"],
"duration_minutes": 110,
diff --git a/tests/testdata/movies/movies_contract_error_details.json b/tests/testdata/movies/movies_contract_error_details.json
index f8cd934..260ee96 100644
--- a/tests/testdata/movies/movies_contract_error_details.json
+++ b/tests/testdata/movies/movies_contract_error_details.json
@@ -2,13 +2,15 @@
"title": {
"Blank": {
"error_code": "BLANKTITLE",
- "error_message": "title should not be blank"
+ "error_message": "title should not be blank",
+ "error_level": "submission"
}
},
"year": {
"Blank": {
"error_code": "BLANKYEAR",
- "error_message": "year not provided"
+ "error_message": "year not provided",
+ "is_informational": true
},
"Bad value": {
"error_code": "DODGYYEAR",