From cbedcffa9b3b995b63e3187bdfd77d6f52e38fc8 Mon Sep 17 00:00:00 2001 From: Rob Mitchell Date: Thu, 7 May 2026 15:05:47 -0400 Subject: [PATCH 1/4] Validation unit tests --- packages/validation/tests/test_validation.py | 157 +++++++++++++++++++ 1 file changed, 157 insertions(+) diff --git a/packages/validation/tests/test_validation.py b/packages/validation/tests/test_validation.py index 24682e6c..eaf77da2 100644 --- a/packages/validation/tests/test_validation.py +++ b/packages/validation/tests/test_validation.py @@ -1,9 +1,92 @@ from pathlib import Path +import pytest + +from validation import main as validation_main from validation import validate_eicr +class FakeAssert: + local_name = "failed-assert" + + def get_attribute_value(self, attribute: str) -> str: + values = { + "id": "ttc-labTestNameOrdered-noCode", + "location": "/ClinicalDocument/component/structuredBody/component/section/entry/observation", + } + + return values[attribute] + + +class FakeRoot: + def __init__(self) -> None: + """Represents the root of the SVRL output, which contains failed-assert children.""" + self.children = [FakeAssert()] + + +class FakeExecutable: + def apply_templates_returning_value(self, xdm_value: str) -> list[list[FakeRoot]]: + """Simulates applying the XSLT stylesheet to the XML document and returning SVRL output.""" + return [[FakeRoot()]] + + +class FakeXsltProcessor: + def __init__(self) -> None: + """Simulates the XSLT processor, recording transformations applied.""" + self.transforms: list[tuple[str, str, str]] = [] + + def transform_to_file(self, source_file: str, stylesheet_file: str, output_file: str) -> None: + """Simulates transforming an XML file with an XSLT stylesheet and writing to an output file.""" + self.transforms.append((source_file, stylesheet_file, output_file)) + Path(output_file).write_text("") + + def compile_stylesheet(self, stylesheet_file: str) -> FakeExecutable: + """Simulates compiling an XSLT stylesheet into an executable.""" + return FakeExecutable() + + +class FakeSaxonProcessor: + version = "Fake Saxon/C" + """Simulates the Saxon/C processor.""" + + def __init__(self, license: bool) -> None: + """Initializes the Saxon/C processor with a license flag and an XSLT processor.""" + self.license = license + self.xslt_processor = FakeXsltProcessor() + + def __enter__(self) -> "FakeSaxonProcessor": + """Enters the context manager, returning itself to be used for transformations.""" + return self + + def __exit__(self, exc_type: object, exc_value: object, traceback: object) -> None: + """Exits the context manager, performing any necessary cleanup (none in this fake implementation).""" + return + + def new_xslt30_processor(self) -> FakeXsltProcessor: + """Returns the XSLT processor to be used for transformations.""" + return self.xslt_processor + + def parse_xml(self, xml_text: str | None) -> str: + """Simulates parsing an XML string into an XDM node, which in this fake implementation is just the string itself.""" + return xml_text or "" + + +class BrokenSaxonProcessor: + def __init__(self, license: bool) -> None: + """Simulates a Saxon/C processor that raises an error when used, to test error handling in the validation function.""" + self.license = license + + def __enter__(self) -> "BrokenSaxonProcessor": + """Raises a RuntimeError to simulate a failure when entering the context manager.""" + raise RuntimeError("validator failed") + + def __exit__(self, exc_type: object, exc_value: object, traceback: object) -> None: + """Exits the context manager, which in this case is not reached due to the error raised in __enter__.""" + return + + def test_validation(): + """Tests that the validate_eicr function correctly processes an eICR and returns expected validation results.""" with Path.open("packages/validation/tests/assets/test_eicr.xml") as f: eicr = f.read() results = validate_eicr(eicr) @@ -17,8 +100,82 @@ def test_validation(): def test_validation_no_errors(): + """Tests that the validate_eicr function returns an empty list when there are no validation errors.""" with Path.open("e2e/snapshots/test_e2e/test_upload_and_process/augmented_eicr.xml") as f: eicr = f.read() results = validate_eicr(eicr) assert results == [] + + +def test_validation_redoes_all_steps(monkeypatch: pytest.MonkeyPatch, tmp_path: Path): + """Tests that the validate_eicr function redoes all steps of the validation process when redo_all_steps is True, and that it returns expected validation results.""" + stage1_output = tmp_path / "stage1.sch.tmp" + stage2_output = tmp_path / "stage2.sch.tmp" + validator_output = tmp_path / "validator.xsl.tmp" + + stage1_output.write_text("old stage 1") + stage2_output.write_text("old stage 2") + validator_output.write_text("old validator") + + monkeypatch.setattr(validation_main, "STAGE1_OUTPUT", stage1_output) + monkeypatch.setattr(validation_main, "STAGE2_OUTPUT", stage2_output) + monkeypatch.setattr(validation_main, "VALIDATOR_OUTPUT", validator_output) + monkeypatch.setattr(validation_main, "APHL_SCHEMATRON", tmp_path / "schema.sch") + monkeypatch.setattr(validation_main, "XSLT_INCLUDE", tmp_path / "include.xsl") + monkeypatch.setattr(validation_main, "XSLT_EXPAND", tmp_path / "expand.xsl") + monkeypatch.setattr(validation_main, "XSLT_COMPILE", tmp_path / "compile.xsl") + monkeypatch.setattr(validation_main, "PySaxonProcessor", FakeSaxonProcessor) + + results = validate_eicr("", redo_all_steps=True) + + assert results == [ + { + "error_id": "ttc-labTestNameOrdered-noCode", + "location": "/ClinicalDocument/component/structuredBody/component/section/entry/observation", + } + ] + assert stage1_output.read_text() == "" + assert stage2_output.read_text() == "" + assert validator_output.read_text() == "" + + +def test_validation_uses_existing_generated_files(monkeypatch: pytest.MonkeyPatch, tmp_path: Path): + """Tests that the validate_eicr function uses existing generated files for steps 1-3 of the validation process when redo_all_steps is False, and that it returns expected validation results.""" + stage1_output = tmp_path / "stage1.sch.tmp" + stage2_output = tmp_path / "stage2.sch.tmp" + validator_output = tmp_path / "validator.xsl.tmp" + + stage1_output.write_text("existing stage 1") + stage2_output.write_text("existing stage 2") + validator_output.write_text("existing validator") + + monkeypatch.setattr(validation_main, "STAGE1_OUTPUT", stage1_output) + monkeypatch.setattr(validation_main, "STAGE2_OUTPUT", stage2_output) + monkeypatch.setattr(validation_main, "VALIDATOR_OUTPUT", validator_output) + monkeypatch.setattr(validation_main, "PySaxonProcessor", FakeSaxonProcessor) + + results = validate_eicr("") + + assert results == [ + { + "error_id": "ttc-labTestNameOrdered-noCode", + "location": "/ClinicalDocument/component/structuredBody/component/section/entry/observation", + } + ] + assert stage1_output.read_text() == "existing stage 1" + assert stage2_output.read_text() == "existing stage 2" + assert validator_output.read_text() == "existing validator" + + +def test_validation_returns_empty_list_when_validator_errors( + monkeypatch: pytest.MonkeyPatch, + caplog: pytest.LogCaptureFixture, +): + """Tests that the validate_eicr function returns an empty list and logs an error when the validator fails.""" + monkeypatch.setattr(validation_main, "PySaxonProcessor", BrokenSaxonProcessor) + + results = validate_eicr("") + + assert results == [] + assert "An error occurred during validation: validator failed" in caplog.text From fbeafa6c02f710976c3ae3554571239dd7ede44a Mon Sep 17 00:00:00 2001 From: Rob Mitchell Date: Thu, 7 May 2026 15:22:32 -0400 Subject: [PATCH 2/4] getting versionNumber from incoming eicr --- .../augmentation/src/augmentation/services/eicr_augmenter.py | 5 ++--- .../basic_eicr_related_doc_augmented.xml | 2 +- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/packages/augmentation/src/augmentation/services/eicr_augmenter.py b/packages/augmentation/src/augmentation/services/eicr_augmenter.py index cd7a28fc..b1be01f2 100644 --- a/packages/augmentation/src/augmentation/services/eicr_augmenter.py +++ b/packages/augmentation/src/augmentation/services/eicr_augmenter.py @@ -197,10 +197,9 @@ def _get_new_effective_time(self) -> Element: def _get_new_version_number(self) -> Element: """Generate a versionNumber element for the augmented eICR document.""" + original_version_number = self._get_original_by_xpath("/ClinicalDocument/versionNumber") version_number_tag = etree.Element("versionNumber") - # hard code to 1 for now - # TODO: we may need to have some way to increment this later - version_number_tag.set("value", "1") + version_number_tag.set("value", original_version_number.get("value")) return version_number_tag def _get_augmented_template_id(self) -> Element: diff --git a/packages/augmentation/tests/unit/snapshots/test_eicr_augmenter/test_eicr_related_doc/basic_eicr_related_doc_augmented.xml b/packages/augmentation/tests/unit/snapshots/test_eicr_augmenter/test_eicr_related_doc/basic_eicr_related_doc_augmented.xml index b71930d3..e50f04d3 100644 --- a/packages/augmentation/tests/unit/snapshots/test_eicr_augmenter/test_eicr_related_doc/basic_eicr_related_doc_augmented.xml +++ b/packages/augmentation/tests/unit/snapshots/test_eicr_augmenter/test_eicr_related_doc/basic_eicr_related_doc_augmented.xml @@ -9,7 +9,7 @@ - + From a49090be379c9eed17a92fd923ad0cf1c0c12980 Mon Sep 17 00:00:00 2001 From: Rob Mitchell Date: Thu, 7 May 2026 15:34:37 -0400 Subject: [PATCH 3/4] Rollback --- .../augmentation/services/eicr_augmenter.py | 23 +++++++++++++++---- .../basic_eicr_related_doc_augmented.xml | 6 ++--- 2 files changed, 21 insertions(+), 8 deletions(-) diff --git a/packages/augmentation/src/augmentation/services/eicr_augmenter.py b/packages/augmentation/src/augmentation/services/eicr_augmenter.py index b1be01f2..5a671a18 100644 --- a/packages/augmentation/src/augmentation/services/eicr_augmenter.py +++ b/packages/augmentation/src/augmentation/services/eicr_augmenter.py @@ -1,5 +1,6 @@ from datetime import datetime -from uuid import uuid4 +from uuid import NAMESPACE_URL +from uuid import uuid5 from lxml import etree from lxml.etree import Element @@ -28,6 +29,7 @@ def __init__( nonstandard_codes: list[NonstandardCodeInstance], config: TTCAugmenterConfig | None = None, augmentation_date: datetime | None = None, + deterministic_id_seed: str | None = None, ): """Initialize EICRAugmenter. @@ -40,8 +42,9 @@ def __init__( super().__init__(document, config, ApplicationCode.TEXT_TO_CODE, augmentation_date) self.original_eicr_id = self._get_augmented_tag_by_xpath("/ClinicalDocument/id/@root") - self.new_doc_id: str = str(uuid4()) - self.new_set_id: str = str(uuid4()) + self.deterministic_id_seed = deterministic_id_seed or self.original_eicr_id + self.new_doc_id: str = self._generate_deterministic_id("document") + self.new_set_id: str = self._generate_deterministic_id("set") self.nonstandard_codes = nonstandard_codes def augment(self) -> Metadata: @@ -176,6 +179,15 @@ def _get_old_version_number(self) -> Element: version = self._get_original_by_xpath("/ClinicalDocument/versionNumber") return version + def _generate_deterministic_id(self, identifier_type: str) -> str: + """Generate a stable UUID for augmented eICR identifiers.""" + return str( + uuid5( + NAMESPACE_URL, + f"{self._get_application_code_value()}:{self.deterministic_id_seed}:{identifier_type}", + ) + ) + def _get_new_document_id(self) -> Element: """Generate a new document ID element for the augmented eICR document.""" doc_id_tag = etree.Element("id") @@ -197,9 +209,10 @@ def _get_new_effective_time(self) -> Element: def _get_new_version_number(self) -> Element: """Generate a versionNumber element for the augmented eICR document.""" - original_version_number = self._get_original_by_xpath("/ClinicalDocument/versionNumber") version_number_tag = etree.Element("versionNumber") - version_number_tag.set("value", original_version_number.get("value")) + # hard code to 1 for now + # TODO: we may need to have some way to increment this later + version_number_tag.set("value", "1") return version_number_tag def _get_augmented_template_id(self) -> Element: diff --git a/packages/augmentation/tests/unit/snapshots/test_eicr_augmenter/test_eicr_related_doc/basic_eicr_related_doc_augmented.xml b/packages/augmentation/tests/unit/snapshots/test_eicr_augmenter/test_eicr_related_doc/basic_eicr_related_doc_augmented.xml index e50f04d3..28b38919 100644 --- a/packages/augmentation/tests/unit/snapshots/test_eicr_augmenter/test_eicr_related_doc/basic_eicr_related_doc_augmented.xml +++ b/packages/augmentation/tests/unit/snapshots/test_eicr_augmenter/test_eicr_related_doc/basic_eicr_related_doc_augmented.xml @@ -3,13 +3,13 @@ - + - + - + From c959a14a4a40249f31a05b39748dc397e464a9a6 Mon Sep 17 00:00:00 2001 From: Rob Mitchell Date: Thu, 7 May 2026 15:48:59 -0400 Subject: [PATCH 4/4] Rollback --- .../augmentation/services/eicr_augmenter.py | 18 +++--------------- .../basic_eicr_related_doc_augmented.xml | 4 ++-- 2 files changed, 5 insertions(+), 17 deletions(-) diff --git a/packages/augmentation/src/augmentation/services/eicr_augmenter.py b/packages/augmentation/src/augmentation/services/eicr_augmenter.py index 5a671a18..cd7a28fc 100644 --- a/packages/augmentation/src/augmentation/services/eicr_augmenter.py +++ b/packages/augmentation/src/augmentation/services/eicr_augmenter.py @@ -1,6 +1,5 @@ from datetime import datetime -from uuid import NAMESPACE_URL -from uuid import uuid5 +from uuid import uuid4 from lxml import etree from lxml.etree import Element @@ -29,7 +28,6 @@ def __init__( nonstandard_codes: list[NonstandardCodeInstance], config: TTCAugmenterConfig | None = None, augmentation_date: datetime | None = None, - deterministic_id_seed: str | None = None, ): """Initialize EICRAugmenter. @@ -42,9 +40,8 @@ def __init__( super().__init__(document, config, ApplicationCode.TEXT_TO_CODE, augmentation_date) self.original_eicr_id = self._get_augmented_tag_by_xpath("/ClinicalDocument/id/@root") - self.deterministic_id_seed = deterministic_id_seed or self.original_eicr_id - self.new_doc_id: str = self._generate_deterministic_id("document") - self.new_set_id: str = self._generate_deterministic_id("set") + self.new_doc_id: str = str(uuid4()) + self.new_set_id: str = str(uuid4()) self.nonstandard_codes = nonstandard_codes def augment(self) -> Metadata: @@ -179,15 +176,6 @@ def _get_old_version_number(self) -> Element: version = self._get_original_by_xpath("/ClinicalDocument/versionNumber") return version - def _generate_deterministic_id(self, identifier_type: str) -> str: - """Generate a stable UUID for augmented eICR identifiers.""" - return str( - uuid5( - NAMESPACE_URL, - f"{self._get_application_code_value()}:{self.deterministic_id_seed}:{identifier_type}", - ) - ) - def _get_new_document_id(self) -> Element: """Generate a new document ID element for the augmented eICR document.""" doc_id_tag = etree.Element("id") diff --git a/packages/augmentation/tests/unit/snapshots/test_eicr_augmenter/test_eicr_related_doc/basic_eicr_related_doc_augmented.xml b/packages/augmentation/tests/unit/snapshots/test_eicr_augmenter/test_eicr_related_doc/basic_eicr_related_doc_augmented.xml index 28b38919..b71930d3 100644 --- a/packages/augmentation/tests/unit/snapshots/test_eicr_augmenter/test_eicr_related_doc/basic_eicr_related_doc_augmented.xml +++ b/packages/augmentation/tests/unit/snapshots/test_eicr_augmenter/test_eicr_related_doc/basic_eicr_related_doc_augmented.xml @@ -3,9 +3,9 @@ - + - +