From ff59d5efdc25d8373bcf19868983a9d6e096f802 Mon Sep 17 00:00:00 2001 From: Rakesh Date: Fri, 2 Jan 2026 15:22:33 -0500 Subject: [PATCH 1/5] Fix output file extension handling and normalize extensions to lowercase --- .../services/reporting/base_report.py | 19 ++++++++++++++++++- .../services/reporting/excel_report.py | 3 +++ .../services/reporting/json_report.py | 4 ++++ scripts/run_validation.py | 7 ++++++- 4 files changed, 31 insertions(+), 2 deletions(-) diff --git a/cdisc_rules_engine/services/reporting/base_report.py b/cdisc_rules_engine/services/reporting/base_report.py index 3fc37311a..7f98fbd09 100644 --- a/cdisc_rules_engine/services/reporting/base_report.py +++ b/cdisc_rules_engine/services/reporting/base_report.py @@ -21,7 +21,24 @@ def __init__( self._report_standard = report_standard self._args = args self._template = template - self._output_name: str = f"{self._args.output}.{self._file_ext}" + self._output_name: str = self._get_output_filename() + + def _get_output_filename(self) -> str: + expected_ext = f".{self._file_ext}" + output_path = self._args.output + + if output_path.lower().endswith(expected_ext.lower()): + base_path = output_path[: -len(expected_ext)] + return f"{base_path}{expected_ext}" + + path_lower = output_path.lower() + known_extensions = [".json", ".xlsx", ".xls"] + for ext in known_extensions: + if path_lower.endswith(ext): + base_path = output_path[: -len(ext)] + return f"{base_path}{expected_ext}" + + return f"{output_path}{expected_ext}" @property @abstractmethod diff --git a/cdisc_rules_engine/services/reporting/excel_report.py b/cdisc_rules_engine/services/reporting/excel_report.py index 6011c58fb..5767ccadf 100644 --- a/cdisc_rules_engine/services/reporting/excel_report.py +++ b/cdisc_rules_engine/services/reporting/excel_report.py @@ -85,6 +85,9 @@ def write_report(self): logger = logging.getLogger("validator") try: report_data = self.get_export() + output_dir = os.path.dirname(self._output_name) + if output_dir: + os.makedirs(output_dir, exist_ok=True) with open(self._output_name, "wb") as f: f.write(excel_workbook_to_stream(report_data)) logger.debug(f"Report written to: {self._output_name}") diff --git a/cdisc_rules_engine/services/reporting/json_report.py b/cdisc_rules_engine/services/reporting/json_report.py index 4aadf7a56..c394059f0 100644 --- a/cdisc_rules_engine/services/reporting/json_report.py +++ b/cdisc_rules_engine/services/reporting/json_report.py @@ -1,4 +1,5 @@ import json +import os from typing import BinaryIO, override from cdisc_rules_engine.enums.report_types import ReportTypes @@ -66,5 +67,8 @@ def write_report(self): report_data = self.get_export( raw_report=self._args.raw_report, ) + output_dir = os.path.dirname(self._output_name) + if output_dir: + os.makedirs(output_dir, exist_ok=True) with open(self._output_name, "w") as f: json.dump(report_data, f) diff --git a/scripts/run_validation.py b/scripts/run_validation.py index ba5cddf26..09b9b2815 100644 --- a/scripts/run_validation.py +++ b/scripts/run_validation.py @@ -199,9 +199,14 @@ def run_validation(args: Validation_args): datasets, results, elapsed_time, args, data_service, dictionary_versions ) reporting_services: List[BaseReport] = reporting_factory.get_report_services() + output_files = [] for reporting_service in reporting_services: reporting_service.write_report() - print(f"Output: {args.output}") + output_files.append(reporting_service._output_name) + if len(output_files) == 1: + print(f"Output: {output_files[0]}") + else: + print(f"Output: {', '.join(output_files)}") finally: if created_files: engine_logger.info(" Report generated, Cleaning up intermediate files") From c76daa6ca0a9537ccf597d9cc2f9637b6f4bd5e6 Mon Sep 17 00:00:00 2001 From: Rakesh Date: Fri, 2 Jan 2026 18:14:51 -0500 Subject: [PATCH 2/5] Fix domain_is_custom operation for TIG standards and refactor domain custom check logic --- .../models/library_metadata_container.py | 7 +++ .../operations/domain_is_custom.py | 3 +- .../services/cdisc_library_service.py | 11 ++-- .../utilities/rule_processor.py | 53 ++++++++++++++----- 4 files changed, 55 insertions(+), 19 deletions(-) diff --git a/cdisc_rules_engine/models/library_metadata_container.py b/cdisc_rules_engine/models/library_metadata_container.py index e031e9b30..57b3198db 100644 --- a/cdisc_rules_engine/models/library_metadata_container.py +++ b/cdisc_rules_engine/models/library_metadata_container.py @@ -139,3 +139,10 @@ def build_ct_terms(self, ct_package_type: str, versions: str | Iterable[str]): ct_terms["term_value"].append(term["submissionValue"]) ct_terms["term_pref_term"].append(term.get("preferredTerm")) return ct_terms + + def is_domain_custom(self, domain: str) -> bool: + standard_data = self._standard_metadata or {} + domains = standard_data.get("domains", set()) + if not isinstance(domains, (set, list, tuple)): + domains = set() + return domain not in domains diff --git a/cdisc_rules_engine/operations/domain_is_custom.py b/cdisc_rules_engine/operations/domain_is_custom.py index 3b9f86986..722f15919 100644 --- a/cdisc_rules_engine/operations/domain_is_custom.py +++ b/cdisc_rules_engine/operations/domain_is_custom.py @@ -8,5 +8,4 @@ def _execute_operation(self): given domain is in standard domains. If no -> the domain is custom. """ - standard_data: dict = self.library_metadata.standard_metadata - return self.params.domain not in standard_data.get("domains", {}) + return self.library_metadata.is_domain_custom(self.params.domain) diff --git a/cdisc_rules_engine/services/cdisc_library_service.py b/cdisc_rules_engine/services/cdisc_library_service.py index a9dfb5acd..270eb315c 100644 --- a/cdisc_rules_engine/services/cdisc_library_service.py +++ b/cdisc_rules_engine/services/cdisc_library_service.py @@ -290,8 +290,7 @@ def get_standard_details( domains: Set[str] = self._extract_domain_names_from_tabulation_standard( standard_data ) - if domains: - standard_data["domains"] = domains + standard_data["domains"] = domains return standard_data def get_model_details(self, standard_details: dict) -> Optional[dict]: @@ -679,5 +678,11 @@ def _extract_domain_names_from_tabulation_standard( domain_names: Set[str] = set() for cls in standard_data.get("classes", []): for dataset in cls.get("datasets", []): - domain_names.add(dataset.get("name")) + domain_name = dataset.get("name") + if domain_name: + domain_names.add(domain_name) + for dataset in standard_data.get("datasets", []): + domain_name = dataset.get("name") + if domain_name: + domain_names.add(domain_name) return domain_names diff --git a/cdisc_rules_engine/utilities/rule_processor.py b/cdisc_rules_engine/utilities/rule_processor.py index c90ac7310..7a2009252 100644 --- a/cdisc_rules_engine/utilities/rule_processor.py +++ b/cdisc_rules_engine/utilities/rule_processor.py @@ -268,6 +268,42 @@ def rule_applies_to_class( is_excluded = True return is_included and not is_excluded + def _is_custom_domain(self, domain: str) -> bool: + return self.library_metadata.is_domain_custom(domain) + + def _get_allowed_domains_for_use_cases( + self, use_cases: List[str], substandard: str + ) -> set: + allowed_domains = set() + for use_case in use_cases: + if use_case in USE_CASE_DOMAINS[substandard]: + allowed_domains.update(USE_CASE_DOMAINS[substandard][use_case]) + return allowed_domains + + def _get_domain_to_check(self, dataset_metadata: SDTMDatasetMetadata) -> str: + if dataset_metadata.is_supp and dataset_metadata.rdomain: + return dataset_metadata.rdomain + return dataset_metadata.domain + + def _check_adam_domain( + self, domain: str, substandard: str, use_cases: List[str] + ) -> bool: + if substandard == "ADAM" and domain.startswith("AD"): + return "ANALYSIS" in use_cases + return False + + def _check_domain_in_use_case( + self, domain: str, use_cases: List[str], substandard: str + ) -> bool: + allowed_domains = self._get_allowed_domains_for_use_cases( + use_cases, substandard + ) + if domain in allowed_domains: + return True + if self._is_custom_domain(domain): + return True + return False + def rule_applies_to_use_case( self, dataset_metadata: SDTMDatasetMetadata, @@ -285,21 +321,10 @@ def rule_applies_to_use_case( if substandard not in USE_CASE_DOMAINS: return False - domain_to_check = dataset_metadata.domain - if dataset_metadata.is_supp and dataset_metadata.rdomain: - domain_to_check = dataset_metadata.rdomain - - # Handle ADaM datasets with AD prefix - if substandard == "ADAM" and domain_to_check.startswith("AD"): - return "ANALYSIS" in use_cases - - allowed_domains = set() - for use_case in use_cases: - if use_case in USE_CASE_DOMAINS[substandard]: - allowed_domains.update(USE_CASE_DOMAINS[substandard][use_case]) - if domain_to_check in allowed_domains: + domain_to_check = self._get_domain_to_check(dataset_metadata) + if self._check_adam_domain(domain_to_check, substandard, use_cases): return True - return False + return self._check_domain_in_use_case(domain_to_check, use_cases, substandard) @classmethod def rule_applies_to_entity( From 89ea4ffb8878f52362c515bbbd44e12e9214898a Mon Sep 17 00:00:00 2001 From: Rakesh Date: Fri, 2 Jan 2026 18:22:21 -0500 Subject: [PATCH 3/5] Revert "Fix domain_is_custom operation for TIG standards and refactor domain custom check logic" This reverts commit c76daa6ca0a9537ccf597d9cc2f9637b6f4bd5e6. --- .../models/library_metadata_container.py | 7 --- .../operations/domain_is_custom.py | 3 +- .../services/cdisc_library_service.py | 11 ++-- .../utilities/rule_processor.py | 53 +++++-------------- 4 files changed, 19 insertions(+), 55 deletions(-) diff --git a/cdisc_rules_engine/models/library_metadata_container.py b/cdisc_rules_engine/models/library_metadata_container.py index 57b3198db..e031e9b30 100644 --- a/cdisc_rules_engine/models/library_metadata_container.py +++ b/cdisc_rules_engine/models/library_metadata_container.py @@ -139,10 +139,3 @@ def build_ct_terms(self, ct_package_type: str, versions: str | Iterable[str]): ct_terms["term_value"].append(term["submissionValue"]) ct_terms["term_pref_term"].append(term.get("preferredTerm")) return ct_terms - - def is_domain_custom(self, domain: str) -> bool: - standard_data = self._standard_metadata or {} - domains = standard_data.get("domains", set()) - if not isinstance(domains, (set, list, tuple)): - domains = set() - return domain not in domains diff --git a/cdisc_rules_engine/operations/domain_is_custom.py b/cdisc_rules_engine/operations/domain_is_custom.py index 722f15919..3b9f86986 100644 --- a/cdisc_rules_engine/operations/domain_is_custom.py +++ b/cdisc_rules_engine/operations/domain_is_custom.py @@ -8,4 +8,5 @@ def _execute_operation(self): given domain is in standard domains. If no -> the domain is custom. """ - return self.library_metadata.is_domain_custom(self.params.domain) + standard_data: dict = self.library_metadata.standard_metadata + return self.params.domain not in standard_data.get("domains", {}) diff --git a/cdisc_rules_engine/services/cdisc_library_service.py b/cdisc_rules_engine/services/cdisc_library_service.py index 270eb315c..a9dfb5acd 100644 --- a/cdisc_rules_engine/services/cdisc_library_service.py +++ b/cdisc_rules_engine/services/cdisc_library_service.py @@ -290,7 +290,8 @@ def get_standard_details( domains: Set[str] = self._extract_domain_names_from_tabulation_standard( standard_data ) - standard_data["domains"] = domains + if domains: + standard_data["domains"] = domains return standard_data def get_model_details(self, standard_details: dict) -> Optional[dict]: @@ -678,11 +679,5 @@ def _extract_domain_names_from_tabulation_standard( domain_names: Set[str] = set() for cls in standard_data.get("classes", []): for dataset in cls.get("datasets", []): - domain_name = dataset.get("name") - if domain_name: - domain_names.add(domain_name) - for dataset in standard_data.get("datasets", []): - domain_name = dataset.get("name") - if domain_name: - domain_names.add(domain_name) + domain_names.add(dataset.get("name")) return domain_names diff --git a/cdisc_rules_engine/utilities/rule_processor.py b/cdisc_rules_engine/utilities/rule_processor.py index 7a2009252..c90ac7310 100644 --- a/cdisc_rules_engine/utilities/rule_processor.py +++ b/cdisc_rules_engine/utilities/rule_processor.py @@ -268,42 +268,6 @@ def rule_applies_to_class( is_excluded = True return is_included and not is_excluded - def _is_custom_domain(self, domain: str) -> bool: - return self.library_metadata.is_domain_custom(domain) - - def _get_allowed_domains_for_use_cases( - self, use_cases: List[str], substandard: str - ) -> set: - allowed_domains = set() - for use_case in use_cases: - if use_case in USE_CASE_DOMAINS[substandard]: - allowed_domains.update(USE_CASE_DOMAINS[substandard][use_case]) - return allowed_domains - - def _get_domain_to_check(self, dataset_metadata: SDTMDatasetMetadata) -> str: - if dataset_metadata.is_supp and dataset_metadata.rdomain: - return dataset_metadata.rdomain - return dataset_metadata.domain - - def _check_adam_domain( - self, domain: str, substandard: str, use_cases: List[str] - ) -> bool: - if substandard == "ADAM" and domain.startswith("AD"): - return "ANALYSIS" in use_cases - return False - - def _check_domain_in_use_case( - self, domain: str, use_cases: List[str], substandard: str - ) -> bool: - allowed_domains = self._get_allowed_domains_for_use_cases( - use_cases, substandard - ) - if domain in allowed_domains: - return True - if self._is_custom_domain(domain): - return True - return False - def rule_applies_to_use_case( self, dataset_metadata: SDTMDatasetMetadata, @@ -321,10 +285,21 @@ def rule_applies_to_use_case( if substandard not in USE_CASE_DOMAINS: return False - domain_to_check = self._get_domain_to_check(dataset_metadata) - if self._check_adam_domain(domain_to_check, substandard, use_cases): + domain_to_check = dataset_metadata.domain + if dataset_metadata.is_supp and dataset_metadata.rdomain: + domain_to_check = dataset_metadata.rdomain + + # Handle ADaM datasets with AD prefix + if substandard == "ADAM" and domain_to_check.startswith("AD"): + return "ANALYSIS" in use_cases + + allowed_domains = set() + for use_case in use_cases: + if use_case in USE_CASE_DOMAINS[substandard]: + allowed_domains.update(USE_CASE_DOMAINS[substandard][use_case]) + if domain_to_check in allowed_domains: return True - return self._check_domain_in_use_case(domain_to_check, use_cases, substandard) + return False @classmethod def rule_applies_to_entity( From f379e85a1e986957bbe01470e0bd1324c40d0beb Mon Sep 17 00:00:00 2001 From: Rakesh Date: Tue, 6 Jan 2026 11:47:25 -0500 Subject: [PATCH 4/5] Move known report extensions to constants module --- cdisc_rules_engine/constants/__init__.py | 2 ++ cdisc_rules_engine/services/reporting/base_report.py | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/cdisc_rules_engine/constants/__init__.py b/cdisc_rules_engine/constants/__init__.py index 0453d6f1f..e61d21310 100644 --- a/cdisc_rules_engine/constants/__init__.py +++ b/cdisc_rules_engine/constants/__init__.py @@ -15,3 +15,5 @@ ) NULL_FLAVORS = ["", None, {None}, [], {}, np.nan] + +KNOWN_REPORT_EXTENSIONS = [".json", ".xlsx", ".xls"] diff --git a/cdisc_rules_engine/services/reporting/base_report.py b/cdisc_rules_engine/services/reporting/base_report.py index 7f98fbd09..16b5180eb 100644 --- a/cdisc_rules_engine/services/reporting/base_report.py +++ b/cdisc_rules_engine/services/reporting/base_report.py @@ -1,6 +1,7 @@ from abc import ABC, abstractmethod from io import IOBase +from cdisc_rules_engine.constants import KNOWN_REPORT_EXTENSIONS from cdisc_rules_engine.models.validation_args import Validation_args from cdisc_rules_engine.services.reporting.base_report_data import ( BaseReportData, @@ -32,8 +33,7 @@ def _get_output_filename(self) -> str: return f"{base_path}{expected_ext}" path_lower = output_path.lower() - known_extensions = [".json", ".xlsx", ".xls"] - for ext in known_extensions: + for ext in KNOWN_REPORT_EXTENSIONS: if path_lower.endswith(ext): base_path = output_path[: -len(ext)] return f"{base_path}{expected_ext}" From 710652ff18fe8f353555b16b73f4e35bb74417ae Mon Sep 17 00:00:00 2001 From: Rakesh Date: Wed, 7 Jan 2026 10:21:19 -0500 Subject: [PATCH 5/5] Better Error Handling for Directory Creation Failures --- README.md | 9 ++++++--- .../services/reporting/excel_report.py | 12 +++++++++++- cdisc_rules_engine/services/reporting/json_report.py | 8 +++++++- 3 files changed, 24 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 9331483e4..0166f59d6 100644 --- a/README.md +++ b/README.md @@ -82,9 +82,12 @@ Run `python core.py validate --help` to see the list of validation options. -o, --output TEXT Report output file destination and name. Path will be relative to the validation execution directory and should end in the desired output filename - without file extension - '/user/reports/result' will be 'user/report' directory - with the filename as 'result' + without file extension. The file extension will be + automatically added based on the output format. + Example: 'reports/result' will create 'reports' directory + with the filename 'result.json' (or 'result.xlsx' for Excel). + Note: Provide a valid, writable path. Absolute paths must + be valid for your operating system. -of, --output-format [JSON|XLSX] Output file format -rr, --raw-report Report in a raw format as it is generated by diff --git a/cdisc_rules_engine/services/reporting/excel_report.py b/cdisc_rules_engine/services/reporting/excel_report.py index 5767ccadf..a40d27a4a 100644 --- a/cdisc_rules_engine/services/reporting/excel_report.py +++ b/cdisc_rules_engine/services/reporting/excel_report.py @@ -87,10 +87,20 @@ def write_report(self): report_data = self.get_export() output_dir = os.path.dirname(self._output_name) if output_dir: - os.makedirs(output_dir, exist_ok=True) + try: + os.makedirs(output_dir, exist_ok=True) + except OSError as e: + error_msg = ( + f"Cannot create output directory '{output_dir}': {e.strerror}. " + f"Please provide a valid, writable path for the output file." + ) + logger.error(error_msg) + raise OSError(error_msg) from e with open(self._output_name, "wb") as f: f.write(excel_workbook_to_stream(report_data)) logger.debug(f"Report written to: {self._output_name}") + except OSError: + raise except Exception as e: logger.error(f"Error writing report: {e}") raise e diff --git a/cdisc_rules_engine/services/reporting/json_report.py b/cdisc_rules_engine/services/reporting/json_report.py index c394059f0..3c1a32b2b 100644 --- a/cdisc_rules_engine/services/reporting/json_report.py +++ b/cdisc_rules_engine/services/reporting/json_report.py @@ -69,6 +69,12 @@ def write_report(self): ) output_dir = os.path.dirname(self._output_name) if output_dir: - os.makedirs(output_dir, exist_ok=True) + try: + os.makedirs(output_dir, exist_ok=True) + except OSError as e: + raise OSError( + f"Cannot create output directory '{output_dir}': {e.strerror}. " + f"Please provide a valid, writable path for the output file." + ) from e with open(self._output_name, "w") as f: json.dump(report_data, f)