diff --git a/requirements.txt b/requirements.txt index fb6c7ed..78f4e90 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1 +1,4 @@ pandas +pytest +defusedxml +lxml diff --git a/src/xml-validator.py b/src/xml-validator.py index 058669d..e0281ba 100644 --- a/src/xml-validator.py +++ b/src/xml-validator.py @@ -7,6 +7,7 @@ import sys import xml.etree.ElementTree as ET from lxml import etree +from defusedxml.lxml import parse as defused_parse import logging # Keep standard logging import for levels like logging.INFO import re @@ -28,11 +29,11 @@ def validate_against_xsd(xml_file, xsd_file): """ try: # Parse the XSD schema - xmlschema_doc = etree.parse(xsd_file) + xmlschema_doc = defused_parse(xsd_file) xmlschema = etree.XMLSchema(xmlschema_doc) # Parse the XML file - xml_doc = etree.parse(xml_file) + xml_doc = defused_parse(xml_file) # Validate is_valid = xmlschema.validate(xml_doc) diff --git a/tests/test_base_converter.py b/tests/test_base_converter.py new file mode 100644 index 0000000..5ffbe9e --- /dev/null +++ b/tests/test_base_converter.py @@ -0,0 +1,50 @@ +import unittest +import os +import sys + +# Add the project root to the Python path +sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) + +from src.converters.base_converter import BaseConverter +from src.logging_util import ConversionLogger +from src.validation_report import ValidationTracker + +class TestBaseConverter(unittest.TestCase): + + def setUp(self): + self.logger = ConversionLogger("test_base", log_level="DEBUG", log_to_file=False).logger + self.validator = ValidationTracker() + + def test_cannot_instantiate_abc(self): + """ + Tests that BaseConverter cannot be instantiated directly because it's an ABC. + """ + with self.assertRaisesRegex(TypeError, "Can't instantiate abstract class BaseConverter"): + BaseConverter(self.logger, self.validator) + + def test_subclass_must_implement_convert(self): + """ + Tests that a subclass must implement the 'convert' method. + """ + class IncompleteConverter(BaseConverter): + pass + + with self.assertRaisesRegex(TypeError, "Can't instantiate abstract class IncompleteConverter"): + IncompleteConverter(self.logger, self.validator) + + def test_subclass_with_convert_can_be_instantiated(self): + """ + Tests that a subclass that implements 'convert' can be instantiated. + """ + class CompleteConverter(BaseConverter): + def convert(self, input_path: str, output_path: str): + pass + + converter = CompleteConverter(self.logger, self.validator) + self.assertIsInstance(converter, CompleteConverter) + self.assertIsInstance(converter, BaseConverter) + self.assertEqual(converter.logger, self.logger) + self.assertEqual(converter.validator, self.validator) + +if __name__ == '__main__': + unittest.main() diff --git a/tests/test_data_cleaning.py b/tests/test_data_cleaning.py index 7e5244f..9388dad 100644 --- a/tests/test_data_cleaning.py +++ b/tests/test_data_cleaning.py @@ -42,6 +42,27 @@ def test_format_date_output_format_and_default(self): self.assertEqual(format_date("2023-1-1"), "2023-01-01") # Check zero padding self.assertEqual(format_date("bad", default_return="---"), "---") + def test_format_date_value_error_path(self): + # Specifically malformed date string that causes ValueError inside the date parsing loop + # and tests that it continues to try the next format + self.assertEqual(format_date("10/26/2023", input_formats=["%Y-%m-%d", "%m/%d/%Y"]), "2023-10-26") + + # Test a date that raises ValueError for logical reasons (e.g., Feb 29 on non-leap year) + self.assertEqual(format_date("2023-02-29", input_formats=["%Y-%m-%d"]), "") + + # Test a date that raises ValueError for the first format but succeeds on the second + # (leap year case) + self.assertEqual(format_date("2024-02-29", input_formats=["%m/%d/%Y", "%Y-%m-%d"]), "2024-02-29") + + # Test complete exhaustion of formats due to ValueError + self.assertEqual(format_date("2023-13-01", input_formats=["%Y-%m-%d", "%m/%d/%Y"]), "") + + def test_format_date_regex_fallback(self): + # Test the regex fallback logic for missing zero-padding + self.assertEqual(format_date("2023-1-1", input_formats=["%Y/%m/%d"]), "2023-01-01") + # Test the regex fallback failing due to invalid date elements + self.assertEqual(format_date("2023-30-30", input_formats=["%Y/%m/%d"]), "") + class TestStandardizeStateName(unittest.TestCase): # Using DEFAULT_VALID_STATES from data_cleaning for some tests # These are the states the function itself knows about if no list is passed diff --git a/tests/test_data_validation.py b/tests/test_data_validation.py new file mode 100644 index 0000000..f9b9f03 --- /dev/null +++ b/tests/test_data_validation.py @@ -0,0 +1,152 @@ +import unittest +from unittest.mock import MagicMock +import sys +import os + +# Add the project root to the Python path +sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) + +from src.data_validation import ( + validate_counseling_record, + validate_training_record, + analyze_counseling_csv, + analyze_training_csv +) +from src.config import ValidationCategory as VC, CounselingConfig, TrainingConfig + +class TestDataValidation(unittest.TestCase): + + def setUp(self): + self.validator = MagicMock() + + def test_validate_counseling_record_success(self): + row = { + CounselingConfig.REQUIRED_FIELDS[0]: "C-123", + 'Last Name': 'Doe', + 'First Name': 'John', + 'Date': '2023-10-15' + } + + result = validate_counseling_record(row, 1, self.validator) + + self.assertTrue(result) + self.validator.set_current_record_id.assert_called_once_with("C-123") + self.validator.add_issue.assert_not_called() + + def test_validate_counseling_record_missing_id(self): + row = { + 'Last Name': 'Doe', + 'First Name': 'John', + 'Date': '2023-10-15' + } + + result = validate_counseling_record(row, 2, self.validator) + + self.assertFalse(result) + self.validator.set_current_record_id.assert_not_called() + self.validator.add_issue.assert_called_once_with( + "Row_2", "error", VC.MISSING_REQUIRED, CounselingConfig.REQUIRED_FIELDS[0], "Missing required Contact ID." + ) + + def test_validate_counseling_record_missing_last_name(self): + row = { + CounselingConfig.REQUIRED_FIELDS[0]: "C-124", + 'First Name': 'John', + 'Date': '2023-10-15' + } + + result = validate_counseling_record(row, 3, self.validator) + + self.assertTrue(result) + self.validator.set_current_record_id.assert_called_once_with("C-124") + self.validator.add_issue.assert_called_once_with( + "C-124", "warning", VC.MISSING_FIELD, "Last Name", "Missing Last Name." + ) + + def test_validate_counseling_record_invalid_date_format(self): + row = { + CounselingConfig.REQUIRED_FIELDS[0]: "C-125", + 'Last Name': 'Doe', + 'Date': 'invalid-date' + } + + result = validate_counseling_record(row, 4, self.validator) + + self.assertTrue(result) + self.validator.set_current_record_id.assert_called_once_with("C-125") + self.validator.add_issue.assert_called_once_with( + "C-125", "warning", VC.INVALID_FORMAT, "Date Counseled", "Invalid date format: invalid-date" + ) + + def test_validate_counseling_record_early_date(self): + row = { + CounselingConfig.REQUIRED_FIELDS[0]: "C-126", + 'Last Name': 'Doe', + 'Date': '2020-01-01' + } + + result = validate_counseling_record(row, 5, self.validator) + + self.assertTrue(result) + self.validator.set_current_record_id.assert_called_once_with("C-126") + self.validator.add_issue.assert_called_once_with( + "C-126", "warning", VC.INVALID_DATE, "Date Counseled", f"Date 2020-01-01 is before minimum of {CounselingConfig.MIN_COUNSELING_DATE}" + ) + + def test_validate_training_record_success(self): + event_id_col = TrainingConfig.COLUMN_MAPPING['event_id'] + row = { + event_id_col: "T-999", + 'Other': 'Data' + } + + result = validate_training_record(row, 1, self.validator) + + self.assertTrue(result) + self.validator.set_current_record_id.assert_called_once_with("T-999") + self.validator.add_issue.assert_not_called() + + def test_validate_training_record_missing_id(self): + event_id_col = TrainingConfig.COLUMN_MAPPING['event_id'] + row = { + 'Other': 'Data' + } + + result = validate_training_record(row, 2, self.validator) + + self.assertFalse(result) + self.validator.set_current_record_id.assert_not_called() + self.validator.add_issue.assert_called_once_with( + "Row_2", "error", VC.MISSING_REQUIRED, event_id_col, "Missing required Class/Event ID." + ) + + def test_analyze_counseling_csv(self): + rows = [ + {CounselingConfig.REQUIRED_FIELDS[0]: "C-1", 'Last Name': 'Doe', 'First Name': 'John', 'Date': '2023-10-15'}, + {'Last Name': 'Smith', 'First Name': 'Alice'}, # missing id + {CounselingConfig.REQUIRED_FIELDS[0]: "C-3", 'First Name': 'Bob'}, # missing last name + {CounselingConfig.REQUIRED_FIELDS[0]: "C-4", 'Last Name': 'Brown', 'Date': 'invalid'}, # invalid date, missing first name + ] + + analysis = analyze_counseling_csv(rows) + + self.assertEqual(analysis['row_count'], 4) + self.assertEqual(analysis['missing_contact_id'], 1) + self.assertEqual(analysis['missing_names'], 2) + self.assertEqual(analysis['invalid_dates'], 1) + + def test_analyze_training_csv(self): + event_id_col = TrainingConfig.COLUMN_MAPPING['event_id'] + rows = [ + {event_id_col: "T-1"}, + {}, # missing event id + {event_id_col: "T-3"} + ] + + analysis = analyze_training_csv(rows) + + self.assertEqual(analysis['row_count'], 3) + self.assertEqual(analysis['missing_event_id'], 1) + +if __name__ == '__main__': + unittest.main() diff --git a/tests/test_xml_validator.py b/tests/test_xml_validator.py new file mode 100644 index 0000000..c59a1e4 --- /dev/null +++ b/tests/test_xml_validator.py @@ -0,0 +1,29 @@ +import unittest +from unittest.mock import patch +import sys +import os +import importlib + +# Add the project root to the Python path +sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'src'))) + +# Import using importlib because of the dash in the filename +xml_validator = importlib.import_module("xml-validator") + +class TestValidateAgainstXsd(unittest.TestCase): + + def test_validate_against_xsd_exception(self): + # We can patch using getattr since the module has a dash + with patch.object(xml_validator.etree, 'parse') as mock_parse: + # Setup mock to raise an exception + mock_parse.side_effect = Exception("Test exception") + + # Call the function + is_valid, errors = xml_validator.validate_against_xsd("dummy.xml", "dummy.xsd") + + # Verify the exception was caught and returned correctly + self.assertFalse(is_valid) + self.assertEqual(errors, ["Validation error: Test exception"]) + +if __name__ == '__main__': + unittest.main()