Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1 +1,4 @@
pandas
pytest
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

Development dependencies like pytest should be kept separate from production dependencies to avoid installing them in production environments. Please consider moving this to a separate file for development/test dependencies, such as requirements-dev.txt.

defusedxml
lxml
5 changes: 3 additions & 2 deletions src/xml-validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import sys
import xml.etree.ElementTree as ET
from lxml import etree
from defusedxml.lxml import parse as defused_parse
import logging # Keep standard logging import for levels like logging.INFO
import re

Expand All @@ -28,11 +29,11 @@ def validate_against_xsd(xml_file, xsd_file):
"""
try:
# Parse the XSD schema
xmlschema_doc = etree.parse(xsd_file)
xmlschema_doc = defused_parse(xsd_file)
xmlschema = etree.XMLSchema(xmlschema_doc)

# Parse the XML file
xml_doc = etree.parse(xml_file)
xml_doc = defused_parse(xml_file)

# Validate
is_valid = xmlschema.validate(xml_doc)
Expand Down
50 changes: 50 additions & 0 deletions tests/test_base_converter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
import unittest
import os
import sys

# Add the project root to the Python path
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))

from src.converters.base_converter import BaseConverter
from src.logging_util import ConversionLogger
from src.validation_report import ValidationTracker

class TestBaseConverter(unittest.TestCase):

def setUp(self):
self.logger = ConversionLogger("test_base", log_level="DEBUG", log_to_file=False).logger
self.validator = ValidationTracker()

def test_cannot_instantiate_abc(self):
"""
Tests that BaseConverter cannot be instantiated directly because it's an ABC.
"""
with self.assertRaisesRegex(TypeError, "Can't instantiate abstract class BaseConverter"):
BaseConverter(self.logger, self.validator)

def test_subclass_must_implement_convert(self):
"""
Tests that a subclass must implement the 'convert' method.
"""
class IncompleteConverter(BaseConverter):
pass

with self.assertRaisesRegex(TypeError, "Can't instantiate abstract class IncompleteConverter"):
IncompleteConverter(self.logger, self.validator)

def test_subclass_with_convert_can_be_instantiated(self):
"""
Tests that a subclass that implements 'convert' can be instantiated.
"""
class CompleteConverter(BaseConverter):
def convert(self, input_path: str, output_path: str):
pass

converter = CompleteConverter(self.logger, self.validator)
self.assertIsInstance(converter, CompleteConverter)
self.assertIsInstance(converter, BaseConverter)
self.assertEqual(converter.logger, self.logger)
self.assertEqual(converter.validator, self.validator)

if __name__ == '__main__':
unittest.main()
21 changes: 21 additions & 0 deletions tests/test_data_cleaning.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,27 @@ def test_format_date_output_format_and_default(self):
self.assertEqual(format_date("2023-1-1"), "2023-01-01") # Check zero padding
self.assertEqual(format_date("bad", default_return="---"), "---")

def test_format_date_value_error_path(self):
# Specifically malformed date string that causes ValueError inside the date parsing loop
# and tests that it continues to try the next format
self.assertEqual(format_date("10/26/2023", input_formats=["%Y-%m-%d", "%m/%d/%Y"]), "2023-10-26")

# Test a date that raises ValueError for logical reasons (e.g., Feb 29 on non-leap year)
self.assertEqual(format_date("2023-02-29", input_formats=["%Y-%m-%d"]), "")

# Test a date that raises ValueError for the first format but succeeds on the second
# (leap year case)
self.assertEqual(format_date("2024-02-29", input_formats=["%m/%d/%Y", "%Y-%m-%d"]), "2024-02-29")

# Test complete exhaustion of formats due to ValueError
self.assertEqual(format_date("2023-13-01", input_formats=["%Y-%m-%d", "%m/%d/%Y"]), "")

def test_format_date_regex_fallback(self):
# Test the regex fallback logic for missing zero-padding
self.assertEqual(format_date("2023-1-1", input_formats=["%Y/%m/%d"]), "2023-01-01")
# Test the regex fallback failing due to invalid date elements
self.assertEqual(format_date("2023-30-30", input_formats=["%Y/%m/%d"]), "")

class TestStandardizeStateName(unittest.TestCase):
# Using DEFAULT_VALID_STATES from data_cleaning for some tests
# These are the states the function itself knows about if no list is passed
Expand Down
152 changes: 152 additions & 0 deletions tests/test_data_validation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
import unittest
from unittest.mock import MagicMock
import sys
import os

# Add the project root to the Python path
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))

from src.data_validation import (
validate_counseling_record,
validate_training_record,
analyze_counseling_csv,
analyze_training_csv
)
from src.config import ValidationCategory as VC, CounselingConfig, TrainingConfig

class TestDataValidation(unittest.TestCase):

def setUp(self):
self.validator = MagicMock()

def test_validate_counseling_record_success(self):
row = {
CounselingConfig.REQUIRED_FIELDS[0]: "C-123",
'Last Name': 'Doe',
'First Name': 'John',
'Date': '2023-10-15'
}

result = validate_counseling_record(row, 1, self.validator)

self.assertTrue(result)
self.validator.set_current_record_id.assert_called_once_with("C-123")
self.validator.add_issue.assert_not_called()

def test_validate_counseling_record_missing_id(self):
row = {
'Last Name': 'Doe',
'First Name': 'John',
'Date': '2023-10-15'
}

result = validate_counseling_record(row, 2, self.validator)

self.assertFalse(result)
self.validator.set_current_record_id.assert_not_called()
self.validator.add_issue.assert_called_once_with(
"Row_2", "error", VC.MISSING_REQUIRED, CounselingConfig.REQUIRED_FIELDS[0], "Missing required Contact ID."
)

def test_validate_counseling_record_missing_last_name(self):
row = {
CounselingConfig.REQUIRED_FIELDS[0]: "C-124",
'First Name': 'John',
'Date': '2023-10-15'
}

result = validate_counseling_record(row, 3, self.validator)

self.assertTrue(result)
self.validator.set_current_record_id.assert_called_once_with("C-124")
self.validator.add_issue.assert_called_once_with(
"C-124", "warning", VC.MISSING_FIELD, "Last Name", "Missing Last Name."
)

def test_validate_counseling_record_invalid_date_format(self):
row = {
CounselingConfig.REQUIRED_FIELDS[0]: "C-125",
'Last Name': 'Doe',
'Date': 'invalid-date'
}

result = validate_counseling_record(row, 4, self.validator)

self.assertTrue(result)
self.validator.set_current_record_id.assert_called_once_with("C-125")
self.validator.add_issue.assert_called_once_with(
"C-125", "warning", VC.INVALID_FORMAT, "Date Counseled", "Invalid date format: invalid-date"
)

def test_validate_counseling_record_early_date(self):
row = {
CounselingConfig.REQUIRED_FIELDS[0]: "C-126",
'Last Name': 'Doe',
'Date': '2020-01-01'
}

result = validate_counseling_record(row, 5, self.validator)

self.assertTrue(result)
self.validator.set_current_record_id.assert_called_once_with("C-126")
self.validator.add_issue.assert_called_once_with(
"C-126", "warning", VC.INVALID_DATE, "Date Counseled", f"Date 2020-01-01 is before minimum of {CounselingConfig.MIN_COUNSELING_DATE}"
)

def test_validate_training_record_success(self):
event_id_col = TrainingConfig.COLUMN_MAPPING['event_id']
row = {
event_id_col: "T-999",
'Other': 'Data'
}

result = validate_training_record(row, 1, self.validator)

self.assertTrue(result)
self.validator.set_current_record_id.assert_called_once_with("T-999")
self.validator.add_issue.assert_not_called()

def test_validate_training_record_missing_id(self):
event_id_col = TrainingConfig.COLUMN_MAPPING['event_id']
row = {
'Other': 'Data'
}

result = validate_training_record(row, 2, self.validator)

self.assertFalse(result)
self.validator.set_current_record_id.assert_not_called()
self.validator.add_issue.assert_called_once_with(
"Row_2", "error", VC.MISSING_REQUIRED, event_id_col, "Missing required Class/Event ID."
)

def test_analyze_counseling_csv(self):
rows = [
{CounselingConfig.REQUIRED_FIELDS[0]: "C-1", 'Last Name': 'Doe', 'First Name': 'John', 'Date': '2023-10-15'},
{'Last Name': 'Smith', 'First Name': 'Alice'}, # missing id
{CounselingConfig.REQUIRED_FIELDS[0]: "C-3", 'First Name': 'Bob'}, # missing last name
{CounselingConfig.REQUIRED_FIELDS[0]: "C-4", 'Last Name': 'Brown', 'Date': 'invalid'}, # invalid date, missing first name
]

analysis = analyze_counseling_csv(rows)

self.assertEqual(analysis['row_count'], 4)
self.assertEqual(analysis['missing_contact_id'], 1)
self.assertEqual(analysis['missing_names'], 2)
self.assertEqual(analysis['invalid_dates'], 1)

def test_analyze_training_csv(self):
event_id_col = TrainingConfig.COLUMN_MAPPING['event_id']
rows = [
{event_id_col: "T-1"},
{}, # missing event id
{event_id_col: "T-3"}
]

analysis = analyze_training_csv(rows)

self.assertEqual(analysis['row_count'], 3)
self.assertEqual(analysis['missing_event_id'], 1)

if __name__ == '__main__':
unittest.main()
29 changes: 29 additions & 0 deletions tests/test_xml_validator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
import unittest
from unittest.mock import patch
import sys
import os
import importlib

# Add the project root to the Python path
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'src')))

# Import using importlib because of the dash in the filename
xml_validator = importlib.import_module("xml-validator")

class TestValidateAgainstXsd(unittest.TestCase):

def test_validate_against_xsd_exception(self):
# We can patch using getattr since the module has a dash
with patch.object(xml_validator.etree, 'parse') as mock_parse:
# Setup mock to raise an exception
mock_parse.side_effect = Exception("Test exception")

# Call the function
is_valid, errors = xml_validator.validate_against_xsd("dummy.xml", "dummy.xsd")

# Verify the exception was caught and returned correctly
self.assertFalse(is_valid)
self.assertEqual(errors, ["Validation error: Test exception"])

if __name__ == '__main__':
unittest.main()