From 21cdf08078257e43a337357d7b138edebb9f1882 Mon Sep 17 00:00:00 2001 From: Hoss Date: Fri, 30 Jan 2026 16:16:04 +0800 Subject: [PATCH 1/8] feat: add parsed data structures for X12 segments and loops [why] Need structured representations for parsed X12 data before building parsers. These are the core data types the entire parsing system returns. [how] - ParsedElement/ParsedComponent for element-level data with validation - ParsedCompositeElement for composite elements containing components - ParsedSegment for fully parsed segments with to_dict/to_json/is_valid - ParsedLoop for hierarchical loop assembly by consumers - Strict X12 numeric validation (rejects inf/nan/scientific notation) --- pyx12lib/core/parsed.py | 165 ++++++++++++++++++++++++++++++++++++++ tests/test_parsed_loop.py | 113 ++++++++++++++++++++++++++ 2 files changed, 278 insertions(+) create mode 100644 pyx12lib/core/parsed.py create mode 100644 tests/test_parsed_loop.py diff --git a/pyx12lib/core/parsed.py b/pyx12lib/core/parsed.py new file mode 100644 index 0000000..c321133 --- /dev/null +++ b/pyx12lib/core/parsed.py @@ -0,0 +1,165 @@ +import json + +from pyx12lib.core.grammar.element import ( + NotUsedElement, + USAGE_MANDATORY, + ELEMENT_TYPE_DECIMAL, + ELEMENT_TYPE_NUMERIC, +) + + +class ParsedElement: + """Represents a parsed element with its value and grammar metadata.""" + + def __init__(self, grammar, value): + self._grammar = grammar + self._value = value + + @property + def grammar(self): + return self._grammar + + @property + def value(self): + return self._value + + def to_dict(self): + return { + 'reference_designator': self._grammar.reference_designator, + 'name': self._grammar.name, + 'value': self._value, + 'type': self._grammar.type, + 'usage': self._grammar.usage, + } + + def is_valid(self): + ele = self._grammar + value = self._value + + if not isinstance(value, str): + return False + + if ele.usage == USAGE_MANDATORY and value == '': + return False + + if value != '': + if not (ele.minimum <= len(value) <= ele.maximum): + return False + + if ele.type == ELEMENT_TYPE_DECIMAL: + stripped_sign = value.lstrip('-') + if not stripped_sign or not stripped_sign.replace('.', '', 1).isdigit(): + return False + elif ele.type.startswith(ELEMENT_TYPE_NUMERIC): + stripped_sign = value.lstrip('-') + if not stripped_sign or not stripped_sign.isdigit(): + return False + + return True + + def is_empty(self): + return not bool(self._value) + + +class ParsedComponent(ParsedElement): + """Represents a parsed component within a composite element.""" + pass + + +class ParsedCompositeElement: + """Represents a parsed composite element containing components.""" + + def __init__(self, grammar, components): + self._grammar = grammar + self._components = components + + @property + def grammar(self): + return self._grammar + + @property + def components(self): + return self._components + + def to_dict(self): + return { + 'reference_designator': self._grammar.reference_designator, + 'name': self._grammar.name, + 'components': [c.to_dict() for c in self._components], + } + + def is_valid(self): + if self.is_empty(): + if self._grammar.usage == USAGE_MANDATORY: + return False + return True + return all(c.is_valid() for c in self._components) + + def is_empty(self): + return all(c.is_empty() for c in self._components) + + +class ParsedSegment: + """Represents a fully parsed segment with all its elements.""" + + def __init__(self, grammar, elements): + self._grammar = grammar + self._elements = elements + + @property + def grammar(self): + return self._grammar + + @property + def elements(self): + return self._elements + + @property + def segment_id(self): + return self._grammar.segment_id + + def to_dict(self): + return { + 'segment_id': self._grammar.segment_id, + 'elements': [e.to_dict() for e in self._elements], + } + + def to_json(self, indent=2): + return json.dumps(self.to_dict(), indent=indent) + + def is_valid(self): + if self.is_empty(): + if self._grammar.usage == USAGE_MANDATORY: + return False + return True + return all(e.is_valid() for e in self._elements) + + def is_empty(self): + return all(e.is_empty() for e in self._elements) + + +class ParsedLoop: + """Represents a hierarchical loop of parsed segments.""" + + def __init__(self, loop_id, segments=None): + self.loop_id = loop_id + self.segments = segments or [] + self.child_loops = [] + + def add_segment(self, segment): + self.segments.append(segment) + + def add_child_loop(self, loop): + self.child_loops.append(loop) + + def to_dict(self): + result = { + 'loop_id': self.loop_id, + 'segments': [s.to_dict() for s in self.segments], + } + if self.child_loops: + result['child_loops'] = [c.to_dict() for c in self.child_loops] + return result + + def to_json(self, indent=2): + return json.dumps(self.to_dict(), indent=indent) diff --git a/tests/test_parsed_loop.py b/tests/test_parsed_loop.py new file mode 100644 index 0000000..4659c7b --- /dev/null +++ b/tests/test_parsed_loop.py @@ -0,0 +1,113 @@ +import json +from unittest import TestCase + +from pyx12lib.core.grammar import BaseSegment, Element, element, segment +from pyx12lib.core.parsed import ParsedLoop +from pyx12lib.core.parser import SegmentParser + + +class _StubSegment(BaseSegment): + segment_id = "STB" + usage = segment.USAGE_MANDATORY + max_use = 1 + elements = ( + Element( + reference_designator="STB01", + name="Stub Element", + usage=element.USAGE_MANDATORY, + element_type=element.ELEMENT_TYPE_STRING, + minimum=1, + maximum=10, + ), + ) + + +class TestParsedLoop(TestCase): + def _make_segment(self, value): + parser = SegmentParser("STB*{}~".format(value), grammar=_StubSegment) + return parser.parse() + + def test_loop_basic(self): + # arrange + seg1 = self._make_segment("A") + seg2 = self._make_segment("B") + + # action + loop = ParsedLoop(loop_id="TEST_LOOP", segments=[seg1, seg2]) + result = loop.to_dict() + + # assert + self.assertEqual(result['loop_id'], 'TEST_LOOP') + self.assertEqual(len(result['segments']), 2) + self.assertNotIn('child_loops', result) + + def test_loop_with_children(self): + # arrange + parent_seg = self._make_segment("PARENT") + child_seg = self._make_segment("CHILD") + + parent_loop = ParsedLoop(loop_id="PARENT", segments=[parent_seg]) + child_loop = ParsedLoop(loop_id="CHILD", segments=[child_seg]) + parent_loop.add_child_loop(child_loop) + + # action + result = parent_loop.to_dict() + + # assert + self.assertIn('child_loops', result) + self.assertEqual(len(result['child_loops']), 1) + self.assertEqual(result['child_loops'][0]['loop_id'], 'CHILD') + + def test_add_segment(self): + # arrange + loop = ParsedLoop(loop_id="TEST") + seg = self._make_segment("ADDED") + + # action + loop.add_segment(seg) + result = loop.to_dict() + + # assert + self.assertEqual(len(result['segments']), 1) + + def test_to_json(self): + # arrange + seg = self._make_segment("JSON") + loop = ParsedLoop(loop_id="JSON_LOOP", segments=[seg]) + + # action + json_output = loop.to_json() + data = json.loads(json_output) + + # assert + self.assertEqual(data['loop_id'], 'JSON_LOOP') + + def test_nested_loops_to_dict(self): + # arrange + root = ParsedLoop(loop_id="ROOT") + root.add_segment(self._make_segment("R1")) + + mid = ParsedLoop(loop_id="MID") + mid.add_segment(self._make_segment("M1")) + + leaf = ParsedLoop(loop_id="LEAF") + leaf.add_segment(self._make_segment("L1")) + + mid.add_child_loop(leaf) + root.add_child_loop(mid) + + # action + result = root.to_dict() + + # assert + self.assertEqual(result['loop_id'], 'ROOT') + self.assertEqual(result['child_loops'][0]['loop_id'], 'MID') + self.assertEqual(result['child_loops'][0]['child_loops'][0]['loop_id'], 'LEAF') + + def test_empty_loop(self): + loop = ParsedLoop(loop_id="EMPTY") + result = loop.to_dict() + + self.assertEqual(result['loop_id'], 'EMPTY') + self.assertEqual(len(result['segments']), 0) + self.assertNotIn('child_loops', result) From 07c46b498dc2265bbc20d703b5c688d24d3fd485 Mon Sep 17 00:00:00 2001 From: Hoss Date: Fri, 30 Jan 2026 16:16:10 +0800 Subject: [PATCH 2/8] feat: add delimiter detection and grammar registry [why] X12 documents can use non-standard delimiters defined in the ISA header. Parsers also need a way to resolve segment IDs to grammar definitions. [how] - detect_delimiters() extracts element/component/segment delimiters from ISA - Delimiters value object for passing delimiter configuration - GrammarRegistry maps segment IDs to grammar classes - create_default_registry() pre-registers ISA/IEA/GS/GE/ST/SE --- pyx12lib/core/delimiters.py | 94 ++++++++++++++++++++++ pyx12lib/core/registry.py | 38 +++++++++ tests/test_delimiters.py | 155 ++++++++++++++++++++++++++++++++++++ tests/test_registry.py | 67 ++++++++++++++++ 4 files changed, 354 insertions(+) create mode 100644 pyx12lib/core/delimiters.py create mode 100644 pyx12lib/core/registry.py create mode 100644 tests/test_delimiters.py create mode 100644 tests/test_registry.py diff --git a/pyx12lib/core/delimiters.py b/pyx12lib/core/delimiters.py new file mode 100644 index 0000000..5be3dd0 --- /dev/null +++ b/pyx12lib/core/delimiters.py @@ -0,0 +1,94 @@ +"""Delimiter detection from ISA segment header. + +In X12, the ISA segment defines the delimiters used throughout the interchange: +- Character at position 3 is the element delimiter (typically '*') +- ISA16 value is the component separator (typically '^') +- Character immediately after the ISA segment is the segment terminator (typically '~') +""" + +from pyx12lib.core.grammar.segment import ELEMENT_DELIMITER, SEGMENT_TERMINATOR +from pyx12lib.core.grammar.element import COMPONENT_DELIMITER + +# ISA segment is always exactly 106 characters (including the segment terminator) +ISA_SEGMENT_LENGTH = 106 + + +class Delimiters(object): + """Container for X12 delimiter characters.""" + + def __init__(self, element_delimiter, component_delimiter, segment_terminator): + self.element_delimiter = element_delimiter + self.component_delimiter = component_delimiter + self.segment_terminator = segment_terminator + + def __eq__(self, other): + if not isinstance(other, Delimiters): + return NotImplemented + return ( + self.element_delimiter == other.element_delimiter + and self.component_delimiter == other.component_delimiter + and self.segment_terminator == other.segment_terminator + ) + + def __repr__(self): + return "Delimiters(element='{}', component='{}', terminator='{}')".format( + self.element_delimiter, self.component_delimiter, self.segment_terminator + ) + + +DEFAULT_DELIMITERS = Delimiters( + element_delimiter=ELEMENT_DELIMITER, + component_delimiter=COMPONENT_DELIMITER, + segment_terminator=SEGMENT_TERMINATOR, +) + + +def detect_delimiters(raw_x12): + """Detect delimiters from the ISA segment header. + + Args: + raw_x12: Raw X12 string starting with ISA. + + Returns: + Delimiters object with the detected characters. + + Raises: + ValueError: If the string doesn't start with ISA or is too short. + """ + stripped = raw_x12.lstrip() + if not stripped.startswith('ISA'): + raise ValueError("X12 data must start with ISA segment for delimiter detection") + + if len(stripped) < ISA_SEGMENT_LENGTH: + raise ValueError( + "ISA segment requires at least {} characters, got {}".format( + ISA_SEGMENT_LENGTH, len(stripped) + ) + ) + + element_delimiter = stripped[3] + segment_terminator = stripped[ISA_SEGMENT_LENGTH - 1] + + # ISA16 is the component separator. Count 16 element delimiters to find it. + # ISA has exactly 16 elements, so there are 16 element delimiters. + # The component separator is the value of the 16th element (ISA16), + # which is between the 16th delimiter and the segment terminator. + delimiter_count = 0 + for i, ch in enumerate(stripped): + if ch == element_delimiter: + delimiter_count += 1 + if delimiter_count == 16: + if i + 1 >= len(stripped): + raise ValueError( + "ISA segment truncated after 16th element delimiter" + ) + component_delimiter = stripped[i + 1] + break + else: + raise ValueError("Could not find 16 element delimiters in ISA segment") + + return Delimiters( + element_delimiter=element_delimiter, + component_delimiter=component_delimiter, + segment_terminator=segment_terminator, + ) diff --git a/pyx12lib/core/registry.py b/pyx12lib/core/registry.py new file mode 100644 index 0000000..0badcde --- /dev/null +++ b/pyx12lib/core/registry.py @@ -0,0 +1,38 @@ +from pyx12lib.common.envelope.grammar import ( + IsaSegment, IeaSegment, + GsSegment, GeSegment, + StSegment, SeSegment, +) + + +class GrammarRegistry(object): + """Registry mapping segment IDs to their grammar definitions.""" + + def __init__(self): + self._registry = {} + + def register(self, segment_grammar): + segment_id = segment_grammar.segment_id + if segment_id in self._registry: + raise ValueError( + "Segment '{}' already registered".format(segment_id) + ) + self._registry[segment_id] = segment_grammar + + def get(self, segment_id): + return self._registry.get(segment_id) + + def has(self, segment_id): + return segment_id in self._registry + + +def create_default_registry(): + """Create a registry with standard envelope segments pre-registered.""" + registry = GrammarRegistry() + registry.register(IsaSegment) + registry.register(IeaSegment) + registry.register(GsSegment) + registry.register(GeSegment) + registry.register(StSegment) + registry.register(SeSegment) + return registry diff --git a/tests/test_delimiters.py b/tests/test_delimiters.py new file mode 100644 index 0000000..99b0f50 --- /dev/null +++ b/tests/test_delimiters.py @@ -0,0 +1,155 @@ +from unittest import TestCase + +from pyx12lib.core.delimiters import detect_delimiters, Delimiters, DEFAULT_DELIMITERS + + +class TestDetectDelimiters(TestCase): + def test_standard_delimiters(self): + # arrange: standard ISA with * ^ ~ + x12_string = ( + "ISA*00* *00* *ZZ*SENDER " + "*ZZ*RECEIVER *210101*1200*^*00501*000000001*0*P*>~" + ) + + # action + result = detect_delimiters(x12_string) + + # assert + self.assertEqual(result.element_delimiter, '*') + self.assertEqual(result.component_delimiter, '>') + self.assertEqual(result.segment_terminator, '~') + + def test_non_standard_element_delimiter(self): + # arrange: using | as element delimiter + x12_string = ( + "ISA|00| |00| |ZZ|SENDER " + "|ZZ|RECEIVER |210101|1200|^|00501|000000001|0|P|>~" + ) + + # action + result = detect_delimiters(x12_string) + + # assert + self.assertEqual(result.element_delimiter, '|') + self.assertEqual(result.component_delimiter, '>') + self.assertEqual(result.segment_terminator, '~') + + def test_non_standard_segment_terminator(self): + # arrange: using \n as segment terminator + x12_string = ( + "ISA*00* *00* *ZZ*SENDER " + "*ZZ*RECEIVER *210101*1200*^*00501*000000001*0*P*>\n" + ) + + # action + result = detect_delimiters(x12_string) + + # assert + self.assertEqual(result.element_delimiter, '*') + self.assertEqual(result.segment_terminator, '\n') + + def test_non_standard_component_delimiter(self): + # arrange: using : as component separator (ISA16) + x12_string = ( + "ISA*00* *00* *ZZ*SENDER " + "*ZZ*RECEIVER *210101*1200*^*00501*000000001*0*P*:~" + ) + + # action + result = detect_delimiters(x12_string) + + # assert + self.assertEqual(result.component_delimiter, ':') + + def test_leading_whitespace_stripped(self): + # arrange + x12_string = ( + " \n ISA*00* *00* *ZZ*SENDER " + "*ZZ*RECEIVER *210101*1200*^*00501*000000001*0*P*>~" + ) + + # action + result = detect_delimiters(x12_string) + + # assert + self.assertEqual(result.element_delimiter, '*') + + def test_no_isa_raises_error(self): + # arrange + x12_string = "GS*FA*SENDER*RECEIVER~" + + # action & assert + with self.assertRaises(ValueError) as ctx: + detect_delimiters(x12_string) + self.assertIn("ISA", str(ctx.exception)) + + def test_too_short_raises_error(self): + # arrange + x12_string = "ISA*00*short" + + # action & assert + with self.assertRaises(ValueError) as ctx: + detect_delimiters(x12_string) + self.assertIn("106", str(ctx.exception)) + + def test_default_delimiters_constant(self): + self.assertEqual(DEFAULT_DELIMITERS.element_delimiter, '*') + self.assertEqual(DEFAULT_DELIMITERS.component_delimiter, '^') + self.assertEqual(DEFAULT_DELIMITERS.segment_terminator, '~') + + def test_delimiters_equality(self): + d1 = Delimiters('*', '^', '~') + d2 = Delimiters('*', '^', '~') + d3 = Delimiters('|', '^', '~') + + self.assertEqual(d1, d2) + self.assertNotEqual(d1, d3) + + def test_delimiters_repr(self): + d = Delimiters('*', '^', '~') + self.assertIn('*', repr(d)) + self.assertIn('^', repr(d)) + self.assertIn('~', repr(d)) + + +class TestX12ParserWithAutoDetection(TestCase): + """Test that X12Parser uses auto-detected delimiters.""" + + def test_auto_detect_with_standard_isa(self): + from pyx12lib.core.parser import X12Parser + + x12_string = ( + "ISA*00* *00* *ZZ*SENDER " + "*ZZ*RECEIVER *210101*1200*^*00501*000000001*0*P*>~" + "GS*FA*SENDER*RECEIVER*20210101*1200*1*X*005010~" + "ST*997*0001~" + "SE*1*0001~" + "GE*1*1~" + "IEA*1*000000001~" + ) + + parser = X12Parser(x12_string) + result = parser.to_dict() + + self.assertEqual(len(result['segments']), 6) + + def test_auto_detect_disabled_uses_defaults(self): + from pyx12lib.core.parser import X12Parser + + x12_string = "ST*997*0001~SE*1*0001~" + + parser = X12Parser(x12_string, auto_detect_delimiters=False) + result = parser.to_dict() + + self.assertEqual(len(result['segments']), 2) + + def test_no_isa_falls_through_to_defaults(self): + from pyx12lib.core.parser import X12Parser + + # No ISA present, auto_detect=True but should use defaults gracefully + x12_string = "ST*997*0001~SE*1*0001~" + + parser = X12Parser(x12_string, auto_detect_delimiters=True) + result = parser.to_dict() + + self.assertEqual(len(result['segments']), 2) diff --git a/tests/test_registry.py b/tests/test_registry.py new file mode 100644 index 0000000..4b7b78d --- /dev/null +++ b/tests/test_registry.py @@ -0,0 +1,67 @@ +from unittest import TestCase + +from pyx12lib.core.grammar import BaseSegment, Element, element, segment +from pyx12lib.core.registry import GrammarRegistry, create_default_registry + + +class _CustomSegment(BaseSegment): + segment_id = "CUS" + usage = segment.USAGE_MANDATORY + max_use = 1 + elements = ( + Element( + reference_designator="CUS01", + name="Custom Element", + usage=element.USAGE_MANDATORY, + element_type=element.ELEMENT_TYPE_STRING, + minimum=1, + maximum=10, + ), + ) + + +class TestGrammarRegistry(TestCase): + def test_register_and_get(self): + registry = GrammarRegistry() + registry.register(_CustomSegment) + + result = registry.get("CUS") + self.assertIs(result, _CustomSegment) + + def test_get_unknown_returns_none(self): + registry = GrammarRegistry() + + result = registry.get("UNKNOWN") + self.assertIsNone(result) + + def test_has(self): + registry = GrammarRegistry() + registry.register(_CustomSegment) + + self.assertTrue(registry.has("CUS")) + self.assertFalse(registry.has("NOPE")) + + def test_duplicate_register_raises_error(self): + registry = GrammarRegistry() + registry.register(_CustomSegment) + + with self.assertRaises(ValueError) as ctx: + registry.register(_CustomSegment) + self.assertIn("CUS", str(ctx.exception)) + + def test_default_registry_has_envelope_segments(self): + registry = create_default_registry() + + self.assertTrue(registry.has("ISA")) + self.assertTrue(registry.has("IEA")) + self.assertTrue(registry.has("GS")) + self.assertTrue(registry.has("GE")) + self.assertTrue(registry.has("ST")) + self.assertTrue(registry.has("SE")) + + def test_default_registry_returns_correct_grammar(self): + from pyx12lib.common.envelope.grammar import IsaSegment, StSegment + registry = create_default_registry() + + self.assertIs(registry.get("ISA"), IsaSegment) + self.assertIs(registry.get("ST"), StSegment) From c3ecd44d4443fd178404006861f1ffaa22ab7361 Mon Sep 17 00:00:00 2001 From: Hoss Date: Fri, 30 Jan 2026 16:16:18 +0800 Subject: [PATCH 3/8] feat: add SegmentParser and X12Parser [why] Enable parsing X12 EDI strings into structured Python dicts and JSON, complementing the existing rendering (Python -> X12) capability. [how] - SegmentParser parses a single segment against a grammar definition - X12Parser parses complete multi-segment documents via GrammarRegistry - Auto-detects delimiters from ISA header when present - Skips unknown segments not in the registry (lenient by design) - parse_x12() and parse_x12_to_json() convenience functions in __init__ - Handles composites, NotUsed elements, and trailing empty elements --- pyx12lib/__init__.py | 34 ++++ pyx12lib/core/parser.py | 163 ++++++++++++++++ tests/test_composite_parser.py | 329 +++++++++++++++++++++++++++++++++ tests/test_integration.py | 257 +++++++++++++++++++++++++ tests/test_segment_parser.py | 180 ++++++++++++++++++ tests/test_x12_parser.py | 160 ++++++++++++++++ 6 files changed, 1123 insertions(+) create mode 100644 pyx12lib/core/parser.py create mode 100644 tests/test_composite_parser.py create mode 100644 tests/test_integration.py create mode 100644 tests/test_segment_parser.py create mode 100644 tests/test_x12_parser.py diff --git a/pyx12lib/__init__.py b/pyx12lib/__init__.py index e69de29..c868e55 100644 --- a/pyx12lib/__init__.py +++ b/pyx12lib/__init__.py @@ -0,0 +1,34 @@ +from pyx12lib.core.parser import SegmentParser, X12Parser +from pyx12lib.core.registry import GrammarRegistry, create_default_registry +from pyx12lib.core.delimiters import detect_delimiters, Delimiters + + +def parse_x12(x12_string, registry=None): + """Parse an X12 string into a Python dict. + + Args: + x12_string: Raw X12 EDI string. + registry: Optional GrammarRegistry. Uses default envelope + segments (ISA/IEA/GS/GE/ST/SE) if not provided. + + Returns: + Dict with 'segments' key containing list of parsed segment dicts. + """ + parser = X12Parser(x12_string, registry=registry) + return parser.to_dict() + + +def parse_x12_to_json(x12_string, indent=2, registry=None): + """Parse an X12 string into a JSON string. + + Args: + x12_string: Raw X12 EDI string. + indent: JSON indentation level. None for compact output. + registry: Optional GrammarRegistry. Uses default envelope + segments (ISA/IEA/GS/GE/ST/SE) if not provided. + + Returns: + JSON string representation of the parsed X12 data. + """ + parser = X12Parser(x12_string, registry=registry) + return parser.to_json(indent=indent) diff --git a/pyx12lib/core/parser.py b/pyx12lib/core/parser.py new file mode 100644 index 0000000..0eb417c --- /dev/null +++ b/pyx12lib/core/parser.py @@ -0,0 +1,163 @@ +import json + +from pyx12lib.core.parsed import ( + ParsedElement, + ParsedComponent, + ParsedCompositeElement, + ParsedSegment, +) +from pyx12lib.core.grammar.element import ( + CompositeElement, + NotUsedElement, + COMPONENT_DELIMITER, +) +from pyx12lib.core.grammar.segment import ( + ELEMENT_DELIMITER, + SEGMENT_TERMINATOR, +) +from pyx12lib.core.delimiters import detect_delimiters +from pyx12lib.core.registry import create_default_registry + + +class BaseSegmentParser(object): + def __init__( + self, + segment_terminator=SEGMENT_TERMINATOR, + element_delimiter=ELEMENT_DELIMITER, + component_delimiter=COMPONENT_DELIMITER, + ): + self._segment_terminator = segment_terminator + self._element_delimiter = element_delimiter + self._component_delimiter = component_delimiter + + def parse(self): + raise NotImplementedError + + +class SegmentParser(BaseSegmentParser): + """Parse a single X12 segment string using a grammar definition.""" + + def __init__(self, segment_string, grammar, **kwargs): + super(SegmentParser, self).__init__(**kwargs) + self._segment_string = segment_string.rstrip(self._segment_terminator) + self._grammar = grammar + self._parsed_segment = None + + def parse(self): + if self._parsed_segment is not None: + return self._parsed_segment + + parts = self._segment_string.split(self._element_delimiter) + segment_id = parts[0] + + if segment_id != self._grammar.segment_id: + raise ValueError( + "Segment ID mismatch: expected '{}', got '{}'".format( + self._grammar.segment_id, segment_id + ) + ) + + element_values = parts[1:] + elements = [] + + for i, ele_grammar in enumerate(self._grammar.elements): + if isinstance(ele_grammar, NotUsedElement): + continue + + value = element_values[i] if i < len(element_values) else '' + + if isinstance(ele_grammar, CompositeElement): + parsed = self._parse_composite(ele_grammar, value) + else: + parsed = ParsedElement(grammar=ele_grammar, value=value) + + elements.append(parsed) + + self._parsed_segment = ParsedSegment( + grammar=self._grammar, + elements=elements, + ) + + return self._parsed_segment + + def _parse_composite(self, grammar, value): + component_values = value.split(self._component_delimiter) if value else [] + components = [] + + for i, comp_grammar in enumerate(grammar.components): + if isinstance(comp_grammar, NotUsedElement): + continue + comp_value = component_values[i] if i < len(component_values) else '' + components.append(ParsedComponent(grammar=comp_grammar, value=comp_value)) + + return ParsedCompositeElement(grammar=grammar, components=components) + + def to_dict(self): + return self.parse().to_dict() + + def to_json(self, indent=2): + return self.parse().to_json(indent=indent) + + +class X12Parser(BaseSegmentParser): + """Parse a complete X12 string containing multiple segments. + + Uses a GrammarRegistry to auto-detect segment types by their ID. + Unknown segments (not in the registry) are skipped. + + If auto_detect_delimiters is True and the string starts with ISA, + delimiters are automatically detected from the ISA header. + """ + + def __init__(self, x12_string, registry=None, auto_detect_delimiters=True, **kwargs): + self._x12_string = x12_string + + if auto_detect_delimiters and x12_string.lstrip().startswith('ISA'): + detected = detect_delimiters(x12_string) + kwargs.setdefault('segment_terminator', detected.segment_terminator) + kwargs.setdefault('element_delimiter', detected.element_delimiter) + kwargs.setdefault('component_delimiter', detected.component_delimiter) + + super(X12Parser, self).__init__(**kwargs) + + if registry is None: + registry = create_default_registry() + self._registry = registry + self._parsed_segments = None + + def parse(self): + if self._parsed_segments is not None: + return self._parsed_segments + + segments = [] + raw_segments = self._x12_string.split(self._segment_terminator) + + for raw in raw_segments: + raw = raw.strip() + if not raw: + continue + + segment_id = raw.split(self._element_delimiter)[0] + grammar = self._registry.get(segment_id) + if grammar is None: + continue + + parser = SegmentParser( + raw + self._segment_terminator, + grammar=grammar, + segment_terminator=self._segment_terminator, + element_delimiter=self._element_delimiter, + component_delimiter=self._component_delimiter, + ) + segments.append(parser.parse()) + + self._parsed_segments = segments + return segments + + def to_dict(self): + return { + 'segments': [s.to_dict() for s in self.parse()], + } + + def to_json(self, indent=2): + return json.dumps(self.to_dict(), indent=indent) diff --git a/tests/test_composite_parser.py b/tests/test_composite_parser.py new file mode 100644 index 0000000..abbf353 --- /dev/null +++ b/tests/test_composite_parser.py @@ -0,0 +1,329 @@ +from unittest import TestCase + +from pyx12lib.core.grammar import ( + BaseSegment, Element, CompositeElement, Component, NotUsedElement, + element, segment, +) +from pyx12lib.core.parser import SegmentParser + + +class _TestCompositeSegment(BaseSegment): + segment_id = "COMP" + usage = segment.USAGE_MANDATORY + max_use = 1 + elements = ( + Element( + reference_designator="COMP01", + name="Simple Element", + usage=element.USAGE_MANDATORY, + element_type=element.ELEMENT_TYPE_STRING, + minimum=2, + maximum=3, + ), + CompositeElement( + reference_designator='COMP02', + name='Composite Element', + usage=element.USAGE_MANDATORY, + element_type=element.ELEMENT_TYPE_COMPOSITE, + minimum=1, + maximum=35, + components=( + Component( + reference_designator='C001', + name='Component 1', + usage=element.USAGE_MANDATORY, + element_type=element.ELEMENT_TYPE_ID, + minimum=1, + maximum=2, + ), + Component( + reference_designator='C002', + name='Component 2', + usage=element.USAGE_OPTIONAL, + element_type=element.ELEMENT_TYPE_STRING, + minimum=1, + maximum=5, + ), + ), + ), + Element( + reference_designator="COMP03", + name="Trailing Element", + usage=element.USAGE_OPTIONAL, + element_type=element.ELEMENT_TYPE_STRING, + minimum=1, + maximum=30, + ), + ) + + +class _SegmentWithNotUsedAndComposite(BaseSegment): + segment_id = "MIX" + usage = segment.USAGE_MANDATORY + max_use = 1 + elements = ( + NotUsedElement(reference_designator="MIX01"), + Element( + reference_designator="MIX02", + name="Simple", + usage=element.USAGE_OPTIONAL, + element_type=element.ELEMENT_TYPE_STRING, + minimum=2, + maximum=3, + ), + CompositeElement( + reference_designator='MIX03', + name='Composite', + usage=element.USAGE_MANDATORY, + element_type=element.ELEMENT_TYPE_COMPOSITE, + minimum=1, + maximum=35, + components=( + Component( + reference_designator='M001', + name='Comp 1', + usage=element.USAGE_MANDATORY, + element_type=element.ELEMENT_TYPE_ID, + minimum=1, + maximum=1, + ), + Component( + reference_designator='M002', + name='Comp 2', + usage=element.USAGE_OPTIONAL, + element_type=element.ELEMENT_TYPE_STRING, + minimum=1, + maximum=5, + ), + ), + ), + Element( + reference_designator="MIX04", + name="After Composite", + usage=element.USAGE_OPTIONAL, + element_type=element.ELEMENT_TYPE_STRING, + minimum=1, + maximum=30, + ), + ) + + +class TestCompositeParser(TestCase): + def test_parse_composite_element(self): + # arrange + x12_string = "COMP*AB*X^TEST~" + + # action + parser = SegmentParser(x12_string, grammar=_TestCompositeSegment) + result = parser.to_dict() + + # assert + self.assertEqual(result['segment_id'], 'COMP') + self.assertEqual(len(result['elements']), 3) + + # First element is simple + self.assertEqual(result['elements'][0]['value'], 'AB') + + # Second element is composite + composite = result['elements'][1] + self.assertIn('components', composite) + self.assertEqual(len(composite['components']), 2) + self.assertEqual(composite['components'][0]['value'], 'X') + self.assertEqual(composite['components'][0]['reference_designator'], 'C001') + self.assertEqual(composite['components'][1]['value'], 'TEST') + self.assertEqual(composite['components'][1]['reference_designator'], 'C002') + + def test_parse_composite_with_trailing_element(self): + # arrange + x12_string = "COMP*AB*A^BCD*TRAILING ELE~" + + # action + parser = SegmentParser(x12_string, grammar=_TestCompositeSegment) + result = parser.to_dict() + + # assert + self.assertEqual(result['elements'][2]['value'], 'TRAILING ELE') + + def test_parse_composite_with_empty_components(self): + # arrange: composite has only first component + x12_string = "COMP*AB*X~" + + # action + parser = SegmentParser(x12_string, grammar=_TestCompositeSegment) + result = parser.to_dict() + + # assert + composite = result['elements'][1] + self.assertEqual(composite['components'][0]['value'], 'X') + self.assertEqual(composite['components'][1]['value'], '') + + def test_parse_empty_composite(self): + # arrange: composite is empty + x12_string = "COMP*AB*~" + + # action + parser = SegmentParser(x12_string, grammar=_TestCompositeSegment) + result = parser.to_dict() + + # assert + composite = result['elements'][1] + self.assertEqual(len(composite['components']), 2) + # Empty string split gives [''], first component gets '' + self.assertEqual(composite['components'][0]['value'], '') + self.assertEqual(composite['components'][1]['value'], '') + + def test_composite_validation_valid(self): + # arrange + x12_string = "COMP*AB*X^TEST~" + + # action + parser = SegmentParser(x12_string, grammar=_TestCompositeSegment) + parsed = parser.parse() + + # assert + self.assertTrue(parsed.is_valid()) + + def test_composite_validation_missing_mandatory_component(self): + # arrange: C001 is mandatory but empty + x12_string = "COMP*AB*^TEST~" + + # action + parser = SegmentParser(x12_string, grammar=_TestCompositeSegment) + parsed = parser.parse() + + # assert - composite has empty mandatory component + composite = parsed.elements[1] + self.assertFalse(composite.is_valid()) + + def test_segment_with_not_used_and_composite(self): + # arrange: MIX01 is NotUsed, MIX02 is simple, MIX03 is composite + x12_string = "MIX**AB*A^BCD*TRAILING~" + + # action + parser = SegmentParser(x12_string, grammar=_SegmentWithNotUsedAndComposite) + result = parser.to_dict() + + # assert: NotUsed skipped, 3 elements in output + self.assertEqual(len(result['elements']), 3) + self.assertEqual(result['elements'][0]['reference_designator'], 'MIX02') + self.assertEqual(result['elements'][0]['value'], 'AB') + self.assertIn('components', result['elements'][1]) + self.assertEqual(result['elements'][1]['components'][0]['value'], 'A') + self.assertEqual(result['elements'][2]['value'], 'TRAILING') + + def test_parse_composite_json_output(self): + # arrange + x12_string = "COMP*AB*X^TEST~" + + # action + parser = SegmentParser(x12_string, grammar=_TestCompositeSegment) + json_output = parser.to_json() + + # assert + self.assertIn('"components"', json_output) + self.assertIn('"C001"', json_output) + + +class TestEnvelopeSegmentParsing(TestCase): + """Test parsing with real envelope grammar definitions.""" + + def test_parse_st_segment(self): + from pyx12lib.common.envelope.grammar import StSegment + + # arrange + x12_string = "ST*997*0001~" + + # action + parser = SegmentParser(x12_string, grammar=StSegment) + result = parser.to_dict() + + # assert + self.assertEqual(result['segment_id'], 'ST') + self.assertEqual(result['elements'][0]['value'], '997') + self.assertEqual(result['elements'][0]['reference_designator'], 'ST01') + self.assertEqual(result['elements'][1]['value'], '0001') + + def test_parse_se_segment(self): + from pyx12lib.common.envelope.grammar import SeSegment + + # arrange + x12_string = "SE*5*0001~" + + # action + parser = SegmentParser(x12_string, grammar=SeSegment) + result = parser.to_dict() + + # assert + self.assertEqual(result['segment_id'], 'SE') + self.assertEqual(result['elements'][0]['value'], '5') + self.assertEqual(result['elements'][1]['value'], '0001') + + def test_parse_gs_segment(self): + from pyx12lib.common.envelope.grammar import GsSegment + + # arrange + x12_string = "GS*FA*SENDER*RECEIVER*20210101*1200*1*X*005010~" + + # action + parser = SegmentParser(x12_string, grammar=GsSegment) + result = parser.to_dict() + + # assert + self.assertEqual(result['segment_id'], 'GS') + self.assertEqual(len(result['elements']), 8) + self.assertEqual(result['elements'][0]['value'], 'FA') + self.assertEqual(result['elements'][1]['value'], 'SENDER') + self.assertEqual(result['elements'][7]['value'], '005010') + + def test_parse_ge_segment(self): + from pyx12lib.common.envelope.grammar import GeSegment + + # arrange + x12_string = "GE*1*1~" + + # action + parser = SegmentParser(x12_string, grammar=GeSegment) + result = parser.to_dict() + + # assert + self.assertEqual(result['segment_id'], 'GE') + self.assertEqual(result['elements'][0]['value'], '1') + self.assertEqual(result['elements'][1]['value'], '1') + + def test_parse_isa_segment(self): + from pyx12lib.common.envelope.grammar import IsaSegment + + # arrange + x12_string = ( + "ISA*00* *00* *ZZ*SENDER " + "*ZZ*RECEIVER *210101*1200*^*00501*000000001*0*P*>~" + ) + + # action + parser = SegmentParser(x12_string, grammar=IsaSegment) + result = parser.to_dict() + + # assert + self.assertEqual(result['segment_id'], 'ISA') + self.assertEqual(len(result['elements']), 16) + self.assertEqual(result['elements'][0]['value'], '00') + self.assertEqual(result['elements'][0]['reference_designator'], 'ISA01') + # ISA06 = Sender ID (padded) + self.assertEqual(result['elements'][5]['value'], 'SENDER ') + # ISA16 = component separator + self.assertEqual(result['elements'][15]['value'], '>') + + def test_parse_iea_segment(self): + from pyx12lib.common.envelope.grammar import IeaSegment + + # arrange + x12_string = "IEA*1*000000001~" + + # action + parser = SegmentParser(x12_string, grammar=IeaSegment) + result = parser.to_dict() + + # assert + self.assertEqual(result['segment_id'], 'IEA') + self.assertEqual(result['elements'][0]['value'], '1') + self.assertEqual(result['elements'][1]['value'], '000000001') diff --git a/tests/test_integration.py b/tests/test_integration.py new file mode 100644 index 0000000..ef2c287 --- /dev/null +++ b/tests/test_integration.py @@ -0,0 +1,257 @@ +import json +import time +from unittest import TestCase + +from pyx12lib import parse_x12, parse_x12_to_json +from pyx12lib.core.parser import SegmentParser, X12Parser +from pyx12lib.core.registry import create_default_registry + + +class TestIntegrationFullEnvelope(TestCase): + """Integration tests with complete X12 envelope structures.""" + + FULL_X12 = ( + "ISA*00* *00* *ZZ*SENDER " + "*ZZ*RECEIVER *210101*1200*^*00501*000000001*0*P*>~" + "GS*FA*SENDER*RECEIVER*20210101*1200*1*X*005010~" + "ST*997*0001~" + "SE*1*0001~" + "GE*1*1~" + "IEA*1*000000001~" + ) + + def test_parse_complete_transaction(self): + result = parse_x12(self.FULL_X12) + + self.assertEqual(len(result['segments']), 6) + segment_ids = [s['segment_id'] for s in result['segments']] + self.assertEqual(segment_ids, ['ISA', 'GS', 'ST', 'SE', 'GE', 'IEA']) + + def test_parse_to_json_is_valid_json(self): + json_output = parse_x12_to_json(self.FULL_X12) + + data = json.loads(json_output) + self.assertIn('segments', data) + self.assertEqual(len(data['segments']), 6) + + def test_isa_element_values(self): + result = parse_x12(self.FULL_X12) + + isa = result['segments'][0] + self.assertEqual(isa['segment_id'], 'ISA') + # ISA01 = Authorization Info Qualifier + self.assertEqual(isa['elements'][0]['value'], '00') + # ISA05 = Interchange ID Qualifier + self.assertEqual(isa['elements'][4]['value'], 'ZZ') + # ISA13 = Interchange Control Number + self.assertEqual(isa['elements'][12]['value'], '000000001') + # ISA15 = Usage Indicator + self.assertEqual(isa['elements'][14]['value'], 'P') + # ISA16 = Component Separator + self.assertEqual(isa['elements'][15]['value'], '>') + + def test_gs_element_values(self): + result = parse_x12(self.FULL_X12) + + gs = result['segments'][1] + self.assertEqual(gs['segment_id'], 'GS') + self.assertEqual(gs['elements'][0]['value'], 'FA') + self.assertEqual(gs['elements'][1]['value'], 'SENDER') + self.assertEqual(gs['elements'][7]['value'], '005010') + + def test_st_se_element_values(self): + result = parse_x12(self.FULL_X12) + + st = result['segments'][2] + se = result['segments'][3] + self.assertEqual(st['elements'][0]['value'], '997') + self.assertEqual(st['elements'][1]['value'], '0001') + self.assertEqual(se['elements'][0]['value'], '1') + self.assertEqual(se['elements'][1]['value'], '0001') + + def test_compact_json_no_indent(self): + json_output = parse_x12_to_json(self.FULL_X12, indent=None) + + self.assertNotIn('\n', json_output) + data = json.loads(json_output) + self.assertEqual(len(data['segments']), 6) + + +class TestEdgeCases(TestCase): + """Edge case tests for parser robustness.""" + + def test_empty_string(self): + result = parse_x12("") + self.assertEqual(result['segments'], []) + + def test_only_terminators(self): + result = parse_x12("~~~") + self.assertEqual(result['segments'], []) + + def test_only_whitespace(self): + result = parse_x12(" \n\t ") + self.assertEqual(result['segments'], []) + + def test_single_segment(self): + result = parse_x12("ST*997*0001~") + self.assertEqual(len(result['segments']), 1) + + def test_trailing_newlines(self): + result = parse_x12("ST*997*0001~\n\n\n") + self.assertEqual(len(result['segments']), 1) + + def test_segments_separated_by_crlf(self): + result = parse_x12("ST*997*0001~\r\nSE*1*0001~\r\n") + self.assertEqual(len(result['segments']), 2) + + def test_segment_with_many_trailing_empty_elements(self): + from pyx12lib.core.grammar import BaseSegment, Element, element, segment + from pyx12lib.core.registry import GrammarRegistry + + class _BigSegment(BaseSegment): + segment_id = "BIG" + usage = segment.USAGE_OPTIONAL + max_use = 1 + elements = tuple( + Element( + reference_designator="BIG{:02d}".format(i), + name="Elem {}".format(i), + usage=element.USAGE_OPTIONAL, + element_type=element.ELEMENT_TYPE_STRING, + minimum=1, + maximum=10, + ) + for i in range(1, 11) + ) + + registry = GrammarRegistry() + registry.register(_BigSegment) + + # Only first element has a value + result = parse_x12("BIG*hello~", registry=registry) + self.assertEqual(len(result['segments']), 1) + elements = result['segments'][0]['elements'] + self.assertEqual(elements[0]['value'], 'hello') + # Remaining elements should be empty strings + for e in elements[1:]: + self.assertEqual(e['value'], '') + + def test_unknown_segments_between_known(self): + x12 = "ST*997*0001~FOO*BAR~BAZ*QUX*123~SE*1*0001~" + result = parse_x12(x12) + + # Only ST and SE should be parsed (FOO and BAZ unknown) + self.assertEqual(len(result['segments']), 2) + self.assertEqual(result['segments'][0]['segment_id'], 'ST') + self.assertEqual(result['segments'][1]['segment_id'], 'SE') + + def test_segment_parser_with_extra_elements(self): + """Parser should not crash if X12 has more elements than grammar defines.""" + from pyx12lib.common.envelope.grammar import StSegment + + # ST grammar has 2 elements, but string has 3 + parser = SegmentParser("ST*997*0001*EXTRA~", grammar=StSegment) + result = parser.to_dict() + + self.assertEqual(result['segment_id'], 'ST') + self.assertEqual(len(result['elements']), 2) + self.assertEqual(result['elements'][0]['value'], '997') + self.assertEqual(result['elements'][1]['value'], '0001') + + def test_segment_parser_with_fewer_elements(self): + """Parser should handle fewer elements than grammar defines.""" + from pyx12lib.common.envelope.grammar import GsSegment + + # GS grammar has 8 elements, but string has only 3 + parser = SegmentParser("GS*FA*SENDER~", grammar=GsSegment) + result = parser.to_dict() + + self.assertEqual(result['segment_id'], 'GS') + self.assertEqual(result['elements'][0]['value'], 'FA') + self.assertEqual(result['elements'][1]['value'], 'SENDER') + # Missing elements should be empty + for e in result['elements'][2:]: + self.assertEqual(e['value'], '') + + +class TestValidation(TestCase): + """Tests for parsed segment validation.""" + + def test_valid_st_segment(self): + from pyx12lib.common.envelope.grammar import StSegment + + parser = SegmentParser("ST*997*0001~", grammar=StSegment) + self.assertTrue(parser.parse().is_valid()) + + def test_invalid_st_missing_mandatory(self): + from pyx12lib.common.envelope.grammar import StSegment + + parser = SegmentParser("ST**0001~", grammar=StSegment) + self.assertFalse(parser.parse().is_valid()) + + def test_invalid_element_too_long(self): + from pyx12lib.common.envelope.grammar import StSegment + + # ST01 max=3, giving 4 chars + parser = SegmentParser("ST*9977*0001~", grammar=StSegment) + self.assertFalse(parser.parse().is_valid()) + + def test_invalid_element_too_short(self): + from pyx12lib.common.envelope.grammar import StSegment + + # ST01 min=3, giving 2 chars + parser = SegmentParser("ST*99*0001~", grammar=StSegment) + self.assertFalse(parser.parse().is_valid()) + + def test_invalid_numeric_element(self): + from pyx12lib.common.envelope.grammar import SeSegment + + # SE01 expects numeric, giving non-numeric + parser = SegmentParser("SE*abc*0001~", grammar=SeSegment) + self.assertFalse(parser.parse().is_valid()) + + def test_valid_numeric_element(self): + from pyx12lib.common.envelope.grammar import SeSegment + + parser = SegmentParser("SE*5*0001~", grammar=SeSegment) + self.assertTrue(parser.parse().is_valid()) + + def test_empty_optional_segment_is_valid(self): + from pyx12lib.core.grammar import BaseSegment, Element, element, segment + + class _OptSegment(BaseSegment): + segment_id = "OPT" + usage = segment.USAGE_OPTIONAL + max_use = 1 + elements = ( + Element( + reference_designator="OPT01", + name="Optional El", + usage=element.USAGE_OPTIONAL, + element_type=element.ELEMENT_TYPE_STRING, + minimum=1, + maximum=5, + ), + ) + + parser = SegmentParser("OPT*~", grammar=_OptSegment) + self.assertTrue(parser.parse().is_valid()) + + +class TestPerformance(TestCase): + """Basic performance sanity check.""" + + def test_parse_many_segments(self): + # Build a string with 2000 segments (1000 ST + 1000 SE) + parts = [] + for i in range(1000): + parts.append("ST*997*{:04d}~SE*1*{:04d}~".format(i, i)) + x12_string = "".join(parts) + + start = time.time() + result = parse_x12(x12_string) + elapsed = time.time() - start + + self.assertEqual(len(result['segments']), 2000) + # Should parse 2000 segments in well under 1 second + self.assertLess(elapsed, 1.0) diff --git a/tests/test_segment_parser.py b/tests/test_segment_parser.py new file mode 100644 index 0000000..872b752 --- /dev/null +++ b/tests/test_segment_parser.py @@ -0,0 +1,180 @@ +from unittest import TestCase + +from pyx12lib.core.grammar import BaseSegment, Element, NotUsedElement, element, segment +from pyx12lib.core.parser import SegmentParser + + +class _TestSegment(BaseSegment): + segment_id = "TEST" + usage = segment.USAGE_MANDATORY + max_use = 1 + elements = ( + NotUsedElement(reference_designator="TEST01"), + Element( + reference_designator="TEST02", + name="Test Element 1", + usage=element.USAGE_MANDATORY, + element_type=element.ELEMENT_TYPE_STRING, + minimum=2, + maximum=3, + ), + Element( + reference_designator="TEST03", + name="Test Element 2", + usage=element.USAGE_OPTIONAL, + element_type=element.ELEMENT_TYPE_STRING, + minimum=1, + maximum=5, + ), + ) + + +class _SimpleSegment(BaseSegment): + segment_id = "SIM" + usage = segment.USAGE_MANDATORY + max_use = 1 + elements = ( + Element( + reference_designator="SIM01", + name="Element 1", + usage=element.USAGE_MANDATORY, + element_type=element.ELEMENT_TYPE_ID, + minimum=3, + maximum=3, + ), + Element( + reference_designator="SIM02", + name="Element 2", + usage=element.USAGE_MANDATORY, + element_type=element.ELEMENT_TYPE_STRING, + minimum=4, + maximum=9, + ), + ) + + +class TestSegmentParser(TestCase): + def test_parse_simple_segment(self): + # arrange + x12_string = "SIM*997*0001~" + + # action + parser = SegmentParser(x12_string, grammar=_SimpleSegment) + result = parser.to_dict() + + # assert + self.assertEqual(result['segment_id'], 'SIM') + self.assertEqual(len(result['elements']), 2) + self.assertEqual(result['elements'][0]['value'], '997') + self.assertEqual(result['elements'][0]['reference_designator'], 'SIM01') + self.assertEqual(result['elements'][1]['value'], '0001') + + def test_parse_segment_with_not_used_element(self): + # arrange: TEST01 is NotUsedElement, should be skipped in output + x12_string = "TEST**AB*hello~" + + # action + parser = SegmentParser(x12_string, grammar=_TestSegment) + result = parser.to_dict() + + # assert + self.assertEqual(result['segment_id'], 'TEST') + self.assertEqual(len(result['elements']), 2) # NotUsedElement skipped + self.assertEqual(result['elements'][0]['value'], 'AB') + self.assertEqual(result['elements'][0]['reference_designator'], 'TEST02') + self.assertEqual(result['elements'][1]['value'], 'hello') + + def test_parse_segment_with_empty_trailing_elements(self): + # arrange + x12_string = "TEST**AB~" + + # action + parser = SegmentParser(x12_string, grammar=_TestSegment) + result = parser.to_dict() + + # assert + self.assertEqual(result['elements'][0]['value'], 'AB') + self.assertEqual(result['elements'][1]['value'], '') + + def test_parse_segment_to_json(self): + # arrange + x12_string = "SIM*997*0001~" + + # action + parser = SegmentParser(x12_string, grammar=_SimpleSegment) + json_output = parser.to_json() + + # assert + self.assertIn('"segment_id": "SIM"', json_output) + self.assertIn('"value": "997"', json_output) + + def test_invalid_segment_id_raises_error(self): + # arrange + x12_string = "WRONG*AB*12345~" + + # action & assert + parser = SegmentParser(x12_string, grammar=_SimpleSegment) + with self.assertRaises(ValueError) as ctx: + parser.parse() + self.assertIn('SIM', str(ctx.exception)) + self.assertIn('WRONG', str(ctx.exception)) + + def test_parse_caches_result(self): + # arrange + x12_string = "SIM*997*0001~" + + # action + parser = SegmentParser(x12_string, grammar=_SimpleSegment) + result1 = parser.parse() + result2 = parser.parse() + + # assert - same object returned + self.assertIs(result1, result2) + + def test_parse_segment_without_terminator(self): + # arrange: no trailing ~ + x12_string = "SIM*997*0001" + + # action + parser = SegmentParser(x12_string, grammar=_SimpleSegment) + result = parser.to_dict() + + # assert + self.assertEqual(result['segment_id'], 'SIM') + self.assertEqual(result['elements'][0]['value'], '997') + + def test_parsed_element_metadata(self): + # arrange + x12_string = "SIM*997*0001~" + + # action + parser = SegmentParser(x12_string, grammar=_SimpleSegment) + result = parser.to_dict() + + # assert + ele = result['elements'][0] + self.assertEqual(ele['name'], 'Element 1') + self.assertEqual(ele['type'], element.ELEMENT_TYPE_ID) + self.assertEqual(ele['usage'], element.USAGE_MANDATORY) + + def test_is_valid_with_valid_segment(self): + # arrange + x12_string = "SIM*997*0001~" + + # action + parser = SegmentParser(x12_string, grammar=_SimpleSegment) + parsed = parser.parse() + + # assert + self.assertTrue(parsed.is_valid()) + + def test_is_valid_with_missing_mandatory_element(self): + # arrange: SIM01 is mandatory but empty + x12_string = "SIM**0001~" + + # action + parser = SegmentParser(x12_string, grammar=_SimpleSegment) + parsed = parser.parse() + + # assert + self.assertFalse(parsed.is_valid()) diff --git a/tests/test_x12_parser.py b/tests/test_x12_parser.py new file mode 100644 index 0000000..24b6ee6 --- /dev/null +++ b/tests/test_x12_parser.py @@ -0,0 +1,160 @@ +import json +from unittest import TestCase + +from pyx12lib.core.grammar import BaseSegment, Element, element, segment +from pyx12lib.core.parser import X12Parser +from pyx12lib.core.registry import GrammarRegistry, create_default_registry + + +class _CustomSegment(BaseSegment): + segment_id = "CUS" + usage = segment.USAGE_OPTIONAL + max_use = 99 + elements = ( + Element( + reference_designator="CUS01", + name="Custom Element", + usage=element.USAGE_MANDATORY, + element_type=element.ELEMENT_TYPE_STRING, + minimum=1, + maximum=10, + ), + ) + + +class TestX12Parser(TestCase): + def test_parse_multiple_envelope_segments(self): + # arrange + x12_string = "ST*997*0001~SE*1*0001~" + + # action + parser = X12Parser(x12_string) + result = parser.to_dict() + + # assert + self.assertEqual(len(result['segments']), 2) + self.assertEqual(result['segments'][0]['segment_id'], 'ST') + self.assertEqual(result['segments'][1]['segment_id'], 'SE') + + def test_parse_full_envelope(self): + # arrange + x12_string = ( + "ISA*00* *00* *ZZ*SENDER " + "*ZZ*RECEIVER *210101*1200*^*00501*000000001*0*P*>~" + "GS*FA*SENDER*RECEIVER*20210101*1200*1*X*005010~" + "ST*997*0001~" + "SE*1*0001~" + "GE*1*1~" + "IEA*1*000000001~" + ) + + # action + parser = X12Parser(x12_string) + result = parser.to_dict() + + # assert + self.assertEqual(len(result['segments']), 6) + segment_ids = [s['segment_id'] for s in result['segments']] + self.assertEqual(segment_ids, ['ISA', 'GS', 'ST', 'SE', 'GE', 'IEA']) + + def test_unknown_segments_are_skipped(self): + # arrange: BOGUS is not in default registry + x12_string = "ST*997*0001~BOGUS*DATA~SE*1*0001~" + + # action + parser = X12Parser(x12_string) + result = parser.to_dict() + + # assert + self.assertEqual(len(result['segments']), 2) + self.assertEqual(result['segments'][0]['segment_id'], 'ST') + self.assertEqual(result['segments'][1]['segment_id'], 'SE') + + def test_custom_registry(self): + # arrange + registry = GrammarRegistry() + registry.register(_CustomSegment) + x12_string = "CUS*hello~CUS*world~" + + # action + parser = X12Parser(x12_string, registry=registry) + result = parser.to_dict() + + # assert + self.assertEqual(len(result['segments']), 2) + self.assertEqual(result['segments'][0]['elements'][0]['value'], 'hello') + self.assertEqual(result['segments'][1]['elements'][0]['value'], 'world') + + def test_to_json_produces_valid_json(self): + # arrange + x12_string = "ST*997*0001~SE*1*0001~" + + # action + parser = X12Parser(x12_string) + json_output = parser.to_json() + + # assert + data = json.loads(json_output) + self.assertIn('segments', data) + self.assertEqual(len(data['segments']), 2) + + def test_empty_string_returns_no_segments(self): + # arrange + x12_string = "" + + # action + parser = X12Parser(x12_string) + result = parser.to_dict() + + # assert + self.assertEqual(len(result['segments']), 0) + + def test_whitespace_only_returns_no_segments(self): + # arrange + x12_string = " \n \n " + + # action + parser = X12Parser(x12_string) + result = parser.to_dict() + + # assert + self.assertEqual(len(result['segments']), 0) + + def test_parse_caches_result(self): + # arrange + x12_string = "ST*997*0001~SE*1*0001~" + + # action + parser = X12Parser(x12_string) + result1 = parser.parse() + result2 = parser.parse() + + # assert + self.assertIs(result1, result2) + + def test_newline_separated_segments(self): + # arrange: segments separated by newlines + x12_string = "ST*997*0001~\nSE*1*0001~\n" + + # action + parser = X12Parser(x12_string) + result = parser.to_dict() + + # assert + self.assertEqual(len(result['segments']), 2) + + def test_mixed_registry_with_defaults(self): + # arrange: add custom to default registry + registry = create_default_registry() + registry.register(_CustomSegment) + x12_string = "ST*997*0001~CUS*test~SE*1*0001~" + + # action + parser = X12Parser(x12_string, registry=registry) + result = parser.to_dict() + + # assert + self.assertEqual(len(result['segments']), 3) + self.assertEqual(result['segments'][0]['segment_id'], 'ST') + self.assertEqual(result['segments'][1]['segment_id'], 'CUS') + self.assertEqual(result['segments'][2]['segment_id'], 'SE') From 5bfe07a1c984db61f53eaf836d7e4c8a6bc4aa83 Mon Sep 17 00:00:00 2001 From: Hoss Date: Fri, 30 Jan 2026 16:16:25 +0800 Subject: [PATCH 4/8] test: add EDI 304 and 315 validation tests with grammar fixtures [why] Verify the parser works with production-grade segment definitions from real EDI 304 (Shipping Instruction) and 315 (Status Detail) implementations. [how] - Grammar fixtures copied from fms and gf-notif-svc implementations - EDI 304: B2, B2A, K1, L0, L3, LX, M0, N1, N7, N9, R2, R4, V1 and more - EDI 315: B4, R4, DTM, N9, Q2 segments - Full-document parsing tests with complete EDI 304 and 315 messages - Multi-transaction document test (two ST/SE pairs in one interchange) --- tests/fixtures/__init__.py | 0 tests/fixtures/edi_304_grammar/__init__.py | 13 + tests/fixtures/edi_304_grammar/b2.py | 59 ++ tests/fixtures/edi_304_grammar/b2a.py | 18 + tests/fixtures/edi_304_grammar/k1.py | 26 + tests/fixtures/edi_304_grammar/l0.py | 90 ++ tests/fixtures/edi_304_grammar/l3.py | 148 ++++ tests/fixtures/edi_304_grammar/lx.py | 192 ++++ tests/fixtures/edi_304_grammar/m0.py | 34 + tests/fixtures/edi_304_grammar/n1.py | 186 ++++ tests/fixtures/edi_304_grammar/n7.py | 233 +++++ tests/fixtures/edi_304_grammar/n9.py | 26 + tests/fixtures/edi_304_grammar/r2.py | 43 + tests/fixtures/edi_304_grammar/r4.py | 60 ++ tests/fixtures/edi_304_grammar/v1.py | 53 ++ tests/fixtures/edi_315_grammar/__init__.py | 4 + tests/fixtures/edi_315_grammar/b4.py | 103 +++ tests/fixtures/edi_315_grammar/n9.py | 25 + tests/fixtures/edi_315_grammar/q2.py | 130 +++ tests/fixtures/edi_315_grammar/r4.py | 106 +++ tests/test_edi304_validation.py | 983 +++++++++++++++++++++ tests/test_edi315_validation.py | 462 ++++++++++ 22 files changed, 2994 insertions(+) create mode 100644 tests/fixtures/__init__.py create mode 100644 tests/fixtures/edi_304_grammar/__init__.py create mode 100644 tests/fixtures/edi_304_grammar/b2.py create mode 100644 tests/fixtures/edi_304_grammar/b2a.py create mode 100644 tests/fixtures/edi_304_grammar/k1.py create mode 100644 tests/fixtures/edi_304_grammar/l0.py create mode 100644 tests/fixtures/edi_304_grammar/l3.py create mode 100644 tests/fixtures/edi_304_grammar/lx.py create mode 100644 tests/fixtures/edi_304_grammar/m0.py create mode 100644 tests/fixtures/edi_304_grammar/n1.py create mode 100644 tests/fixtures/edi_304_grammar/n7.py create mode 100644 tests/fixtures/edi_304_grammar/n9.py create mode 100644 tests/fixtures/edi_304_grammar/r2.py create mode 100644 tests/fixtures/edi_304_grammar/r4.py create mode 100644 tests/fixtures/edi_304_grammar/v1.py create mode 100644 tests/fixtures/edi_315_grammar/__init__.py create mode 100644 tests/fixtures/edi_315_grammar/b4.py create mode 100644 tests/fixtures/edi_315_grammar/n9.py create mode 100644 tests/fixtures/edi_315_grammar/q2.py create mode 100644 tests/fixtures/edi_315_grammar/r4.py create mode 100644 tests/test_edi304_validation.py create mode 100644 tests/test_edi315_validation.py diff --git a/tests/fixtures/__init__.py b/tests/fixtures/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/fixtures/edi_304_grammar/__init__.py b/tests/fixtures/edi_304_grammar/__init__.py new file mode 100644 index 0000000..72e2147 --- /dev/null +++ b/tests/fixtures/edi_304_grammar/__init__.py @@ -0,0 +1,13 @@ +from .b2 import B2Segment +from .b2a import B2aSegment +from .k1 import K1Segment +from .l0 import L0Segment as l0_L0Segment +from .l3 import L3Segment, PWKSegment, SACSegment +from .lx import L0Segment, L4Segment, L5Segment, LxSegment +from .m0 import M0Segment +from .n1 import G61Segment, N1Segment, N2Segment, N3Segment, N4Segment +from .n7 import M7Segment, N7Segment, QtySegment, W09Segment +from .n9 import N9Segment +from .r2 import R2Segment +from .r4 import R4Segment +from .v1 import V1Segment diff --git a/tests/fixtures/edi_304_grammar/b2.py b/tests/fixtures/edi_304_grammar/b2.py new file mode 100644 index 0000000..184d009 --- /dev/null +++ b/tests/fixtures/edi_304_grammar/b2.py @@ -0,0 +1,59 @@ +from pyx12lib.core.grammar import Element, NotUsedElement +from pyx12lib.core.grammar.segment import USAGE_MANDATORY, USAGE_OPTIONAL, BaseSegment + + +class B2Segment(BaseSegment): + segment_id = 'B2' + usage = 'M' + max_use = 1 + elements = ( + Element( + reference_designator='B201', + name='Tariff Service Code', + usage=USAGE_OPTIONAL, + element_type='ID', + minimum=2, + maximum=2, + ), + Element( + reference_designator='B202', + name='Standard Carrier Alpha Code', + usage=USAGE_MANDATORY, + element_type='ID', + minimum=2, + maximum=4, + ), + NotUsedElement( + reference_designator='B203', + ), + Element( + reference_designator='B204', + name='Shipment Identification Number', + usage=USAGE_MANDATORY, + element_type='AN', + minimum=1, + maximum=30, + ), + NotUsedElement( + reference_designator='B205', + ), + Element( + reference_designator='B206', + name='Shipment Method of Payment', + usage=USAGE_MANDATORY, + element_type='ID', + minimum=2, + maximum=2, + ), + NotUsedElement( + reference_designator='B207', + ), + Element( + reference_designator='B208', + name='Total Equipment', + usage=USAGE_OPTIONAL, + element_type='N0', + minimum=1, + maximum=3, + ), + ) diff --git a/tests/fixtures/edi_304_grammar/b2a.py b/tests/fixtures/edi_304_grammar/b2a.py new file mode 100644 index 0000000..8a5c96b --- /dev/null +++ b/tests/fixtures/edi_304_grammar/b2a.py @@ -0,0 +1,18 @@ +from pyx12lib.core.grammar import Element +from pyx12lib.core.grammar.segment import USAGE_MANDATORY, BaseSegment + + +class B2aSegment(BaseSegment): + segment_id = 'B2A' + usage = 'M' + max_use = 1 + elements = ( + Element( + reference_designator='B2A01', + name='Transaction Set Purpose Code', + usage=USAGE_MANDATORY, + element_type='ID', + minimum=2, + maximum=2, + ), + ) diff --git a/tests/fixtures/edi_304_grammar/k1.py b/tests/fixtures/edi_304_grammar/k1.py new file mode 100644 index 0000000..d904326 --- /dev/null +++ b/tests/fixtures/edi_304_grammar/k1.py @@ -0,0 +1,26 @@ +from pyx12lib.core.grammar import Element +from pyx12lib.core.grammar.segment import USAGE_MANDATORY, USAGE_OPTIONAL, BaseSegment + + +class K1Segment(BaseSegment): + segment_id = 'K1' + usage = 'O' + max_use = 999 + elements = ( + Element( + reference_designator='K101', + name='Free-Form Message', + usage=USAGE_MANDATORY, + element_type='AN', + minimum=1, + maximum=30, + ), + Element( + reference_designator='K102', + name='Free-Form Message', + usage=USAGE_OPTIONAL, + element_type='AN', + minimum=1, + maximum=30, + ), + ) diff --git a/tests/fixtures/edi_304_grammar/l0.py b/tests/fixtures/edi_304_grammar/l0.py new file mode 100644 index 0000000..9bf2fc2 --- /dev/null +++ b/tests/fixtures/edi_304_grammar/l0.py @@ -0,0 +1,90 @@ +from pyx12lib.core.grammar import Element +from pyx12lib.core.grammar.segment import USAGE_CONDITIONAL, USAGE_OPTIONAL, BaseSegment + + +class L0Segment(BaseSegment): + segment_id = 'L0' + usage = 'M' + max_use = 1 + elements = ( + Element( + reference_designator='L001', + name='Lading Line Item Number', + usage=USAGE_OPTIONAL, + element_type='N0', + minimum=1, + maximum=3, + ), + Element( + reference_designator='L004', + name='Weight', + usage=USAGE_CONDITIONAL, + element_type='R', + minimum=1, + maximum=10, + ), + Element( + reference_designator='L005', + name='Weight Qualifier', + usage=USAGE_CONDITIONAL, + element_type='ID', + minimum=1, + maximum=2, + ), + Element( + reference_designator='L006', + name='Volume', + usage=USAGE_CONDITIONAL, + element_type='R', + minimum=1, + maximum=8, + ), + Element( + reference_designator='L007', + name='Volume Unit Qualifier', + usage=USAGE_CONDITIONAL, + element_type='ID', + minimum=1, + maximum=1, + ), + Element( + reference_designator='L008', + name='Lading Quantity', + usage=USAGE_CONDITIONAL, + element_type='N0', + minimum=1, + maximum=7, + ), + Element( + reference_designator='L009', + name='Packaging Form Code', + usage=USAGE_CONDITIONAL, + element_type='ID', + minimum=3, + maximum=3, + ), + Element( + reference_designator='L010', + name='Dunnage Description', + usage=USAGE_OPTIONAL, + element_type='AN', + minimum=2, + maximum=25, + ), + Element( + reference_designator='L011', + name='Weight Unit Code', + usage=USAGE_OPTIONAL, + element_type='ID', + minimum=1, + maximum=1, + ), + Element( + reference_designator='L014', + name='Packaging Form Code', + usage=USAGE_OPTIONAL, + element_type='ID', + minimum=3, + maximum=3, + ), + ) diff --git a/tests/fixtures/edi_304_grammar/l3.py b/tests/fixtures/edi_304_grammar/l3.py new file mode 100644 index 0000000..b57c54d --- /dev/null +++ b/tests/fixtures/edi_304_grammar/l3.py @@ -0,0 +1,148 @@ +from pyx12lib.core.grammar import Element, NotUsedElement +from pyx12lib.core.grammar.segment import USAGE_CONDITIONAL, USAGE_MANDATORY, USAGE_OPTIONAL, BaseSegment + + +class L3Segment(BaseSegment): + segment_id = 'L3' + usage = 'O' + max_use = 1 + elements = ( + Element( + reference_designator='L301', + name='Weight', + usage=USAGE_CONDITIONAL, + element_type='R', + minimum=1, + maximum=10, + ), + Element( + reference_designator='L302', + name='Weight Qualifier', + usage=USAGE_CONDITIONAL, + element_type='ID', + minimum=1, + maximum=2, + ), + NotUsedElement(reference_designator='L303'), + NotUsedElement(reference_designator='L304'), + NotUsedElement(reference_designator='L305'), + NotUsedElement(reference_designator='L306'), + NotUsedElement(reference_designator='L307'), + NotUsedElement(reference_designator='L308'), + Element( + reference_designator='L309', + name='Volume', + usage=USAGE_CONDITIONAL, + element_type='R', + minimum=1, + maximum=8, + ), + Element( + reference_designator='L310', + name='Volume Unit Qualifier', + usage=USAGE_CONDITIONAL, + element_type='ID', + minimum=1, + maximum=1, + ), + Element( + reference_designator='L311', + name='Lading Quantity', + usage=USAGE_OPTIONAL, + element_type='N0', + minimum=1, + maximum=7, + ), + Element( + reference_designator='L312', + name='Weight Unit Code', + usage=USAGE_OPTIONAL, + element_type='ID', + minimum=1, + maximum=1, + ), + ) + + +class PWKSegment(BaseSegment): + segment_id = 'PWK' + usage = 'O' + max_use = 50 + elements = ( + Element( + reference_designator='PWK01', + name='Report Type Code', + usage=USAGE_CONDITIONAL, + element_type='ID', + minimum=2, + maximum=2, + ), + Element( + reference_designator='PWK02', + name='Report Transmission Code', + usage=USAGE_OPTIONAL, + element_type='ID', + minimum=1, + maximum=2, + ), + Element( + reference_designator='PWK03', + name='Report Copies Needed', + usage=USAGE_OPTIONAL, + element_type='N0', + minimum=1, + maximum=2, + ), + NotUsedElement(reference_designator='PWK04'), + NotUsedElement(reference_designator='PWK05'), + NotUsedElement(reference_designator='PWK06'), + Element( + reference_designator='PWK07', + name='Description', + usage=USAGE_OPTIONAL, + element_type='AN', + minimum=1, + maximum=80, + ), + ) + + +class SACSegment(BaseSegment): + segment_id = 'SAC' + usage = 'M' + max_use = 1 + elements = ( + Element( + reference_designator='SAC01', + name='Allowance or Charge Indicator', + usage=USAGE_MANDATORY, + element_type='ID', + minimum=1, + maximum=1, + ), + Element( + reference_designator='SAC02', + name='Service, Promotion, Allowance, or Charge Code', + usage=USAGE_CONDITIONAL, + element_type='ID', + minimum=4, + maximum=4, + ), + NotUsedElement(reference_designator='SAC03'), + NotUsedElement(reference_designator='SAC04'), + NotUsedElement(reference_designator='SAC05'), + NotUsedElement(reference_designator='SAC06'), + NotUsedElement(reference_designator='SAC07'), + NotUsedElement(reference_designator='SAC08'), + NotUsedElement(reference_designator='SAC09'), + NotUsedElement(reference_designator='SAC10'), + NotUsedElement(reference_designator='SAC11'), + Element( + reference_designator='SAC12', + name='Allowance or Charge Method of Handling Code', + usage=USAGE_OPTIONAL, + element_type='ID', + minimum=2, + maximum=2, + ), + ) diff --git a/tests/fixtures/edi_304_grammar/lx.py b/tests/fixtures/edi_304_grammar/lx.py new file mode 100644 index 0000000..8172770 --- /dev/null +++ b/tests/fixtures/edi_304_grammar/lx.py @@ -0,0 +1,192 @@ +from pyx12lib.core.grammar import Element, NotUsedElement +from pyx12lib.core.grammar.segment import USAGE_CONDITIONAL, USAGE_MANDATORY, USAGE_OPTIONAL, BaseSegment + + +class LxSegment(BaseSegment): + segment_id = 'LX' + usage = 'O' + max_use = 1 + elements = ( + Element( + reference_designator='LX01', + name='Assigned Number', + usage=USAGE_MANDATORY, + element_type='N0', + minimum=1, + maximum=6, + ), + ) + + +class L0Segment(BaseSegment): + segment_id = 'L0' + usage = 'O' + max_use = 1 + elements = ( + Element( + reference_designator='L001', + name='Lading Line Item Number', + usage=USAGE_OPTIONAL, + element_type='N0', + minimum=1, + maximum=3, + ), + NotUsedElement(reference_designator='L002'), + NotUsedElement(reference_designator='L003'), + Element( + reference_designator='L004', + name='Weight', + usage=USAGE_CONDITIONAL, + element_type='R', + minimum=1, + maximum=10, + ), + Element( + reference_designator='L005', + name='Weight Qualifier', + usage=USAGE_CONDITIONAL, + element_type='ID', + minimum=1, + maximum=2, + ), + Element( + reference_designator='L006', + name='Volume', + usage=USAGE_CONDITIONAL, + element_type='R', + minimum=1, + maximum=8, + ), + Element( + reference_designator='L007', + name='Volume Unit Qualifier', + usage=USAGE_CONDITIONAL, + element_type='ID', + minimum=1, + maximum=1, + ), + Element( + reference_designator='L008', + name='Lading Quantity', + usage=USAGE_CONDITIONAL, + element_type='N0', + minimum=1, + maximum=7, + ), + Element( + reference_designator='L009', + name='Packaging Form Code', + usage=USAGE_CONDITIONAL, + element_type='ID', + minimum=3, + maximum=3, + ), + NotUsedElement(reference_designator='L010'), + Element( + reference_designator='L011', + name='Weight Unit Code', + usage=USAGE_OPTIONAL, + element_type='ID', + minimum=1, + maximum=1, + ), + NotUsedElement(reference_designator='L012'), + NotUsedElement(reference_designator='L013'), + Element( + reference_designator='L014', + name='Packaging Form Code', + usage=USAGE_OPTIONAL, + element_type='ID', + minimum=3, + maximum=3, + ), + ) + + +class L4Segment(BaseSegment): + segment_id = 'L4' + usage = 'O' + max_use = 1 + elements = ( + Element( + reference_designator='L401', + name='Length', + usage=USAGE_MANDATORY, + element_type='R', + minimum=1, + maximum=8, + ), + Element( + reference_designator='L402', + name='Width', + usage=USAGE_MANDATORY, + element_type='R', + minimum=1, + maximum=8, + ), + Element( + reference_designator='L403', + name='Height', + usage=USAGE_MANDATORY, + element_type='R', + minimum=1, + maximum=8, + ), + Element( + reference_designator='L404', + name='Measurement Unit Qualifier', + usage=USAGE_MANDATORY, + element_type='ID', + minimum=1, + maximum=1, + ), + ) + + +class L5Segment(BaseSegment): + segment_id = 'L5' + usage = 'M' + max_use = 990 + elements = ( + Element( + reference_designator='L501', + name='Lading Line Item Number', + usage=USAGE_OPTIONAL, + element_type='N0', + minimum=1, + maximum=3, + ), + Element( + reference_designator='L502', + name='Lading Description', + usage=USAGE_OPTIONAL, + element_type='AN', + minimum=1, + maximum=50, + ), + Element( + reference_designator='L503', + name='Commodity Code', + usage=USAGE_CONDITIONAL, + element_type='AN', + minimum=1, + maximum=30, + ), + Element( + reference_designator='L504', + name='Commodity Code Qualifier', + usage=USAGE_CONDITIONAL, + element_type='ID', + minimum=1, + maximum=1, + ), + NotUsedElement(reference_designator='L505'), + Element( + reference_designator='L506', + name='Marks and Numbers', + usage=USAGE_CONDITIONAL, + element_type='AN', + minimum=1, + maximum=48, + ), + ) diff --git a/tests/fixtures/edi_304_grammar/m0.py b/tests/fixtures/edi_304_grammar/m0.py new file mode 100644 index 0000000..b4cde3b --- /dev/null +++ b/tests/fixtures/edi_304_grammar/m0.py @@ -0,0 +1,34 @@ +from pyx12lib.core.grammar import Element +from pyx12lib.core.grammar.segment import USAGE_MANDATORY, USAGE_OPTIONAL, BaseSegment + + +class M0Segment(BaseSegment): + segment_id = 'M0' + usage = 'O' + max_use = 1 + elements = ( + Element( + reference_designator='M001', + name='Letter of Credit Number', + usage=USAGE_MANDATORY, + element_type='AN', + minimum=2, + maximum=40, + ), + Element( + reference_designator='M002', + name='Date', + usage=USAGE_OPTIONAL, + element_type='DT', + minimum=8, + maximum=8, + ), + Element( + reference_designator='M003', + name='Date', + usage=USAGE_OPTIONAL, + element_type='DT', + minimum=8, + maximum=8, + ), + ) diff --git a/tests/fixtures/edi_304_grammar/n1.py b/tests/fixtures/edi_304_grammar/n1.py new file mode 100644 index 0000000..344e512 --- /dev/null +++ b/tests/fixtures/edi_304_grammar/n1.py @@ -0,0 +1,186 @@ +from pyx12lib.core.grammar import Element +from pyx12lib.core.grammar.segment import USAGE_CONDITIONAL, USAGE_MANDATORY, USAGE_OPTIONAL, BaseSegment + + +class N1Segment(BaseSegment): + segment_id = 'N1' + usage = 'O' + max_use = 1 + elements = ( + Element( + reference_designator='N101', + name='Entity Identifier Code', + usage=USAGE_MANDATORY, + element_type='ID', + minimum=2, + maximum=3, + ), + Element( + reference_designator='N102', + name='Name', + usage=USAGE_CONDITIONAL, + element_type='AN', + minimum=1, + maximum=35, + ), + Element( + reference_designator='N103', + name='Identification Code Qualifier', + usage=USAGE_CONDITIONAL, + element_type='ID', + minimum=1, + maximum=2, + ), + Element( + reference_designator='N104', + name='Identification Code', + usage=USAGE_CONDITIONAL, + element_type='AN', + minimum=2, + maximum=35, + ), + ) + + +class N2Segment(BaseSegment): + segment_id = 'N2' + usage = 'O' + max_use = 2 + elements = ( + Element( + reference_designator='N201', + name='Name', + usage=USAGE_MANDATORY, + element_type='AN', + minimum=1, + maximum=35, + ), + Element( + reference_designator='N202', + name='Name', + usage=USAGE_OPTIONAL, + element_type='AN', + minimum=1, + maximum=35, + ), + ) + + +class N3Segment(BaseSegment): + segment_id = 'N3' + usage = 'O' + max_use = 2 + elements = ( + Element( + reference_designator='N301', + name='Address Information', + usage=USAGE_MANDATORY, + element_type='AN', + minimum=1, + maximum=35, + ), + Element( + reference_designator='N302', + name='Address Information', + usage=USAGE_OPTIONAL, + element_type='AN', + minimum=1, + maximum=35, + ), + ) + + +class N4Segment(BaseSegment): + segment_id = 'N4' + usage = 'O' + max_use = 1 + elements = ( + Element( + reference_designator='N401', + name='City Name', + usage=USAGE_OPTIONAL, + element_type='AN', + minimum=2, + maximum=30, + ), + Element( + reference_designator='N402', + name='State or Province Code', + usage=USAGE_OPTIONAL, + element_type='ID', + minimum=2, + maximum=2, + ), + Element( + reference_designator='N403', + name='Postal Code', + usage=USAGE_OPTIONAL, + element_type='ID', + minimum=3, + maximum=15, + ), + Element( + reference_designator='N404', + name='Country Code', + usage=USAGE_OPTIONAL, + element_type='ID', + minimum=2, + maximum=3, + ), + Element( + reference_designator='N405', + name='Location Qualifier', + usage=USAGE_CONDITIONAL, + element_type='ID', + minimum=1, + maximum=2, + ), + Element( + reference_designator='N406', + name='Location Identifier', + usage=USAGE_OPTIONAL, + element_type='AN', + minimum=1, + maximum=30, + ), + ) + + +class G61Segment(BaseSegment): + segment_id = 'G61' + usage = 'O' + max_use = 3 + elements = ( + Element( + reference_designator='G6101', + name='Contact Function Code', + usage=USAGE_MANDATORY, + element_type='ID', + minimum=2, + maximum=2, + ), + Element( + reference_designator='G6102', + name='Name', + usage=USAGE_MANDATORY, + element_type='AN', + minimum=1, + maximum=35, + ), + Element( + reference_designator='G6103', + name='Communication Number Qualifier', + usage=USAGE_CONDITIONAL, + element_type='ID', + minimum=2, + maximum=2, + ), + Element( + reference_designator='G6104', + name='Communication Number', + usage=USAGE_CONDITIONAL, + element_type='AN', + minimum=1, + maximum=80, + ), + ) diff --git a/tests/fixtures/edi_304_grammar/n7.py b/tests/fixtures/edi_304_grammar/n7.py new file mode 100644 index 0000000..56d0ca5 --- /dev/null +++ b/tests/fixtures/edi_304_grammar/n7.py @@ -0,0 +1,233 @@ +from pyx12lib.core.grammar import Element, NotUsedElement +from pyx12lib.core.grammar.segment import USAGE_CONDITIONAL, USAGE_MANDATORY, USAGE_OPTIONAL, BaseSegment + + +class N7Segment(BaseSegment): + segment_id = 'N7' + usage = 'O' + max_use = 1 + elements = ( + Element( + reference_designator='N701', + name='Equipment Initial', + usage=USAGE_OPTIONAL, + element_type='AN', + minimum=1, + maximum=4, + ), + Element( + reference_designator='N702', + name='Equipment Number', + usage=USAGE_MANDATORY, + element_type='AN', + minimum=1, + maximum=10, + ), + Element( + reference_designator='N703', + name='Weight', + usage=USAGE_CONDITIONAL, + element_type='R', + minimum=1, + maximum=10, + ), + Element( + reference_designator='N704', + name='Weight Qualifier', + usage=USAGE_CONDITIONAL, + element_type='ID', + minimum=1, + maximum=2, + ), + Element( + reference_designator='N705', + name='Tare Weight', + usage=USAGE_OPTIONAL, + element_type='N0', + minimum=3, + maximum=8, + ), + NotUsedElement(reference_designator='N706'), + NotUsedElement(reference_designator='N707'), + Element( + reference_designator='N708', + name='Volume', + usage=USAGE_CONDITIONAL, + element_type='R', + minimum=1, + maximum=8, + ), + Element( + reference_designator='N709', + name='Volume Unit Qualifier', + usage=USAGE_CONDITIONAL, + element_type='ID', + minimum=1, + maximum=1, + ), + Element( + reference_designator='N710', + name='Ownership Code', + usage=USAGE_OPTIONAL, + element_type='ID', + minimum=1, + maximum=1, + ), + NotUsedElement(reference_designator='N711'), + NotUsedElement(reference_designator='N712'), + NotUsedElement(reference_designator='N713'), + NotUsedElement(reference_designator='N714'), + NotUsedElement(reference_designator='N715'), + NotUsedElement(reference_designator='N716'), + Element( + reference_designator='N717', + name='Weight Unit Code', + usage=USAGE_OPTIONAL, + element_type='ID', + minimum=1, + maximum=1, + ), + Element( + reference_designator='N718', + name='Equipment Number Check Digit', + usage=USAGE_OPTIONAL, + element_type='N0', + minimum=1, + maximum=1, + ), + NotUsedElement(reference_designator='N719'), + NotUsedElement(reference_designator='N720'), + NotUsedElement(reference_designator='N721'), + Element( + reference_designator='N722', + name='Equipment Type', + usage=USAGE_OPTIONAL, + element_type='ID', + minimum=4, + maximum=4, + ), + ) + + +class QtySegment(BaseSegment): + segment_id = 'QTY' + usage = 'O' + max_use = 1 + elements = ( + Element( + reference_designator='QTY01', + name='Quantity Qualifier', + usage=USAGE_MANDATORY, + element_type='ID', + minimum=2, + maximum=2, + ), + Element( + reference_designator='QTY02', + name='Quantity', + usage=USAGE_MANDATORY, + element_type='R', + minimum=1, + maximum=15, + ), + ) + + +class M7Segment(BaseSegment): + segment_id = 'M7' + usage = 'O' + max_use = 5 + elements = ( + Element( + reference_designator='M701', + name='Seal Number', + usage=USAGE_MANDATORY, + element_type='AN', + minimum=2, + maximum=15, + ), + Element( + reference_designator='M702', + name='Seal Number', + usage=USAGE_OPTIONAL, + element_type='AN', + minimum=2, + maximum=15, + ), + Element( + reference_designator='M703', + name='Seal Number', + usage=USAGE_OPTIONAL, + element_type='AN', + minimum=2, + maximum=15, + ), + NotUsedElement(reference_designator='M704'), + Element( + reference_designator='M705', + name='Entity Identifier Code', + usage=USAGE_OPTIONAL, + element_type='ID', + minimum=2, + maximum=3, + ), + ) + + +class W09Segment(BaseSegment): + segment_id = 'W09' + usage = 'O' + max_use = 1 + elements = ( + Element( + reference_designator='W0901', + name='Equipment Description Code', + usage=USAGE_MANDATORY, + element_type='ID', + minimum=2, + maximum=2, + ), + Element( + reference_designator='W0902', + name='Temperature', + usage=USAGE_CONDITIONAL, + element_type='R', + minimum=1, + maximum=4, + ), + Element( + reference_designator='W0903', + name='Unit or Basis for Measurement Code', + usage=USAGE_CONDITIONAL, + element_type='ID', + minimum=2, + maximum=2, + ), + NotUsedElement(reference_designator='W0904'), + Element( + reference_designator='W0905', + name='Unit or Basis for Measurement Code', + usage=USAGE_CONDITIONAL, + element_type='ID', + minimum=2, + maximum=2, + ), + Element( + reference_designator='W0906', + name='Free Form Message', + usage=USAGE_OPTIONAL, + element_type='AN', + minimum=1, + maximum=60, + ), + NotUsedElement(reference_designator='W0907'), + NotUsedElement(reference_designator='W0908'), + Element( + reference_designator='W0909', + name='Quantity', + usage=USAGE_OPTIONAL, + element_type='R', + minimum=1, + maximum=15, + ), + ) diff --git a/tests/fixtures/edi_304_grammar/n9.py b/tests/fixtures/edi_304_grammar/n9.py new file mode 100644 index 0000000..ff69705 --- /dev/null +++ b/tests/fixtures/edi_304_grammar/n9.py @@ -0,0 +1,26 @@ +from pyx12lib.core.grammar import Element +from pyx12lib.core.grammar.segment import USAGE_CONDITIONAL, USAGE_MANDATORY, BaseSegment + + +class N9Segment(BaseSegment): + segment_id = 'N9' + usage = 'M' + max_use = 100 + elements = ( + Element( + reference_designator='N901', + name='Reference Identification Qualifier', + usage=USAGE_MANDATORY, + element_type='ID', + minimum=2, + maximum=3, + ), + Element( + reference_designator='N902', + name='Reference Identification', + usage=USAGE_CONDITIONAL, + element_type='AN', + minimum=1, + maximum=35, + ), + ) diff --git a/tests/fixtures/edi_304_grammar/r2.py b/tests/fixtures/edi_304_grammar/r2.py new file mode 100644 index 0000000..d5d38ad --- /dev/null +++ b/tests/fixtures/edi_304_grammar/r2.py @@ -0,0 +1,43 @@ +from pyx12lib.core.grammar import Element, NotUsedElement +from pyx12lib.core.grammar.segment import USAGE_MANDATORY, USAGE_OPTIONAL, BaseSegment + + +class R2Segment(BaseSegment): + segment_id = 'R2' + usage = 'O' + max_use = 1 + elements = ( + Element( + reference_designator='R201', + name='Standard Carrier Alpha Code', + usage=USAGE_MANDATORY, + element_type='ID', + minimum=2, + maximum=4, + ), + Element( + reference_designator='R202', + name='Routing Sequence Code', + usage=USAGE_MANDATORY, + element_type='ID', + minimum=1, + maximum=2, + ), + NotUsedElement(reference_designator='R203'), + NotUsedElement(reference_designator='R204'), + NotUsedElement(reference_designator='R205'), + NotUsedElement(reference_designator='R206'), + NotUsedElement(reference_designator='R207'), + NotUsedElement(reference_designator='R208'), + NotUsedElement(reference_designator='R209'), + NotUsedElement(reference_designator='R210'), + NotUsedElement(reference_designator='R211'), + Element( + reference_designator='R212', + name='Type of Service Code', + usage=USAGE_OPTIONAL, + element_type='ID', + minimum=2, + maximum=2, + ), + ) diff --git a/tests/fixtures/edi_304_grammar/r4.py b/tests/fixtures/edi_304_grammar/r4.py new file mode 100644 index 0000000..b8b3439 --- /dev/null +++ b/tests/fixtures/edi_304_grammar/r4.py @@ -0,0 +1,60 @@ +from pyx12lib.core.grammar import Element, NotUsedElement +from pyx12lib.core.grammar.segment import USAGE_CONDITIONAL, USAGE_MANDATORY, USAGE_OPTIONAL, BaseSegment + + +class R4Segment(BaseSegment): + segment_id = 'R4' + usage = 'O' + max_use = 1 + elements = ( + Element( + reference_designator='R401', + name='Port or Terminal Function Code', + usage=USAGE_MANDATORY, + element_type='ID', + minimum=1, + maximum=1, + ), + Element( + reference_designator='R402', + name='Location Qualifier', + usage=USAGE_CONDITIONAL, + element_type='ID', + minimum=1, + maximum=2, + ), + Element( + reference_designator='R403', + name='Location Identifier', + usage=USAGE_CONDITIONAL, + element_type='AN', + minimum=1, + maximum=30, + ), + Element( + reference_designator='R404', + name='Port Name', + usage=USAGE_OPTIONAL, + element_type='AN', + minimum=2, + maximum=60, + ), + Element( + reference_designator='R405', + name='Country Code', + usage=USAGE_OPTIONAL, + element_type='ID', + minimum=2, + maximum=3, + ), + NotUsedElement(reference_designator='R406'), + NotUsedElement(reference_designator='R407'), + Element( + reference_designator='R408', + name='State or Province Code', + usage=USAGE_OPTIONAL, + element_type='ID', + minimum=2, + maximum=2, + ), + ) diff --git a/tests/fixtures/edi_304_grammar/v1.py b/tests/fixtures/edi_304_grammar/v1.py new file mode 100644 index 0000000..c160b84 --- /dev/null +++ b/tests/fixtures/edi_304_grammar/v1.py @@ -0,0 +1,53 @@ +from pyx12lib.core.grammar import Element, NotUsedElement +from pyx12lib.core.grammar.segment import USAGE_CONDITIONAL, USAGE_OPTIONAL, BaseSegment + + +class V1Segment(BaseSegment): + segment_id = 'V1' + usage = 'M' + max_use = 1 + elements = ( + Element( + reference_designator='V101', + name='Vessel Code', + usage=USAGE_CONDITIONAL, + element_type='ID', + minimum=1, + maximum=20, + ), + Element( + reference_designator='V102', + name='Vessel Name', + usage=USAGE_CONDITIONAL, + element_type='AN', + minimum=2, + maximum=28, + ), + NotUsedElement(reference_designator='V103'), + Element( + reference_designator='V104', + name='Flight/Voyage Number', + usage=USAGE_OPTIONAL, + element_type='AN', + minimum=2, + maximum=10, + ), + Element( + reference_designator='V105', + name='Standard Carrier Alpha Code', + usage=USAGE_OPTIONAL, + element_type='ID', + minimum=2, + maximum=4, + ), + NotUsedElement(reference_designator='V106'), + NotUsedElement(reference_designator='V107'), + Element( + reference_designator='V108', + name='Vessel Code Qualifier', + usage=USAGE_OPTIONAL, + element_type='ID', + minimum=1, + maximum=1, + ), + ) diff --git a/tests/fixtures/edi_315_grammar/__init__.py b/tests/fixtures/edi_315_grammar/__init__.py new file mode 100644 index 0000000..83d403f --- /dev/null +++ b/tests/fixtures/edi_315_grammar/__init__.py @@ -0,0 +1,4 @@ +from .b4 import B4Segment +from .n9 import N9Segment +from .q2 import Q2Segment +from .r4 import DTMSegment, R4Segment diff --git a/tests/fixtures/edi_315_grammar/b4.py b/tests/fixtures/edi_315_grammar/b4.py new file mode 100644 index 0000000..5193217 --- /dev/null +++ b/tests/fixtures/edi_315_grammar/b4.py @@ -0,0 +1,103 @@ +from pyx12lib.core.grammar import Element, NotUsedElement, element, segment + + +class B4Segment(segment.BaseSegment): + segment_id = 'B4' + usage = segment.USAGE_MANDATORY + max_use = 1 + elements = ( + NotUsedElement( + reference_designator='B401', + ), + NotUsedElement( + reference_designator='B402', + ), + Element( + reference_designator='B403', + name='Status Code', + usage=element.USAGE_OPTIONAL, + element_type=element.ELEMENT_TYPE_ID, + minimum=1, + maximum=2, + ), + Element( + reference_designator='B404', + name='Status Date', + usage=element.USAGE_OPTIONAL, + element_type=element.ELEMENT_TYPE_DATE, + minimum=8, + maximum=8, + ), + Element( + reference_designator='B405', + name='Status Time', + usage=element.USAGE_OPTIONAL, + element_type=element.ELEMENT_TYPE_TIME, + minimum=4, + maximum=4, + ), + Element( + reference_designator='B406', + name='Status Location', + usage=element.USAGE_OPTIONAL, + element_type=element.ELEMENT_TYPE_STRING, + minimum=3, + maximum=5, + ), + Element( + reference_designator='B407', + name='Transaction Equipment Initial', + usage=element.USAGE_MANDATORY, + element_type=element.ELEMENT_TYPE_STRING, + minimum=2, + maximum=4, + ), + Element( + reference_designator='B408', + name='Equipment Number', + usage=element.USAGE_CONDITIONAL, + element_type=element.ELEMENT_TYPE_STRING, + minimum=1, + maximum=10, + ), + Element( + reference_designator='B409', + name='Equipment Status Code', + usage=element.USAGE_OPTIONAL, + element_type=element.ELEMENT_TYPE_ID, + minimum=1, + maximum=2, + ), + Element( + reference_designator='B410', + name='Equipment Type', + usage=element.USAGE_OPTIONAL, + element_type=element.ELEMENT_TYPE_ID, + minimum=4, + maximum=4, + ), + Element( + reference_designator='B411', + name='Location Identifier', + usage=element.USAGE_CONDITIONAL, + element_type=element.ELEMENT_TYPE_STRING, + minimum=1, + maximum=30, + ), + Element( + reference_designator='B412', + name='Location Qualifier', + usage=element.USAGE_CONDITIONAL, + element_type=element.ELEMENT_TYPE_STRING, + minimum=1, + maximum=2, + ), + Element( + reference_designator='B413', + name='Equipment Number Check Digit', + usage=element.USAGE_OPTIONAL, + element_type=element.get_numeric_type(max_digits=0), + minimum=1, + maximum=1, + ), + ) diff --git a/tests/fixtures/edi_315_grammar/n9.py b/tests/fixtures/edi_315_grammar/n9.py new file mode 100644 index 0000000..cb2fe84 --- /dev/null +++ b/tests/fixtures/edi_315_grammar/n9.py @@ -0,0 +1,25 @@ +from pyx12lib.core.grammar import Element, element, segment + + +class N9Segment(segment.BaseSegment): + segment_id = 'N9' + usage = segment.USAGE_OPTIONAL + max_use = 30 + elements = ( + Element( + reference_designator='N901', + name='Reference Identification Qualifier', + usage=element.USAGE_MANDATORY, + element_type=element.ELEMENT_TYPE_ID, + minimum=2, + maximum=3, + ), + Element( + reference_designator='N902', + name='Reference Identification', + usage=element.USAGE_CONDITIONAL, + element_type=element.ELEMENT_TYPE_STRING, + minimum=1, + maximum=30, + ), + ) diff --git a/tests/fixtures/edi_315_grammar/q2.py b/tests/fixtures/edi_315_grammar/q2.py new file mode 100644 index 0000000..7c0eaf6 --- /dev/null +++ b/tests/fixtures/edi_315_grammar/q2.py @@ -0,0 +1,130 @@ +from pyx12lib.core.grammar import Element, NotUsedElement, element, segment + + +class Q2Segment(segment.BaseSegment): + segment_id = 'Q2' + usage = segment.USAGE_OPTIONAL + max_use = 1 + elements = ( + Element( + reference_designator='Q201', + name='Vessel Code', + usage=element.USAGE_OPTIONAL, + element_type=element.ELEMENT_TYPE_ID, + minimum=1, + maximum=7, + ), + Element( + reference_designator='Q202', + name='Country Code', + usage=element.USAGE_OPTIONAL, + element_type=element.ELEMENT_TYPE_ID, + minimum=2, + maximum=3, + ), + NotUsedElement(reference_designator='Q203'), + Element( + reference_designator='Q204', + name='Date', + usage=element.USAGE_OPTIONAL, + element_type=element.ELEMENT_TYPE_DATE, + minimum=8, + maximum=8, + ), + Element( + reference_designator='Q205', + name='Date', + usage=element.USAGE_OPTIONAL, + element_type=element.ELEMENT_TYPE_DATE, + minimum=8, + maximum=8, + ), + Element( + reference_designator='Q206', + name='Lading Quantity', + usage=element.USAGE_OPTIONAL, + element_type=element.get_numeric_type(max_digits=0), + minimum=1, + maximum=7, + ), + Element( + reference_designator='Q207', + name='Weight', + usage=element.USAGE_CONDITIONAL, + element_type=element.ELEMENT_TYPE_DECIMAL, + minimum=1, + maximum=10, + ), + Element( + reference_designator='Q208', + name='Weight Qualifier', + usage=element.USAGE_CONDITIONAL, + element_type=element.ELEMENT_TYPE_ID, + minimum=1, + maximum=2, + ), + Element( + reference_designator='Q209', + name='Flight/Voyage Number', + usage=element.USAGE_OPTIONAL, + element_type=element.ELEMENT_TYPE_STRING, + minimum=2, + maximum=10, + ), + Element( + reference_designator='Q210', + name='Reference Number Qualifier', + usage=element.USAGE_OPTIONAL, + element_type=element.ELEMENT_TYPE_ID, + minimum=2, + maximum=2, + ), + Element( + reference_designator='Q211', + name='Reference Number', + usage=element.USAGE_CONDITIONAL, + element_type=element.ELEMENT_TYPE_STRING, + minimum=1, + maximum=30, + ), + Element( + reference_designator='Q212', + name='Vessel Code Qualifier', + usage=element.USAGE_OPTIONAL, + element_type=element.ELEMENT_TYPE_ID, + minimum=1, + maximum=1, + ), + Element( + reference_designator='Q213', + name='Vessel name', + usage=element.USAGE_OPTIONAL, + element_type=element.ELEMENT_TYPE_STRING, + minimum=2, + maximum=28, + ), + Element( + reference_designator='Q214', + name='Volume', + usage=element.USAGE_CONDITIONAL, + element_type=element.ELEMENT_TYPE_DECIMAL, + minimum=1, + maximum=8, + ), + Element( + reference_designator='Q215', + name='Volume Unit Qualifier', + usage=element.USAGE_CONDITIONAL, + element_type=element.ELEMENT_TYPE_ID, + minimum=1, + maximum=1, + ), + Element( + reference_designator='Q216', + name='Weight Unit Code', + usage=element.USAGE_CONDITIONAL, + element_type=element.ELEMENT_TYPE_ID, + minimum=1, + maximum=1, + ), + ) diff --git a/tests/fixtures/edi_315_grammar/r4.py b/tests/fixtures/edi_315_grammar/r4.py new file mode 100644 index 0000000..7bfdfcc --- /dev/null +++ b/tests/fixtures/edi_315_grammar/r4.py @@ -0,0 +1,106 @@ +from pyx12lib.core.grammar import Element, NotUsedElement, element, segment + + +class R4Segment(segment.BaseSegment): + segment_id = 'R4' + usage = segment.USAGE_OPTIONAL + max_use = 12 + elements = ( + Element( + reference_designator='R401', + name='Port Function Code', + usage=element.USAGE_MANDATORY, + element_type=element.ELEMENT_TYPE_ID, + minimum=1, + maximum=1, + ), + Element( + reference_designator='R402', + name='Location Qualifier', + usage=element.USAGE_MANDATORY, + element_type=element.ELEMENT_TYPE_ID, + minimum=1, + maximum=2, + ), + Element( + reference_designator='R403', + name='Location Identifier', + usage=element.USAGE_CONDITIONAL, + element_type=element.ELEMENT_TYPE_STRING, + minimum=1, + maximum=30, + ), + Element( + reference_designator='R404', + name='Port name', + usage=element.USAGE_OPTIONAL, + element_type=element.ELEMENT_TYPE_STRING, + minimum=2, + maximum=24, + ), + Element( + reference_designator='R405', + name='Country Code', + usage=element.USAGE_OPTIONAL, + element_type=element.ELEMENT_TYPE_ID, + minimum=2, + maximum=3, + ), + Element( + reference_designator='R406', + name='Terminal name', + usage=element.USAGE_OPTIONAL, + element_type=element.ELEMENT_TYPE_STRING, + minimum=2, + maximum=30, + ), + NotUsedElement(reference_designator='R407'), + Element( + reference_designator='R408', + name='State or Province Code', + usage=element.USAGE_OPTIONAL, + element_type=element.ELEMENT_TYPE_ID, + minimum=2, + maximum=2, + ), + ) + + +class DTMSegment(segment.BaseSegment): + segment_id = 'DTM' + usage = segment.USAGE_OPTIONAL + max_use = 15 + elements = ( + Element( + reference_designator='DTM01', + name='Date/Time Qualifier', + usage=element.USAGE_MANDATORY, + element_type=element.ELEMENT_TYPE_ID, + minimum=3, + maximum=3, + ), + Element( + reference_designator='DTM02', + name='Date', + usage=element.USAGE_CONDITIONAL, + element_type=element.ELEMENT_TYPE_DATE, + minimum=8, + maximum=8, + ), + Element( + reference_designator='DTM03', + name='Time', + usage=element.USAGE_CONDITIONAL, + element_type=element.ELEMENT_TYPE_TIME, + minimum=4, + maximum=8, + ), + Element( + reference_designator='DTM04', + name='Time Code', + usage=element.USAGE_OPTIONAL, + element_type=element.ELEMENT_TYPE_ID, + minimum=2, + maximum=2, + ), + ) diff --git a/tests/test_edi304_validation.py b/tests/test_edi304_validation.py new file mode 100644 index 0000000..b0d5c73 --- /dev/null +++ b/tests/test_edi304_validation.py @@ -0,0 +1,983 @@ +""" +Validation tests using EDI 304 grammar definitions. + +These tests parse actual rendered X12 EDI 304 strings using grammar +definitions copied from a real EDI 304 (Shipping Instruction) implementation +to verify the parser works with production-grade segment definitions. + +Grammar source: fms/app/edi/x12/implementations/cargo_smart +Segment types: B2, B2A, K1, L0, L3, PWK, SAC, LX, L4, L5, M0, + N1, N2, N3, N4, G61, N7, QTY, M7, W09, N9, R2, R4, V1 +""" +import json +from unittest import TestCase + +from pyx12lib.core.parser import SegmentParser, X12Parser +from pyx12lib.core.registry import GrammarRegistry, create_default_registry + +from tests.fixtures.edi_304_grammar import ( + B2Segment, B2aSegment, K1Segment, L3Segment, PWKSegment, SACSegment, + LxSegment, L0Segment, L4Segment, L5Segment, + l0_L0Segment, + M0Segment, + N1Segment, N2Segment, N3Segment, N4Segment, G61Segment, + N7Segment, QtySegment, M7Segment, W09Segment, + N9Segment, R2Segment, R4Segment, V1Segment, +) + + +def _make_edi304_registry(): + """Create a registry with all EDI 304 + envelope segments.""" + registry = create_default_registry() + registry.register(B2Segment) + registry.register(B2aSegment) + registry.register(K1Segment) + registry.register(L3Segment) + registry.register(PWKSegment) + registry.register(SACSegment) + registry.register(LxSegment) + registry.register(L0Segment) + registry.register(L4Segment) + registry.register(L5Segment) + registry.register(M0Segment) + registry.register(N1Segment) + registry.register(N2Segment) + registry.register(N3Segment) + registry.register(N4Segment) + registry.register(G61Segment) + registry.register(N7Segment) + registry.register(QtySegment) + registry.register(M7Segment) + registry.register(W09Segment) + registry.register(N9Segment) + registry.register(R2Segment) + registry.register(R4Segment) + registry.register(V1Segment) + return registry + + +class TestB2SegmentParsing(TestCase): + """Parse B2 segments (Beginning of Shipment).""" + + def test_parse_b2_full_data(self): + x12 = 'B2*YY*HLCU**FILE123456**PP**3~' + + parser = SegmentParser(x12, grammar=B2Segment) + result = parser.to_dict() + + self.assertEqual(result['segment_id'], 'B2') + ref_map = {e['reference_designator']: e['value'] for e in result['elements']} + self.assertEqual(ref_map['B201'], 'YY') + self.assertEqual(ref_map['B202'], 'HLCU') + # B203 is NotUsed -> skipped + self.assertNotIn('B203', ref_map) + self.assertEqual(ref_map['B204'], 'FILE123456') + # B205 is NotUsed -> skipped + self.assertNotIn('B205', ref_map) + self.assertEqual(ref_map['B206'], 'PP') + # B207 is NotUsed -> skipped + self.assertNotIn('B207', ref_map) + self.assertEqual(ref_map['B208'], '3') + + def test_parse_b2_no_tariff(self): + x12 = 'B2**HDMU**SI-2024-001**CC**1~' + + parser = SegmentParser(x12, grammar=B2Segment) + result = parser.to_dict() + + ref_map = {e['reference_designator']: e['value'] for e in result['elements']} + self.assertEqual(ref_map['B201'], '') + self.assertEqual(ref_map['B202'], 'HDMU') + self.assertEqual(ref_map['B204'], 'SI-2024-001') + self.assertEqual(ref_map['B206'], 'CC') + self.assertEqual(ref_map['B208'], '1') + + def test_parse_b2_no_equipment_count(self): + x12 = 'B2**MAEU**MAEU12345**PP~' + + parser = SegmentParser(x12, grammar=B2Segment) + result = parser.to_dict() + + ref_map = {e['reference_designator']: e['value'] for e in result['elements']} + self.assertEqual(ref_map['B202'], 'MAEU') + self.assertEqual(ref_map['B208'], '') + + def test_b2_validation(self): + x12 = 'B2*YY*HLCU**FILE123456**PP**3~' + parser = SegmentParser(x12, grammar=B2Segment) + self.assertTrue(parser.parse().is_valid()) + + +class TestB2aSegmentParsing(TestCase): + """Parse B2A segments (Transaction Set Purpose Code).""" + + def test_parse_b2a_original(self): + x12 = 'B2A*00~' + + parser = SegmentParser(x12, grammar=B2aSegment) + result = parser.to_dict() + + self.assertEqual(result['segment_id'], 'B2A') + self.assertEqual(result['elements'][0]['value'], '00') + self.assertEqual(result['elements'][0]['reference_designator'], 'B2A01') + + def test_parse_b2a_replacement(self): + x12 = 'B2A*05~' + + parser = SegmentParser(x12, grammar=B2aSegment) + result = parser.to_dict() + + self.assertEqual(result['elements'][0]['value'], '05') + + def test_b2a_validation(self): + x12 = 'B2A*00~' + parser = SegmentParser(x12, grammar=B2aSegment) + self.assertTrue(parser.parse().is_valid()) + + +class TestN9SegmentParsing304(TestCase): + """Parse N9 segments (Reference Identification) for 304.""" + + def test_parse_n9_bl_number(self): + x12 = 'N9*BM*HLCUSHA2209QSEA1~' + + parser = SegmentParser(x12, grammar=N9Segment) + result = parser.to_dict() + + self.assertEqual(result['segment_id'], 'N9') + self.assertEqual(result['elements'][0]['value'], 'BM') + self.assertEqual(result['elements'][1]['value'], 'HLCUSHA2209QSEA1') + + def test_parse_n9_booking(self): + x12 = 'N9*BN*BKG2024001~' + + parser = SegmentParser(x12, grammar=N9Segment) + result = parser.to_dict() + + self.assertEqual(result['elements'][0]['value'], 'BN') + self.assertEqual(result['elements'][1]['value'], 'BKG2024001') + + def test_parse_n9_contract(self): + x12 = 'N9*CT*CONTRACT-001~' + + parser = SegmentParser(x12, grammar=N9Segment) + result = parser.to_dict() + + self.assertEqual(result['elements'][0]['value'], 'CT') + self.assertEqual(result['elements'][1]['value'], 'CONTRACT-001') + + def test_parse_n9_forwarder_ref(self): + x12 = 'N9*FN*FWD-REF-123~' + + parser = SegmentParser(x12, grammar=N9Segment) + result = parser.to_dict() + + self.assertEqual(result['elements'][0]['value'], 'FN') + + def test_parse_n9_si_file_number(self): + x12 = 'N9*SI*FILENO-456~' + + parser = SegmentParser(x12, grammar=N9Segment) + result = parser.to_dict() + + self.assertEqual(result['elements'][0]['value'], 'SI') + self.assertEqual(result['elements'][1]['value'], 'FILENO-456') + + def test_parse_n9_itn_number(self): + x12 = 'N9*TN*X20240101234567~' + + parser = SegmentParser(x12, grammar=N9Segment) + result = parser.to_dict() + + self.assertEqual(result['elements'][0]['value'], 'TN') + + def test_n9_validation(self): + x12 = 'N9*BM*HLCUSHA2209QSEA1~' + parser = SegmentParser(x12, grammar=N9Segment) + self.assertTrue(parser.parse().is_valid()) + + +class TestV1SegmentParsing(TestCase): + """Parse V1 segments (Vessel Information).""" + + def test_parse_v1_full_data(self): + x12 = 'V1*9834276*HYUNDAI FORWARD**0024W*HDMU***L~' + + parser = SegmentParser(x12, grammar=V1Segment) + result = parser.to_dict() + + self.assertEqual(result['segment_id'], 'V1') + ref_map = {e['reference_designator']: e['value'] for e in result['elements']} + self.assertEqual(ref_map['V101'], '9834276') + self.assertEqual(ref_map['V102'], 'HYUNDAI FORWARD') + self.assertNotIn('V103', ref_map) + self.assertEqual(ref_map['V104'], '0024W') + self.assertEqual(ref_map['V105'], 'HDMU') + self.assertNotIn('V106', ref_map) + self.assertNotIn('V107', ref_map) + self.assertEqual(ref_map['V108'], 'L') + + def test_parse_v1_no_voyage(self): + x12 = 'V1*9834276*EVER GLORY***EGLV***L~' + + parser = SegmentParser(x12, grammar=V1Segment) + result = parser.to_dict() + + ref_map = {e['reference_designator']: e['value'] for e in result['elements']} + self.assertEqual(ref_map['V102'], 'EVER GLORY') + self.assertEqual(ref_map['V104'], '') + self.assertEqual(ref_map['V105'], 'EGLV') + + def test_parse_v1_minimal(self): + x12 = 'V1**MSC AURORA~' + + parser = SegmentParser(x12, grammar=V1Segment) + result = parser.to_dict() + + ref_map = {e['reference_designator']: e['value'] for e in result['elements']} + self.assertEqual(ref_map['V101'], '') + self.assertEqual(ref_map['V102'], 'MSC AURORA') + + def test_v1_validation(self): + x12 = 'V1*9834276*HYUNDAI FORWARD**0024W*HDMU***L~' + parser = SegmentParser(x12, grammar=V1Segment) + self.assertTrue(parser.parse().is_valid()) + + +class TestM0SegmentParsing(TestCase): + """Parse M0 segments (Letter of Credit).""" + + def test_parse_m0_full(self): + x12 = 'M0*LC123456789*20231215*20240115~' + + parser = SegmentParser(x12, grammar=M0Segment) + result = parser.to_dict() + + self.assertEqual(result['segment_id'], 'M0') + self.assertEqual(result['elements'][0]['value'], 'LC123456789') + self.assertEqual(result['elements'][1]['value'], '20231215') + self.assertEqual(result['elements'][2]['value'], '20240115') + + def test_parse_m0_no_dates(self): + x12 = 'M0*LC999~' + + parser = SegmentParser(x12, grammar=M0Segment) + result = parser.to_dict() + + self.assertEqual(result['elements'][0]['value'], 'LC999') + self.assertEqual(result['elements'][1]['value'], '') + self.assertEqual(result['elements'][2]['value'], '') + + def test_m0_validation(self): + x12 = 'M0*LC123456789*20231215*20240115~' + parser = SegmentParser(x12, grammar=M0Segment) + self.assertTrue(parser.parse().is_valid()) + + +class TestN1LoopSegmentsParsing(TestCase): + """Parse N1, N2, N3, N4, G61 segments (Trade Partner Loop).""" + + def test_parse_n1_shipper(self): + x12 = 'N1*SH*ACME TRADING CO*25*123456789~' + + parser = SegmentParser(x12, grammar=N1Segment) + result = parser.to_dict() + + self.assertEqual(result['segment_id'], 'N1') + self.assertEqual(result['elements'][0]['value'], 'SH') + self.assertEqual(result['elements'][1]['value'], 'ACME TRADING CO') + self.assertEqual(result['elements'][2]['value'], '25') + self.assertEqual(result['elements'][3]['value'], '123456789') + + def test_parse_n1_consignee(self): + x12 = 'N1*CN*BUYER CORP~' + + parser = SegmentParser(x12, grammar=N1Segment) + result = parser.to_dict() + + self.assertEqual(result['elements'][0]['value'], 'CN') + self.assertEqual(result['elements'][1]['value'], 'BUYER CORP') + self.assertEqual(result['elements'][2]['value'], '') + self.assertEqual(result['elements'][3]['value'], '') + + def test_parse_n1_carrier(self): + x12 = 'N1*CA*HAPAG LLOYD~' + + parser = SegmentParser(x12, grammar=N1Segment) + result = parser.to_dict() + + self.assertEqual(result['elements'][0]['value'], 'CA') + + def test_parse_n2_additional_name(self): + x12 = 'N2*ADDITIONAL NAME LINE 1*LINE 2~' + + parser = SegmentParser(x12, grammar=N2Segment) + result = parser.to_dict() + + self.assertEqual(result['segment_id'], 'N2') + self.assertEqual(result['elements'][0]['value'], 'ADDITIONAL NAME LINE 1') + self.assertEqual(result['elements'][1]['value'], 'LINE 2') + + def test_parse_n3_address(self): + x12 = 'N3*123 MAIN STREET*SUITE 100~' + + parser = SegmentParser(x12, grammar=N3Segment) + result = parser.to_dict() + + self.assertEqual(result['segment_id'], 'N3') + self.assertEqual(result['elements'][0]['value'], '123 MAIN STREET') + self.assertEqual(result['elements'][1]['value'], 'SUITE 100') + + def test_parse_n4_full_location(self): + x12 = 'N4*SHANGHAI*SH*200000*CN~' + + parser = SegmentParser(x12, grammar=N4Segment) + result = parser.to_dict() + + self.assertEqual(result['segment_id'], 'N4') + self.assertEqual(result['elements'][0]['value'], 'SHANGHAI') + self.assertEqual(result['elements'][1]['value'], 'SH') + self.assertEqual(result['elements'][2]['value'], '200000') + self.assertEqual(result['elements'][3]['value'], 'CN') + + def test_parse_n4_with_location_qualifier(self): + x12 = 'N4*LOS ANGELES*CA*90001*US*UN*USLAX~' + + parser = SegmentParser(x12, grammar=N4Segment) + result = parser.to_dict() + + ref_map = {e['reference_designator']: e['value'] for e in result['elements']} + self.assertEqual(ref_map['N405'], 'UN') + self.assertEqual(ref_map['N406'], 'USLAX') + + def test_parse_g61_contact(self): + x12 = 'G61*IC*JOHN DOE*TE*+86-21-12345678~' + + parser = SegmentParser(x12, grammar=G61Segment) + result = parser.to_dict() + + self.assertEqual(result['segment_id'], 'G61') + self.assertEqual(result['elements'][0]['value'], 'IC') + self.assertEqual(result['elements'][1]['value'], 'JOHN DOE') + self.assertEqual(result['elements'][2]['value'], 'TE') + self.assertEqual(result['elements'][3]['value'], '+86-21-12345678') + + def test_n1_validation(self): + x12 = 'N1*SH*ACME TRADING CO*25*123456789~' + parser = SegmentParser(x12, grammar=N1Segment) + self.assertTrue(parser.parse().is_valid()) + + def test_g61_validation(self): + x12 = 'G61*IC*JOHN DOE*TE*+86-21-12345678~' + parser = SegmentParser(x12, grammar=G61Segment) + self.assertTrue(parser.parse().is_valid()) + + +class TestR4SegmentParsing304(TestCase): + """Parse R4 segments (Port or Terminal) for 304.""" + + def test_parse_r4_port_of_receipt(self): + x12 = 'R4*R*UN*CNSHA*SHANGHAI*CN~' + + parser = SegmentParser(x12, grammar=R4Segment) + result = parser.to_dict() + + self.assertEqual(result['segment_id'], 'R4') + ref_map = {e['reference_designator']: e['value'] for e in result['elements']} + self.assertEqual(ref_map['R401'], 'R') + self.assertEqual(ref_map['R402'], 'UN') + self.assertEqual(ref_map['R403'], 'CNSHA') + self.assertEqual(ref_map['R404'], 'SHANGHAI') + self.assertEqual(ref_map['R405'], 'CN') + self.assertNotIn('R406', ref_map) + self.assertNotIn('R407', ref_map) + + def test_parse_r4_port_of_loading(self): + x12 = 'R4*L*UN*CNSHA*SHANGHAI*CN~' + + parser = SegmentParser(x12, grammar=R4Segment) + result = parser.to_dict() + + ref_map = {e['reference_designator']: e['value'] for e in result['elements']} + self.assertEqual(ref_map['R401'], 'L') + + def test_parse_r4_port_of_discharge(self): + x12 = 'R4*D*UN*USLAX*LOS ANGELES*US***CA~' + + parser = SegmentParser(x12, grammar=R4Segment) + result = parser.to_dict() + + ref_map = {e['reference_designator']: e['value'] for e in result['elements']} + self.assertEqual(ref_map['R401'], 'D') + self.assertEqual(ref_map['R403'], 'USLAX') + self.assertEqual(ref_map['R404'], 'LOS ANGELES') + self.assertEqual(ref_map['R408'], 'CA') + + def test_parse_r4_delivery(self): + x12 = 'R4*E*UN*USLAX*LOS ANGELES*US~' + + parser = SegmentParser(x12, grammar=R4Segment) + result = parser.to_dict() + + ref_map = {e['reference_designator']: e['value'] for e in result['elements']} + self.assertEqual(ref_map['R401'], 'E') + + def test_parse_r4_bl_release_office(self): + x12 = 'R4*K*UN*CNSHA*SHANGHAI*CN~' + + parser = SegmentParser(x12, grammar=R4Segment) + result = parser.to_dict() + + ref_map = {e['reference_designator']: e['value'] for e in result['elements']} + self.assertEqual(ref_map['R401'], 'K') + + def test_r4_validation(self): + x12 = 'R4*R*UN*CNSHA*SHANGHAI*CN~' + parser = SegmentParser(x12, grammar=R4Segment) + self.assertTrue(parser.parse().is_valid()) + + +class TestR2SegmentParsing(TestCase): + """Parse R2 segments (Route Information) with many NotUsed.""" + + def test_parse_r2_full(self): + x12 = 'R2*HDMU*B**********CY~' + + parser = SegmentParser(x12, grammar=R2Segment) + result = parser.to_dict() + + self.assertEqual(result['segment_id'], 'R2') + ref_map = {e['reference_designator']: e['value'] for e in result['elements']} + self.assertEqual(ref_map['R201'], 'HDMU') + self.assertEqual(ref_map['R202'], 'B') + # R203-R211 are NotUsed -> skipped + for i in range(3, 12): + self.assertNotIn('R2%02d' % i, ref_map) + self.assertEqual(ref_map['R212'], 'CY') + + def test_parse_r2_no_service_type(self): + x12 = 'R2*MAEU*B~' + + parser = SegmentParser(x12, grammar=R2Segment) + result = parser.to_dict() + + ref_map = {e['reference_designator']: e['value'] for e in result['elements']} + self.assertEqual(ref_map['R201'], 'MAEU') + self.assertEqual(ref_map['R202'], 'B') + self.assertEqual(ref_map['R212'], '') + + def test_r2_validation(self): + x12 = 'R2*HDMU*B**********CY~' + parser = SegmentParser(x12, grammar=R2Segment) + self.assertTrue(parser.parse().is_valid()) + + +class TestN7SegmentParsing(TestCase): + """Parse N7 segments (Equipment Details) with many NotUsed.""" + + def test_parse_n7_full_container(self): + x12 = 'N7*HLCU*3456789*15000*G*4200***28*E*S*******K*7****22GP~' + + parser = SegmentParser(x12, grammar=N7Segment) + result = parser.to_dict() + + self.assertEqual(result['segment_id'], 'N7') + ref_map = {e['reference_designator']: e['value'] for e in result['elements']} + self.assertEqual(ref_map['N701'], 'HLCU') + self.assertEqual(ref_map['N702'], '3456789') + self.assertEqual(ref_map['N703'], '15000') + self.assertEqual(ref_map['N704'], 'G') + self.assertEqual(ref_map['N705'], '4200') + self.assertNotIn('N706', ref_map) + self.assertNotIn('N707', ref_map) + self.assertEqual(ref_map['N708'], '28') + self.assertEqual(ref_map['N709'], 'E') + self.assertEqual(ref_map['N710'], 'S') + for i in range(11, 17): + self.assertNotIn('N7%d' % i, ref_map) + self.assertEqual(ref_map['N717'], 'K') + self.assertEqual(ref_map['N718'], '7') + self.assertNotIn('N719', ref_map) + self.assertNotIn('N720', ref_map) + self.assertNotIn('N721', ref_map) + self.assertEqual(ref_map['N722'], '22GP') + + def test_parse_n7_minimal(self): + x12 = 'N7**1234567~' + + parser = SegmentParser(x12, grammar=N7Segment) + result = parser.to_dict() + + ref_map = {e['reference_designator']: e['value'] for e in result['elements']} + self.assertEqual(ref_map['N701'], '') + self.assertEqual(ref_map['N702'], '1234567') + + def test_n7_validation(self): + x12 = 'N7*HLCU*3456789*15000*G*4200***28*E*S*******K*7****22GP~' + parser = SegmentParser(x12, grammar=N7Segment) + self.assertTrue(parser.parse().is_valid()) + + +class TestQtySegmentParsing(TestCase): + """Parse QTY segments (Quantity).""" + + def test_parse_qty(self): + x12 = 'QTY*38*100~' + + parser = SegmentParser(x12, grammar=QtySegment) + result = parser.to_dict() + + self.assertEqual(result['segment_id'], 'QTY') + self.assertEqual(result['elements'][0]['value'], '38') + self.assertEqual(result['elements'][1]['value'], '100') + + def test_qty_validation(self): + x12 = 'QTY*38*100~' + parser = SegmentParser(x12, grammar=QtySegment) + self.assertTrue(parser.parse().is_valid()) + + +class TestM7SegmentParsing(TestCase): + """Parse M7 segments (Seal Information).""" + + def test_parse_m7_full(self): + x12 = 'M7*SEAL001*SEAL002*SEAL003**CA~' + + parser = SegmentParser(x12, grammar=M7Segment) + result = parser.to_dict() + + self.assertEqual(result['segment_id'], 'M7') + ref_map = {e['reference_designator']: e['value'] for e in result['elements']} + self.assertEqual(ref_map['M701'], 'SEAL001') + self.assertEqual(ref_map['M702'], 'SEAL002') + self.assertEqual(ref_map['M703'], 'SEAL003') + self.assertNotIn('M704', ref_map) + self.assertEqual(ref_map['M705'], 'CA') + + def test_parse_m7_single_seal(self): + x12 = 'M7*SL12345~' + + parser = SegmentParser(x12, grammar=M7Segment) + result = parser.to_dict() + + self.assertEqual(result['elements'][0]['value'], 'SL12345') + + def test_m7_validation(self): + x12 = 'M7*SEAL001*SEAL002*SEAL003**CA~' + parser = SegmentParser(x12, grammar=M7Segment) + self.assertTrue(parser.parse().is_valid()) + + +class TestW09SegmentParsing(TestCase): + """Parse W09 segments (Equipment Characteristics).""" + + def test_parse_w09_reefer(self): + x12 = 'W09*RC*-18*FA**CE*FROZEN GOODS***500~' + + parser = SegmentParser(x12, grammar=W09Segment) + result = parser.to_dict() + + self.assertEqual(result['segment_id'], 'W09') + ref_map = {e['reference_designator']: e['value'] for e in result['elements']} + self.assertEqual(ref_map['W0901'], 'RC') + self.assertEqual(ref_map['W0902'], '-18') + self.assertEqual(ref_map['W0903'], 'FA') + self.assertNotIn('W0904', ref_map) + self.assertEqual(ref_map['W0905'], 'CE') + self.assertEqual(ref_map['W0906'], 'FROZEN GOODS') + self.assertNotIn('W0907', ref_map) + self.assertNotIn('W0908', ref_map) + self.assertEqual(ref_map['W0909'], '500') + + def test_parse_w09_dry(self): + x12 = 'W09*CN~' + + parser = SegmentParser(x12, grammar=W09Segment) + result = parser.to_dict() + + ref_map = {e['reference_designator']: e['value'] for e in result['elements']} + self.assertEqual(ref_map['W0901'], 'CN') + + def test_w09_validation(self): + x12 = 'W09*RC*-18*FA**CE*FROZEN GOODS***500~' + parser = SegmentParser(x12, grammar=W09Segment) + self.assertTrue(parser.parse().is_valid()) + + +class TestLxLoopSegmentsParsing(TestCase): + """Parse LX, L0 (lx variant), L4, L5 segments.""" + + def test_parse_lx(self): + x12 = 'LX*1~' + + parser = SegmentParser(x12, grammar=LxSegment) + result = parser.to_dict() + + self.assertEqual(result['segment_id'], 'LX') + self.assertEqual(result['elements'][0]['value'], '1') + + def test_parse_l0_lx_variant(self): + """L0 from lx.py context (with NotUsed positions).""" + x12 = 'L0*1***5000*G*28*E*100*CTN**K***PLT~' + + parser = SegmentParser(x12, grammar=L0Segment) + result = parser.to_dict() + + self.assertEqual(result['segment_id'], 'L0') + ref_map = {e['reference_designator']: e['value'] for e in result['elements']} + self.assertEqual(ref_map['L001'], '1') + self.assertNotIn('L002', ref_map) + self.assertNotIn('L003', ref_map) + self.assertEqual(ref_map['L004'], '5000') + self.assertEqual(ref_map['L005'], 'G') + self.assertEqual(ref_map['L006'], '28') + self.assertEqual(ref_map['L007'], 'E') + self.assertEqual(ref_map['L008'], '100') + self.assertEqual(ref_map['L009'], 'CTN') + self.assertNotIn('L010', ref_map) + self.assertEqual(ref_map['L011'], 'K') + self.assertNotIn('L012', ref_map) + self.assertNotIn('L013', ref_map) + self.assertEqual(ref_map['L014'], 'PLT') + + def test_parse_l0_standalone_variant(self): + """L0 from l0.py (no NotUsed, sequential elements).""" + x12 = 'L0*1*5000*G*28*E*100*CTN**K*PLT~' + + parser = SegmentParser(x12, grammar=l0_L0Segment) + result = parser.to_dict() + + self.assertEqual(result['segment_id'], 'L0') + ref_map = {e['reference_designator']: e['value'] for e in result['elements']} + self.assertEqual(ref_map['L001'], '1') + self.assertEqual(ref_map['L004'], '5000') + self.assertEqual(ref_map['L005'], 'G') + self.assertEqual(ref_map['L006'], '28') + self.assertEqual(ref_map['L007'], 'E') + self.assertEqual(ref_map['L008'], '100') + self.assertEqual(ref_map['L009'], 'CTN') + self.assertEqual(ref_map['L010'], '') + self.assertEqual(ref_map['L011'], 'K') + self.assertEqual(ref_map['L014'], 'PLT') + + def test_parse_l4_measurement(self): + x12 = 'L4*120*240*260*E~' + + parser = SegmentParser(x12, grammar=L4Segment) + result = parser.to_dict() + + self.assertEqual(result['segment_id'], 'L4') + self.assertEqual(result['elements'][0]['value'], '120') + self.assertEqual(result['elements'][1]['value'], '240') + self.assertEqual(result['elements'][2]['value'], '260') + self.assertEqual(result['elements'][3]['value'], 'E') + + def test_parse_l5_full(self): + x12 = 'L5*1*ELECTRONIC GOODS*8471300000*T**MARK123~' + + parser = SegmentParser(x12, grammar=L5Segment) + result = parser.to_dict() + + self.assertEqual(result['segment_id'], 'L5') + ref_map = {e['reference_designator']: e['value'] for e in result['elements']} + self.assertEqual(ref_map['L501'], '1') + self.assertEqual(ref_map['L502'], 'ELECTRONIC GOODS') + self.assertEqual(ref_map['L503'], '8471300000') + self.assertEqual(ref_map['L504'], 'T') + self.assertNotIn('L505', ref_map) + self.assertEqual(ref_map['L506'], 'MARK123') + + def test_parse_l5_no_hts(self): + x12 = 'L5*1*GARMENTS~' + + parser = SegmentParser(x12, grammar=L5Segment) + result = parser.to_dict() + + ref_map = {e['reference_designator']: e['value'] for e in result['elements']} + self.assertEqual(ref_map['L502'], 'GARMENTS') + self.assertEqual(ref_map['L503'], '') + + def test_l0_validation(self): + x12 = 'L0*1***5000*G*28*E*100*CTN**K***PLT~' + parser = SegmentParser(x12, grammar=L0Segment) + self.assertTrue(parser.parse().is_valid()) + + def test_l5_validation(self): + x12 = 'L5*1*ELECTRONIC GOODS*8471300000*T**MARK123~' + parser = SegmentParser(x12, grammar=L5Segment) + self.assertTrue(parser.parse().is_valid()) + + +class TestL3LoopSegmentsParsing(TestCase): + """Parse L3, PWK, SAC segments (Total / Paperwork / Charges).""" + + def test_parse_l3_totals(self): + x12 = 'L3*25000*G*******150*E*100*K~' + + parser = SegmentParser(x12, grammar=L3Segment) + result = parser.to_dict() + + self.assertEqual(result['segment_id'], 'L3') + ref_map = {e['reference_designator']: e['value'] for e in result['elements']} + self.assertEqual(ref_map['L301'], '25000') + self.assertEqual(ref_map['L302'], 'G') + # L303-L308 are NotUsed -> skipped + for i in range(3, 9): + self.assertNotIn('L3%02d' % i, ref_map) + self.assertEqual(ref_map['L309'], '150') + self.assertEqual(ref_map['L310'], 'E') + self.assertEqual(ref_map['L311'], '100') + self.assertEqual(ref_map['L312'], 'K') + + def test_parse_pwk_paperwork(self): + x12 = 'PWK*BL*FX*3****ORIGINAL BILL OF LADING~' + + parser = SegmentParser(x12, grammar=PWKSegment) + result = parser.to_dict() + + self.assertEqual(result['segment_id'], 'PWK') + ref_map = {e['reference_designator']: e['value'] for e in result['elements']} + self.assertEqual(ref_map['PWK01'], 'BL') + self.assertEqual(ref_map['PWK02'], 'FX') + self.assertEqual(ref_map['PWK03'], '3') + self.assertNotIn('PWK04', ref_map) + self.assertNotIn('PWK05', ref_map) + self.assertNotIn('PWK06', ref_map) + self.assertEqual(ref_map['PWK07'], 'ORIGINAL BILL OF LADING') + + def test_parse_sac_charges(self): + x12 = 'SAC*C*D240**********06~' + + parser = SegmentParser(x12, grammar=SACSegment) + result = parser.to_dict() + + self.assertEqual(result['segment_id'], 'SAC') + ref_map = {e['reference_designator']: e['value'] for e in result['elements']} + self.assertEqual(ref_map['SAC01'], 'C') + self.assertEqual(ref_map['SAC02'], 'D240') + # SAC03-SAC11 are NotUsed -> skipped + for i in range(3, 12): + self.assertNotIn('SAC%02d' % i, ref_map) + self.assertEqual(ref_map['SAC12'], '06') + + def test_parse_k1_remarks(self): + x12 = 'K1*HANDLE WITH CARE*FRAGILE~' + + parser = SegmentParser(x12, grammar=K1Segment) + result = parser.to_dict() + + self.assertEqual(result['segment_id'], 'K1') + self.assertEqual(result['elements'][0]['value'], 'HANDLE WITH CARE') + self.assertEqual(result['elements'][1]['value'], 'FRAGILE') + + def test_l3_validation(self): + x12 = 'L3*25000*G*******150*E*100*K~' + parser = SegmentParser(x12, grammar=L3Segment) + self.assertTrue(parser.parse().is_valid()) + + def test_sac_validation(self): + x12 = 'SAC*C*D240**********06~' + parser = SegmentParser(x12, grammar=SACSegment) + self.assertTrue(parser.parse().is_valid()) + + +class TestFullEdi304DocumentParsing(TestCase): + """Parse a complete EDI 304 document using X12Parser with all grammars.""" + + FULL_EDI_304 = ( + 'ISA*00* *00* *ZZ*GOFREIGHT *ZZ*CARGOSMART *240115*1030*U*00401*000000001*0*P*^~' + 'GS*SO*GOFREIGHT*CARGOSMART*20240115*1030*1*X*004010~' + 'ST*304*0001~' + 'B2**HDMU**SI-2024-001**PP**2~' + 'B2A*00~' + 'N9*BM*HDMUSHA2401QSEA1~' + 'N9*BN*BKG2024001~' + 'N9*CT*CONTRACT-001~' + 'N9*SI*FILENO-456~' + 'V1*9834276*HYUNDAI FORWARD**0024W*HDMU***L~' + 'N1*SH*ACME TRADING CO*25*123456789~' + 'N3*123 MAIN STREET*SUITE 100~' + 'N4*SHANGHAI*SH*200000*CN~' + 'G61*IC*JOHN DOE*TE*+86-21-12345678~' + 'N1*CN*BUYER CORP~' + 'N3*456 HARBOR DRIVE~' + 'N4*LOS ANGELES*CA*90001*US~' + 'N1*CA*HAPAG LLOYD~' + 'N1*FW*GOFREIGHT INC~' + 'R4*R*UN*CNSHA*SHANGHAI*CN~' + 'R4*L*UN*CNSHA*SHANGHAI*CN~' + 'R4*D*UN*USLAX*LOS ANGELES*US~' + 'R4*E*UN*USLAX*LOS ANGELES*US~' + 'R2*HDMU*B**********CY~' + 'LX*1~' + 'N7*HLCU*3456789*15000*G*4200***28*E*S*******K*7****22GP~' + 'QTY*38*100~' + 'M7*SEAL001**~' + 'L5*1*ELECTRONIC GOODS*8471300000*T**MARK123~' + 'LX*2~' + 'N7*HDMU*7654321*18000*G*4500***33*E*S*******K*1****40HC~' + 'QTY*38*200~' + 'M7*SEAL002~' + 'L5*2*GARMENTS~' + 'L3*33000*G*******61*E*300*K~' + 'K1*HANDLE WITH CARE~' + 'SE*36*0001~' + 'GE*1*1~' + 'IEA*1*000000001~' + ) + + def test_parse_full_document(self): + registry = _make_edi304_registry() + parser = X12Parser(self.FULL_EDI_304, registry=registry) + result = parser.to_dict() + + segment_ids = [s['segment_id'] for s in result['segments']] + self.assertEqual(segment_ids, [ + 'ISA', 'GS', 'ST', + 'B2', 'B2A', + 'N9', 'N9', 'N9', 'N9', + 'V1', + 'N1', 'N3', 'N4', 'G61', + 'N1', 'N3', 'N4', + 'N1', 'N1', + 'R4', 'R4', 'R4', 'R4', + 'R2', + 'LX', 'N7', 'QTY', 'M7', 'L5', + 'LX', 'N7', 'QTY', 'M7', 'L5', + 'L3', 'K1', + 'SE', 'GE', 'IEA', + ]) + + def test_parse_full_document_segment_count(self): + registry = _make_edi304_registry() + parser = X12Parser(self.FULL_EDI_304, registry=registry) + segments = parser.parse() + + self.assertEqual(len(segments), 39) + + def test_parse_full_document_to_json(self): + registry = _make_edi304_registry() + parser = X12Parser(self.FULL_EDI_304, registry=registry) + json_output = parser.to_json() + + data = json.loads(json_output) + self.assertEqual(len(data['segments']), 39) + + def test_parse_b2_from_document(self): + registry = _make_edi304_registry() + parser = X12Parser(self.FULL_EDI_304, registry=registry) + segments = parser.parse() + + b2 = segments[3] + self.assertEqual(b2.segment_id, 'B2') + ref_map = {e['reference_designator']: e['value'] for e in b2.to_dict()['elements']} + self.assertEqual(ref_map['B202'], 'HDMU') + self.assertEqual(ref_map['B204'], 'SI-2024-001') + self.assertEqual(ref_map['B206'], 'PP') + self.assertEqual(ref_map['B208'], '2') + + def test_parse_v1_from_document(self): + registry = _make_edi304_registry() + parser = X12Parser(self.FULL_EDI_304, registry=registry) + segments = parser.parse() + + v1 = [s for s in segments if s.segment_id == 'V1'][0] + ref_map = {e['reference_designator']: e['value'] for e in v1.to_dict()['elements']} + self.assertEqual(ref_map['V101'], '9834276') + self.assertEqual(ref_map['V102'], 'HYUNDAI FORWARD') + self.assertEqual(ref_map['V104'], '0024W') + self.assertEqual(ref_map['V105'], 'HDMU') + self.assertEqual(ref_map['V108'], 'L') + + def test_parse_n9_segments_from_document(self): + registry = _make_edi304_registry() + parser = X12Parser(self.FULL_EDI_304, registry=registry) + segments = parser.parse() + + n9_segments = [s for s in segments if s.segment_id == 'N9'] + self.assertEqual(len(n9_segments), 4) + + qualifiers = [ + s.to_dict()['elements'][0]['value'] for s in n9_segments + ] + self.assertEqual(qualifiers, ['BM', 'BN', 'CT', 'SI']) + + def test_parse_n1_segments_from_document(self): + registry = _make_edi304_registry() + parser = X12Parser(self.FULL_EDI_304, registry=registry) + segments = parser.parse() + + n1_segments = [s for s in segments if s.segment_id == 'N1'] + self.assertEqual(len(n1_segments), 4) + + entity_codes = [ + s.to_dict()['elements'][0]['value'] for s in n1_segments + ] + self.assertEqual(entity_codes, ['SH', 'CN', 'CA', 'FW']) + + def test_parse_r4_ports_from_document(self): + registry = _make_edi304_registry() + parser = X12Parser(self.FULL_EDI_304, registry=registry) + segments = parser.parse() + + r4_segments = [s for s in segments if s.segment_id == 'R4'] + self.assertEqual(len(r4_segments), 4) + + port_functions = [ + s.to_dict()['elements'][0]['value'] for s in r4_segments + ] + self.assertEqual(port_functions, ['R', 'L', 'D', 'E']) + + def test_parse_lx_containers_from_document(self): + registry = _make_edi304_registry() + parser = X12Parser(self.FULL_EDI_304, registry=registry) + segments = parser.parse() + + lx_segments = [s for s in segments if s.segment_id == 'LX'] + self.assertEqual(len(lx_segments), 2) + self.assertEqual(lx_segments[0].to_dict()['elements'][0]['value'], '1') + self.assertEqual(lx_segments[1].to_dict()['elements'][0]['value'], '2') + + def test_parse_n7_equipment_from_document(self): + registry = _make_edi304_registry() + parser = X12Parser(self.FULL_EDI_304, registry=registry) + segments = parser.parse() + + n7_segments = [s for s in segments if s.segment_id == 'N7'] + self.assertEqual(len(n7_segments), 2) + + first_n7 = n7_segments[0].to_dict() + ref_map = {e['reference_designator']: e['value'] for e in first_n7['elements']} + self.assertEqual(ref_map['N701'], 'HLCU') + self.assertEqual(ref_map['N702'], '3456789') + self.assertEqual(ref_map['N722'], '22GP') + + second_n7 = n7_segments[1].to_dict() + ref_map2 = {e['reference_designator']: e['value'] for e in second_n7['elements']} + self.assertEqual(ref_map2['N701'], 'HDMU') + self.assertEqual(ref_map2['N722'], '40HC') + + def test_parse_l3_totals_from_document(self): + registry = _make_edi304_registry() + parser = X12Parser(self.FULL_EDI_304, registry=registry) + segments = parser.parse() + + l3 = [s for s in segments if s.segment_id == 'L3'][0] + ref_map = {e['reference_designator']: e['value'] for e in l3.to_dict()['elements']} + self.assertEqual(ref_map['L301'], '33000') + self.assertEqual(ref_map['L302'], 'G') + self.assertEqual(ref_map['L309'], '61') + self.assertEqual(ref_map['L310'], 'E') + self.assertEqual(ref_map['L311'], '300') + self.assertEqual(ref_map['L312'], 'K') + + def test_document_all_segments_valid(self): + registry = _make_edi304_registry() + parser = X12Parser(self.FULL_EDI_304, registry=registry) + segments = parser.parse() + + for seg in segments: + self.assertTrue( + seg.is_valid(), + msg='Segment {} is not valid'.format(seg.segment_id), + ) diff --git a/tests/test_edi315_validation.py b/tests/test_edi315_validation.py new file mode 100644 index 0000000..ae9208d --- /dev/null +++ b/tests/test_edi315_validation.py @@ -0,0 +1,462 @@ +""" +Validation tests using EDI 315 grammar definitions. + +These tests parse actual rendered X12 EDI 315 strings using grammar +definitions copied from a real EDI 315 implementation to verify the +parser works with production-grade segment definitions. + +Grammar source: gf-notif-svc/edi_315 (B4, R4, DTM, N9, Q2 segments) +""" +import json +from unittest import TestCase + +from pyx12lib.core.parser import SegmentParser, X12Parser +from pyx12lib.core.registry import GrammarRegistry, create_default_registry + +from tests.fixtures.edi_315_grammar import ( + B4Segment, R4Segment, DTMSegment, N9Segment, Q2Segment, +) + + +def _make_edi315_registry(): + """Create a registry with all EDI 315 + envelope segments.""" + registry = create_default_registry() + registry.register(B4Segment) + registry.register(R4Segment) + registry.register(DTMSegment) + registry.register(N9Segment) + registry.register(Q2Segment) + return registry + + +class TestB4SegmentParsing(TestCase): + """Parse B4 segments rendered by the edi_315 B4Renderer.""" + + def test_parse_b4_vessel_depart_full_data(self): + x12 = 'B4***VD*20221101*1400*USDAL*HASU*431617*L*40HC*USDAL*UN*0~' + + parser = SegmentParser(x12, grammar=B4Segment) + result = parser.to_dict() + + self.assertEqual(result['segment_id'], 'B4') + # B401, B402 are NotUsed -> skipped + self.assertEqual(result['elements'][0]['reference_designator'], 'B403') + self.assertEqual(result['elements'][0]['value'], 'VD') + self.assertEqual(result['elements'][1]['value'], '20221101') + self.assertEqual(result['elements'][2]['value'], '1400') + self.assertEqual(result['elements'][3]['value'], 'USDAL') + self.assertEqual(result['elements'][4]['value'], 'HASU') + self.assertEqual(result['elements'][5]['value'], '431617') + self.assertEqual(result['elements'][6]['value'], 'L') + self.assertEqual(result['elements'][7]['value'], '40HC') + self.assertEqual(result['elements'][8]['value'], 'USDAL') + self.assertEqual(result['elements'][9]['value'], 'UN') + self.assertEqual(result['elements'][10]['value'], '0') + + def test_parse_b4_no_timestamp(self): + x12 = 'B4***I***USDAL*HASU*431617*L*40HC*USDAL*UN*0~' + + parser = SegmentParser(x12, grammar=B4Segment) + result = parser.to_dict() + + self.assertEqual(result['elements'][0]['value'], 'I') + self.assertEqual(result['elements'][1]['value'], '') + self.assertEqual(result['elements'][2]['value'], '') + self.assertEqual(result['elements'][3]['value'], 'USDAL') + + def test_parse_b4_empty_container_type(self): + x12 = 'B4***I*20221101*1400*USDAL*HASU*431617*L* *USDAL*UN*0~' + + parser = SegmentParser(x12, grammar=B4Segment) + result = parser.to_dict() + + self.assertEqual(result['elements'][7]['value'], ' ') + + def test_b4_validation(self): + x12 = 'B4***VD*20221101*1400*USDAL*HASU*431617*L*40HC*USDAL*UN*0~' + parser = SegmentParser(x12, grammar=B4Segment) + self.assertTrue(parser.parse().is_valid()) + + +class TestN9SegmentParsing(TestCase): + """Parse N9 segments rendered by the edi_315 N9RendererLoop.""" + + def test_parse_n9_bl_number(self): + x12 = 'N9*BM*BL_NO~' + + parser = SegmentParser(x12, grammar=N9Segment) + result = parser.to_dict() + + self.assertEqual(result['segment_id'], 'N9') + self.assertEqual(result['elements'][0]['value'], 'BM') + self.assertEqual(result['elements'][0]['reference_designator'], 'N901') + self.assertEqual(result['elements'][1]['value'], 'BL_NO') + self.assertEqual(result['elements'][1]['reference_designator'], 'N902') + + def test_parse_n9_booking_number(self): + x12 = 'N9*BN*BOOKING_NO~' + + parser = SegmentParser(x12, grammar=N9Segment) + result = parser.to_dict() + + self.assertEqual(result['elements'][0]['value'], 'BN') + self.assertEqual(result['elements'][1]['value'], 'BOOKING_NO') + + def test_parse_n9_equipment(self): + x12 = 'N9*EQ*HASU4316170~' + + parser = SegmentParser(x12, grammar=N9Segment) + result = parser.to_dict() + + self.assertEqual(result['elements'][0]['value'], 'EQ') + self.assertEqual(result['elements'][1]['value'], 'HASU4316170') + + def test_parse_n9_scac(self): + x12 = 'N9*SCA*SCAC_CODE~' + + parser = SegmentParser(x12, grammar=N9Segment) + result = parser.to_dict() + + self.assertEqual(result['elements'][0]['value'], 'SCA') + self.assertEqual(result['elements'][1]['value'], 'SCAC_CODE') + + def test_parse_n9_po_number(self): + x12 = 'N9*PO*PO123456789~' + + parser = SegmentParser(x12, grammar=N9Segment) + result = parser.to_dict() + + self.assertEqual(result['elements'][0]['value'], 'PO') + self.assertEqual(result['elements'][1]['value'], 'PO123456789') + + def test_n9_validation(self): + x12 = 'N9*BM*BL_NO~' + parser = SegmentParser(x12, grammar=N9Segment) + self.assertTrue(parser.parse().is_valid()) + + +class TestR4SegmentParsing(TestCase): + """Parse R4 segments rendered by the edi_315 R4RendererLoop.""" + + def test_parse_r4_port_with_unlocode(self): + x12 = 'R4*L*UN*VNHPH*HAIPHONG*VN~' + + parser = SegmentParser(x12, grammar=R4Segment) + result = parser.to_dict() + + self.assertEqual(result['segment_id'], 'R4') + self.assertEqual(result['elements'][0]['value'], 'L') + self.assertEqual(result['elements'][1]['value'], 'UN') + self.assertEqual(result['elements'][2]['value'], 'VNHPH') + self.assertEqual(result['elements'][3]['value'], 'HAIPHONG') + self.assertEqual(result['elements'][4]['value'], 'VN') + + def test_parse_r4_port_with_city_name(self): + x12 = 'R4*R*CI*HAIPHONG*HAIPHONG*VN~' + + parser = SegmentParser(x12, grammar=R4Segment) + result = parser.to_dict() + + self.assertEqual(result['elements'][0]['value'], 'R') + self.assertEqual(result['elements'][1]['value'], 'CI') + + def test_parse_r4_discharge_port(self): + x12 = 'R4*D*UN*SGSIN*SINGAPORE*SG~' + + parser = SegmentParser(x12, grammar=R4Segment) + result = parser.to_dict() + + self.assertEqual(result['elements'][0]['value'], 'D') + self.assertEqual(result['elements'][2]['value'], 'SGSIN') + self.assertEqual(result['elements'][3]['value'], 'SINGAPORE') + + def test_parse_r4_delivery_port(self): + x12 = 'R4*E*UN*SGSIN*SINGAPORE*SG~' + + parser = SegmentParser(x12, grammar=R4Segment) + result = parser.to_dict() + + self.assertEqual(result['elements'][0]['value'], 'E') + + def test_parse_r4_minimal(self): + x12 = 'R4*R*CI~' + + parser = SegmentParser(x12, grammar=R4Segment) + result = parser.to_dict() + + self.assertEqual(result['elements'][0]['value'], 'R') + self.assertEqual(result['elements'][1]['value'], 'CI') + for e in result['elements'][2:]: + self.assertEqual(e['value'], '') + + def test_r4_validation(self): + x12 = 'R4*L*UN*VNHPH*HAIPHONG*VN~' + parser = SegmentParser(x12, grammar=R4Segment) + self.assertTrue(parser.parse().is_valid()) + + +class TestDTMSegmentParsing(TestCase): + """Parse DTM segments rendered by the edi_315 DTMRenderer.""" + + def test_parse_dtm_estimated(self): + x12 = 'DTM*139*20221217*0000*LT~' + + parser = SegmentParser(x12, grammar=DTMSegment) + result = parser.to_dict() + + self.assertEqual(result['segment_id'], 'DTM') + self.assertEqual(result['elements'][0]['value'], '139') + self.assertEqual(result['elements'][1]['value'], '20221217') + self.assertEqual(result['elements'][2]['value'], '0000') + self.assertEqual(result['elements'][3]['value'], 'LT') + + def test_parse_dtm_actual(self): + x12 = 'DTM*140*20220801*0000*LT~' + + parser = SegmentParser(x12, grammar=DTMSegment) + result = parser.to_dict() + + self.assertEqual(result['elements'][0]['value'], '140') + + def test_parse_dtm_empty_date(self): + x12 = 'DTM*139***LT~' + + parser = SegmentParser(x12, grammar=DTMSegment) + result = parser.to_dict() + + self.assertEqual(result['elements'][0]['value'], '139') + self.assertEqual(result['elements'][1]['value'], '') + self.assertEqual(result['elements'][2]['value'], '') + self.assertEqual(result['elements'][3]['value'], 'LT') + + def test_dtm_validation(self): + x12 = 'DTM*139*20221217*0000*LT~' + parser = SegmentParser(x12, grammar=DTMSegment) + self.assertTrue(parser.parse().is_valid()) + + +class TestQ2SegmentParsing(TestCase): + """Parse Q2 segments rendered by the edi_315 Q2Renderer.""" + + def test_parse_q2_full_data(self): + x12 = 'Q2*******0*G*371S***L*CAP SAN MARCO***K~' + + parser = SegmentParser(x12, grammar=Q2Segment) + result = parser.to_dict() + + self.assertEqual(result['segment_id'], 'Q2') + ref_map = {e['reference_designator']: e['value'] for e in result['elements']} + + self.assertEqual(ref_map['Q201'], '') + self.assertEqual(ref_map['Q202'], '') + self.assertNotIn('Q203', ref_map) + self.assertEqual(ref_map['Q204'], '') + self.assertEqual(ref_map['Q205'], '') + self.assertEqual(ref_map['Q206'], '') + self.assertEqual(ref_map['Q207'], '0') + self.assertEqual(ref_map['Q208'], 'G') + self.assertEqual(ref_map['Q209'], '371S') + self.assertEqual(ref_map['Q210'], '') + self.assertEqual(ref_map['Q211'], '') + self.assertEqual(ref_map['Q212'], 'L') + self.assertEqual(ref_map['Q213'], 'CAP SAN MARCO') + self.assertEqual(ref_map['Q214'], '') + self.assertEqual(ref_map['Q215'], '') + self.assertEqual(ref_map['Q216'], 'K') + + def test_parse_q2_only_vessel_name(self): + x12 = 'Q2*******0*G****L*CAP SAN MARCO***K~' + + parser = SegmentParser(x12, grammar=Q2Segment) + result = parser.to_dict() + + ref_map = {e['reference_designator']: e['value'] for e in result['elements']} + self.assertEqual(ref_map['Q209'], '') + self.assertEqual(ref_map['Q213'], 'CAP SAN MARCO') + + def test_parse_q2_empty(self): + x12 = 'Q2*******0*G********K~' + + parser = SegmentParser(x12, grammar=Q2Segment) + result = parser.to_dict() + + ref_map = {e['reference_designator']: e['value'] for e in result['elements']} + self.assertEqual(ref_map['Q207'], '0') + self.assertEqual(ref_map['Q208'], 'G') + self.assertEqual(ref_map['Q216'], 'K') + + def test_q2_validation(self): + x12 = 'Q2*******0*G*371S***L*CAP SAN MARCO***K~' + parser = SegmentParser(x12, grammar=Q2Segment) + self.assertTrue(parser.parse().is_valid()) + + +class TestFullEdi315DocumentParsing(TestCase): + """Parse complete EDI 315 documents using X12Parser with edi_315 grammars.""" + + FULL_EDI_315 = ( + 'ISA*00* *00* *ZZ*GOFREIGHT *ZZ*PARTNERID *230207*2022*U*00401*000000001*0*P*^~' + 'GS*QO*GOFREIGHT*PARTNERID*20230207*2022*1*X*004010~' + 'ST*315*0001~' + 'B4***VD*20221217*0000*VNHPH*HASU*431617*L*40HC*VNHPH*UN*0~' + 'N9*BM*BL_NO~' + 'N9*BN*BOOKING_NO~' + 'N9*EQ*HASU4316170~' + 'N9*SCA*SCAC_CODE~' + 'Q2*******0*G*371S***L*CAP SAN MARCO***K~' + 'R4*R*CI*HAIPHONG*HAIPHONG*VN~' + 'DTM*139*20221217*0000*LT~' + 'R4*L*UN*VNHPH*HAIPHONG*VN~' + 'DTM*139*20221217*0000*LT~' + 'R4*D*UN*SGSIN*SINGAPORE*SG~' + 'DTM*139*20221226*0000*LT~' + 'R4*E*UN*SGSIN*SINGAPORE*SG~' + 'DTM*139*20221226*0000*LT~' + 'SE*16*0001~' + 'GE*1*1~' + 'IEA*1*000000001~' + ) + + def test_parse_full_document(self): + registry = _make_edi315_registry() + parser = X12Parser(self.FULL_EDI_315, registry=registry) + result = parser.to_dict() + + segment_ids = [s['segment_id'] for s in result['segments']] + self.assertEqual(segment_ids, [ + 'ISA', 'GS', 'ST', + 'B4', + 'N9', 'N9', 'N9', 'N9', + 'Q2', + 'R4', 'DTM', 'R4', 'DTM', 'R4', 'DTM', 'R4', 'DTM', + 'SE', 'GE', 'IEA', + ]) + + def test_parse_full_document_segment_count(self): + registry = _make_edi315_registry() + parser = X12Parser(self.FULL_EDI_315, registry=registry) + segments = parser.parse() + + self.assertEqual(len(segments), 20) + + def test_parse_full_document_to_json(self): + registry = _make_edi315_registry() + parser = X12Parser(self.FULL_EDI_315, registry=registry) + json_output = parser.to_json() + + data = json.loads(json_output) + self.assertEqual(len(data['segments']), 20) + + def test_parse_b4_from_document(self): + registry = _make_edi315_registry() + parser = X12Parser(self.FULL_EDI_315, registry=registry) + segments = parser.parse() + + b4 = segments[3] + self.assertEqual(b4.segment_id, 'B4') + b4_dict = b4.to_dict() + ref_map = {e['reference_designator']: e['value'] for e in b4_dict['elements']} + self.assertEqual(ref_map['B403'], 'VD') + self.assertEqual(ref_map['B407'], 'HASU') + self.assertEqual(ref_map['B408'], '431617') + + def test_parse_n9_segments_from_document(self): + registry = _make_edi315_registry() + parser = X12Parser(self.FULL_EDI_315, registry=registry) + segments = parser.parse() + + n9_segments = [s for s in segments if s.segment_id == 'N9'] + self.assertEqual(len(n9_segments), 4) + + qualifiers = [ + s.to_dict()['elements'][0]['value'] for s in n9_segments + ] + self.assertEqual(qualifiers, ['BM', 'BN', 'EQ', 'SCA']) + + def test_parse_r4_dtm_pairs_from_document(self): + registry = _make_edi315_registry() + parser = X12Parser(self.FULL_EDI_315, registry=registry) + segments = parser.parse() + + r4_segments = [s for s in segments if s.segment_id == 'R4'] + dtm_segments = [s for s in segments if s.segment_id == 'DTM'] + self.assertEqual(len(r4_segments), 4) + self.assertEqual(len(dtm_segments), 4) + + port_functions = [ + s.to_dict()['elements'][0]['value'] for s in r4_segments + ] + self.assertEqual(port_functions, ['R', 'L', 'D', 'E']) + + def test_parse_two_transaction_document(self): + """Parse the two-transaction document.""" + x12 = ( + 'ISA*00* *00* *ZZ*GOFREIGHT *ZZ*PARTNERID *230207*2022*U*00401*000000001*0*P*^~' + 'GS*QO*GOFREIGHT*PARTNERID*20230207*2022*1*X*004010~' + 'ST*315*0001~' + 'B4***VD*20221217*0000*VNHPH*HASU*431617*L*40HC*VNHPH*UN*0~' + 'N9*BM*BL_NO~' + 'N9*BN*BOOKING_NO~' + 'N9*EQ*HASU4316170~' + 'N9*SCA*SCAC_CODE~' + 'Q2*******0*G*371S***L*CAP SAN MARCO***K~' + 'R4*R*CI*HAIPHONG*HAIPHONG*VN~' + 'DTM*139*20221217*0000*LT~' + 'R4*L*UN*VNHPH*HAIPHONG*VN~' + 'DTM*139*20221217*0000*LT~' + 'R4*D*UN*SGSIN*SINGAPORE*SG~' + 'DTM*139*20221226*0000*LT~' + 'R4*E*UN*SGSIN*SINGAPORE*SG~' + 'DTM*139*20221226*0000*LT~' + 'SE*16*0001~' + 'ST*315*0001~' + 'B4***AE*20221226*0000*SGSIN*TRHU*693609*L*40HC*SGSIN*UN*0~' + 'N9*BM*BL_NO~' + 'N9*EQ*TRHU6936090~' + 'Q2*******0*G********K~' + 'R4*R*UN*SGSIN*SINGAPORE*SG~' + 'DTM*139*20221226*0000*LT~' + 'R4*L*UN*SGSIN*SINGAPORE*SG~' + 'DTM*139*20221226*0000*LT~' + 'SE*10*0001~' + 'GE*2*1~' + 'IEA*1*000000001~' + ) + + registry = _make_edi315_registry() + parser = X12Parser(x12, registry=registry) + segments = parser.parse() + + counts = {} + for s in segments: + counts[s.segment_id] = counts.get(s.segment_id, 0) + 1 + + self.assertEqual(counts['ISA'], 1) + self.assertEqual(counts['GS'], 1) + self.assertEqual(counts['ST'], 2) + self.assertEqual(counts['B4'], 2) + self.assertEqual(counts['N9'], 6) + self.assertEqual(counts['Q2'], 2) + self.assertEqual(counts['R4'], 6) + self.assertEqual(counts['DTM'], 6) + self.assertEqual(counts['SE'], 2) + self.assertEqual(counts['GE'], 1) + self.assertEqual(counts['IEA'], 1) + + self.assertEqual(len(segments), 30) + + def test_headers_only_document(self): + """Parse envelope-only document (no transactions).""" + x12 = ( + 'ISA*00* *00* *ZZ*GOFREIGHT *ZZ*PARTNERID *230207*2022*U*00401*000000001*0*P*^~' + 'GS*QO*GOFREIGHT*PARTNERID*20230207*2022*1*X*004010~' + 'GE*0*1~' + 'IEA*1*000000001~' + ) + + registry = _make_edi315_registry() + parser = X12Parser(x12, registry=registry) + result = parser.to_dict() + + segment_ids = [s['segment_id'] for s in result['segments']] + self.assertEqual(segment_ids, ['ISA', 'GS', 'GE', 'IEA']) From a25e26c8638328ade251303c92d6bdc517cf66e3 Mon Sep 17 00:00:00 2001 From: Hoss Date: Fri, 30 Jan 2026 16:16:31 +0800 Subject: [PATCH 5/8] build: bump version to v0.4 and update docs [why] X12 parsing is a new feature warranting a minor version bump. [how] - Bump version 0.3 -> 0.4 in setup.py - Add changelog entry listing all new parsing APIs - Add README examples for parse_x12, SegmentParser, GrammarRegistry, and auto-detect delimiters --- CHANGELOG.md | 13 ++++++++++ README.md | 70 ++++++++++++++++++++++++++++++++++++++++++++++++++++ setup.py | 2 +- 3 files changed, 84 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 443f94d..c543828 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,19 @@ Changelog ========= +0.4 +----- + +Changes: + +- Add X12 parsing support (X12 -> JSON/Python dict). +- Add `SegmentParser` for parsing individual segments against a grammar. +- Add `X12Parser` for parsing complete X12 documents with auto-detection. +- Add `GrammarRegistry` for mapping segment IDs to grammar definitions. +- Add `ParsedSegment`, `ParsedElement`, `ParsedCompositeElement`, `ParsedComponent`, `ParsedLoop` data structures. +- Add `detect_delimiters()` for automatic ISA delimiter detection. +- Add `parse_x12()` and `parse_x12_to_json()` convenience functions. + 0.3 ----- diff --git a/README.md b/README.md index 5b07a58..89e5c09 100644 --- a/README.md +++ b/README.md @@ -70,6 +70,76 @@ class StRenderer(SegmentRenderer): return '{:04d}'.format(data.transaction_set_no) # return value should always be strings ``` +### Parsing X12 to JSON + +* Quick parse using the top-level API. +```python +from pyx12lib import parse_x12, parse_x12_to_json + +x12_data = "ST*997*0001~SE*1*0001~" + +# Parse to Python dict +data = parse_x12(x12_data) +# {'segments': [{'segment_id': 'ST', 'elements': [...]}, ...]} + +# Parse to JSON string +json_str = parse_x12_to_json(x12_data) +``` + +* Parse a single segment with explicit grammar. +```python +from pyx12lib.core.parser import SegmentParser +from pyx12lib.common.envelope.grammar import StSegment + +parser = SegmentParser("ST*997*0001~", grammar=StSegment) +result = parser.to_dict() +# {'segment_id': 'ST', 'elements': [ +# {'reference_designator': 'ST01', 'name': 'Transaction Set Identifier Code', 'value': '997', ...}, +# {'reference_designator': 'ST02', 'name': 'Transaction Set Control Number', 'value': '0001', ...}, +# ]} +``` + +* Register custom segment grammars for parsing. +```python +from pyx12lib import GrammarRegistry, X12Parser +from pyx12lib.core.grammar import BaseSegment, Element, element, segment + +class MySegment(BaseSegment): + segment_id = 'MY' + usage = segment.USAGE_MANDATORY + max_use = 1 + elements = ( + Element( + reference_designator='MY01', + name='My Field', + usage=element.USAGE_MANDATORY, + element_type=element.ELEMENT_TYPE_STRING, + minimum=1, + maximum=10, + ), + ) + +registry = GrammarRegistry() +registry.register(MySegment) + +parser = X12Parser("MY*hello~MY*world~", registry=registry) +data = parser.to_dict() +``` + +* Auto-detect delimiters from ISA header. +```python +from pyx12lib import parse_x12 + +# Delimiters are automatically detected from the ISA segment +x12_data = ( + "ISA*00* *00* *ZZ*SENDER " + "*ZZ*RECEIVER *210101*1200*^*00501*000000001*0*P*>~" + "GS*FA*SENDER*RECEIVER*20210101*1200*1*X*005010~" + "ST*997*0001~SE*1*0001~GE*1*1~IEA*1*000000001~" +) +data = parse_x12(x12_data) +``` + --- ## Test ```bash diff --git a/setup.py b/setup.py index 957d39a..757f2be 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ setuptools.setup( name="pyx12lib", - version="0.3", + version="0.4", author="CJHwong", author_email="pypi@hardcoretech.co", url="https://github.com/hardcoretech/pyx12-lib", From aed455be182a25e03b907d4980cf5503477a064c Mon Sep 17 00:00:00 2001 From: Hoss Date: Wed, 11 Feb 2026 21:07:32 +0800 Subject: [PATCH 6/8] feat: add loop-aware parsing with LoopDefinition [why] X12Parser produces a flat segment list. For loops (e.g., N1 followed by N2/N3/N4/G61 children), clients cannot determine which children belong to which parent without re-implementing grouping logic. [how] - LoopDefinition(start_segment, children) defines loop structure with type validation and recursive grammar enumeration - GrammarRegistry gains register_loop() (auto-registers all grammars), register_all(), get_loop(), and has_loops property - X12Parser._organize_into_loops() groups flat segments into ParsedLoop objects in a single O(n) pass when loops are registered - Without loops registered, output is identical to before (backward compat) - 44 new tests: LoopDefinition unit (9), registry (8), parser edge cases (10), EDI 304 full-document integration (17) --- pyx12lib/__init__.py | 1 + pyx12lib/core/grammar/__init__.py | 1 + pyx12lib/core/grammar/loop.py | 90 ++++++++++ pyx12lib/core/parser.py | 46 ++++- pyx12lib/core/registry.py | 41 +++++ tests/test_edi304_validation.py | 175 ++++++++++++++++++ tests/test_loop_definition.py | 102 +++++++++++ tests/test_registry.py | 84 +++++++++ tests/test_x12_parser.py | 287 ++++++++++++++++++++++++++++++ 9 files changed, 823 insertions(+), 4 deletions(-) create mode 100644 pyx12lib/core/grammar/loop.py create mode 100644 tests/test_loop_definition.py diff --git a/pyx12lib/__init__.py b/pyx12lib/__init__.py index c868e55..d69472d 100644 --- a/pyx12lib/__init__.py +++ b/pyx12lib/__init__.py @@ -1,3 +1,4 @@ +from pyx12lib.core.grammar.loop import LoopDefinition from pyx12lib.core.parser import SegmentParser, X12Parser from pyx12lib.core.registry import GrammarRegistry, create_default_registry from pyx12lib.core.delimiters import detect_delimiters, Delimiters diff --git a/pyx12lib/core/grammar/__init__.py b/pyx12lib/core/grammar/__init__.py index 9c0f9f2..91ec47b 100644 --- a/pyx12lib/core/grammar/__init__.py +++ b/pyx12lib/core/grammar/__init__.py @@ -1,2 +1,3 @@ from .element import Component, CompositeElement, Element, NotUsedElement +from .loop import LoopDefinition from .segment import BaseSegment diff --git a/pyx12lib/core/grammar/loop.py b/pyx12lib/core/grammar/loop.py new file mode 100644 index 0000000..5296ebc --- /dev/null +++ b/pyx12lib/core/grammar/loop.py @@ -0,0 +1,90 @@ +from pyx12lib.core.grammar.segment import BaseSegment + + +class LoopDefinition(object): + """Defines a loop structure for X12 parsing. + + A loop starts with a specific segment type and contains child segments + and/or nested loop definitions. + + Args: + start_segment: BaseSegment subclass that triggers a new loop instance. + children: List of BaseSegment subclasses or nested LoopDefinition objects. + + Raises: + TypeError: If start_segment is not a BaseSegment subclass. + TypeError: If any child is neither a BaseSegment subclass nor LoopDefinition. + """ + + def __init__(self, start_segment, children=None): + if not _is_segment_class(start_segment): + raise TypeError( + "start_segment must be a BaseSegment subclass, got {}".format( + type(start_segment) + ) + ) + + self._start_segment = start_segment + self._children = children or [] + + self._child_segment_grammars = [] + self._child_loops = [] + + for child in self._children: + if isinstance(child, LoopDefinition): + self._child_loops.append(child) + elif _is_segment_class(child): + self._child_segment_grammars.append(child) + else: + raise TypeError( + "Each child must be a BaseSegment subclass or " + "LoopDefinition, got {}".format(type(child)) + ) + + self._child_segment_ids = frozenset( + g.segment_id for g in self._child_segment_grammars + ) + + @property + def loop_id(self): + return self._start_segment.segment_id + + @property + def start_segment_id(self): + return self._start_segment.segment_id + + @property + def start_segment_grammar(self): + return self._start_segment + + @property + def child_segment_ids(self): + return self._child_segment_ids + + @property + def child_segment_grammars(self): + return list(self._child_segment_grammars) + + @property + def child_loops(self): + return list(self._child_loops) + + def is_start(self, segment_id): + return segment_id == self._start_segment.segment_id + + def is_child(self, segment_id): + return segment_id in self._child_segment_ids + + def all_segment_grammars(self): + """Yield all grammar classes recursively (start + children + nested).""" + yield self._start_segment + for grammar in self._child_segment_grammars: + yield grammar + for child_loop in self._child_loops: + for grammar in child_loop.all_segment_grammars(): + yield grammar + + +def _is_segment_class(obj): + """Check if obj is a class that is a subclass of BaseSegment.""" + return isinstance(obj, type) and issubclass(obj, BaseSegment) diff --git a/pyx12lib/core/parser.py b/pyx12lib/core/parser.py index 0eb417c..edbf88d 100644 --- a/pyx12lib/core/parser.py +++ b/pyx12lib/core/parser.py @@ -4,6 +4,7 @@ ParsedElement, ParsedComponent, ParsedCompositeElement, + ParsedLoop, ParsedSegment, ) from pyx12lib.core.grammar.element import ( @@ -129,7 +130,7 @@ def parse(self): if self._parsed_segments is not None: return self._parsed_segments - segments = [] + flat_segments = [] raw_segments = self._x12_string.split(self._segment_terminator) for raw in raw_segments: @@ -149,10 +150,47 @@ def parse(self): element_delimiter=self._element_delimiter, component_delimiter=self._component_delimiter, ) - segments.append(parser.parse()) + flat_segments.append(parser.parse()) - self._parsed_segments = segments - return segments + if self._registry.has_loops: + self._parsed_segments = self._organize_into_loops(flat_segments) + else: + self._parsed_segments = flat_segments + + return self._parsed_segments + + def _organize_into_loops(self, flat_segments): + """Group flat segments into ParsedLoop objects based on registry loop definitions.""" + result = [] + current_loop = None + current_loop_def = None + + for segment in flat_segments: + seg_id = segment.segment_id + loop_def = self._registry.get_loop(seg_id) + + if loop_def is not None: + # This segment starts a new loop. + if current_loop is not None: + result.append(current_loop) + current_loop = ParsedLoop(loop_id=loop_def.loop_id) + current_loop.add_segment(segment) + current_loop_def = loop_def + + elif current_loop is not None and current_loop_def.is_child(seg_id): + current_loop.add_segment(segment) + + else: + if current_loop is not None: + result.append(current_loop) + current_loop = None + current_loop_def = None + result.append(segment) + + if current_loop is not None: + result.append(current_loop) + + return result def to_dict(self): return { diff --git a/pyx12lib/core/registry.py b/pyx12lib/core/registry.py index 0badcde..26cf692 100644 --- a/pyx12lib/core/registry.py +++ b/pyx12lib/core/registry.py @@ -3,6 +3,7 @@ GsSegment, GeSegment, StSegment, SeSegment, ) +from pyx12lib.core.grammar.loop import LoopDefinition class GrammarRegistry(object): @@ -10,6 +11,7 @@ class GrammarRegistry(object): def __init__(self): self._registry = {} + self._loop_definitions = {} def register(self, segment_grammar): segment_id = segment_grammar.segment_id @@ -19,12 +21,51 @@ def register(self, segment_grammar): ) self._registry[segment_id] = segment_grammar + def register_all(self, segment_grammars): + """Register a list of segment grammar classes.""" + for grammar in segment_grammars: + self.register(grammar) + + def register_loop(self, loop_definition): + """Register a loop definition and auto-register all its segment grammars. + + Args: + loop_definition: A LoopDefinition instance. + + Raises: + TypeError: If loop_definition is not a LoopDefinition. + ValueError: If a loop with the same start_segment_id is already registered. + """ + if not isinstance(loop_definition, LoopDefinition): + raise TypeError( + "Expected LoopDefinition, got {}".format(type(loop_definition)) + ) + + start_id = loop_definition.start_segment_id + if start_id in self._loop_definitions: + raise ValueError( + "Loop with start segment '{}' already registered".format(start_id) + ) + + self._loop_definitions[start_id] = loop_definition + + for grammar in loop_definition.all_segment_grammars(): + if not self.has(grammar.segment_id): + self.register(grammar) + def get(self, segment_id): return self._registry.get(segment_id) + def get_loop(self, start_segment_id): + return self._loop_definitions.get(start_segment_id) + def has(self, segment_id): return segment_id in self._registry + @property + def has_loops(self): + return bool(self._loop_definitions) + def create_default_registry(): """Create a registry with standard envelope segments pre-registered.""" diff --git a/tests/test_edi304_validation.py b/tests/test_edi304_validation.py index b0d5c73..4b66220 100644 --- a/tests/test_edi304_validation.py +++ b/tests/test_edi304_validation.py @@ -12,6 +12,8 @@ import json from unittest import TestCase +from pyx12lib.core.grammar.loop import LoopDefinition +from pyx12lib.core.parsed import ParsedLoop, ParsedSegment from pyx12lib.core.parser import SegmentParser, X12Parser from pyx12lib.core.registry import GrammarRegistry, create_default_registry @@ -981,3 +983,176 @@ def test_document_all_segments_valid(self): seg.is_valid(), msg='Segment {} is not valid'.format(seg.segment_id), ) + + +def _make_edi304_loop_registry(): + """Create a registry with EDI 304 segments and loop definitions.""" + registry = create_default_registry() + registry.register_all([ + B2Segment, B2aSegment, N9Segment, V1Segment, + R4Segment, R2Segment, L3Segment, PWKSegment, + ]) + registry.register_loop( + LoopDefinition(N1Segment, [N2Segment, N3Segment, N4Segment, G61Segment]) + ) + registry.register_loop( + LoopDefinition(LxSegment, [N7Segment, QtySegment, M7Segment, L0Segment, L5Segment]) + ) + return registry + + +class TestFullEdi304LoopParsing(TestCase): + """Parse an EDI 304 document with loop definitions applied. + + Uses a different X12 string from TestFullEdi304DocumentParsing to + provide complementary coverage — N1 with N2 children, single LX + with multiple containers, PWK segments instead of K1. + """ + + FULL_EDI_304_WITH_LOOPS = ( + 'ISA*00* *00* *ZZ*GOFREIGHT *ZZ*CARGOSMART *240115*1030*^*00401*000000001*0*P*^~' + 'GS*SO*CARGOSMART*GOFREIGHT*20240115*1030*1*X*004010~' + 'ST*304*123456789~' + 'B2*PP*EGLV**OE-25120002**MX**2~' + 'B2A*00~' + 'N9*BM*123324~' + 'N9*BN*12334~' + 'N9*CT*QWER~' + 'N9*SI*OE-25120002~' + 'N9*TN*12345~' + 'V1**PACIFIC VOYAGER**1234*EGLV~' + 'N1*CA*EGLV~' + 'N1*SH*ACME EXPORTS INC~' + 'N1*CN*TERMINAL WEST PORT~' + 'N2*100 DOCK STREET~' + 'N2*PORTSIDE 100 BC CA~' + 'N1*FW*TERMINAL WEST PORT~' + 'N2*100 DOCK STREET~' + 'N2*PORTSIDE 100 BC CA~' + 'N1*SI*ACME EXPORTS INC*25*12345~' + 'G61*IC*JANE SMITH*EM*contact@acme-exports.test~' + 'R4*R*UN*US2AA~' + 'R4*D*UN*US267~' + 'R4*L*UN*US267~' + 'R4*E*UN*US2AA~' + 'R4*W*UN*US267~' + 'R2*EGLV*O**********02~' + 'LX*000001~' + 'N7**12*1*G*4200***22*E*S*******K*7****22G0~' + 'QTY*39*100~' + 'M7*12321*1231***SH~' + 'N7**2234*100*G*4200***22*E*S*******K*7****22G0~' + 'QTY*39*100~' + 'M7*32123*12345***SH~' + 'L0*001***101*G*100*X*200*CTN**K~' + 'L5*001*TEST DESC****TEST MARK~' + 'L3*101*G*******100*X*200*K~' + 'PWK*BC*EI*5~' + 'PWK*BL*EI*3~' + 'SE*38*123456789~' + 'GE*1*123456789~' + 'IEA*1*000000001~' + ) + + def setUp(self): + self.registry = _make_edi304_loop_registry() + self.parser = X12Parser(self.FULL_EDI_304_WITH_LOOPS, registry=self.registry) + self.result = self.parser.parse() + + def test_loop_parse_top_level_item_count(self): + # ISA, GS, ST, B2, B2A, 5x N9, V1 = 11 flat + # 5x N1 loops = 5 loops + # 5x R4, R2 = 6 flat + # 1x LX loop = 1 loop + # L3, 2x PWK, SE, GE, IEA = 6 flat + # Total = 11 + 5 + 6 + 1 + 6 = 29 + self.assertEqual(len(self.result), 29) + + def test_loop_parse_n1_loop_count(self): + n1_loops = [item for item in self.result if isinstance(item, ParsedLoop) and item.loop_id == 'N1'] + self.assertEqual(len(n1_loops), 5) + + def test_loop_n1_ca_has_no_children(self): + n1_loops = [item for item in self.result if isinstance(item, ParsedLoop) and item.loop_id == 'N1'] + ca_loop = n1_loops[0] + self.assertEqual(len(ca_loop.segments), 1) + self.assertEqual(ca_loop.segments[0].segment_id, 'N1') + + def test_loop_n1_cn_has_two_n2(self): + n1_loops = [item for item in self.result if isinstance(item, ParsedLoop) and item.loop_id == 'N1'] + cn_loop = n1_loops[2] # CA, SH, CN + self.assertEqual(len(cn_loop.segments), 3) + self.assertEqual(cn_loop.segments[0].segment_id, 'N1') + self.assertEqual(cn_loop.segments[1].segment_id, 'N2') + self.assertEqual(cn_loop.segments[2].segment_id, 'N2') + + def test_loop_n1_si_has_g61(self): + n1_loops = [item for item in self.result if isinstance(item, ParsedLoop) and item.loop_id == 'N1'] + si_loop = n1_loops[4] # CA, SH, CN, FW, SI + self.assertEqual(len(si_loop.segments), 2) + self.assertEqual(si_loop.segments[0].segment_id, 'N1') + self.assertEqual(si_loop.segments[1].segment_id, 'G61') + + def test_loop_n1_entity_codes(self): + n1_loops = [item for item in self.result if isinstance(item, ParsedLoop) and item.loop_id == 'N1'] + entity_codes = [] + for loop in n1_loops: + n1_seg = loop.segments[0] + code = n1_seg.to_dict()['elements'][0]['value'] + entity_codes.append(code) + self.assertEqual(entity_codes, ['CA', 'SH', 'CN', 'FW', 'SI']) + + def test_loop_lx_count(self): + lx_loops = [item for item in self.result if isinstance(item, ParsedLoop) and item.loop_id == 'LX'] + self.assertEqual(len(lx_loops), 1) + + def test_loop_lx_contains_all_children(self): + lx_loops = [item for item in self.result if isinstance(item, ParsedLoop) and item.loop_id == 'LX'] + lx_loop = lx_loops[0] + self.assertEqual(len(lx_loop.segments), 9) + seg_ids = [s.segment_id for s in lx_loop.segments] + self.assertEqual(seg_ids, ['LX', 'N7', 'QTY', 'M7', 'N7', 'QTY', 'M7', 'L0', 'L5']) + + def test_loop_lx_two_containers(self): + lx_loops = [item for item in self.result if isinstance(item, ParsedLoop) and item.loop_id == 'LX'] + lx_loop = lx_loops[0] + n7_segments = [s for s in lx_loop.segments if s.segment_id == 'N7'] + self.assertEqual(len(n7_segments), 2) + + def test_r4_segments_are_flat(self): + r4_items = [item for item in self.result if isinstance(item, ParsedSegment) and item.segment_id == 'R4'] + self.assertEqual(len(r4_items), 5) + + def test_l3_after_lx_loop_is_flat(self): + l3_items = [item for item in self.result if isinstance(item, ParsedSegment) and item.segment_id == 'L3'] + self.assertEqual(len(l3_items), 1) + + def test_pwk_after_l3_are_flat(self): + pwk_items = [item for item in self.result if isinstance(item, ParsedSegment) and item.segment_id == 'PWK'] + self.assertEqual(len(pwk_items), 2) + + def test_n9_segments_before_loops_are_flat(self): + n9_items = [item for item in self.result if isinstance(item, ParsedSegment) and item.segment_id == 'N9'] + self.assertEqual(len(n9_items), 5) + + def test_envelope_segments_unaffected(self): + envelope_ids = ['ISA', 'GS', 'ST', 'SE', 'GE', 'IEA'] + for env_id in envelope_ids: + matches = [item for item in self.result if isinstance(item, ParsedSegment) and item.segment_id == env_id] + self.assertEqual(len(matches), 1, msg='Expected 1 {} segment, got {}'.format(env_id, len(matches))) + + def test_to_dict_loop_items_have_loop_id(self): + result_dict = self.parser.to_dict() + loop_items = [item for item in result_dict['segments'] if 'loop_id' in item] + self.assertEqual(len(loop_items), 6) # 5 N1 + 1 LX + + def test_to_dict_flat_items_have_segment_id(self): + result_dict = self.parser.to_dict() + flat_items = [item for item in result_dict['segments'] if 'segment_id' in item] + self.assertEqual(len(flat_items), 23) # 29 total - 6 loops + + def test_to_json_round_trip(self): + json_output = self.parser.to_json() + data = json.loads(json_output) + self.assertIn('segments', data) + self.assertEqual(len(data['segments']), 29) diff --git a/tests/test_loop_definition.py b/tests/test_loop_definition.py new file mode 100644 index 0000000..d384f6b --- /dev/null +++ b/tests/test_loop_definition.py @@ -0,0 +1,102 @@ +from unittest import TestCase + +from pyx12lib.core.grammar import BaseSegment, element, segment +from pyx12lib.core.grammar.loop import LoopDefinition + + +class _SegA(BaseSegment): + segment_id = "A" + usage = segment.USAGE_MANDATORY + max_use = 1 + elements = () + + +class _SegB(BaseSegment): + segment_id = "B" + usage = segment.USAGE_OPTIONAL + max_use = 99 + elements = () + + +class _SegC(BaseSegment): + segment_id = "C" + usage = segment.USAGE_OPTIONAL + max_use = 99 + elements = () + + +class _SegD(BaseSegment): + segment_id = "D" + usage = segment.USAGE_OPTIONAL + max_use = 99 + elements = () + + +class TestLoopDefinition(TestCase): + def test_basic_construction(self): + loop = LoopDefinition(_SegA, [_SegB, _SegC]) + + self.assertEqual(loop.loop_id, "A") + self.assertEqual(loop.start_segment_id, "A") + self.assertIs(loop.start_segment_grammar, _SegA) + + def test_children_segments_stored(self): + loop = LoopDefinition(_SegA, [_SegB, _SegC]) + + self.assertEqual(loop.child_segment_ids, frozenset(["B", "C"])) + self.assertEqual(loop.child_segment_grammars, [_SegB, _SegC]) + + def test_children_with_nested_loop(self): + nested = LoopDefinition(_SegC, [_SegD]) + loop = LoopDefinition(_SegA, [_SegB, nested]) + + # child_segment_ids excludes nested loop's segments + self.assertEqual(loop.child_segment_ids, frozenset(["B"])) + self.assertEqual(loop.child_segment_grammars, [_SegB]) + self.assertEqual(len(loop.child_loops), 1) + self.assertIs(loop.child_loops[0], nested) + + def test_is_start(self): + loop = LoopDefinition(_SegA, [_SegB]) + + self.assertTrue(loop.is_start("A")) + self.assertFalse(loop.is_start("B")) + self.assertFalse(loop.is_start("X")) + + def test_is_child(self): + loop = LoopDefinition(_SegA, [_SegB, _SegC]) + + self.assertTrue(loop.is_child("B")) + self.assertTrue(loop.is_child("C")) + self.assertFalse(loop.is_child("A")) + self.assertFalse(loop.is_child("X")) + + def test_all_segment_grammars(self): + nested = LoopDefinition(_SegC, [_SegD]) + loop = LoopDefinition(_SegA, [_SegB, nested]) + + grammars = list(loop.all_segment_grammars()) + + self.assertEqual(grammars, [_SegA, _SegB, _SegC, _SegD]) + + def test_no_children(self): + loop = LoopDefinition(_SegA) + + self.assertEqual(loop.child_segment_ids, frozenset()) + self.assertEqual(loop.child_segment_grammars, []) + self.assertEqual(loop.child_loops, []) + self.assertEqual(list(loop.all_segment_grammars()), [_SegA]) + + def test_invalid_start_segment_raises(self): + with self.assertRaises(TypeError): + LoopDefinition("not_a_segment", [_SegB]) + + with self.assertRaises(TypeError): + LoopDefinition(_SegA(), [_SegB]) # instance, not class + + def test_invalid_child_raises(self): + with self.assertRaises(TypeError): + LoopDefinition(_SegA, ["not_a_segment"]) + + with self.assertRaises(TypeError): + LoopDefinition(_SegA, [42]) diff --git a/tests/test_registry.py b/tests/test_registry.py index 4b7b78d..25c0527 100644 --- a/tests/test_registry.py +++ b/tests/test_registry.py @@ -1,6 +1,7 @@ from unittest import TestCase from pyx12lib.core.grammar import BaseSegment, Element, element, segment +from pyx12lib.core.grammar.loop import LoopDefinition from pyx12lib.core.registry import GrammarRegistry, create_default_registry @@ -20,6 +21,27 @@ class _CustomSegment(BaseSegment): ) +class _LoopStart(BaseSegment): + segment_id = "LS" + usage = segment.USAGE_MANDATORY + max_use = 1 + elements = () + + +class _LoopChild1(BaseSegment): + segment_id = "LC1" + usage = segment.USAGE_OPTIONAL + max_use = 99 + elements = () + + +class _LoopChild2(BaseSegment): + segment_id = "LC2" + usage = segment.USAGE_OPTIONAL + max_use = 99 + elements = () + + class TestGrammarRegistry(TestCase): def test_register_and_get(self): registry = GrammarRegistry() @@ -65,3 +87,65 @@ def test_default_registry_returns_correct_grammar(self): self.assertIs(registry.get("ISA"), IsaSegment) self.assertIs(registry.get("ST"), StSegment) + + def test_register_all(self): + registry = GrammarRegistry() + registry.register_all([_LoopStart, _LoopChild1, _LoopChild2]) + + self.assertTrue(registry.has("LS")) + self.assertTrue(registry.has("LC1")) + self.assertTrue(registry.has("LC2")) + + def test_register_loop(self): + registry = GrammarRegistry() + loop_def = LoopDefinition(_LoopStart, [_LoopChild1, _LoopChild2]) + registry.register_loop(loop_def) + + result = registry.get_loop("LS") + self.assertIs(result, loop_def) + + def test_register_loop_auto_registers_grammars(self): + registry = GrammarRegistry() + loop_def = LoopDefinition(_LoopStart, [_LoopChild1, _LoopChild2]) + registry.register_loop(loop_def) + + self.assertTrue(registry.has("LS")) + self.assertTrue(registry.has("LC1")) + self.assertTrue(registry.has("LC2")) + + def test_has_loops(self): + registry = GrammarRegistry() + self.assertFalse(registry.has_loops) + + loop_def = LoopDefinition(_LoopStart, [_LoopChild1]) + registry.register_loop(loop_def) + self.assertTrue(registry.has_loops) + + def test_duplicate_loop_start_raises(self): + registry = GrammarRegistry() + loop_def = LoopDefinition(_LoopStart, [_LoopChild1]) + registry.register_loop(loop_def) + + loop_def2 = LoopDefinition(_LoopStart, [_LoopChild2]) + with self.assertRaises(ValueError) as ctx: + registry.register_loop(loop_def2) + self.assertIn("LS", str(ctx.exception)) + + def test_register_loop_type_error(self): + registry = GrammarRegistry() + with self.assertRaises(TypeError): + registry.register_loop("not_a_loop_definition") + + def test_get_loop_unknown_returns_none(self): + registry = GrammarRegistry() + self.assertIsNone(registry.get_loop("UNKNOWN")) + + def test_register_loop_skips_already_registered_grammars(self): + registry = GrammarRegistry() + registry.register(_LoopStart) + loop_def = LoopDefinition(_LoopStart, [_LoopChild1]) + # Should not raise even though _LoopStart is already registered + registry.register_loop(loop_def) + + self.assertIs(registry.get("LS"), _LoopStart) + self.assertTrue(registry.has("LC1")) diff --git a/tests/test_x12_parser.py b/tests/test_x12_parser.py index 24b6ee6..5f5834d 100644 --- a/tests/test_x12_parser.py +++ b/tests/test_x12_parser.py @@ -2,6 +2,8 @@ from unittest import TestCase from pyx12lib.core.grammar import BaseSegment, Element, element, segment +from pyx12lib.core.grammar.loop import LoopDefinition +from pyx12lib.core.parsed import ParsedLoop, ParsedSegment from pyx12lib.core.parser import X12Parser from pyx12lib.core.registry import GrammarRegistry, create_default_registry @@ -22,6 +24,118 @@ class _CustomSegment(BaseSegment): ) +class _SegA(BaseSegment): + segment_id = "A" + usage = segment.USAGE_OPTIONAL + max_use = 99 + elements = ( + Element( + reference_designator="A01", + name="A Element", + usage=element.USAGE_MANDATORY, + element_type=element.ELEMENT_TYPE_STRING, + minimum=1, + maximum=10, + ), + ) + + +class _SegB(BaseSegment): + segment_id = "B" + usage = segment.USAGE_OPTIONAL + max_use = 99 + elements = ( + Element( + reference_designator="B01", + name="B Element", + usage=element.USAGE_MANDATORY, + element_type=element.ELEMENT_TYPE_STRING, + minimum=1, + maximum=10, + ), + ) + + +class _SegN1(BaseSegment): + segment_id = "N1" + usage = segment.USAGE_OPTIONAL + max_use = 99 + elements = ( + Element( + reference_designator="N101", + name="Entity Identifier Code", + usage=element.USAGE_MANDATORY, + element_type=element.ELEMENT_TYPE_STRING, + minimum=1, + maximum=10, + ), + ) + + +class _SegN2(BaseSegment): + segment_id = "N2" + usage = segment.USAGE_OPTIONAL + max_use = 99 + elements = ( + Element( + reference_designator="N201", + name="Name", + usage=element.USAGE_MANDATORY, + element_type=element.ELEMENT_TYPE_STRING, + minimum=1, + maximum=60, + ), + ) + + +class _SegN3(BaseSegment): + segment_id = "N3" + usage = segment.USAGE_OPTIONAL + max_use = 99 + elements = ( + Element( + reference_designator="N301", + name="Address", + usage=element.USAGE_MANDATORY, + element_type=element.ELEMENT_TYPE_STRING, + minimum=1, + maximum=55, + ), + ) + + +class _SegLX(BaseSegment): + segment_id = "LX" + usage = segment.USAGE_OPTIONAL + max_use = 99 + elements = ( + Element( + reference_designator="LX01", + name="Assigned Number", + usage=element.USAGE_MANDATORY, + element_type=element.ELEMENT_TYPE_NUMERIC, + minimum=1, + maximum=6, + ), + ) + + +class _SegN7(BaseSegment): + segment_id = "N7" + usage = segment.USAGE_OPTIONAL + max_use = 99 + elements = ( + Element( + reference_designator="N701", + name="Equipment Initial", + usage=element.USAGE_OPTIONAL, + element_type=element.ELEMENT_TYPE_STRING, + minimum=1, + maximum=4, + ), + ) + + class TestX12Parser(TestCase): def test_parse_multiple_envelope_segments(self): # arrange @@ -158,3 +272,176 @@ def test_mixed_registry_with_defaults(self): self.assertEqual(result['segments'][0]['segment_id'], 'ST') self.assertEqual(result['segments'][1]['segment_id'], 'CUS') self.assertEqual(result['segments'][2]['segment_id'], 'SE') + + +class TestX12ParserWithLoops(TestCase): + """Test loop-aware parsing behavior.""" + + def _make_registry(self, loop_defs=None, extra_grammars=None): + registry = GrammarRegistry() + if extra_grammars: + registry.register_all(extra_grammars) + if loop_defs: + for ld in loop_defs: + registry.register_loop(ld) + return registry + + def test_no_loops_returns_flat_list(self): + registry = GrammarRegistry() + registry.register_all([_SegA, _SegB]) + x12_string = "A*x~B*y~" + + parser = X12Parser(x12_string, registry=registry) + result = parser.parse() + + self.assertEqual(len(result), 2) + self.assertIsInstance(result[0], ParsedSegment) + self.assertIsInstance(result[1], ParsedSegment) + + def test_single_loop_start_no_children(self): + # [A, N1, B] → [A, Loop(N1), B] + registry = self._make_registry( + loop_defs=[LoopDefinition(_SegN1, [_SegN2, _SegN3])], + extra_grammars=[_SegA, _SegB], + ) + x12_string = "A*x~N1*CA~B*y~" + + parser = X12Parser(x12_string, registry=registry) + result = parser.parse() + + self.assertEqual(len(result), 3) + self.assertIsInstance(result[0], ParsedSegment) + self.assertIsInstance(result[1], ParsedLoop) + self.assertEqual(result[1].loop_id, "N1") + self.assertEqual(len(result[1].segments), 1) + self.assertIsInstance(result[2], ParsedSegment) + + def test_single_loop_with_children(self): + # [N1, N2, N3] → [Loop(N1, N2, N3)] + registry = self._make_registry( + loop_defs=[LoopDefinition(_SegN1, [_SegN2, _SegN3])], + ) + x12_string = "N1*CA~N2*Name~N3*Addr~" + + parser = X12Parser(x12_string, registry=registry) + result = parser.parse() + + self.assertEqual(len(result), 1) + self.assertIsInstance(result[0], ParsedLoop) + self.assertEqual(result[0].loop_id, "N1") + self.assertEqual(len(result[0].segments), 3) + self.assertEqual(result[0].segments[0].segment_id, "N1") + self.assertEqual(result[0].segments[1].segment_id, "N2") + self.assertEqual(result[0].segments[2].segment_id, "N3") + + def test_consecutive_loop_starts(self): + # [N1, N1] → [Loop(N1), Loop(N1)] + registry = self._make_registry( + loop_defs=[LoopDefinition(_SegN1, [_SegN2])], + ) + x12_string = "N1*CA~N1*SH~" + + parser = X12Parser(x12_string, registry=registry) + result = parser.parse() + + self.assertEqual(len(result), 2) + self.assertIsInstance(result[0], ParsedLoop) + self.assertIsInstance(result[1], ParsedLoop) + self.assertEqual(len(result[0].segments), 1) + self.assertEqual(len(result[1].segments), 1) + + def test_loop_terminated_by_non_child(self): + # [N1, N2, B] → [Loop(N1, N2), B] + registry = self._make_registry( + loop_defs=[LoopDefinition(_SegN1, [_SegN2, _SegN3])], + extra_grammars=[_SegB], + ) + x12_string = "N1*CA~N2*Name~B*y~" + + parser = X12Parser(x12_string, registry=registry) + result = parser.parse() + + self.assertEqual(len(result), 2) + self.assertIsInstance(result[0], ParsedLoop) + self.assertEqual(len(result[0].segments), 2) + self.assertIsInstance(result[1], ParsedSegment) + self.assertEqual(result[1].segment_id, "B") + + def test_loop_terminated_by_different_loop(self): + # [N1, N2, LX, N7] → [Loop(N1, N2), Loop(LX, N7)] + registry = self._make_registry( + loop_defs=[ + LoopDefinition(_SegN1, [_SegN2, _SegN3]), + LoopDefinition(_SegLX, [_SegN7]), + ], + ) + x12_string = "N1*CA~N2*Name~LX*1~N7*HLCU~" + + parser = X12Parser(x12_string, registry=registry) + result = parser.parse() + + self.assertEqual(len(result), 2) + self.assertIsInstance(result[0], ParsedLoop) + self.assertEqual(result[0].loop_id, "N1") + self.assertEqual(len(result[0].segments), 2) + self.assertIsInstance(result[1], ParsedLoop) + self.assertEqual(result[1].loop_id, "LX") + self.assertEqual(len(result[1].segments), 2) + + def test_orphan_child_emitted_flat(self): + # [N2, N1, N2] → [N2, Loop(N1, N2)] + registry = self._make_registry( + loop_defs=[LoopDefinition(_SegN1, [_SegN2])], + ) + x12_string = "N2*orphan~N1*CA~N2*child~" + + parser = X12Parser(x12_string, registry=registry) + result = parser.parse() + + self.assertEqual(len(result), 2) + self.assertIsInstance(result[0], ParsedSegment) + self.assertEqual(result[0].segment_id, "N2") + self.assertIsInstance(result[1], ParsedLoop) + self.assertEqual(len(result[1].segments), 2) + + def test_to_dict_mixed_output(self): + registry = self._make_registry( + loop_defs=[LoopDefinition(_SegN1, [_SegN2])], + extra_grammars=[_SegA], + ) + x12_string = "A*x~N1*CA~N2*Name~" + + parser = X12Parser(x12_string, registry=registry) + result = parser.to_dict() + + self.assertIn('segment_id', result['segments'][0]) + self.assertEqual(result['segments'][0]['segment_id'], 'A') + self.assertIn('loop_id', result['segments'][1]) + self.assertEqual(result['segments'][1]['loop_id'], 'N1') + + def test_to_json_with_loops(self): + registry = self._make_registry( + loop_defs=[LoopDefinition(_SegN1, [_SegN2])], + extra_grammars=[_SegA], + ) + x12_string = "A*x~N1*CA~N2*Name~" + + parser = X12Parser(x12_string, registry=registry) + json_output = parser.to_json() + + data = json.loads(json_output) + self.assertIn('segments', data) + self.assertEqual(len(data['segments']), 2) + self.assertEqual(data['segments'][1]['loop_id'], 'N1') + + def test_parse_caches_with_loops(self): + registry = self._make_registry( + loop_defs=[LoopDefinition(_SegN1, [_SegN2])], + ) + x12_string = "N1*CA~N2*Name~" + + parser = X12Parser(x12_string, registry=registry) + result1 = parser.parse() + result2 = parser.parse() + + self.assertIs(result1, result2) From 5c00a403273b22bac5d573b44e43942d1a4361a3 Mon Sep 17 00:00:00 2001 From: Hoss Date: Wed, 11 Feb 2026 21:07:38 +0800 Subject: [PATCH 7/8] build: update docs for loop-aware parsing [why] New LoopDefinition and loop-aware parsing APIs need documentation. [how] - Add 4 changelog entries under v0.4 for LoopDefinition, loop-aware parsing, register_loop(), and register_all() - Add README example showing LoopDefinition usage with register_loop() --- CHANGELOG.md | 4 ++++ README.md | 40 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index c543828..a2358ea 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,10 @@ Changes: - Add `ParsedSegment`, `ParsedElement`, `ParsedCompositeElement`, `ParsedComponent`, `ParsedLoop` data structures. - Add `detect_delimiters()` for automatic ISA delimiter detection. - Add `parse_x12()` and `parse_x12_to_json()` convenience functions. +- Add `LoopDefinition` for defining loop structures (start segment + children). +- Add loop-aware parsing: `X12Parser` groups segments into `ParsedLoop` objects when loop definitions are registered. +- Add `GrammarRegistry.register_loop()` which auto-registers all segment grammars in a loop definition. +- Add `GrammarRegistry.register_all()` for batch segment registration. 0.3 ----- diff --git a/README.md b/README.md index 89e5c09..dfcc4d3 100644 --- a/README.md +++ b/README.md @@ -126,6 +126,46 @@ parser = X12Parser("MY*hello~MY*world~", registry=registry) data = parser.to_dict() ``` +* Parse with loop definitions to group related segments. +```python +from pyx12lib import X12Parser, LoopDefinition, GrammarRegistry +from pyx12lib.core.grammar import BaseSegment, Element, element, segment + +class N1Segment(BaseSegment): + segment_id = 'N1' + usage = segment.USAGE_OPTIONAL + max_use = 99 + elements = ( + Element(reference_designator='N101', name='Entity Identifier Code', + usage=element.USAGE_MANDATORY, element_type=element.ELEMENT_TYPE_ID, + minimum=2, maximum=3), + ) + +class N2Segment(BaseSegment): + segment_id = 'N2' + usage = segment.USAGE_OPTIONAL + max_use = 2 + elements = ( + Element(reference_designator='N201', name='Name', + usage=element.USAGE_MANDATORY, element_type=element.ELEMENT_TYPE_STRING, + minimum=1, maximum=35), + ) + +registry = GrammarRegistry() +registry.register_loop(LoopDefinition(N1Segment, [N2Segment])) + +parser = X12Parser("N1*CA~N1*SH~N2*ACME CORP~N1*CN~", registry=registry) +data = parser.to_dict() +# {'segments': [ +# {'loop_id': 'N1', 'segments': [{'segment_id': 'N1', 'elements': [...]}]}, +# {'loop_id': 'N1', 'segments': [ +# {'segment_id': 'N1', 'elements': [...]}, +# {'segment_id': 'N2', 'elements': [...]}, +# ]}, +# {'loop_id': 'N1', 'segments': [{'segment_id': 'N1', 'elements': [...]}]}, +# ]} +``` + * Auto-detect delimiters from ISA header. ```python from pyx12lib import parse_x12 From 02877eea291fcf453a5b82f5d86387265294aecc Mon Sep 17 00:00:00 2001 From: Hoss Date: Wed, 11 Feb 2026 21:28:25 +0800 Subject: [PATCH 8/8] refactor: separate X12Parser config from input data MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [why] X12Parser mixed configuration (registry, delimiter settings) with input data (x12_string) in the constructor, making the parser single-use. Also addresses two smaller PR review items: missing type hint on ParsedCompositeElement and indent=2 default on parse_x12_to_json. [how] - Move x12_string from X12Parser.__init__() to parse(x12_string), making the parser reusable across multiple inputs - Add X12ParseResult class as return type of parse() with .segments, .to_dict(), and .to_json() — replaces to_dict/to_json on X12Parser - Move delimiter auto-detection from constructor into parse() - Add type hints to ParsedCompositeElement.__init__ - Change parse_x12_to_json() indent default from 2 to None - Add detailed docstring with examples to _organize_into_loops() - Replace cache tests with reusability tests - Update all test files and README examples for new API --- README.md | 8 +- pyx12lib/__init__.py | 12 +-- pyx12lib/core/parsed.py | 5 +- pyx12lib/core/parser.py | 105 ++++++++++++------ tests/test_delimiters.py | 12 +-- tests/test_edi304_validation.py | 59 +++++----- tests/test_edi315_validation.py | 32 +++--- tests/test_integration.py | 3 +- tests/test_x12_parser.py | 186 +++++++++++++++++--------------- 9 files changed, 236 insertions(+), 186 deletions(-) diff --git a/README.md b/README.md index dfcc4d3..54ca0b1 100644 --- a/README.md +++ b/README.md @@ -122,8 +122,8 @@ class MySegment(BaseSegment): registry = GrammarRegistry() registry.register(MySegment) -parser = X12Parser("MY*hello~MY*world~", registry=registry) -data = parser.to_dict() +parser = X12Parser(registry=registry) +data = parser.parse("MY*hello~MY*world~").to_dict() ``` * Parse with loop definitions to group related segments. @@ -154,8 +154,8 @@ class N2Segment(BaseSegment): registry = GrammarRegistry() registry.register_loop(LoopDefinition(N1Segment, [N2Segment])) -parser = X12Parser("N1*CA~N1*SH~N2*ACME CORP~N1*CN~", registry=registry) -data = parser.to_dict() +parser = X12Parser(registry=registry) +data = parser.parse("N1*CA~N1*SH~N2*ACME CORP~N1*CN~").to_dict() # {'segments': [ # {'loop_id': 'N1', 'segments': [{'segment_id': 'N1', 'elements': [...]}]}, # {'loop_id': 'N1', 'segments': [ diff --git a/pyx12lib/__init__.py b/pyx12lib/__init__.py index d69472d..d1e5b4d 100644 --- a/pyx12lib/__init__.py +++ b/pyx12lib/__init__.py @@ -1,5 +1,5 @@ from pyx12lib.core.grammar.loop import LoopDefinition -from pyx12lib.core.parser import SegmentParser, X12Parser +from pyx12lib.core.parser import SegmentParser, X12Parser, X12ParseResult from pyx12lib.core.registry import GrammarRegistry, create_default_registry from pyx12lib.core.delimiters import detect_delimiters, Delimiters @@ -15,11 +15,11 @@ def parse_x12(x12_string, registry=None): Returns: Dict with 'segments' key containing list of parsed segment dicts. """ - parser = X12Parser(x12_string, registry=registry) - return parser.to_dict() + parser = X12Parser(registry=registry) + return parser.parse(x12_string).to_dict() -def parse_x12_to_json(x12_string, indent=2, registry=None): +def parse_x12_to_json(x12_string, indent=None, registry=None): """Parse an X12 string into a JSON string. Args: @@ -31,5 +31,5 @@ def parse_x12_to_json(x12_string, indent=2, registry=None): Returns: JSON string representation of the parsed X12 data. """ - parser = X12Parser(x12_string, registry=registry) - return parser.to_json(indent=indent) + parser = X12Parser(registry=registry) + return parser.parse(x12_string).to_json(indent=indent) diff --git a/pyx12lib/core/parsed.py b/pyx12lib/core/parsed.py index c321133..5299133 100644 --- a/pyx12lib/core/parsed.py +++ b/pyx12lib/core/parsed.py @@ -1,10 +1,11 @@ import json +from typing import Iterable from pyx12lib.core.grammar.element import ( - NotUsedElement, USAGE_MANDATORY, ELEMENT_TYPE_DECIMAL, ELEMENT_TYPE_NUMERIC, + CompositeElement, ) @@ -69,7 +70,7 @@ class ParsedComponent(ParsedElement): class ParsedCompositeElement: """Represents a parsed composite element containing components.""" - def __init__(self, grammar, components): + def __init__(self, grammar: CompositeElement, components: Iterable[ParsedComponent]) -> None: self._grammar = grammar self._components = components diff --git a/pyx12lib/core/parser.py b/pyx12lib/core/parser.py index edbf88d..a03a0a5 100644 --- a/pyx12lib/core/parser.py +++ b/pyx12lib/core/parser.py @@ -31,7 +31,7 @@ def __init__( self._element_delimiter = element_delimiter self._component_delimiter = component_delimiter - def parse(self): + def parse(self, x12_string: str): raise NotImplementedError @@ -100,8 +100,27 @@ def to_json(self, indent=2): return self.parse().to_json(indent=indent) +class X12ParseResult(object): + """Result of parsing an X12 string.""" + + def __init__(self, segments): + self._segments = segments + + @property + def segments(self): + return self._segments + + def to_dict(self): + return { + 'segments': [s.to_dict() for s in self._segments], + } + + def to_json(self, indent=None): + return json.dumps(self.to_dict(), indent=indent) + + class X12Parser(BaseSegmentParser): - """Parse a complete X12 string containing multiple segments. + """Parse complete X12 strings containing multiple segments. Uses a GrammarRegistry to auto-detect segment types by their ID. Unknown segments (not in the registry) are skipped. @@ -110,57 +129,71 @@ class X12Parser(BaseSegmentParser): delimiters are automatically detected from the ISA header. """ - def __init__(self, x12_string, registry=None, auto_detect_delimiters=True, **kwargs): - self._x12_string = x12_string - - if auto_detect_delimiters and x12_string.lstrip().startswith('ISA'): - detected = detect_delimiters(x12_string) - kwargs.setdefault('segment_terminator', detected.segment_terminator) - kwargs.setdefault('element_delimiter', detected.element_delimiter) - kwargs.setdefault('component_delimiter', detected.component_delimiter) - + def __init__(self, registry=None, auto_detect_delimiters=True, **kwargs): super(X12Parser, self).__init__(**kwargs) if registry is None: registry = create_default_registry() self._registry = registry - self._parsed_segments = None + self._auto_detect_delimiters = auto_detect_delimiters - def parse(self): - if self._parsed_segments is not None: - return self._parsed_segments + def parse(self, x12_string): + segment_terminator = self._segment_terminator + element_delimiter = self._element_delimiter + component_delimiter = self._component_delimiter + + if self._auto_detect_delimiters and x12_string.lstrip().startswith('ISA'): + detected = detect_delimiters(x12_string) + segment_terminator = detected.segment_terminator + element_delimiter = detected.element_delimiter + component_delimiter = detected.component_delimiter flat_segments = [] - raw_segments = self._x12_string.split(self._segment_terminator) + raw_segments = x12_string.split(segment_terminator) for raw in raw_segments: raw = raw.strip() if not raw: continue - segment_id = raw.split(self._element_delimiter)[0] + segment_id = raw.split(element_delimiter)[0] grammar = self._registry.get(segment_id) if grammar is None: continue parser = SegmentParser( - raw + self._segment_terminator, + raw + segment_terminator, grammar=grammar, - segment_terminator=self._segment_terminator, - element_delimiter=self._element_delimiter, - component_delimiter=self._component_delimiter, + segment_terminator=segment_terminator, + element_delimiter=element_delimiter, + component_delimiter=component_delimiter, ) flat_segments.append(parser.parse()) if self._registry.has_loops: - self._parsed_segments = self._organize_into_loops(flat_segments) + segments = self._organize_into_loops(flat_segments) else: - self._parsed_segments = flat_segments + segments = flat_segments - return self._parsed_segments + return X12ParseResult(segments) def _organize_into_loops(self, flat_segments): - """Group flat segments into ParsedLoop objects based on registry loop definitions.""" + """Group flat segments into ParsedLoop objects based on registry loop definitions. + + Single O(n) pass. Maintains two pieces of state: + - current_loop: the ParsedLoop being built (None when not inside a loop) + - current_loop_def: the LoopDefinition that governs valid children + + For each segment, exactly one of three cases applies: + 1. Segment starts a new loop (its ID matches a registered LoopDefinition). + 2. Segment is a child of the currently open loop. + 3. Segment is unrelated — emitted flat, closing any open loop first. + + Examples (N1 loop with N2 child): + [A, N1, N2, B] → [A, Loop(N1,N2), B] + [N1*CA, N1*SH, N2] → [Loop(N1*CA), Loop(N1*SH, N2)] + [N2, N1, N2] → [N2, Loop(N1, N2)] (orphan N2 emitted flat) + """ result = [] current_loop = None current_loop_def = None @@ -170,32 +203,34 @@ def _organize_into_loops(self, flat_segments): loop_def = self._registry.get_loop(seg_id) if loop_def is not None: - # This segment starts a new loop. + # Case 1: This segment starts a new loop. + # Finalize the previous loop if one is open, then start fresh. + # This also handles consecutive starts (e.g. N1*CA then N1*SH) + # — each start closes the prior loop. if current_loop is not None: result.append(current_loop) current_loop = ParsedLoop(loop_id=loop_def.loop_id) current_loop.add_segment(segment) current_loop_def = loop_def - elif current_loop is not None and current_loop_def.is_child(seg_id): + elif current_loop is not None and current_loop_def is not None and current_loop_def.is_child(seg_id): + # Case 2: Segment belongs to the currently open loop. + # is_child() checks against the LoopDefinition's child_segment_ids + # (direct children only, not nested loop members). current_loop.add_segment(segment) else: + # Case 3: Segment is not a loop start and not a child. + # If a loop is open, it's terminated by this unrelated segment. + # The segment itself is emitted as a flat ParsedSegment. if current_loop is not None: result.append(current_loop) current_loop = None current_loop_def = None result.append(segment) + # Flush any loop still open after the last segment. if current_loop is not None: result.append(current_loop) return result - - def to_dict(self): - return { - 'segments': [s.to_dict() for s in self.parse()], - } - - def to_json(self, indent=2): - return json.dumps(self.to_dict(), indent=indent) diff --git a/tests/test_delimiters.py b/tests/test_delimiters.py index 99b0f50..11c55aa 100644 --- a/tests/test_delimiters.py +++ b/tests/test_delimiters.py @@ -128,8 +128,8 @@ def test_auto_detect_with_standard_isa(self): "IEA*1*000000001~" ) - parser = X12Parser(x12_string) - result = parser.to_dict() + parser = X12Parser() + result = parser.parse(x12_string).to_dict() self.assertEqual(len(result['segments']), 6) @@ -138,8 +138,8 @@ def test_auto_detect_disabled_uses_defaults(self): x12_string = "ST*997*0001~SE*1*0001~" - parser = X12Parser(x12_string, auto_detect_delimiters=False) - result = parser.to_dict() + parser = X12Parser(auto_detect_delimiters=False) + result = parser.parse(x12_string).to_dict() self.assertEqual(len(result['segments']), 2) @@ -149,7 +149,7 @@ def test_no_isa_falls_through_to_defaults(self): # No ISA present, auto_detect=True but should use defaults gracefully x12_string = "ST*997*0001~SE*1*0001~" - parser = X12Parser(x12_string, auto_detect_delimiters=True) - result = parser.to_dict() + parser = X12Parser(auto_detect_delimiters=True) + result = parser.parse(x12_string).to_dict() self.assertEqual(len(result['segments']), 2) diff --git a/tests/test_edi304_validation.py b/tests/test_edi304_validation.py index 4b66220..6e8aaaa 100644 --- a/tests/test_edi304_validation.py +++ b/tests/test_edi304_validation.py @@ -830,8 +830,8 @@ class TestFullEdi304DocumentParsing(TestCase): def test_parse_full_document(self): registry = _make_edi304_registry() - parser = X12Parser(self.FULL_EDI_304, registry=registry) - result = parser.to_dict() + parser = X12Parser(registry=registry) + result = parser.parse(self.FULL_EDI_304).to_dict() segment_ids = [s['segment_id'] for s in result['segments']] self.assertEqual(segment_ids, [ @@ -852,23 +852,23 @@ def test_parse_full_document(self): def test_parse_full_document_segment_count(self): registry = _make_edi304_registry() - parser = X12Parser(self.FULL_EDI_304, registry=registry) - segments = parser.parse() + parser = X12Parser(registry=registry) + segments = parser.parse(self.FULL_EDI_304).segments self.assertEqual(len(segments), 39) def test_parse_full_document_to_json(self): registry = _make_edi304_registry() - parser = X12Parser(self.FULL_EDI_304, registry=registry) - json_output = parser.to_json() + parser = X12Parser(registry=registry) + json_output = parser.parse(self.FULL_EDI_304).to_json() data = json.loads(json_output) self.assertEqual(len(data['segments']), 39) def test_parse_b2_from_document(self): registry = _make_edi304_registry() - parser = X12Parser(self.FULL_EDI_304, registry=registry) - segments = parser.parse() + parser = X12Parser(registry=registry) + segments = parser.parse(self.FULL_EDI_304).segments b2 = segments[3] self.assertEqual(b2.segment_id, 'B2') @@ -880,8 +880,8 @@ def test_parse_b2_from_document(self): def test_parse_v1_from_document(self): registry = _make_edi304_registry() - parser = X12Parser(self.FULL_EDI_304, registry=registry) - segments = parser.parse() + parser = X12Parser(registry=registry) + segments = parser.parse(self.FULL_EDI_304).segments v1 = [s for s in segments if s.segment_id == 'V1'][0] ref_map = {e['reference_designator']: e['value'] for e in v1.to_dict()['elements']} @@ -893,8 +893,8 @@ def test_parse_v1_from_document(self): def test_parse_n9_segments_from_document(self): registry = _make_edi304_registry() - parser = X12Parser(self.FULL_EDI_304, registry=registry) - segments = parser.parse() + parser = X12Parser(registry=registry) + segments = parser.parse(self.FULL_EDI_304).segments n9_segments = [s for s in segments if s.segment_id == 'N9'] self.assertEqual(len(n9_segments), 4) @@ -906,8 +906,8 @@ def test_parse_n9_segments_from_document(self): def test_parse_n1_segments_from_document(self): registry = _make_edi304_registry() - parser = X12Parser(self.FULL_EDI_304, registry=registry) - segments = parser.parse() + parser = X12Parser(registry=registry) + segments = parser.parse(self.FULL_EDI_304).segments n1_segments = [s for s in segments if s.segment_id == 'N1'] self.assertEqual(len(n1_segments), 4) @@ -919,8 +919,8 @@ def test_parse_n1_segments_from_document(self): def test_parse_r4_ports_from_document(self): registry = _make_edi304_registry() - parser = X12Parser(self.FULL_EDI_304, registry=registry) - segments = parser.parse() + parser = X12Parser(registry=registry) + segments = parser.parse(self.FULL_EDI_304).segments r4_segments = [s for s in segments if s.segment_id == 'R4'] self.assertEqual(len(r4_segments), 4) @@ -932,8 +932,8 @@ def test_parse_r4_ports_from_document(self): def test_parse_lx_containers_from_document(self): registry = _make_edi304_registry() - parser = X12Parser(self.FULL_EDI_304, registry=registry) - segments = parser.parse() + parser = X12Parser(registry=registry) + segments = parser.parse(self.FULL_EDI_304).segments lx_segments = [s for s in segments if s.segment_id == 'LX'] self.assertEqual(len(lx_segments), 2) @@ -942,8 +942,8 @@ def test_parse_lx_containers_from_document(self): def test_parse_n7_equipment_from_document(self): registry = _make_edi304_registry() - parser = X12Parser(self.FULL_EDI_304, registry=registry) - segments = parser.parse() + parser = X12Parser(registry=registry) + segments = parser.parse(self.FULL_EDI_304).segments n7_segments = [s for s in segments if s.segment_id == 'N7'] self.assertEqual(len(n7_segments), 2) @@ -961,8 +961,8 @@ def test_parse_n7_equipment_from_document(self): def test_parse_l3_totals_from_document(self): registry = _make_edi304_registry() - parser = X12Parser(self.FULL_EDI_304, registry=registry) - segments = parser.parse() + parser = X12Parser(registry=registry) + segments = parser.parse(self.FULL_EDI_304).segments l3 = [s for s in segments if s.segment_id == 'L3'][0] ref_map = {e['reference_designator']: e['value'] for e in l3.to_dict()['elements']} @@ -975,8 +975,8 @@ def test_parse_l3_totals_from_document(self): def test_document_all_segments_valid(self): registry = _make_edi304_registry() - parser = X12Parser(self.FULL_EDI_304, registry=registry) - segments = parser.parse() + parser = X12Parser(registry=registry) + segments = parser.parse(self.FULL_EDI_304).segments for seg in segments: self.assertTrue( @@ -1056,8 +1056,9 @@ class TestFullEdi304LoopParsing(TestCase): def setUp(self): self.registry = _make_edi304_loop_registry() - self.parser = X12Parser(self.FULL_EDI_304_WITH_LOOPS, registry=self.registry) - self.result = self.parser.parse() + parser = X12Parser(registry=self.registry) + self.parse_result = parser.parse(self.FULL_EDI_304_WITH_LOOPS) + self.result = self.parse_result.segments def test_loop_parse_top_level_item_count(self): # ISA, GS, ST, B2, B2A, 5x N9, V1 = 11 flat @@ -1142,17 +1143,17 @@ def test_envelope_segments_unaffected(self): self.assertEqual(len(matches), 1, msg='Expected 1 {} segment, got {}'.format(env_id, len(matches))) def test_to_dict_loop_items_have_loop_id(self): - result_dict = self.parser.to_dict() + result_dict = self.parse_result.to_dict() loop_items = [item for item in result_dict['segments'] if 'loop_id' in item] self.assertEqual(len(loop_items), 6) # 5 N1 + 1 LX def test_to_dict_flat_items_have_segment_id(self): - result_dict = self.parser.to_dict() + result_dict = self.parse_result.to_dict() flat_items = [item for item in result_dict['segments'] if 'segment_id' in item] self.assertEqual(len(flat_items), 23) # 29 total - 6 loops def test_to_json_round_trip(self): - json_output = self.parser.to_json() + json_output = self.parse_result.to_json() data = json.loads(json_output) self.assertIn('segments', data) self.assertEqual(len(data['segments']), 29) diff --git a/tests/test_edi315_validation.py b/tests/test_edi315_validation.py index ae9208d..f915bb6 100644 --- a/tests/test_edi315_validation.py +++ b/tests/test_edi315_validation.py @@ -319,8 +319,8 @@ class TestFullEdi315DocumentParsing(TestCase): def test_parse_full_document(self): registry = _make_edi315_registry() - parser = X12Parser(self.FULL_EDI_315, registry=registry) - result = parser.to_dict() + parser = X12Parser(registry=registry) + result = parser.parse(self.FULL_EDI_315).to_dict() segment_ids = [s['segment_id'] for s in result['segments']] self.assertEqual(segment_ids, [ @@ -334,23 +334,23 @@ def test_parse_full_document(self): def test_parse_full_document_segment_count(self): registry = _make_edi315_registry() - parser = X12Parser(self.FULL_EDI_315, registry=registry) - segments = parser.parse() + parser = X12Parser(registry=registry) + segments = parser.parse(self.FULL_EDI_315).segments self.assertEqual(len(segments), 20) def test_parse_full_document_to_json(self): registry = _make_edi315_registry() - parser = X12Parser(self.FULL_EDI_315, registry=registry) - json_output = parser.to_json() + parser = X12Parser(registry=registry) + json_output = parser.parse(self.FULL_EDI_315).to_json() data = json.loads(json_output) self.assertEqual(len(data['segments']), 20) def test_parse_b4_from_document(self): registry = _make_edi315_registry() - parser = X12Parser(self.FULL_EDI_315, registry=registry) - segments = parser.parse() + parser = X12Parser(registry=registry) + segments = parser.parse(self.FULL_EDI_315).segments b4 = segments[3] self.assertEqual(b4.segment_id, 'B4') @@ -362,8 +362,8 @@ def test_parse_b4_from_document(self): def test_parse_n9_segments_from_document(self): registry = _make_edi315_registry() - parser = X12Parser(self.FULL_EDI_315, registry=registry) - segments = parser.parse() + parser = X12Parser(registry=registry) + segments = parser.parse(self.FULL_EDI_315).segments n9_segments = [s for s in segments if s.segment_id == 'N9'] self.assertEqual(len(n9_segments), 4) @@ -375,8 +375,8 @@ def test_parse_n9_segments_from_document(self): def test_parse_r4_dtm_pairs_from_document(self): registry = _make_edi315_registry() - parser = X12Parser(self.FULL_EDI_315, registry=registry) - segments = parser.parse() + parser = X12Parser(registry=registry) + segments = parser.parse(self.FULL_EDI_315).segments r4_segments = [s for s in segments if s.segment_id == 'R4'] dtm_segments = [s for s in segments if s.segment_id == 'DTM'] @@ -424,8 +424,8 @@ def test_parse_two_transaction_document(self): ) registry = _make_edi315_registry() - parser = X12Parser(x12, registry=registry) - segments = parser.parse() + parser = X12Parser(registry=registry) + segments = parser.parse(x12).segments counts = {} for s in segments: @@ -455,8 +455,8 @@ def test_headers_only_document(self): ) registry = _make_edi315_registry() - parser = X12Parser(x12, registry=registry) - result = parser.to_dict() + parser = X12Parser(registry=registry) + result = parser.parse(x12).to_dict() segment_ids = [s['segment_id'] for s in result['segments']] self.assertEqual(segment_ids, ['ISA', 'GS', 'GE', 'IEA']) diff --git a/tests/test_integration.py b/tests/test_integration.py index ef2c287..fa6b78f 100644 --- a/tests/test_integration.py +++ b/tests/test_integration.py @@ -3,8 +3,7 @@ from unittest import TestCase from pyx12lib import parse_x12, parse_x12_to_json -from pyx12lib.core.parser import SegmentParser, X12Parser -from pyx12lib.core.registry import create_default_registry +from pyx12lib.core.parser import SegmentParser class TestIntegrationFullEnvelope(TestCase): diff --git a/tests/test_x12_parser.py b/tests/test_x12_parser.py index 5f5834d..0e3ca6b 100644 --- a/tests/test_x12_parser.py +++ b/tests/test_x12_parser.py @@ -4,7 +4,7 @@ from pyx12lib.core.grammar import BaseSegment, Element, element, segment from pyx12lib.core.grammar.loop import LoopDefinition from pyx12lib.core.parsed import ParsedLoop, ParsedSegment -from pyx12lib.core.parser import X12Parser +from pyx12lib.core.parser import X12Parser, X12ParseResult from pyx12lib.core.registry import GrammarRegistry, create_default_registry @@ -142,8 +142,8 @@ def test_parse_multiple_envelope_segments(self): x12_string = "ST*997*0001~SE*1*0001~" # action - parser = X12Parser(x12_string) - result = parser.to_dict() + parser = X12Parser() + result = parser.parse(x12_string).to_dict() # assert self.assertEqual(len(result['segments']), 2) @@ -163,8 +163,8 @@ def test_parse_full_envelope(self): ) # action - parser = X12Parser(x12_string) - result = parser.to_dict() + parser = X12Parser() + result = parser.parse(x12_string).to_dict() # assert self.assertEqual(len(result['segments']), 6) @@ -176,8 +176,8 @@ def test_unknown_segments_are_skipped(self): x12_string = "ST*997*0001~BOGUS*DATA~SE*1*0001~" # action - parser = X12Parser(x12_string) - result = parser.to_dict() + parser = X12Parser() + result = parser.parse(x12_string).to_dict() # assert self.assertEqual(len(result['segments']), 2) @@ -191,8 +191,8 @@ def test_custom_registry(self): x12_string = "CUS*hello~CUS*world~" # action - parser = X12Parser(x12_string, registry=registry) - result = parser.to_dict() + parser = X12Parser(registry=registry) + result = parser.parse(x12_string).to_dict() # assert self.assertEqual(len(result['segments']), 2) @@ -204,8 +204,8 @@ def test_to_json_produces_valid_json(self): x12_string = "ST*997*0001~SE*1*0001~" # action - parser = X12Parser(x12_string) - json_output = parser.to_json() + parser = X12Parser() + json_output = parser.parse(x12_string).to_json() # assert data = json.loads(json_output) @@ -217,8 +217,8 @@ def test_empty_string_returns_no_segments(self): x12_string = "" # action - parser = X12Parser(x12_string) - result = parser.to_dict() + parser = X12Parser() + result = parser.parse(x12_string).to_dict() # assert self.assertEqual(len(result['segments']), 0) @@ -228,31 +228,43 @@ def test_whitespace_only_returns_no_segments(self): x12_string = " \n \n " # action - parser = X12Parser(x12_string) - result = parser.to_dict() + parser = X12Parser() + result = parser.parse(x12_string).to_dict() # assert self.assertEqual(len(result['segments']), 0) - def test_parse_caches_result(self): + def test_parse_returns_x12_parse_result(self): # arrange x12_string = "ST*997*0001~SE*1*0001~" # action - parser = X12Parser(x12_string) - result1 = parser.parse() - result2 = parser.parse() + parser = X12Parser() + result = parser.parse(x12_string) # assert - self.assertIs(result1, result2) + self.assertIsInstance(result, X12ParseResult) + self.assertEqual(len(result.segments), 2) + + def test_parser_is_reusable(self): + # arrange + parser = X12Parser() + + # action + result1 = parser.parse("ST*997*0001~SE*1*0001~") + result2 = parser.parse("ST*997*0002~") + + # assert + self.assertEqual(len(result1.segments), 2) + self.assertEqual(len(result2.segments), 1) def test_newline_separated_segments(self): # arrange: segments separated by newlines x12_string = "ST*997*0001~\nSE*1*0001~\n" # action - parser = X12Parser(x12_string) - result = parser.to_dict() + parser = X12Parser() + result = parser.parse(x12_string).to_dict() # assert self.assertEqual(len(result['segments']), 2) @@ -264,8 +276,8 @@ def test_mixed_registry_with_defaults(self): x12_string = "ST*997*0001~CUS*test~SE*1*0001~" # action - parser = X12Parser(x12_string, registry=registry) - result = parser.to_dict() + parser = X12Parser(registry=registry) + result = parser.parse(x12_string).to_dict() # assert self.assertEqual(len(result['segments']), 3) @@ -291,12 +303,12 @@ def test_no_loops_returns_flat_list(self): registry.register_all([_SegA, _SegB]) x12_string = "A*x~B*y~" - parser = X12Parser(x12_string, registry=registry) - result = parser.parse() + parser = X12Parser(registry=registry) + result = parser.parse(x12_string) - self.assertEqual(len(result), 2) - self.assertIsInstance(result[0], ParsedSegment) - self.assertIsInstance(result[1], ParsedSegment) + self.assertEqual(len(result.segments), 2) + self.assertIsInstance(result.segments[0], ParsedSegment) + self.assertIsInstance(result.segments[1], ParsedSegment) def test_single_loop_start_no_children(self): # [A, N1, B] → [A, Loop(N1), B] @@ -306,15 +318,15 @@ def test_single_loop_start_no_children(self): ) x12_string = "A*x~N1*CA~B*y~" - parser = X12Parser(x12_string, registry=registry) - result = parser.parse() + parser = X12Parser(registry=registry) + result = parser.parse(x12_string) - self.assertEqual(len(result), 3) - self.assertIsInstance(result[0], ParsedSegment) - self.assertIsInstance(result[1], ParsedLoop) - self.assertEqual(result[1].loop_id, "N1") - self.assertEqual(len(result[1].segments), 1) - self.assertIsInstance(result[2], ParsedSegment) + self.assertEqual(len(result.segments), 3) + self.assertIsInstance(result.segments[0], ParsedSegment) + self.assertIsInstance(result.segments[1], ParsedLoop) + self.assertEqual(result.segments[1].loop_id, "N1") + self.assertEqual(len(result.segments[1].segments), 1) + self.assertIsInstance(result.segments[2], ParsedSegment) def test_single_loop_with_children(self): # [N1, N2, N3] → [Loop(N1, N2, N3)] @@ -323,16 +335,16 @@ def test_single_loop_with_children(self): ) x12_string = "N1*CA~N2*Name~N3*Addr~" - parser = X12Parser(x12_string, registry=registry) - result = parser.parse() + parser = X12Parser(registry=registry) + result = parser.parse(x12_string) - self.assertEqual(len(result), 1) - self.assertIsInstance(result[0], ParsedLoop) - self.assertEqual(result[0].loop_id, "N1") - self.assertEqual(len(result[0].segments), 3) - self.assertEqual(result[0].segments[0].segment_id, "N1") - self.assertEqual(result[0].segments[1].segment_id, "N2") - self.assertEqual(result[0].segments[2].segment_id, "N3") + self.assertEqual(len(result.segments), 1) + self.assertIsInstance(result.segments[0], ParsedLoop) + self.assertEqual(result.segments[0].loop_id, "N1") + self.assertEqual(len(result.segments[0].segments), 3) + self.assertEqual(result.segments[0].segments[0].segment_id, "N1") + self.assertEqual(result.segments[0].segments[1].segment_id, "N2") + self.assertEqual(result.segments[0].segments[2].segment_id, "N3") def test_consecutive_loop_starts(self): # [N1, N1] → [Loop(N1), Loop(N1)] @@ -341,14 +353,14 @@ def test_consecutive_loop_starts(self): ) x12_string = "N1*CA~N1*SH~" - parser = X12Parser(x12_string, registry=registry) - result = parser.parse() + parser = X12Parser(registry=registry) + result = parser.parse(x12_string) - self.assertEqual(len(result), 2) - self.assertIsInstance(result[0], ParsedLoop) - self.assertIsInstance(result[1], ParsedLoop) - self.assertEqual(len(result[0].segments), 1) - self.assertEqual(len(result[1].segments), 1) + self.assertEqual(len(result.segments), 2) + self.assertIsInstance(result.segments[0], ParsedLoop) + self.assertIsInstance(result.segments[1], ParsedLoop) + self.assertEqual(len(result.segments[0].segments), 1) + self.assertEqual(len(result.segments[1].segments), 1) def test_loop_terminated_by_non_child(self): # [N1, N2, B] → [Loop(N1, N2), B] @@ -358,14 +370,14 @@ def test_loop_terminated_by_non_child(self): ) x12_string = "N1*CA~N2*Name~B*y~" - parser = X12Parser(x12_string, registry=registry) - result = parser.parse() + parser = X12Parser(registry=registry) + result = parser.parse(x12_string) - self.assertEqual(len(result), 2) - self.assertIsInstance(result[0], ParsedLoop) - self.assertEqual(len(result[0].segments), 2) - self.assertIsInstance(result[1], ParsedSegment) - self.assertEqual(result[1].segment_id, "B") + self.assertEqual(len(result.segments), 2) + self.assertIsInstance(result.segments[0], ParsedLoop) + self.assertEqual(len(result.segments[0].segments), 2) + self.assertIsInstance(result.segments[1], ParsedSegment) + self.assertEqual(result.segments[1].segment_id, "B") def test_loop_terminated_by_different_loop(self): # [N1, N2, LX, N7] → [Loop(N1, N2), Loop(LX, N7)] @@ -377,16 +389,16 @@ def test_loop_terminated_by_different_loop(self): ) x12_string = "N1*CA~N2*Name~LX*1~N7*HLCU~" - parser = X12Parser(x12_string, registry=registry) - result = parser.parse() + parser = X12Parser(registry=registry) + result = parser.parse(x12_string) - self.assertEqual(len(result), 2) - self.assertIsInstance(result[0], ParsedLoop) - self.assertEqual(result[0].loop_id, "N1") - self.assertEqual(len(result[0].segments), 2) - self.assertIsInstance(result[1], ParsedLoop) - self.assertEqual(result[1].loop_id, "LX") - self.assertEqual(len(result[1].segments), 2) + self.assertEqual(len(result.segments), 2) + self.assertIsInstance(result.segments[0], ParsedLoop) + self.assertEqual(result.segments[0].loop_id, "N1") + self.assertEqual(len(result.segments[0].segments), 2) + self.assertIsInstance(result.segments[1], ParsedLoop) + self.assertEqual(result.segments[1].loop_id, "LX") + self.assertEqual(len(result.segments[1].segments), 2) def test_orphan_child_emitted_flat(self): # [N2, N1, N2] → [N2, Loop(N1, N2)] @@ -395,14 +407,14 @@ def test_orphan_child_emitted_flat(self): ) x12_string = "N2*orphan~N1*CA~N2*child~" - parser = X12Parser(x12_string, registry=registry) - result = parser.parse() + parser = X12Parser(registry=registry) + result = parser.parse(x12_string) - self.assertEqual(len(result), 2) - self.assertIsInstance(result[0], ParsedSegment) - self.assertEqual(result[0].segment_id, "N2") - self.assertIsInstance(result[1], ParsedLoop) - self.assertEqual(len(result[1].segments), 2) + self.assertEqual(len(result.segments), 2) + self.assertIsInstance(result.segments[0], ParsedSegment) + self.assertEqual(result.segments[0].segment_id, "N2") + self.assertIsInstance(result.segments[1], ParsedLoop) + self.assertEqual(len(result.segments[1].segments), 2) def test_to_dict_mixed_output(self): registry = self._make_registry( @@ -411,8 +423,8 @@ def test_to_dict_mixed_output(self): ) x12_string = "A*x~N1*CA~N2*Name~" - parser = X12Parser(x12_string, registry=registry) - result = parser.to_dict() + parser = X12Parser(registry=registry) + result = parser.parse(x12_string).to_dict() self.assertIn('segment_id', result['segments'][0]) self.assertEqual(result['segments'][0]['segment_id'], 'A') @@ -426,22 +438,24 @@ def test_to_json_with_loops(self): ) x12_string = "A*x~N1*CA~N2*Name~" - parser = X12Parser(x12_string, registry=registry) - json_output = parser.to_json() + parser = X12Parser(registry=registry) + json_output = parser.parse(x12_string).to_json() data = json.loads(json_output) self.assertIn('segments', data) self.assertEqual(len(data['segments']), 2) self.assertEqual(data['segments'][1]['loop_id'], 'N1') - def test_parse_caches_with_loops(self): + def test_parser_reusable_with_loops(self): registry = self._make_registry( loop_defs=[LoopDefinition(_SegN1, [_SegN2])], ) - x12_string = "N1*CA~N2*Name~" - parser = X12Parser(x12_string, registry=registry) - result1 = parser.parse() - result2 = parser.parse() + parser = X12Parser(registry=registry) + result1 = parser.parse("N1*CA~N2*Name~") + result2 = parser.parse("N1*SH~") - self.assertIs(result1, result2) + self.assertEqual(len(result1.segments), 1) + self.assertEqual(len(result1.segments[0].segments), 2) + self.assertEqual(len(result2.segments), 1) + self.assertEqual(len(result2.segments[0].segments), 1)