cdisc-org · swhume · Jun 23, 2026 · Jun 16, 2026 · Jun 16, 2026 · Jun 16, 2026
diff --git a/src/define-xml/define.json b/src/define-xml/define.json
diff --git a/src/generators/define/itemGroups.py b/src/generators/define/itemGroups.py
@@ -6,6 +6,8 @@
 import valueLevel as VL
 from constants import TRIAL_DESIGN_DOMAINS, NON_REPEATING_DOMAINS, DEFAULT_PURPOSE
 
+MAX_SUBCLASS_DEPTH = 4
+
 
 class ItemGroups(define_object.DefineObject):
     """ create a Define-XML v2.1 ItemGroupDef element template """
@@ -51,23 +53,47 @@ def _generate_dataset(self, dataset, define_objects, lang, acrf):
         slices = dataset.get("slices")
         items.Items().create_define_objects(items_list, define_objects, lang, acrf, slice=slices)
 
-        # assumption: list of subclasses, but no nested subclasses - may need to revisit this for ADaM
-        if dataset.get("observationClass", {}).get("name", ""):
-            ds_class = dataset["observationClass"]["name"].upper().replace("-", " ")
+        observation_class = dataset.get("observationClass") or {}
+        if observation_class.get("name"):
+            ds_class = observation_class["name"].upper().replace("-", " ")
             itg.Class = DEFINE.Class(Name=ds_class)
-            sub_classes = dataset.get("observationClass").get("subClasses", [])
-            for sub_class in sub_classes:
-                sub_class_name = sub_class.get("name")
-                if sub_class_name.get("parentClass", ""):
-                    itg.Class.SubClass.append(DEFINE.SubClass(Name=sub_class["name"], ParentClass=sub_class["parentClass"]))
-                else:
-                    itg.Class.SubClass.append(DEFINE.SubClass(Name=sub_class["name"]))
+            self._append_subclasses(
+                itg.Class,
+                observation_class.get("subClasses") or [],
+                parent_name=None,
+                depth=1,
+            )
 
         # default is Dataset-JSON .ndjson datasets - will be overridden in post-processing if is_xpt CL arg is True
         leaf = DEFINE.leaf(ID="LF." + dataset_name, href=dataset_name.lower() + ".ndjson")
         leaf.title = DEFINE.title(_content=dataset_name.lower() + ".ndjson")
         itg.leaf = leaf
 
+    def _append_subclasses(self, class_obj, sub_classes, parent_name, depth):
+        """
+        Flatten a nested subClasses JSON tree into Class.SubClass siblings.
+
+        Define-XML v2.1 represents nested SubClasses as flat siblings under def:Class
+        with the ParentClass attribute referencing the parent SubClass's Name. Depth
+        is capped at MAX_SUBCLASS_DEPTH; deeper levels are silently dropped.
+        """
+        if depth > MAX_SUBCLASS_DEPTH:
+            return
+        for sub_class in sub_classes:
+            name = sub_class.get("name")
+            if not name:
+                continue
+            resolved_parent = sub_class.get("parentClass") or parent_name
+            if resolved_parent:
+                class_obj.SubClass.append(
+                    DEFINE.SubClass(Name=name, ParentClass=resolved_parent)
+                )
+            else:
+                class_obj.SubClass.append(DEFINE.SubClass(Name=name))
+            nested = sub_class.get("subClasses") or []
+            if nested:
+                self._append_subclasses(class_obj, nested, parent_name=name, depth=depth + 1)
+
     def _create_itemgroupdef_object(self, obj):
         name = self.require_key(obj, "name", "ItemGroupDef")
         oid = self.generate_oid(["IG", name])

diff --git a/src/generators/define/items.py b/src/generators/define/items.py
@@ -69,6 +69,7 @@ def _create_itemdef_object(self, obj, oid, slice):
     @staticmethod
     def _new_itemdef(attr: dict[str, Any]) -> Any:
         """Instantiate an ItemDef without triggering descriptor validation."""
+        # TODO replace this with v0.2.0 odmlib permissive mode
         item = object.__new__(DEFINE.ItemDef)
         for key, value in attr.items():
             item.__dict__[key] = value
@@ -88,7 +89,23 @@ def _add_optional_itemdef_elements(self, item, obj, it_oid, slice):
             self._add_origin(item, obj)
 
     def _add_origin(self, item, obj):
-        origin_in = obj["origin"]
+        origins_in = obj["origin"]
+        if isinstance(origins_in, dict):
+            # Today's DDS shape: a single origin dict with item-level predecessor/pages siblings.
+            # Future shape: a list of origin dicts, each carrying its own predecessor/pages.
+            # Normalize today's shape by folding the item-level fields into the wrapped dict so
+            # the per-origin builder only ever reads from the origin dict.
+            wrapped = dict(origins_in)
+            if obj.get("predecessor") and "predecessor" not in wrapped:
+                wrapped["predecessor"] = obj["predecessor"]
+            if obj.get("pages") and "pages" not in wrapped:
+                wrapped["pages"] = obj["pages"]
+            origins_in = [wrapped]
+
+        for origin_in in origins_in:
+            item.Origin.append(self._build_origin(origin_in))
+
+    def _build_origin(self, origin_in):
         origin_type = origin_in.get("type")
         origin_source = origin_in.get("source")
         attr: dict[str, Any] = {}
@@ -108,17 +125,16 @@ def _add_origin(self, item, obj):
             dr.PDFPageRef.append(DEFINE.PDFPageRef(PageRefs="__PLACEHOLDER__", Type="PhysicalRef"))
             origin.DocumentRef.append(dr)
 
-        if obj.get("predecessor"):
+        if origin_in.get("predecessor"):
             origin.Description = DEFINE.Description()
             origin.Description.TranslatedText.append(
-                DEFINE.TranslatedText(_content=obj["predecessor"], lang=self.lang)
+                DEFINE.TranslatedText(_content=origin_in["predecessor"], lang=self.lang)
             )
-        if obj.get("pages"):
+        if origin_in.get("pages"):
             dr = DEFINE.DocumentRef(leafID=self.acrf)
-            dr.PDFPageRef.append(DEFINE.PDFPageRef(PageRefs=obj["pages"], Type="PhysicalRef"))
+            dr.PDFPageRef.append(DEFINE.PDFPageRef(PageRefs=origin_in["pages"], Type="PhysicalRef"))
             origin.DocumentRef.append(dr)
-
-        item.Origin.append(origin)
+        return origin
 
     @staticmethod
     def _add_optional_itemdef_attributes(attr, obj):
@@ -131,6 +147,13 @@ def _add_optional_itemdef_attributes(attr, obj):
             attr["Length"] = "__PLACEHOLDER__"
         if obj.get("significantDigits"):
             attr["SignificantDigits"] = obj["significantDigits"]
+        elif obj.get("displayFormat") and obj.get("dataType") == "float":
+            # TODO work around issue 78 - missing significant digits - remove after USDM updated
+            length_str, sep, sig_str = str(obj["displayFormat"]).partition(".")
+            if sep and sig_str.isdigit():
+                attr["SignificantDigits"] = int(sig_str)
+            if length_str.isdigit() and attr.get("Length") == "__PLACEHOLDER__":
+                attr["Length"] = int(length_str)
         if obj.get("displayFormat"):
             attr["DisplayFormat"] = obj["displayFormat"]
         if obj.get("comment"):

diff --git a/src/generators/define/post_processing.py b/src/generators/define/post_processing.py
@@ -36,8 +36,7 @@ def _add_derived_methods(self) -> None:
         Add methods to ItemDefs where the def:Origin Type="Derived".
         """
         for item_def in self.define_objects['ItemDef']:
-            # assumes we're interested in the first origin; define.json defines origin as an object, not a list
-            if  item_def.Origin and item_def.Origin[0].Type == 'Derived':
+            if any(getattr(o, "Type", None) == "Derived" for o in (item_def.Origin or [])):
                 # generate the MethodOID
                 method_oid = self._generate_method_oid(item_def.OID)
                 # find the ItemRef and add a MethodOID attribute

diff --git a/src/generators/define/tests/fixtures/define-360i.json b/src/generators/define/tests/fixtures/define-360i.json
@@ -36635,7 +36635,7 @@
           "decode": "Chemiluminescent Magnetic Microparticle Immunoassay",
           "coding": {
             "code": "C172557",
-            "codeSystem": "nci:ExtCodeID"Class
+            "codeSystem": "nci:ExtCodeID"
           }
         },
         {

diff --git a/src/generators/define/tests/fixtures/define-360i.xml b/src/generators/define/tests/fixtures/define-360i.xml
diff --git a/src/generators/define/tests/test_itemdef_attributes.py b/src/generators/define/tests/test_itemdef_attributes.py
@@ -0,0 +1,76 @@
+"""
+Unit tests for Items._add_optional_itemdef_attributes, focused on the
+displayFormat -> SignificantDigits/Length fallback (the issue-78 workaround).
+
+The fallback parses obj['displayFormat'] (e.g. "8.3") into a Length /
+SignificantDigits pair when the study JSON omits them. The hardened version must:
+  * never raise when the format lacks exactly one dot (the old split(".")
+    unpack raised ValueError for "8", "1.2.3", etc.), and
+  * yield ints, matching the type of values that arrive straight from the JSON
+    (the old code left the derived values as strings).
+"""
+import pytest
+
+
+@pytest.fixture
+def add_attrs():
+    import items
+    return items.Items._add_optional_itemdef_attributes
+
+
+class TestExplicitValuesWin:
+    def test_explicit_significant_digits_preserved_as_int(self, add_attrs):
+        # An explicit significantDigits short-circuits the displayFormat fallback.
+        attr = {}
+        add_attrs(attr, {"significantDigits": 2, "dataType": "float", "displayFormat": "8.3"})
+        assert attr["SignificantDigits"] == 2
+        assert isinstance(attr["SignificantDigits"], int)
+
+    def test_explicit_length_not_overridden_by_displayformat(self, add_attrs):
+        attr = {}
+        add_attrs(attr, {"dataType": "float", "length": 10, "displayFormat": "8.3"})
+        assert attr["Length"] == 10            # explicit length wins
+        assert attr["SignificantDigits"] == 3  # sig digits still derived from format
+        assert isinstance(attr["SignificantDigits"], int)
+
+
+class TestDisplayFormatFallback:
+    def test_float_displayformat_yields_int_significant_digits_and_length(self, add_attrs):
+        attr = {}
+        add_attrs(attr, {"dataType": "float", "displayFormat": "8.3"})
+        # Core regression: derived values must be ints, not the strings the old
+        # split(".") branch produced.
+        assert attr["SignificantDigits"] == 3
+        assert isinstance(attr["SignificantDigits"], int)
+        assert attr["Length"] == 8
+        assert isinstance(attr["Length"], int)
+
+    def test_displayformat_without_dot_does_not_raise(self, add_attrs):
+        # "8" has no dot; old split(".") -> single element -> ValueError on unpack.
+        attr = {}
+        add_attrs(attr, {"dataType": "float", "displayFormat": "8"})
+        assert "SignificantDigits" not in attr
+        # The whole-number portion is still recovered into the placeholder slot.
+        assert attr["Length"] == 8
+        assert isinstance(attr["Length"], int)
+
+    def test_displayformat_with_multiple_dots_does_not_raise(self, add_attrs):
+        # "1.2.3" -> old split(".") -> three elements -> ValueError on unpack.
+        attr = {}
+        add_attrs(attr, {"dataType": "float", "displayFormat": "1.2.3"})
+        # "2.3" is not a pure digit string, so no SignificantDigits is derived.
+        assert "SignificantDigits" not in attr
+        assert attr["Length"] == 1
+
+    def test_non_numeric_displayformat_leaves_placeholder(self, add_attrs):
+        attr = {}
+        add_attrs(attr, {"dataType": "float", "displayFormat": "DATE9."})
+        assert "SignificantDigits" not in attr
+        assert attr["Length"] == "__PLACEHOLDER__"  # nothing numeric to recover
+        assert attr["DisplayFormat"] == "DATE9."
+
+    def test_fallback_only_applies_to_float_datatype(self, add_attrs):
+        attr = {}
+        add_attrs(attr, {"dataType": "text", "displayFormat": "8.3"})
+        assert "SignificantDigits" not in attr
+        assert attr["DisplayFormat"] == "8.3"