Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14,638 changes: 14,638 additions & 0 deletions src/define-xml/define.json

Large diffs are not rendered by default.

46 changes: 36 additions & 10 deletions src/generators/define/itemGroups.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
import valueLevel as VL
from constants import TRIAL_DESIGN_DOMAINS, NON_REPEATING_DOMAINS, DEFAULT_PURPOSE

MAX_SUBCLASS_DEPTH = 4


class ItemGroups(define_object.DefineObject):
""" create a Define-XML v2.1 ItemGroupDef element template """
Expand Down Expand Up @@ -51,23 +53,47 @@ def _generate_dataset(self, dataset, define_objects, lang, acrf):
slices = dataset.get("slices")
items.Items().create_define_objects(items_list, define_objects, lang, acrf, slice=slices)

# assumption: list of subclasses, but no nested subclasses - may need to revisit this for ADaM
if dataset.get("observationClass", {}).get("name", ""):
ds_class = dataset["observationClass"]["name"].upper().replace("-", " ")
observation_class = dataset.get("observationClass") or {}
if observation_class.get("name"):
ds_class = observation_class["name"].upper().replace("-", " ")
itg.Class = DEFINE.Class(Name=ds_class)
sub_classes = dataset.get("observationClass").get("subClasses", [])
for sub_class in sub_classes:
sub_class_name = sub_class.get("name")
if sub_class_name.get("parentClass", ""):
itg.Class.SubClass.append(DEFINE.SubClass(Name=sub_class["name"], ParentClass=sub_class["parentClass"]))
else:
itg.Class.SubClass.append(DEFINE.SubClass(Name=sub_class["name"]))
self._append_subclasses(
itg.Class,
observation_class.get("subClasses") or [],
parent_name=None,
depth=1,
)

# default is Dataset-JSON .ndjson datasets - will be overridden in post-processing if is_xpt CL arg is True
leaf = DEFINE.leaf(ID="LF." + dataset_name, href=dataset_name.lower() + ".ndjson")
leaf.title = DEFINE.title(_content=dataset_name.lower() + ".ndjson")
itg.leaf = leaf

def _append_subclasses(self, class_obj, sub_classes, parent_name, depth):
"""
Flatten a nested subClasses JSON tree into Class.SubClass siblings.

Define-XML v2.1 represents nested SubClasses as flat siblings under def:Class
with the ParentClass attribute referencing the parent SubClass's Name. Depth
is capped at MAX_SUBCLASS_DEPTH; deeper levels are silently dropped.
"""
if depth > MAX_SUBCLASS_DEPTH:
return
for sub_class in sub_classes:
name = sub_class.get("name")
if not name:
continue
resolved_parent = sub_class.get("parentClass") or parent_name
if resolved_parent:
class_obj.SubClass.append(
DEFINE.SubClass(Name=name, ParentClass=resolved_parent)
)
else:
class_obj.SubClass.append(DEFINE.SubClass(Name=name))
nested = sub_class.get("subClasses") or []
if nested:
self._append_subclasses(class_obj, nested, parent_name=name, depth=depth + 1)

def _create_itemgroupdef_object(self, obj):
name = self.require_key(obj, "name", "ItemGroupDef")
oid = self.generate_oid(["IG", name])
Expand Down
37 changes: 30 additions & 7 deletions src/generators/define/items.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ def _create_itemdef_object(self, obj, oid, slice):
@staticmethod
def _new_itemdef(attr: dict[str, Any]) -> Any:
"""Instantiate an ItemDef without triggering descriptor validation."""
# TODO replace this with v0.2.0 odmlib permissive mode
item = object.__new__(DEFINE.ItemDef)
for key, value in attr.items():
item.__dict__[key] = value
Expand All @@ -88,7 +89,23 @@ def _add_optional_itemdef_elements(self, item, obj, it_oid, slice):
self._add_origin(item, obj)

def _add_origin(self, item, obj):
origin_in = obj["origin"]
origins_in = obj["origin"]
if isinstance(origins_in, dict):
# Today's DDS shape: a single origin dict with item-level predecessor/pages siblings.
# Future shape: a list of origin dicts, each carrying its own predecessor/pages.
# Normalize today's shape by folding the item-level fields into the wrapped dict so
# the per-origin builder only ever reads from the origin dict.
wrapped = dict(origins_in)
if obj.get("predecessor") and "predecessor" not in wrapped:
wrapped["predecessor"] = obj["predecessor"]
if obj.get("pages") and "pages" not in wrapped:
wrapped["pages"] = obj["pages"]
origins_in = [wrapped]
Comment thread
swhume marked this conversation as resolved.

for origin_in in origins_in:
item.Origin.append(self._build_origin(origin_in))

def _build_origin(self, origin_in):
origin_type = origin_in.get("type")
origin_source = origin_in.get("source")
attr: dict[str, Any] = {}
Expand All @@ -108,17 +125,16 @@ def _add_origin(self, item, obj):
dr.PDFPageRef.append(DEFINE.PDFPageRef(PageRefs="__PLACEHOLDER__", Type="PhysicalRef"))
origin.DocumentRef.append(dr)

if obj.get("predecessor"):
if origin_in.get("predecessor"):
origin.Description = DEFINE.Description()
origin.Description.TranslatedText.append(
DEFINE.TranslatedText(_content=obj["predecessor"], lang=self.lang)
DEFINE.TranslatedText(_content=origin_in["predecessor"], lang=self.lang)
)
if obj.get("pages"):
if origin_in.get("pages"):
dr = DEFINE.DocumentRef(leafID=self.acrf)
dr.PDFPageRef.append(DEFINE.PDFPageRef(PageRefs=obj["pages"], Type="PhysicalRef"))
dr.PDFPageRef.append(DEFINE.PDFPageRef(PageRefs=origin_in["pages"], Type="PhysicalRef"))
origin.DocumentRef.append(dr)

item.Origin.append(origin)
return origin

@staticmethod
def _add_optional_itemdef_attributes(attr, obj):
Expand All @@ -131,6 +147,13 @@ def _add_optional_itemdef_attributes(attr, obj):
attr["Length"] = "__PLACEHOLDER__"
if obj.get("significantDigits"):
attr["SignificantDigits"] = obj["significantDigits"]
elif obj.get("displayFormat") and obj.get("dataType") == "float":
# TODO work around issue 78 - missing significant digits - remove after USDM updated
length_str, sep, sig_str = str(obj["displayFormat"]).partition(".")
if sep and sig_str.isdigit():
attr["SignificantDigits"] = int(sig_str)
if length_str.isdigit() and attr.get("Length") == "__PLACEHOLDER__":
attr["Length"] = int(length_str)
if obj.get("displayFormat"):
attr["DisplayFormat"] = obj["displayFormat"]
if obj.get("comment"):
Expand Down
3 changes: 1 addition & 2 deletions src/generators/define/post_processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,7 @@ def _add_derived_methods(self) -> None:
Add methods to ItemDefs where the def:Origin Type="Derived".
"""
for item_def in self.define_objects['ItemDef']:
# assumes we're interested in the first origin; define.json defines origin as an object, not a list
if item_def.Origin and item_def.Origin[0].Type == 'Derived':
if any(getattr(o, "Type", None) == "Derived" for o in (item_def.Origin or [])):
# generate the MethodOID
method_oid = self._generate_method_oid(item_def.OID)
# find the ItemRef and add a MethodOID attribute
Expand Down
2 changes: 1 addition & 1 deletion src/generators/define/tests/fixtures/define-360i.json
Original file line number Diff line number Diff line change
Expand Up @@ -36635,7 +36635,7 @@
"decode": "Chemiluminescent Magnetic Microparticle Immunoassay",
"coding": {
"code": "C172557",
"codeSystem": "nci:ExtCodeID"Class
"codeSystem": "nci:ExtCodeID"
}
},
{
Expand Down
8,491 changes: 8,490 additions & 1 deletion src/generators/define/tests/fixtures/define-360i.xml

Large diffs are not rendered by default.

76 changes: 76 additions & 0 deletions src/generators/define/tests/test_itemdef_attributes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
"""
Unit tests for Items._add_optional_itemdef_attributes, focused on the
displayFormat -> SignificantDigits/Length fallback (the issue-78 workaround).

The fallback parses obj['displayFormat'] (e.g. "8.3") into a Length /
SignificantDigits pair when the study JSON omits them. The hardened version must:
* never raise when the format lacks exactly one dot (the old split(".")
unpack raised ValueError for "8", "1.2.3", etc.), and
* yield ints, matching the type of values that arrive straight from the JSON
(the old code left the derived values as strings).
"""
import pytest


@pytest.fixture
def add_attrs():
import items
return items.Items._add_optional_itemdef_attributes


class TestExplicitValuesWin:
def test_explicit_significant_digits_preserved_as_int(self, add_attrs):
# An explicit significantDigits short-circuits the displayFormat fallback.
attr = {}
add_attrs(attr, {"significantDigits": 2, "dataType": "float", "displayFormat": "8.3"})
assert attr["SignificantDigits"] == 2
assert isinstance(attr["SignificantDigits"], int)

def test_explicit_length_not_overridden_by_displayformat(self, add_attrs):
attr = {}
add_attrs(attr, {"dataType": "float", "length": 10, "displayFormat": "8.3"})
assert attr["Length"] == 10 # explicit length wins
assert attr["SignificantDigits"] == 3 # sig digits still derived from format
assert isinstance(attr["SignificantDigits"], int)


class TestDisplayFormatFallback:
def test_float_displayformat_yields_int_significant_digits_and_length(self, add_attrs):
attr = {}
add_attrs(attr, {"dataType": "float", "displayFormat": "8.3"})
# Core regression: derived values must be ints, not the strings the old
# split(".") branch produced.
assert attr["SignificantDigits"] == 3
assert isinstance(attr["SignificantDigits"], int)
assert attr["Length"] == 8
assert isinstance(attr["Length"], int)

def test_displayformat_without_dot_does_not_raise(self, add_attrs):
# "8" has no dot; old split(".") -> single element -> ValueError on unpack.
attr = {}
add_attrs(attr, {"dataType": "float", "displayFormat": "8"})
assert "SignificantDigits" not in attr
# The whole-number portion is still recovered into the placeholder slot.
assert attr["Length"] == 8
assert isinstance(attr["Length"], int)

def test_displayformat_with_multiple_dots_does_not_raise(self, add_attrs):
# "1.2.3" -> old split(".") -> three elements -> ValueError on unpack.
attr = {}
add_attrs(attr, {"dataType": "float", "displayFormat": "1.2.3"})
# "2.3" is not a pure digit string, so no SignificantDigits is derived.
assert "SignificantDigits" not in attr
assert attr["Length"] == 1

def test_non_numeric_displayformat_leaves_placeholder(self, add_attrs):
attr = {}
add_attrs(attr, {"dataType": "float", "displayFormat": "DATE9."})
assert "SignificantDigits" not in attr
assert attr["Length"] == "__PLACEHOLDER__" # nothing numeric to recover
assert attr["DisplayFormat"] == "DATE9."

def test_fallback_only_applies_to_float_datatype(self, add_attrs):
attr = {}
add_attrs(attr, {"dataType": "text", "displayFormat": "8.3"})
assert "SignificantDigits" not in attr
assert attr["DisplayFormat"] == "8.3"
Loading