diff --git a/docs/source/identifiers.rst b/docs/source/identifiers.rst index 312749e1..408c7abe 100644 --- a/docs/source/identifiers.rst +++ b/docs/source/identifiers.rst @@ -11,13 +11,16 @@ enumerated list of options for defining, for example: a neutron, or a photon. - Plasma heating may come from neutral beam injection, electron cyclotron heating, ion cyclotron heating, lower hybrid heating, alpha particles. +- These may have alternative naming conventions supported through aliases + (e.g., "235U" and "U_235" for Uranium 235). -Identifiers are a list of possible valid labels. Each label has three +Identifiers are a list of possible valid labels. Each label has up to four representations: 1. An index (integer) 2. A name (short string) 3. A description (long string) +4. List of aliases (list of short strings) Identifiers in IMAS-Python @@ -44,6 +47,15 @@ the available identifiers is stored as ``imas.identifiers.identifiers``. print(csid.total.index) print(csid.total.description) + # Access identifiers with aliases (when available) + mid = imas.identifiers.materials_identifier + print(mid["235U"].name) # Access by canonical name + print(mid["U_235"].name) # Access by alias + + # Both return the same object + assert mid["235U"].name is mid["U_235"].name + assert mid["235U"].name is mid.U_235.name + # Item access is also possible print(identifiers["edge_source_identifier"]) @@ -64,8 +76,8 @@ Assigning identifiers in IMAS-Python IMAS-Python implements smart assignment of identifiers. You may assign an identifier enum value (for example ``imas.identifiers.core_source_identifier.total``), a -string (for example ``"total"``) or an integer (for example ``"1"``) to an -identifier structure (for example ``core_profiles.source[0].identifier``) to set +string (for example ``"total"`` or its alias), or an integer (for example ``"1"``) +to an identifier structure (for example ``core_profiles.source[0].identifier``) to set all three child nodes ``name``, ``index`` and ``description`` in one go. See below example: @@ -86,6 +98,20 @@ below example: # 3. Assign an integer. This looks up the index in the identifier enum: core_sources.source[0].identifier = 1 + # Identifiers can still be assigned with the old alias name for backward compatibility: + wallids = imas.IDSFactory().wall() + wallids.description_ggd.resize(1) + wallids.description_ggd[0].material.resize(1) + wallids.description_ggd[0].material[0].grid_subset.resize(1) + mat = wallids.description_ggd[0].material[0].grid_subset[0].identifiers + mat.names.extend([""] * 1) + mid = imas.identifiers.materials_identifier + # Assign using canonical name + mat.names[0] = "235U" + # Or assign using alias (equivalent to above) + mat.names[0] = mid["U_235"].name + mat.names[0] = mid.U_235.name + # Inspect the contents of the structure imas.util.inspect(core_sources.source[0].identifier) @@ -101,6 +127,65 @@ below example: imas.util.inspect(core_sources.source[1].identifier) +Identifier aliases +------------------ + +Some identifiers may have multiple aliases defined in the Data Dictionary. Aliases are +former names kept as an option to ensure better backward compatibility after a change +and support multiple naming conventions. An identifier can have any number of +comma-separated aliases. + +Aliases can be accessed in the same ways as canonical names, and all aliases for an +identifier point to the same object. + +Aliases that begin with a number (e.g., 235U) cannot be accessed using dot notation +(e.g., material_identifier.235U) due to Python's syntax restrictions. Instead, such +aliases must be accessed using dictionary-style indexing, for example: +material_identifier["235U"]. + +.. code-block:: python + :caption: Working with identifier aliases + + import imas + + # Get materials identifier which has some aliases defined + mid = imas.identifiers.materials_identifier + + # Access by canonical name + uranium235_by_name = mid["235U"] + print(f"Name: {uranium235_by_name.name}") + print(f"Aliases: {uranium235_by_name.aliases}") # List of all aliases + print(f"First alias: {uranium235_by_name.alias}") # First alias for compatibility + print(f"Index: {uranium235_by_name.index}") + print(f"Description: {uranium235_by_name.description}") + + # Access by any alias - all return the same object + uranium235_by_alias1 = mid["U_235"].name + uranium235_by_alias2 = mid["Uranium_235"].name + print(f"Same objects: {uranium235_by_name is uranium235_by_alias1 is uranium235_by_alias2}") + + # You can also use attribute access for aliases (when valid Python identifiers) + uranium235_by_attr = mid.U_235.name + print(f"Same object: {uranium235_by_name is uranium235_by_attr}") + + # When assigning to IDS structures, alias works the following way + wallids = imas.IDSFactory().wall() + wallids.description_ggd.resize(1) + wallids.description_ggd[0].material.resize(1) + wallids.description_ggd[0].material[0].grid_subset.resize(1) + mat = wallids.description_ggd[0].material[0].grid_subset[0].identifiers + mat.names.extend([""] * 1) + mat.indices.resize(1) + mat.descriptions.extend([""] * 1) + mat.indices[0] = 20 + mat.descriptions[0] = "Uranium 235 isotope" + + # These assignments are all equivalent: + mat.names[0] = "235U" # canonical name + mat.names[0] = mid["235U"].name # enum value + mat.names[0] = mid.U_235.name # enum value via alias + mat.names[0] = mid["U_235"].name # enum value via alias + Compare identifiers ------------------- @@ -108,11 +193,12 @@ Identifier structures can be compared against the identifier enum as well. They compare equal when: 1. ``index`` is an exact match -2. ``name`` is an exact match, or ``name`` is not filled in the IDS node +2. ``name`` is an exact match, or ``name`` matches an alias, or ``name`` is not filled in the IDS node The ``description`` does not have to match with the Data Dictionary definition, but a warning is logged if the description in the IDS node does not match with -the Data Dictionary description: +the Data Dictionary description. The comparison also takes aliases into account, +so an identifier will match both its canonical name and any defined alias: .. code-block:: python :caption: Comparing identifiers @@ -139,6 +225,15 @@ the Data Dictionary description: >>> core_sources.source[0].identifier.name = "totalX" >>> core_sources.source[0].identifier == csid.total False + >>> # Alias comparison example with materials identifier + >>> mid = imas.identifiers.materials_identifier + >>> cxr = imas.IDSFactory().camera_x_rays() + >>> mat = cxr.filter_window.material + >>> mat.index = 20 + >>> mat.name = "U_235" # Using alias + >>> # Compares equal to the canonical identifier even though name is alias + >>> mat == mid["235U"].name + True .. seealso:: diff --git a/imas/ids_identifiers.py b/imas/ids_identifiers.py index a64dd87f..1525a070 100644 --- a/imas/ids_identifiers.py +++ b/imas/ids_identifiers.py @@ -1,7 +1,6 @@ # This file is part of IMAS-Python. # You should have received the IMAS-Python LICENSE file with this project. -"""IMAS-Python module to support Data Dictionary identifiers. -""" +"""IMAS-Python module to support Data Dictionary identifiers.""" import logging from enum import Enum @@ -16,16 +15,18 @@ class IDSIdentifier(Enum): """Base class for all identifier enums.""" - def __new__(self, value: int, description: str): - obj = object.__new__(self) + def __new__(cls, value: int, description: str, aliases: list = []): + obj = object.__new__(cls) obj._value_ = value return obj - def __init__(self, value: int, description: str) -> None: + def __init__(self, value: int, description: str, aliases: list = []) -> None: self.index = value """Unique index for this identifier value.""" self.description = description """Description for this identifier value.""" + self.aliases = aliases + """Alternative names for this identifier value.""" def __eq__(self, other): if self is other: @@ -37,35 +38,49 @@ def __eq__(self, other): except (AttributeError, TypeError, ValueError): # Attribute doesn't exist, or failed to convert return NotImplemented + # Index must match if other_index == self.index: - # Name may be left empty - if other_name == self.name or other_name == "": + # Name may be left empty, or match name or alias + if ( + other_name == self.name + or other_name == "" + or other_name in self.aliases + ): # Description doesn't have to match, though we will warn when it doesn't - if other_description != self.description and other_description != "": + if other_description not in (self.description, ""): logger.warning( "Description of %r does not match identifier description %r", other.description, self.description, ) return True - else: - logger.warning( - "Name %r does not match identifier name %r, but indexes are equal.", - other.name, - self.name, - ) + + # If we get here with matching indexes but no name/alias match, warn + logger.warning( + "Name %r does not match identifier name %r, but indexes are equal.", + other.name, + self.name, + ) return False @classmethod def _from_xml(cls, identifier_name, xml) -> Type["IDSIdentifier"]: element = fromstring(xml) enum_values = {} + aliases = {} for int_element in element.iterfind("int"): name = int_element.get("name") value = int_element.text description = int_element.get("description") - enum_values[name] = (int(value), description) + # alias attribute may contain multiple comma-separated aliases + alias_attr = int_element.get("alias", "") + aliases = [a.strip() for a in alias_attr.split(",") if a.strip()] + # Canonical entry: use the canonical 'name' as key + enum_values[name] = (int(value), description, aliases) + # Also add alias names as enum *aliases* (they become enum attributes) + for alias in aliases: + enum_values[alias] = (int(value), description, aliases) # Create the enumeration enum = cls( identifier_name, diff --git a/imas/test/test_identifiers.py b/imas/test/test_identifiers.py index 263a6ccf..119e0e88 100644 --- a/imas/test/test_identifiers.py +++ b/imas/test/test_identifiers.py @@ -1,9 +1,18 @@ -import pytest +import importlib.metadata +from packaging.version import Version +import pytest from imas.dd_zip import dd_identifiers from imas.ids_factory import IDSFactory from imas.ids_identifiers import IDSIdentifier, identifiers +has_aliases = Version(importlib.metadata.version("imas_data_dictionaries")) >= Version( + "4.1.0" +) +requires_aliases = pytest.mark.skipif( + not has_aliases, reason="Requires DD 4.1.0 for identifier aliases" +) + def test_list_identifiers(): assert identifiers.identifiers == dd_identifiers() @@ -70,6 +79,66 @@ def test_identifier_struct_assignment(caplog): assert source.identifier != csid.total +def test_identifiers_with_aliases(): + # Custom identifier XML, based on materials identifier, with some more features + custom_identifier_xml = """\ + + +
+Materials used in the device mechanical structures +
+20 +21 +22 +23 +
+""" + identifier = IDSIdentifier._from_xml("custom_identifier", custom_identifier_xml) + + assert len(identifier) == 4 + + # no aliases + assert identifier.Diamond.aliases == [] + # 1 alias + assert identifier["235U"] is identifier.U_235 + assert identifier["235U"].aliases == ["U_235"] + # 3 aliases + assert ( + identifier.CxHy + is identifier.alias1 + is identifier.alias2 + is identifier["3alias"] + ) + assert identifier.CxHy.aliases == ["alias1", "alias2", "3alias"] + + +@requires_aliases +def test_identifier_struct_assignment_with_aliases(): + """Test identifier struct assignment with aliases using materials_identifier.""" + mid = identifiers.materials_identifier + + # Create an actual IDS structure + wallids = IDSFactory().wall() + wallids.description_ggd.resize(1) + wallids.description_ggd[0].material.resize(1) + wallids.description_ggd[0].material[0].grid_subset.resize(1) + mat = wallids.description_ggd[0].material[0].grid_subset[0].identifiers + mat.names.extend([""] * 1) + mat.indices.resize(1) + mat.descriptions.extend([""] * 1) + mat.names[0] = mid.U_235.name + mat.indices[0] = 20 + mat.descriptions[0] = "Uranium 235 isotope" + + # Basic attribute checks + assert mat.names[0] == mid["235U"].name + assert mat.indices[0] == mid.U_235.index + + # Modify material properties and test equality + mat.names[0] = "some_name" + assert mat.names[0] != mid.U_235.name + + def test_identifier_aos_assignment(): cfid = identifiers.pf_active_coil_function_identifier pfa = IDSFactory("3.39.0").pf_active() @@ -103,3 +172,124 @@ def test_invalid_identifier_assignment(): with pytest.raises(ValueError): # negative identifiers are reserved for user-defined identifiers cs.source[0].identifier = -1 + + +@requires_aliases +def test_identifier_aliases(): + """Test identifier enum aliases functionality.""" + mid = identifiers.materials_identifier + + # Test that alias points to the same object as the canonical name + assert mid.U_235 is mid["235U"] + assert mid.U_238 is mid["238U"] + assert mid.In_115 is mid["115In"] + assert mid.He_4 is mid["4He"] + + # Test that both name and alias have the same properties + assert mid.U_235.name == "235U" + assert mid.U_235.index == mid["235U"].index + assert mid.U_235.description == mid["235U"].description + assert "U_235" in mid.U_235.aliases + assert isinstance(mid.U_235.aliases, list) + + # Test accessing by any alias via bracket notation + for alias in mid.U_235.aliases: + assert mid[alias] is mid.U_235 + + +@requires_aliases +def test_identifier_alias_equality(): + """Test that identifiers with aliases are equal when comparing names and aliases.""" + mid = identifiers.materials_identifier + target = mid.U_235 + + # Test equality with canonical name + wallids = IDSFactory().wall() + wallids.description_ggd.resize(1) + wallids.description_ggd[0].material.resize(1) + wallids.description_ggd[0].material[0].grid_subset.resize(1) + mat = wallids.description_ggd[0].material[0].grid_subset[0].identifiers + mat.names.extend([""] * 1) + mat.names[0] = "235U" + assert mat.names[0] == target.name + + # Test equality with alias name + wallids2 = IDSFactory().wall() + wallids2.description_ggd.resize(1) + wallids2.description_ggd[0].material.resize(1) + wallids2.description_ggd[0].material[0].grid_subset.resize(1) + mat2 = wallids2.description_ggd[0].material[0].grid_subset[0].identifiers + mat2.names.extend([""] * 1) + mat2.names[0] = mid["U_235"].name # Use alias as name + assert mat2.names[0] == target.name + + # Test inequality when material has alias not matching canonical name + wallids3 = IDSFactory().wall() + wallids3.description_ggd.resize(1) + wallids3.description_ggd[0].material.resize(1) + wallids3.description_ggd[0].material[0].grid_subset.resize(1) + mat3 = wallids3.description_ggd[0].material[0].grid_subset[0].identifiers + mat3.names.extend([""] * 1) + mat3.names[0] = "test_name" + assert mat3.names[0] != target.name + + # Test equality when index doesn't match + wallids4 = IDSFactory().wall() + wallids4.description_ggd.resize(1) + wallids4.description_ggd[0].material.resize(1) + wallids4.description_ggd[0].material[0].grid_subset.resize(1) + mat4 = wallids4.description_ggd[0].material[0].grid_subset[0].identifiers + mat4.names.extend([""] * 1) + mat4.indices.resize(1) + mat4.names[0] = "235U" + mat4.indices[0] = 999 + assert mat4.indices[0] != target.index + assert mat4.names[0] == target.name + + # Test equality for multiple names,indices and descriptions + wallids5 = IDSFactory().wall() + wallids5.description_ggd.resize(1) + wallids5.description_ggd[0].material.resize(1) + wallids5.description_ggd[0].material[0].grid_subset.resize(1) + mat5 = wallids5.description_ggd[0].material[0].grid_subset[0].identifiers + mat5.names.extend([""] * 3) + mat5.indices.resize(3) + mat5.descriptions.extend([""] * 3) + mat5.names[0] = "235U" + mat5.names[1] = "238U" + mat5.names[2] = mid.U_235.name # Use alias as name + mat5.indices[0] = 20 + mat5.indices[1] = 21 + mat5.indices[2] = 20 + mat5.descriptions[0] = "Uranium 235 isotope" + mat5.descriptions[1] = "Uranium 238 isotope" + mat5.descriptions[2] = "Uranium 235 isotope" + + assert mat5.names[0] == mid["235U"].name + assert mat5.names[1] == mid["238U"].name + assert mat5.names[2] == mid["U_235"].name + assert mat5.indices[0] == mid["235U"].index + assert mat5.indices[1] == mid["238U"].index + assert mat5.indices[2] == mid["U_235"].index + assert mat5.descriptions[0] == mid["235U"].description + assert mat5.descriptions[1] == mid["238U"].description + assert mat5.descriptions[2] == mid["U_235"].description + + +@requires_aliases +def test_identifier_alias_equality_non_ggd(): + """Test identifier aliases functionality on non-ggd material""" + mid = identifiers.materials_identifier + + summary_ids = IDSFactory().summary() + summary_ids.wall.material = mid.U_235 # Use alias as enum + assert summary_ids.wall.material == mid["235U"] + assert summary_ids.wall.material == mid["U_235"] + + summary_ids.wall.material.name = "U_235" # Use alias as name + assert summary_ids.wall.material == mid["235U"] + assert summary_ids.wall.material == mid["U_235"] + + summary_ids.wall.material.name = "235U" # Use canonical name + assert summary_ids.wall.material == mid["235U"] + assert summary_ids.wall.material == mid["U_235"]