From 181d19815edf8b8d1c0fc5f613d753ec72d5befb Mon Sep 17 00:00:00 2001 From: Olivier Hoenen Date: Thu, 20 Mar 2025 14:56:47 +0100 Subject: [PATCH 01/74] Update install doc and readme with info on optional deps --- README.md | 15 +++++++++++---- docs/source/installing.rst | 22 ++++++++++++++++++++++ 2 files changed, 33 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 35c1ae46..d0ded727 100644 --- a/README.md +++ b/README.md @@ -8,13 +8,20 @@ Data Model. ## Install -Install steps are described in the documentation generated from `/docs/source/installing.rst`. +Simply install IMAS-Python with ``pip``: +```bash +pip install imas-python +``` +or with optional dependencies for netCDF and xarray support: +```bash +pip install imas-python[netcdf,xarray] +``` -Documentation is autogenerated from the source using [Sphinx](http://sphinx-doc.org/) +More details are described in the documentation generated from `/docs/source/installing.rst`. +The documentation is autogenerated from the source using [Sphinx](http://sphinx-doc.org/) and can be found at the [readthedocs](https://imas-python.readthedocs.io/en/latest/) -The documentation can be manually generated by installing sphinx and running: - +To generated the documentation yourself, install the ``docs`` optional dependencies and do: ```bash make -C docs html ``` diff --git a/docs/source/installing.rst b/docs/source/installing.rst index 0f2129ca..a843ff6f 100644 --- a/docs/source/installing.rst +++ b/docs/source/installing.rst @@ -13,6 +13,28 @@ To get started, you can install it from `pypi.org Date: Tue, 1 Apr 2025 09:22:22 +0200 Subject: [PATCH 02/74] Also apply COCOS 11/17 sign conversion to `dodpsi_like` when converting between DD 3 and 4. Fixes #31. --- imas/ids_convert.py | 16 ++++++++-------- imas/test/test_ids_convert.py | 20 ++++++++++++++++++++ 2 files changed, 28 insertions(+), 8 deletions(-) diff --git a/imas/ids_convert.py b/imas/ids_convert.py index a52db521..e5dc0911 100644 --- a/imas/ids_convert.py +++ b/imas/ids_convert.py @@ -1,7 +1,6 @@ # This file is part of IMAS-Python. # You should have received the IMAS-Python LICENSE file with this project. -"""Functionality for converting IDSToplevels between DD versions. -""" +"""Functionality for converting IDSToplevels between DD versions.""" import copy import datetime @@ -334,12 +333,13 @@ def add_rename(old_path: str, new_path: str): # Additional conversion rules for DDv3 to DDv4 if self.version_old.major == 3 and new_version and new_version.major == 4: # Postprocessing for COCOS definition change: - xpath_query = ".//field[@cocos_label_transformation='psi_like']" - for old_item in old.iterfind(xpath_query): - old_path = old_item.get("path") - new_path = self.old_to_new.path.get(old_path, old_path) - self.new_to_old.post_process[new_path] = _cocos_change - self.old_to_new.post_process[old_path] = _cocos_change + for psi_like in ["psi_like", "dodpsi_like"]: + xpath_query = f".//field[@cocos_label_transformation='{psi_like}']" + for old_item in old.iterfind(xpath_query): + old_path = old_item.get("path") + new_path = self.old_to_new.path.get(old_path, old_path) + self.new_to_old.post_process[new_path] = _cocos_change + self.old_to_new.post_process[old_path] = _cocos_change # Definition change for pf_active circuit/connections if self.ids_name == "pf_active": path = "circuit/connections" diff --git a/imas/test/test_ids_convert.py b/imas/test/test_ids_convert.py index 750c44e4..55045bbc 100644 --- a/imas/test/test_ids_convert.py +++ b/imas/test/test_ids_convert.py @@ -368,6 +368,26 @@ def test_3to4_cocos_change(dd4factory): cp3 = convert_ids(cp4, "3.39.0") compare_children(cp, cp3) + eq = IDSFactory("3.39.0").equilibrium() + eq.ids_properties.homogeneous_time = IDS_TIME_MODE_HOMOGENEOUS + eq.time = [1.0] + eq.time_slice.resize(1) + eq.time_slice[0].profiles_1d.psi = numpy.linspace(0, 1, 11) + eq.time_slice[0].profiles_1d.dpressure_dpsi = numpy.linspace(1, 2, 11) + + eq4 = convert_ids(eq, None, factory=dd4factory) + assert numpy.array_equal( + eq4.time_slice[0].profiles_1d.psi, + -eq.time_slice[0].profiles_1d.psi, + ) + assert numpy.array_equal( + eq4.time_slice[0].profiles_1d.dpressure_dpsi, + -eq.time_slice[0].profiles_1d.dpressure_dpsi, + ) + + eq3 = convert_ids(eq4, "3.39.0") + compare_children(eq, eq3) + def test_3to4_circuit_connections(dd4factory, caplog): pfa = IDSFactory("3.39.0").pf_active() From 3f46e3f2317ce68ee7ed075aed61daec0d84025a Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Thu, 3 Apr 2025 15:34:57 +0200 Subject: [PATCH 03/74] Update backend selection logic Select `imas_core` when the URI starts with `imas:`, otherwise use the netCDF backend. This change allows using NCZarr [1] as storage engine (when it is enabled in the netCDF4 python module) by using `file://`, `s3://` or `https://` style URIs. [1] https://docs.unidata.ucar.edu/nug/current/nczarr_head.html --- imas/db_entry.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/imas/db_entry.py b/imas/db_entry.py index d7d74574..b218ad60 100644 --- a/imas/db_entry.py +++ b/imas/db_entry.py @@ -1,7 +1,6 @@ # This file is part of IMAS-Python. # You should have received the IMAS-Python LICENSE file with this project. -"""Logic for interacting with IMAS Data Entries. -""" +"""Logic for interacting with IMAS Data Entries.""" import logging import os @@ -189,10 +188,10 @@ def __init__( @staticmethod def _select_implementation(uri: Optional[str]) -> Type[DBEntryImpl]: """Select which DBEntry implementation to use based on the URI.""" - if uri and uri.endswith(".nc") and not uri.startswith("imas:"): - from imas.backends.netcdf.db_entry_nc import NCDBEntryImpl as impl - else: + if not uri or uri.startswith("imas:"): from imas.backends.imas_core.db_entry_al import ALDBEntryImpl as impl + else: # Assume it's a netCDF file or NCZarr URI + from imas.backends.netcdf.db_entry_nc import NCDBEntryImpl as impl return impl def __enter__(self): From e27d2565525958ee30291e757db2af36510a9dbf Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Wed, 2 Apr 2025 16:52:19 +0200 Subject: [PATCH 04/74] Initial logic to implement iterorganization/IMAS-Python#21 --- imas/ids_convert.py | 187 ++++++++++++++++++++++++++++------ imas/test/test_ids_convert.py | 95 ++++++++++++----- 2 files changed, 225 insertions(+), 57 deletions(-) diff --git a/imas/ids_convert.py b/imas/ids_convert.py index e5dc0911..5d3ef601 100644 --- a/imas/ids_convert.py +++ b/imas/ids_convert.py @@ -12,19 +12,16 @@ import numpy from packaging.version import InvalidVersion, Version +from scipy.interpolate import interp1d import imas from imas.dd_zip import parse_dd_version from imas.ids_base import IDSBase from imas.ids_data_type import IDSDataType +from imas.ids_defs import IDS_TIME_MODE_HETEROGENEOUS from imas.ids_factory import IDSFactory from imas.ids_path import IDSPath -from imas.ids_primitive import ( - IDSNumeric0D, - IDSNumericArray, - IDSPrimitive, - IDSString0D, -) +from imas.ids_primitive import IDSNumeric0D, IDSNumericArray, IDSPrimitive, IDSString0D from imas.ids_struct_array import IDSStructArray from imas.ids_structure import IDSStructure from imas.ids_toplevel import IDSToplevel @@ -495,7 +492,17 @@ def convert_ids( else: version_map = _DDVersionMap(ids_name, source_tree, target_tree, source_version) - _copy_structure(toplevel, target_ids, deepcopy, source_is_new, version_map) + # Special case for DD3to4 pulse_schedule conversion + if ( + toplevel.metadata.name == "pulse_schedule" + and toplevel.ids_properties.homogeneous_time == IDS_TIME_MODE_HETEROGENEOUS + and source_version < Version("3.40.0") + and target_version.major == 4 + ): + _pulse_schedule_3to4(toplevel, target_ids, deepcopy, version_map) + else: + _copy_structure(toplevel, target_ids, deepcopy, source_is_new, version_map) + logger.info("Conversion of IDS %s finished.", ids_name) if provenance_origin_uri: _add_provenance_entry(target_ids, toplevel._version, provenance_origin_uri) @@ -541,6 +548,41 @@ def _add_provenance_entry( node.sources.append(source_txt) # sources is a STR_1D (=list of strings) +def _get_target_item( + item: IDSBase, target: IDSStructure, rename_map: NBCPathMap +) -> Optional[IDSBase]: + """Find and return the corresponding target item if it exists. + + This method follows NBC renames (as stored in the rename map). It returns None if + there is no corresponding target item in the target structure. + """ + path = item.metadata.path_string + + # Follow NBC renames: + if path in rename_map: + if rename_map.path[path] is None: + if path not in rename_map.ignore_missing_paths: + if path in rename_map.type_change: + msg = "Element %r changed type in the target IDS." + else: + msg = "Element %r does not exist in the target IDS." + logger.warning(msg + " Data is not copied.", path) + return None + else: + return IDSPath(rename_map.path[path]).goto(target) + + # No NBC renames: + try: + return target[item.metadata.name] + except AttributeError: + # In exceptional cases the item does not exist in the target. Example: + # neutron_diagnostic IDS between DD 3.40.1 and 3.41.0. has renamed + # synthetic_signals/fusion_power -> fusion_power. The synthetic_signals + # structure no longer exists but we need to descend into it to get the + # total_neutron_flux. + return target + + def _copy_structure( source: IDSStructure, target: IDSStructure, @@ -561,27 +603,10 @@ def _copy_structure( rename_map = version_map.new_to_old if source_is_new else version_map.old_to_new for item in source.iter_nonempty_(): path = item.metadata.path_string - if path in rename_map: - if rename_map.path[path] is None: - if path not in rename_map.ignore_missing_paths: - if path in rename_map.type_change: - msg = "Element %r changed type in the target IDS." - else: - msg = "Element %r does not exist in the target IDS." - logger.warning(msg + " Data is not copied.", path) - continue - else: - target_item = IDSPath(rename_map.path[path]).goto(target) - else: - try: - target_item = target[item.metadata.name] - except AttributeError: - # In exceptional cases the item does not exist in the target. Example: - # neutron_diagnostic IDS between DD 3.40.1 and 3.41.0. has renamed - # synthetic_signals/fusion_power -> fusion_power. The synthetic_signals - # structure no longer exists but we need to descend into it to get the - # total_neutron_flux. - target_item = target + target_item = _get_target_item(item, target, rename_map) + if target_item is None: + continue + if path in rename_map.type_change: # Handle type change new_items = rename_map.type_change[path](item, target_item) @@ -600,11 +625,7 @@ def _copy_structure( elif isinstance(item, IDSStructure): _copy_structure(item, target_item, deepcopy, source_is_new, version_map) else: - if deepcopy: - # No nested types are used as data, so a shallow copy is sufficient - target_item.value = copy.copy(item.value) - else: - target_item.value = item.value + target_item.value = copy.copy(item.value) if deepcopy else item.value # Post-process the node: if path in rename_map.post_process: @@ -919,3 +940,103 @@ def _ids_properties_source(source: IDSString0D, provenance: IDSStructure) -> Non provenance.node.resize(1) provenance.node[0].reference.resize(1) provenance.node[0].reference[0].name = source.value + + +def _pulse_schedule_3to4( + source: IDSStructure, + target: IDSStructure, + deepcopy: bool, + version_map: DDVersionMap, +): + """Recursively copy data, following NBC renames, and converting time bases for the + pulse_schedule IDS. + + Args: + source: Source structure. + target: Target structure. + deepcopy: See :func:`convert_ids`. + version_map: Version map containing NBC renames. + """ + # All prerequisites are checked before calling this function: + # - source and target are pulse_schedule IDSs + # - source has DD version < 3.40.0 + # - target has DD version >= 4.0.0, < 5.0 + # - IDS is using heterogeneous time + rename_map = version_map.old_to_new + + def copy_and_interpolate( + source: IDSStructure, target: IDSStructure, timebase: numpy.ndarray + ): + """Reimplementation of _copy_structure that can interpolate nodes to the common + timebase.""" + for item in source.iter_nonempty_(): + path = item.metadata.path_string + if path.endswith("/time"): + continue # Skip time bases + + target_item = _get_target_item(item, target, rename_map) + if target_item is None: + continue + # We don't implement type changes and post process in this conversion: + assert path not in rename_map.type_change + assert path not in rename_map.post_process + + if isinstance(item, IDSStructArray): + size = len(item) + target_item.resize(size) + for i in range(size): + copy_and_interpolate(item[i], target_item[i], timebase) + elif isinstance(item, IDSStructure): + copy_and_interpolate(item, target_item, timebase) + elif ( + item.metadata.ndim == 1 + and item.metadata.coordinates[0].is_time_coordinate + ): + # Interpolate 1D dynamic quantities to the common time base + time = item.coordinates[0] # TODO, this can fail? + if len(item) != len(time): + raise ValueError( + f"Array {item} has a different size than its time base {time}." + ) + is_integer = item.metadata.data_type is IDSDataType.INT + value = interp1d( + time.value, + item.value, + "previous" if is_integer else "linear", + copy=False, + bounds_error=False, + fill_value=(item[0], item[-1]), + assume_sorted=True, + )(timebase) + target_item.value = value.astype(numpy.int32) if is_integer else value + else: # Default copy + target_item.value = copy.copy(item.value) if deepcopy else item.value + + for item in source.iter_nonempty_(): + # Special cases for non-dynamic stuff + name = item.metadata.name + target_item = _get_target_item(item, target, rename_map) + if target_item is None: + continue + + if name in ["ids_properties", "code"]: + _copy_structure(item, target_item, deepcopy, False, version_map) + elif name == "time": + target_item.value = item.value if not deepcopy else copy.copy(item.value) + elif name == "event": + size = len(item) + target_item.resize(size) + for i in range(size): + _copy_structure(item[i], target_item[i], deepcopy, False, version_map) + else: + # Find all time bases + time_bases = [ + node.value + for node in imas.util.tree_iter(item) + if node.metadata.name == "time" + ] + # Construct the common time base + timebase = numpy.unique(numpy.concatenate(time_bases)) if time_bases else [] + target_item.time = timebase + # Do the conversion + copy_and_interpolate(item, target_item, timebase) diff --git a/imas/test/test_ids_convert.py b/imas/test/test_ids_convert.py index 55045bbc..f2b9b7f7 100644 --- a/imas/test/test_ids_convert.py +++ b/imas/test/test_ids_convert.py @@ -7,6 +7,7 @@ from unittest.mock import MagicMock import numpy +from numpy import array_equal import pytest from imas import identifiers @@ -27,7 +28,7 @@ from imas.ids_factory import IDSFactory from imas.ids_struct_array import IDSStructArray from imas.ids_structure import IDSStructure -from imas.test.test_helpers import compare_children, open_dbentry +from imas.test.test_helpers import compare_children, fill_consistent, open_dbentry UTC = timezone.utc @@ -287,22 +288,22 @@ def test_3to4_repeat_children_first_point_conditional(dd4factory): for i in range(2): outline_inner = wall4.description_2d[0].vessel.unit[i].annular.outline_inner if i == 0: # open outline, first point not repeated: - assert numpy.array_equal(outline_inner.r, [1.0, 2.0, 3.0]) - assert numpy.array_equal(outline_inner.z, [-1.0, -2.0, -3.0]) + assert array_equal(outline_inner.r, [1.0, 2.0, 3.0]) + assert array_equal(outline_inner.z, [-1.0, -2.0, -3.0]) else: # closed outline, first point repeated: - assert numpy.array_equal(outline_inner.r, [1.0, 2.0, 3.0, 1.0]) - assert numpy.array_equal(outline_inner.z, [-1.0, -2.0, -3.0, -1.0]) + assert array_equal(outline_inner.r, [1.0, 2.0, 3.0, 1.0]) + assert array_equal(outline_inner.z, [-1.0, -2.0, -3.0, -1.0]) # Test conversion for case 2: assert len(wall4.description_2d[0].limiter.unit) == 2 for i in range(2): unit = wall4.description_2d[0].limiter.unit[i] if i == 0: # open outline, first point not repeated: - assert numpy.array_equal(unit.outline.r, [1.0, 2.0, 3.0]) - assert numpy.array_equal(unit.outline.z, [-1.0, -2.0, -3.0]) + assert array_equal(unit.outline.r, [1.0, 2.0, 3.0]) + assert array_equal(unit.outline.z, [-1.0, -2.0, -3.0]) else: # closed outline, first point repeated: - assert numpy.array_equal(unit.outline.r, [1.0, 2.0, 3.0, 1.0]) - assert numpy.array_equal(unit.outline.z, [-1.0, -2.0, -3.0, -1.0]) + assert array_equal(unit.outline.r, [1.0, 2.0, 3.0, 1.0]) + assert array_equal(unit.outline.z, [-1.0, -2.0, -3.0, -1.0]) # Test conversion for case 3: assert len(wall4.description_2d[0].mobile.unit) == 2 @@ -310,11 +311,11 @@ def test_3to4_repeat_children_first_point_conditional(dd4factory): unit = wall4.description_2d[0].mobile.unit[i] for j in range(3): if i == 0: # open outline, first point not repeated: - assert numpy.array_equal(unit.outline[j].r, [1.0, 2.0, 3.0]) - assert numpy.array_equal(unit.outline[j].z, [-1.0, -2.0, -3.0]) + assert array_equal(unit.outline[j].r, [1.0, 2.0, 3.0]) + assert array_equal(unit.outline[j].z, [-1.0, -2.0, -3.0]) else: # closed outline, first point repeated: - assert numpy.array_equal(unit.outline[j].r, [1.0, 2.0, 3.0, 1.0]) - assert numpy.array_equal(unit.outline[j].z, [-1.0, -2.0, -3.0, -1.0]) + assert array_equal(unit.outline[j].r, [1.0, 2.0, 3.0, 1.0]) + assert array_equal(unit.outline[j].z, [-1.0, -2.0, -3.0, -1.0]) assert unit.outline[j].time == pytest.approx(j / 5) # Test conversion for case 4: @@ -322,9 +323,9 @@ def test_3to4_repeat_children_first_point_conditional(dd4factory): for i in range(2): thickness = wall4.description_2d[1].vessel.unit[i].annular.thickness if i == 0: # open outline, there was one value too many, drop the last one - assert numpy.array_equal(thickness, [1, 0.9]) + assert array_equal(thickness, [1, 0.9]) else: # closed outline, thickness values kept - assert numpy.array_equal(thickness, [1, 0.9, 0.9]) + assert array_equal(thickness, [1, 0.9, 0.9]) # Test conversion back wall3 = convert_ids(wall4, "3.39.0") @@ -340,8 +341,8 @@ def test_3to4_repeat_children_first_point(dd4factory): iron_core4 = convert_ids(iron_core, None, factory=dd4factory) geometry = iron_core4.segment[0].geometry - assert numpy.array_equal(geometry.outline.r, [1.0, 2.0, 3.0, 1.0]) - assert numpy.array_equal(geometry.outline.z, [-1.0, -2.0, -3.0, -1.0]) + assert array_equal(geometry.outline.r, [1.0, 2.0, 3.0, 1.0]) + assert array_equal(geometry.outline.z, [-1.0, -2.0, -3.0, -1.0]) iron_core3 = convert_ids(iron_core4, "3.39.0") compare_children(iron_core, iron_core3) @@ -356,11 +357,11 @@ def test_3to4_cocos_change(dd4factory): cp.profiles_1d[0].grid.psi = numpy.linspace(10, 20, 11) cp4 = convert_ids(cp, None, factory=dd4factory) - assert numpy.array_equal( + assert array_equal( cp4.profiles_1d[0].grid.rho_tor_norm, cp.profiles_1d[0].grid.rho_tor_norm, ) - assert numpy.array_equal( + assert array_equal( cp4.profiles_1d[0].grid.psi, -cp.profiles_1d[0].grid.psi, ) @@ -376,11 +377,11 @@ def test_3to4_cocos_change(dd4factory): eq.time_slice[0].profiles_1d.dpressure_dpsi = numpy.linspace(1, 2, 11) eq4 = convert_ids(eq, None, factory=dd4factory) - assert numpy.array_equal( + assert array_equal( eq4.time_slice[0].profiles_1d.psi, -eq.time_slice[0].profiles_1d.psi, ) - assert numpy.array_equal( + assert array_equal( eq4.time_slice[0].profiles_1d.dpressure_dpsi, -eq.time_slice[0].profiles_1d.dpressure_dpsi, ) @@ -400,7 +401,7 @@ def test_3to4_circuit_connections(dd4factory, caplog): ] pfa4 = convert_ids(pfa, None, factory=dd4factory) - assert numpy.array_equal( + assert array_equal( pfa4.circuit[0].connections, [[-1, 0, 1], [0, 1, -1], [1, -1, 0]] ) @@ -417,7 +418,7 @@ def test_3to4_circuit_connections(dd4factory, caplog): with caplog.at_level(logging.ERROR): pfa4 = convert_ids(pfa, None, factory=dd4factory) # Incorrect shape, data is not converted: - assert numpy.array_equal(pfa.circuit[0].connections, pfa4.circuit[0].connections) + assert array_equal(pfa.circuit[0].connections, pfa4.circuit[0].connections) # Check that a message with ERROR severity was logged assert len(caplog.record_tuples) == 1 assert caplog.record_tuples[0][1] == logging.ERROR @@ -430,7 +431,53 @@ def test_3to4_cocos_magnetics_workaround(dd4factory): mag.flux_loop[0].flux.data = [1.0, 2.0] mag4 = convert_ids(mag, None, factory=dd4factory) - assert numpy.array_equal(mag4.flux_loop[0].flux.data, [-1.0, -2.0]) + assert array_equal(mag4.flux_loop[0].flux.data, [-1.0, -2.0]) mag3 = convert_ids(mag4, "3.39.0") compare_children(mag, mag3) + + +def test_3to4_pulse_schedule(): + ps = IDSFactory("3.39.0").pulse_schedule() + ps.ids_properties.homogeneous_time = IDS_TIME_MODE_HETEROGENEOUS + + ps.ec.launcher.resize(3) + ps.ec.launcher[0].power.reference.data = [1.0, 2.0, 3.0] + ps.ec.launcher[0].power.reference.time = [1.0, 2.0, 3.0] + ps.ec.launcher[1].power.reference.data = [0.0, 2.0, 5.0] + ps.ec.launcher[1].power.reference.time = [0.0, 2.0, 5.0] + ps.ec.launcher[2].power.reference.data = [1.0, 1.5] + ps.ec.launcher[2].power.reference.time = [1.0, 1.5] + + ps.ec.mode.data = [1, 2, 5] + ps.ec.mode.time = [1.0, 2.0, 5.0] + + ps4 = convert_ids(ps, "4.0.0") + assert array_equal(ps4.ec.time, [0.0, 1.0, 1.5, 2.0, 3.0, 5.0]) + item = "power_launched/reference" + assert array_equal(ps4.ec.beam[0][item], [1.0, 1.0, 1.5, 2.0, 3.0, 3.0]) + assert array_equal(ps4.ec.beam[1][item], [0.0, 1.0, 1.5, 2.0, 3.0, 5.0]) + assert array_equal(ps4.ec.beam[2][item], [1.0, 1.0, 1.5, 1.5, 1.5, 1.5]) + assert array_equal(ps4.ec.mode, [1, 1, 1, 2, 2, 5]) + + +def test_3to4_pulse_schedule_exceptions(): + ps = IDSFactory("3.39.0").pulse_schedule() + ps.ids_properties.homogeneous_time = IDS_TIME_MODE_HETEROGENEOUS + + ps.ec.launcher.resize(3) + ps.ec.launcher[0].power.reference.data = [1.0, 2.0, 3.0] + with pytest.raises(ValueError): # missing time base + convert_ids(ps, "4.0.0") + + ps.ec.launcher[0].power.reference.time = [1.0, 2.0] + with pytest.raises(ValueError): # incorrect size of time base + convert_ids(ps, "4.0.0") + + +def test_3to4_pulse_schedule_fuzz(): + ps = IDSFactory("3.39.0").pulse_schedule() + ps.ids_properties.homogeneous_time = IDS_TIME_MODE_HETEROGENEOUS + + fill_consistent(ps) + convert_ids(ps, "4.0.0") From c26bf0b86b6e1a5d751112fecd575b1396db1a85 Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Thu, 3 Apr 2025 09:30:04 +0200 Subject: [PATCH 05/74] Restructure pulse_schedule resampling to reuse more existing logic --- imas/ids_convert.py | 97 ++++++++++++++++++++------------------------- 1 file changed, 44 insertions(+), 53 deletions(-) diff --git a/imas/ids_convert.py b/imas/ids_convert.py index 5d3ef601..6f76f812 100644 --- a/imas/ids_convert.py +++ b/imas/ids_convert.py @@ -5,7 +5,7 @@ import copy import datetime import logging -from functools import lru_cache +from functools import lru_cache, partial from pathlib import Path from typing import Callable, Dict, Iterator, Optional, Set, Tuple from xml.etree.ElementTree import Element, ElementTree @@ -589,6 +589,7 @@ def _copy_structure( deepcopy: bool, source_is_new: bool, version_map: DDVersionMap, + callback: Optional[Callable] = None, ): """Recursively copy data, following NBC renames. @@ -599,6 +600,7 @@ def _copy_structure( source_is_new: True iff the DD version of the source is newer than that of the target. version_map: Version map containing NBC renames. + callback: Optional callback that is called for every copied node. """ rename_map = version_map.new_to_old if source_is_new else version_map.old_to_new for item in source.iter_nonempty_(): @@ -620,16 +622,25 @@ def _copy_structure( target_item.resize(size) for i in range(size): _copy_structure( - item[i], target_item[i], deepcopy, source_is_new, version_map + item[i], + target_item[i], + deepcopy, + source_is_new, + version_map, + callback, ) elif isinstance(item, IDSStructure): - _copy_structure(item, target_item, deepcopy, source_is_new, version_map) + _copy_structure( + item, target_item, deepcopy, source_is_new, version_map, callback + ) else: target_item.value = copy.copy(item.value) if deepcopy else item.value # Post-process the node: if path in rename_map.post_process: rename_map.post_process[path](target_item) + if callback is not None: + callback(item, target_item) ######################################################################################## @@ -964,61 +975,13 @@ def _pulse_schedule_3to4( # - IDS is using heterogeneous time rename_map = version_map.old_to_new - def copy_and_interpolate( - source: IDSStructure, target: IDSStructure, timebase: numpy.ndarray - ): - """Reimplementation of _copy_structure that can interpolate nodes to the common - timebase.""" - for item in source.iter_nonempty_(): - path = item.metadata.path_string - if path.endswith("/time"): - continue # Skip time bases - - target_item = _get_target_item(item, target, rename_map) - if target_item is None: - continue - # We don't implement type changes and post process in this conversion: - assert path not in rename_map.type_change - assert path not in rename_map.post_process - - if isinstance(item, IDSStructArray): - size = len(item) - target_item.resize(size) - for i in range(size): - copy_and_interpolate(item[i], target_item[i], timebase) - elif isinstance(item, IDSStructure): - copy_and_interpolate(item, target_item, timebase) - elif ( - item.metadata.ndim == 1 - and item.metadata.coordinates[0].is_time_coordinate - ): - # Interpolate 1D dynamic quantities to the common time base - time = item.coordinates[0] # TODO, this can fail? - if len(item) != len(time): - raise ValueError( - f"Array {item} has a different size than its time base {time}." - ) - is_integer = item.metadata.data_type is IDSDataType.INT - value = interp1d( - time.value, - item.value, - "previous" if is_integer else "linear", - copy=False, - bounds_error=False, - fill_value=(item[0], item[-1]), - assume_sorted=True, - )(timebase) - target_item.value = value.astype(numpy.int32) if is_integer else value - else: # Default copy - target_item.value = copy.copy(item.value) if deepcopy else item.value - for item in source.iter_nonempty_(): - # Special cases for non-dynamic stuff name = item.metadata.name target_item = _get_target_item(item, target, rename_map) if target_item is None: continue + # Special cases for non-dynamic stuff if name in ["ids_properties", "code"]: _copy_structure(item, target_item, deepcopy, False, version_map) elif name == "time": @@ -1039,4 +1002,32 @@ def copy_and_interpolate( timebase = numpy.unique(numpy.concatenate(time_bases)) if time_bases else [] target_item.time = timebase # Do the conversion - copy_and_interpolate(item, target_item, timebase) + _copy_structure( + item, + target_item, + deepcopy, + False, + version_map, + partial(_pulse_schedule_resample_callback, timebase), + ) + + +def _pulse_schedule_resample_callback(timebase, item: IDSBase, target_item: IDSBase): + if item.metadata.ndim == 1 and item.metadata.coordinates[0].is_time_coordinate: + # Interpolate 1D dynamic quantities to the common time base + time = item.coordinates[0] + if len(item) != len(time): + raise ValueError( + f"Array {item} has a different size than its time base {time}." + ) + is_integer = item.metadata.data_type is IDSDataType.INT + value = interp1d( + time.value, + item.value, + "previous" if is_integer else "linear", + copy=False, + bounds_error=False, + fill_value=(item[0], item[-1]), + assume_sorted=True, + )(timebase) + target_item.value = value.astype(numpy.int32) if is_integer else value From 25bc4fdf200af08484254a5f53de075f3fc277df Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Thu, 3 Apr 2025 10:22:03 +0200 Subject: [PATCH 06/74] Refactoring and suppress some log messages --- imas/ids_convert.py | 52 ++++++++++++++++++++------------------------- 1 file changed, 23 insertions(+), 29 deletions(-) diff --git a/imas/ids_convert.py b/imas/ids_convert.py index 6f76f812..bce8c4ac 100644 --- a/imas/ids_convert.py +++ b/imas/ids_convert.py @@ -489,8 +489,10 @@ def convert_ids( target_tree = target_ids._parent._etree if source_is_new: version_map = _DDVersionMap(ids_name, target_tree, source_tree, target_version) + rename_map = version_map.new_to_old else: version_map = _DDVersionMap(ids_name, source_tree, target_tree, source_version) + rename_map = version_map.old_to_new # Special case for DD3to4 pulse_schedule conversion if ( @@ -499,9 +501,14 @@ def convert_ids( and source_version < Version("3.40.0") and target_version.major == 4 ): - _pulse_schedule_3to4(toplevel, target_ids, deepcopy, version_map) + try: + # Suppress "'.../time' does not exist in the target IDS." log messages. + logger.addFilter(_pulse_schedule_3to4_logfilter) + _pulse_schedule_3to4(toplevel, target_ids, deepcopy, rename_map) + finally: + logger.removeFilter(_pulse_schedule_3to4_logfilter) else: - _copy_structure(toplevel, target_ids, deepcopy, source_is_new, version_map) + _copy_structure(toplevel, target_ids, deepcopy, rename_map) logger.info("Conversion of IDS %s finished.", ids_name) if provenance_origin_uri: @@ -587,8 +594,7 @@ def _copy_structure( source: IDSStructure, target: IDSStructure, deepcopy: bool, - source_is_new: bool, - version_map: DDVersionMap, + rename_map: NBCPathMap, callback: Optional[Callable] = None, ): """Recursively copy data, following NBC renames. @@ -602,7 +608,6 @@ def _copy_structure( version_map: Version map containing NBC renames. callback: Optional callback that is called for every copied node. """ - rename_map = version_map.new_to_old if source_is_new else version_map.old_to_new for item in source.iter_nonempty_(): path = item.metadata.path_string target_item = _get_target_item(item, target, rename_map) @@ -621,18 +626,9 @@ def _copy_structure( size = len(item) target_item.resize(size) for i in range(size): - _copy_structure( - item[i], - target_item[i], - deepcopy, - source_is_new, - version_map, - callback, - ) + _copy_structure(item[i], target_item[i], deepcopy, rename_map, callback) elif isinstance(item, IDSStructure): - _copy_structure( - item, target_item, deepcopy, source_is_new, version_map, callback - ) + _copy_structure(item, target_item, deepcopy, rename_map, callback) else: target_item.value = copy.copy(item.value) if deepcopy else item.value @@ -957,7 +953,7 @@ def _pulse_schedule_3to4( source: IDSStructure, target: IDSStructure, deepcopy: bool, - version_map: DDVersionMap, + rename_map: NBCPathMap, ): """Recursively copy data, following NBC renames, and converting time bases for the pulse_schedule IDS. @@ -966,14 +962,13 @@ def _pulse_schedule_3to4( source: Source structure. target: Target structure. deepcopy: See :func:`convert_ids`. - version_map: Version map containing NBC renames. + rename_map: Map containing NBC renames. """ # All prerequisites are checked before calling this function: # - source and target are pulse_schedule IDSs # - source has DD version < 3.40.0 # - target has DD version >= 4.0.0, < 5.0 # - IDS is using heterogeneous time - rename_map = version_map.old_to_new for item in source.iter_nonempty_(): name = item.metadata.name @@ -983,14 +978,14 @@ def _pulse_schedule_3to4( # Special cases for non-dynamic stuff if name in ["ids_properties", "code"]: - _copy_structure(item, target_item, deepcopy, False, version_map) + _copy_structure(item, target_item, deepcopy, rename_map) elif name == "time": target_item.value = item.value if not deepcopy else copy.copy(item.value) elif name == "event": size = len(item) target_item.resize(size) for i in range(size): - _copy_structure(item[i], target_item[i], deepcopy, False, version_map) + _copy_structure(item[i], target_item[i], deepcopy, rename_map) else: # Find all time bases time_bases = [ @@ -1002,14 +997,13 @@ def _pulse_schedule_3to4( timebase = numpy.unique(numpy.concatenate(time_bases)) if time_bases else [] target_item.time = timebase # Do the conversion - _copy_structure( - item, - target_item, - deepcopy, - False, - version_map, - partial(_pulse_schedule_resample_callback, timebase), - ) + callback = partial(_pulse_schedule_resample_callback, timebase) + _copy_structure(item, target_item, deepcopy, rename_map, callback) + + +def _pulse_schedule_3to4_logfilter(logrecord: logging.LogRecord) -> bool: + """Suppress "'.../time' does not exist in the target IDS." log messages.""" + return not (logrecord.args and str(logrecord.args[0]).endswith("/time")) def _pulse_schedule_resample_callback(timebase, item: IDSBase, target_item: IDSBase): From 55bd0c6784045535607056bfa45ba347d9aa19cb Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Thu, 3 Apr 2025 11:07:51 +0200 Subject: [PATCH 07/74] More refactoring --- imas/ids_convert.py | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/imas/ids_convert.py b/imas/ids_convert.py index bce8c4ac..47512d2f 100644 --- a/imas/ids_convert.py +++ b/imas/ids_convert.py @@ -471,12 +471,10 @@ def convert_ids( raise RuntimeError( f"There is no IDS with name {ids_name} in DD version {version}." ) - target_ids = factory.new(ids_name) - else: - target_ids = target + target = factory.new(ids_name) source_version = parse_dd_version(toplevel._version) - target_version = parse_dd_version(target_ids._version) + target_version = parse_dd_version(target._version) logger.info( "Starting conversion of IDS %s from version %s to version %s.", ids_name, @@ -484,10 +482,9 @@ def convert_ids( target_version, ) - source_is_new = source_version > target_version source_tree = toplevel._parent._etree - target_tree = target_ids._parent._etree - if source_is_new: + target_tree = target._parent._etree + if source_version > target_version: version_map = _DDVersionMap(ids_name, target_tree, source_tree, target_version) rename_map = version_map.new_to_old else: @@ -504,16 +501,16 @@ def convert_ids( try: # Suppress "'.../time' does not exist in the target IDS." log messages. logger.addFilter(_pulse_schedule_3to4_logfilter) - _pulse_schedule_3to4(toplevel, target_ids, deepcopy, rename_map) + _pulse_schedule_3to4(toplevel, target, deepcopy, rename_map) finally: logger.removeFilter(_pulse_schedule_3to4_logfilter) else: - _copy_structure(toplevel, target_ids, deepcopy, rename_map) + _copy_structure(toplevel, target, deepcopy, rename_map) logger.info("Conversion of IDS %s finished.", ids_name) if provenance_origin_uri: - _add_provenance_entry(target_ids, toplevel._version, provenance_origin_uri) - return target_ids + _add_provenance_entry(target, toplevel._version, provenance_origin_uri) + return target def _add_provenance_entry( From 06347103fafdaaaeb8fb7a3191e3d1feba15c2dc Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Thu, 3 Apr 2025 11:29:18 +0200 Subject: [PATCH 08/74] Add documentation --- docs/source/multi-dd.rst | 10 ++++++++++ imas/ids_convert.py | 1 + 2 files changed, 11 insertions(+) diff --git a/docs/source/multi-dd.rst b/docs/source/multi-dd.rst index 6ddd7cd1..6585ed2c 100644 --- a/docs/source/multi-dd.rst +++ b/docs/source/multi-dd.rst @@ -146,6 +146,7 @@ explicit conversion mechanisms. Changed definition of open/closed contours, Yes, No Changed definition of ``space/coordinates_type`` in GGD grids, Yes, No Migrate obsolescent ``ids_properties/source`` to ``ids_properties/provenance``, Yes, No + Convert the multiple time-bases in the ``pulse_schedule`` IDS [#ps3to4]_, Yes, No .. [#rename] Quantities which have been renamed between the two DD versions. For example, the ``ec/beam`` Array of Structures in the ``pulse_schedule`` IDS, @@ -175,6 +176,15 @@ explicit conversion mechanisms. .. [#ignore_type_change] These type changes are not supported. Quantities in the destination IDS will remain empty. +.. [#ps3to4] In Data Dictionary 3.39.0 and older, all dynamic quantities in the + ``pulse_schedule`` IDS had their own time array. In DD 4.0.0 this was + restructured to one time array per component (for example `ec/time + `__). + This migration constructs a common time base per subroup, and interpolates + the dynamic quantities within the group to the new time base. Resampling + uses `previous neighbour` interpolation for integer quantities, and linear + interpolation otherwise. See also: + https://github.com/iterorganization/IMAS-Python/issues/21. .. _`DD background`: diff --git a/imas/ids_convert.py b/imas/ids_convert.py index 47512d2f..a6d3d3cf 100644 --- a/imas/ids_convert.py +++ b/imas/ids_convert.py @@ -1004,6 +1004,7 @@ def _pulse_schedule_3to4_logfilter(logrecord: logging.LogRecord) -> bool: def _pulse_schedule_resample_callback(timebase, item: IDSBase, target_item: IDSBase): + """Callback from _copy_structure to resample dynamic data on the new timebase""" if item.metadata.ndim == 1 and item.metadata.coordinates[0].is_time_coordinate: # Interpolate 1D dynamic quantities to the common time base time = item.coordinates[0] From 639added787d2b8929a0c5a0a1b1dcb5c20ae8be Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Thu, 3 Apr 2025 14:21:32 +0200 Subject: [PATCH 09/74] Fix typo --- docs/source/multi-dd.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/multi-dd.rst b/docs/source/multi-dd.rst index 6585ed2c..b63d18e4 100644 --- a/docs/source/multi-dd.rst +++ b/docs/source/multi-dd.rst @@ -180,7 +180,7 @@ explicit conversion mechanisms. ``pulse_schedule`` IDS had their own time array. In DD 4.0.0 this was restructured to one time array per component (for example `ec/time `__). - This migration constructs a common time base per subroup, and interpolates + This migration constructs a common time base per subgroup, and interpolates the dynamic quantities within the group to the new time base. Resampling uses `previous neighbour` interpolation for integer quantities, and linear interpolation otherwise. See also: From 111f6506a681d422d2bc710d6529686f7cb6c2a2 Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Fri, 4 Apr 2025 09:43:34 +0200 Subject: [PATCH 10/74] Extend DD version range to which the pulse schedule conversion applies --- imas/ids_convert.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/imas/ids_convert.py b/imas/ids_convert.py index a6d3d3cf..295a87be 100644 --- a/imas/ids_convert.py +++ b/imas/ids_convert.py @@ -496,7 +496,7 @@ def convert_ids( toplevel.metadata.name == "pulse_schedule" and toplevel.ids_properties.homogeneous_time == IDS_TIME_MODE_HETEROGENEOUS and source_version < Version("3.40.0") - and target_version.major == 4 + and target_version >= Version("3.40.0") ): try: # Suppress "'.../time' does not exist in the target IDS." log messages. From 4e42d1139efdac1d908ee3e83d06b51508957a85 Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Tue, 15 Apr 2025 17:10:25 +0200 Subject: [PATCH 11/74] Remove numpy<2 restriction Pytest runs successfully with numpy 2.x, so we should be able to remove this restriction. --- pyproject.toml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 56e6dc1b..db5111c4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -56,8 +56,7 @@ classifiers = [ ] dynamic = ["version"] dependencies = [ - # FIXME: numpy 2.0 compatibility - "numpy>=1.15.4,<2", + "numpy>=1.15.4", "rich", "scipy", "click", From f4ebddcf48cfd3645ca96937e8f6ec42d509a023 Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Wed, 23 Apr 2025 10:29:46 +0200 Subject: [PATCH 12/74] Use datapath with UDA when fetching IDS properties We request `ids_properties/homogeneous_time` and `ids_properties/version_put/data_dictionary` in two separate calls to the backend before actually getting all data. This is fine for local backends, but UDA would fetch the data three times: 1. When determining the DD version and if the IDS exists 2. When determining whether the IDS uses homogeneous time 3. When actually reading the data This commit adds a `datapath="ids_properties"` to the first two cases. This results in UDA only fetching the IDS properties in points 1 and 2. The full IDS is now requested once by UDA, in point 3. --- imas/backends/imas_core/al_context.py | 11 +++++++---- imas/backends/imas_core/db_entry_al.py | 6 ++++-- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/imas/backends/imas_core/al_context.py b/imas/backends/imas_core/al_context.py index 3341121b..1685e384 100644 --- a/imas/backends/imas_core/al_context.py +++ b/imas/backends/imas_core/al_context.py @@ -1,7 +1,6 @@ # This file is part of IMAS-Python. # You should have received the IMAS-Python LICENSE file with this project. -"""Object-oriented interface to the IMAS lowlevel. -""" +"""Object-oriented interface to the IMAS lowlevel.""" import logging import weakref @@ -61,17 +60,21 @@ def __enter__(self) -> "ALContext": def __exit__(self, exc_type, exc_value, traceback) -> None: ll_interface.end_action(self.ctx) - def global_action(self, path: str, rwmode: int) -> "ALContext": + def global_action(self, path: str, rwmode: int, datapath: str = "") -> "ALContext": """Begin a new global action for use in a ``with`` context. Args: path: access layer path for this global action: ``[/]`` rwmode: read-only or read-write operation mode: ``READ_OP``/``WRITE_OP`` + datapath: used by UDA backend to fetch only part of the data. Returns: The created context. """ - status, ctx = ll_interface.begin_global_action(self.ctx, path, rwmode) + args = [self.ctx, path, rwmode] + if datapath: # AL4 compatibility: datapath arg was added in AL5 + args.append(datapath) + status, ctx = ll_interface.begin_global_action(*args) if status != 0: raise LowlevelError("global_action", status) return ALContext(ctx) diff --git a/imas/backends/imas_core/db_entry_al.py b/imas/backends/imas_core/db_entry_al.py index 52d82fe6..b3240ebd 100644 --- a/imas/backends/imas_core/db_entry_al.py +++ b/imas/backends/imas_core/db_entry_al.py @@ -257,7 +257,8 @@ def get( if occurrence != 0: ll_path += f"/{occurrence}" - with self._db_ctx.global_action(ll_path, READ_OP) as read_ctx: + datapath = "ids_properties" if self.backend == "uda" else "" + with self._db_ctx.global_action(ll_path, READ_OP, datapath) as read_ctx: time_mode_path = "ids_properties/homogeneous_time" time_mode = read_ctx.read_data(time_mode_path, "", INTEGER_DATA, 0) # This is already checked by read_dd_version, but ensure: @@ -314,7 +315,8 @@ def read_dd_version(self, ids_name: str, occurrence: int) -> str: if occurrence != 0: ll_path += f"/{occurrence}" - with self._db_ctx.global_action(ll_path, READ_OP) as read_ctx: + datapath = "ids_properties" if self.backend == "uda" else "" + with self._db_ctx.global_action(ll_path, READ_OP, datapath) as read_ctx: time_mode_path = "ids_properties/homogeneous_time" time_mode = read_ctx.read_data(time_mode_path, "", INTEGER_DATA, 0) dd_version_path = "ids_properties/version_put/data_dictionary" From 12b52e37cfe83fc6078b23fc29461b30b7c651e9 Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Fri, 2 May 2025 09:00:33 +0200 Subject: [PATCH 13/74] Revert "Update backend selection logic" This reverts commit 33e6b7e28516ebd9b276ad98e7e8a76e9f54a145. --- imas/db_entry.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/imas/db_entry.py b/imas/db_entry.py index b218ad60..d7d74574 100644 --- a/imas/db_entry.py +++ b/imas/db_entry.py @@ -1,6 +1,7 @@ # This file is part of IMAS-Python. # You should have received the IMAS-Python LICENSE file with this project. -"""Logic for interacting with IMAS Data Entries.""" +"""Logic for interacting with IMAS Data Entries. +""" import logging import os @@ -188,10 +189,10 @@ def __init__( @staticmethod def _select_implementation(uri: Optional[str]) -> Type[DBEntryImpl]: """Select which DBEntry implementation to use based on the URI.""" - if not uri or uri.startswith("imas:"): - from imas.backends.imas_core.db_entry_al import ALDBEntryImpl as impl - else: # Assume it's a netCDF file or NCZarr URI + if uri and uri.endswith(".nc") and not uri.startswith("imas:"): from imas.backends.netcdf.db_entry_nc import NCDBEntryImpl as impl + else: + from imas.backends.imas_core.db_entry_al import ALDBEntryImpl as impl return impl def __enter__(self): From 7d810dd48c2b55a24964200ae3239b5ddadf4b24 Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Wed, 21 May 2025 09:43:01 +0200 Subject: [PATCH 14/74] Warn only once for paths that do not exist in the target IDS per convert_ids call This prevents polluting the output of the CLI `imas convert` and explict `imas.convert_ids` calls with lots of repeated warnings. --- imas/ids_convert.py | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/imas/ids_convert.py b/imas/ids_convert.py index 295a87be..c4e752e0 100644 --- a/imas/ids_convert.py +++ b/imas/ids_convert.py @@ -27,6 +27,9 @@ from imas.ids_toplevel import IDSToplevel logger = logging.getLogger(__name__) +# Store for which paths we already emitted a warning that the target could not be found +# to prevent polluting the output with lots of repeated items. +_missing_paths_warning = set() def iter_parents(path: str) -> Iterator[str]: @@ -481,6 +484,8 @@ def convert_ids( source_version, target_version, ) + global _missing_paths_warning + _missing_paths_warning = set() # clear for which paths we emitted a warning source_tree = toplevel._parent._etree target_tree = target._parent._etree @@ -566,11 +571,14 @@ def _get_target_item( if path in rename_map: if rename_map.path[path] is None: if path not in rename_map.ignore_missing_paths: - if path in rename_map.type_change: - msg = "Element %r changed type in the target IDS." - else: - msg = "Element %r does not exist in the target IDS." - logger.warning(msg + " Data is not copied.", path) + # Only warn the first time that we encounter this path: + if path not in _missing_paths_warning: + if path in rename_map.type_change: + msg = "Element %r changed type in the target IDS." + else: + msg = "Element %r does not exist in the target IDS." + logger.warning(msg + " Data is not copied.", path) + _missing_paths_warning.add(path) return None else: return IDSPath(rename_map.path[path]).goto(target) From 09104ac4360cc8cf220db15507b789d25ca977ba Mon Sep 17 00:00:00 2001 From: Olivier Hoenen Date: Tue, 3 Jun 2025 14:23:38 +0200 Subject: [PATCH 15/74] Add CODEOWNERS --- .github/CODEOWNERS | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 .github/CODEOWNERS diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS new file mode 100644 index 00000000..18fdbbff --- /dev/null +++ b/.github/CODEOWNERS @@ -0,0 +1,10 @@ +# This file allows setting automatically reviewers for pull requests. +# Each line is a file pattern followed by one or more owners. +# The last match takes precedence over previous ones. +# Do not edit unless specifically mandated to do so. + +# Global/fallback and technical modifications. +* @maarten-ic @prasad-sawantdesai @olivhoenen + +# Modifications to CODEOWNERS and action workflows +.github/ @SimonPinches @olivhoenen From ff0f628e6c35f5db5d3fd760d9da87115da90e23 Mon Sep 17 00:00:00 2001 From: Olivier Hoenen Date: Fri, 6 Jun 2025 11:36:24 +0200 Subject: [PATCH 16/74] Add example in doc to list available IDS+occ in a data-entry (#35) --- docs/source/courses/basic/analyze.rst | 17 +++++++++++++++++ docs/source/courses/basic/explore.rst | 4 ++-- .../basic/imas_snippets/explore_data_entry.py | 9 +++++++++ 3 files changed, 28 insertions(+), 2 deletions(-) create mode 100644 docs/source/courses/basic/imas_snippets/explore_data_entry.py diff --git a/docs/source/courses/basic/analyze.rst b/docs/source/courses/basic/analyze.rst index d1ae1434..6317cb04 100644 --- a/docs/source/courses/basic/analyze.rst +++ b/docs/source/courses/basic/analyze.rst @@ -246,3 +246,20 @@ Exercise 5 A plot of :math:`T_e` vs :math:`t`. .. seealso:: :ref:`Lazy loading` + + +Explore the DBEntry +''''''''''''''''''' + +You may not know apriori which types of IDSs are available within an IMAS database entry. +It can also happen that several IDSs objects of the same type are stored within +this entry, in that case each IDS is stored as a separate `occurrence` +(occurrences are identified with an integer value, 0 being the default). + +In IMAS-Python, the function :meth:`~imas.db_entry.DBEntry.list_all_occurrences()` will +help you finding which occurrences are available in a given database entry and for a given +IDS type. + +The following snippet shows how to list the available IDSs in a given database entry: + +.. literalinclude:: imas_snippets/explore_data_entry.py diff --git a/docs/source/courses/basic/explore.rst b/docs/source/courses/basic/explore.rst index e3395eda..348d9ab4 100644 --- a/docs/source/courses/basic/explore.rst +++ b/docs/source/courses/basic/explore.rst @@ -7,8 +7,8 @@ In this part of the training, we will learn how to use Python to explore data saved in IDSs. -Explore which IDSs are available --------------------------------- +Explore which IDS structures are available +------------------------------------------ Most codes will touch multiple IDSs inside a single IMAS data entry. For example a heating code using a magnetic equilibrium from the ``equilibrium`` IDS with a diff --git a/docs/source/courses/basic/imas_snippets/explore_data_entry.py b/docs/source/courses/basic/imas_snippets/explore_data_entry.py new file mode 100644 index 00000000..cabaf148 --- /dev/null +++ b/docs/source/courses/basic/imas_snippets/explore_data_entry.py @@ -0,0 +1,9 @@ +import imas + +# Open input data entry +entry = imas.DBEntry("imas:hdf5?path=<...>","r") + +# Print the list of available IDSs with their occurrence +print([(idsname,occ) for idsname in imas.IDSFactory().ids_names() for occ in entry.list_all_occurrences(idsname)]) + +entry.close() From d0c5c8e3bc1462136bb1909276a1c923641d3059 Mon Sep 17 00:00:00 2001 From: Olivier Hoenen Date: Fri, 6 Jun 2025 11:52:16 +0200 Subject: [PATCH 17/74] Minor edit --- docs/source/courses/basic/analyze.rst | 4 ++-- .../source/courses/basic/imas_snippets/explore_data_entry.py | 5 +++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/docs/source/courses/basic/analyze.rst b/docs/source/courses/basic/analyze.rst index 6317cb04..00b6dd15 100644 --- a/docs/source/courses/basic/analyze.rst +++ b/docs/source/courses/basic/analyze.rst @@ -248,8 +248,8 @@ Exercise 5 .. seealso:: :ref:`Lazy loading` -Explore the DBEntry -''''''''''''''''''' +Explore the DBEntry and occurrences +''''''''''''''''''''''''''''''''''' You may not know apriori which types of IDSs are available within an IMAS database entry. It can also happen that several IDSs objects of the same type are stored within diff --git a/docs/source/courses/basic/imas_snippets/explore_data_entry.py b/docs/source/courses/basic/imas_snippets/explore_data_entry.py index cabaf148..f49df8d4 100644 --- a/docs/source/courses/basic/imas_snippets/explore_data_entry.py +++ b/docs/source/courses/basic/imas_snippets/explore_data_entry.py @@ -1,9 +1,10 @@ import imas -# Open input data entry +# Open input data entry entry = imas.DBEntry("imas:hdf5?path=<...>","r") # Print the list of available IDSs with their occurrence -print([(idsname,occ) for idsname in imas.IDSFactory().ids_names() for occ in entry.list_all_occurrences(idsname)]) +print([(idsname,occ) for idsname in imas.IDSFactory().ids_names() + for occ in entry.list_all_occurrences(idsname)]) entry.close() From a03fea7bc95f3ccaa1a3c5cc86547bbf2d79d284 Mon Sep 17 00:00:00 2001 From: Olivier Hoenen Date: Fri, 6 Jun 2025 13:56:44 +0200 Subject: [PATCH 18/74] Apply suggestions on code and text of the documentation Co-authored-by: Maarten Sebregts <110895564+maarten-ic@users.noreply.github.com> --- docs/source/courses/basic/analyze.rst | 2 +- .../courses/basic/imas_snippets/explore_data_entry.py | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/docs/source/courses/basic/analyze.rst b/docs/source/courses/basic/analyze.rst index 00b6dd15..21a7c68b 100644 --- a/docs/source/courses/basic/analyze.rst +++ b/docs/source/courses/basic/analyze.rst @@ -251,7 +251,7 @@ Exercise 5 Explore the DBEntry and occurrences ''''''''''''''''''''''''''''''''''' -You may not know apriori which types of IDSs are available within an IMAS database entry. +You may not know a priori which types of IDSs are available within an IMAS database entry. It can also happen that several IDSs objects of the same type are stored within this entry, in that case each IDS is stored as a separate `occurrence` (occurrences are identified with an integer value, 0 being the default). diff --git a/docs/source/courses/basic/imas_snippets/explore_data_entry.py b/docs/source/courses/basic/imas_snippets/explore_data_entry.py index f49df8d4..2ec02698 100644 --- a/docs/source/courses/basic/imas_snippets/explore_data_entry.py +++ b/docs/source/courses/basic/imas_snippets/explore_data_entry.py @@ -1,10 +1,11 @@ import imas # Open input data entry -entry = imas.DBEntry("imas:hdf5?path=<...>","r") +entry = imas.DBEntry("imas:hdf5?path=<...>", "r") # Print the list of available IDSs with their occurrence -print([(idsname,occ) for idsname in imas.IDSFactory().ids_names() - for occ in entry.list_all_occurrences(idsname)]) +for idsname in imas.IDSFactory().ids_names(): + for occ in entry.list_all_occurrences(idsname): + print(idsname, occ) entry.close() From e35187a5a31ef5765403c365da1262dfe683fa81 Mon Sep 17 00:00:00 2001 From: Anushan Fernando Date: Mon, 9 Jun 2025 14:18:16 +0000 Subject: [PATCH 19/74] Modify lazy loading test to generate random data without complex numbers. --- imas/test/test_lazy_loading.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/imas/test/test_lazy_loading.py b/imas/test/test_lazy_loading.py index 9023a795..ff241016 100644 --- a/imas/test/test_lazy_loading.py +++ b/imas/test/test_lazy_loading.py @@ -78,7 +78,7 @@ def test_lazy_loading_distributions_random_netcdf(tmp_path): def run_lazy_loading_distributions_random(dbentry): ids = IDSFactory().new("distributions") - fill_consistent(ids) + fill_consistent(ids, skip_complex=True) dbentry.put(ids) def iterate(structure): From cec6fddeb0dfe56e94a687e6ea55f2928e768088 Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Fri, 6 Jun 2025 16:38:10 +0200 Subject: [PATCH 20/74] Remove logic for building the Data Dictionary The Data Dictionary definitions are now provided by the `imas-data-dictionaries` package. --- docs/source/multi-dd.rst | 23 ++-- imas/__init__.py | 1 - imas/dd_helpers.py | 167 ---------------------------- imas/dd_zip.py | 206 ++--------------------------------- imas/exception.py | 23 +--- imas/test/test_dd_helpers.py | 53 --------- pyproject.toml | 4 +- setup.py | 140 ------------------------ 8 files changed, 25 insertions(+), 592 deletions(-) delete mode 100644 imas/dd_helpers.py delete mode 100644 imas/test/test_dd_helpers.py delete mode 100644 setup.py diff --git a/docs/source/multi-dd.rst b/docs/source/multi-dd.rst index b63d18e4..bef1fe54 100644 --- a/docs/source/multi-dd.rst +++ b/docs/source/multi-dd.rst @@ -207,21 +207,14 @@ Automated tests have been provided that check the loading of all of the DD versions tagged in the data-dictionary git repository. -Extending the DD set -'''''''''''''''''''' +Data Dictionary definitions +''''''''''''''''''''''''''' -Use the command ``python setup.py build_DD`` to build a new ``IDSDef.zip``. This -fetches all tags from the data dictionary git repository and builds the ``IDSDef.zip``. +The Data Dictionary definitions used by IMAS-Python are provided by the `IMAS Data +Dictionaries `__ package. +Please update this package if you need a more recent version of the data dictionary. For +example, using ``pip``: -IMAS-Python searches for an ``IDSDef.zip`` in the following locations: +.. code-block:: bash -1. The environment variable ``$IMAS_DDZIP`` (path to a zip file) -2. The file ``./IDSDef.zip`` in the current working directory -3. In the local configuration folder: ``~/.config/imas/IDSDef.zip``, or - ``$XDG_CONFIG_DIR/imas/IDSDef.zip`` (if the environment variable - ``$XDG_CONFIG_DIR`` is set) -4. The zipfile bundled with the IMAS-Python installation: ``assets/IDSDef.zip`` - -All paths are searched in order when loading the definitions of a specific data -dictionary version: the first zip file that contains the definitions of the requested -version is used. + pip install --upgrade imas-data-dictionaries diff --git a/imas/__init__.py b/imas/__init__.py index 0ed10404..58a66994 100644 --- a/imas/__init__.py +++ b/imas/__init__.py @@ -20,7 +20,6 @@ # Load the IMAS-Python IMAS AL/DD core from . import ( db_entry, - dd_helpers, dd_zip, util, ) diff --git a/imas/dd_helpers.py b/imas/dd_helpers.py deleted file mode 100644 index 446a9991..00000000 --- a/imas/dd_helpers.py +++ /dev/null @@ -1,167 +0,0 @@ -# This file is part of IMAS-Python. -# You should have received the IMAS-Python LICENSE file with this project. -"""Helper functions to build IDSDef.xml""" - -import logging -import os -import shutil -from pathlib import Path -from typing import Tuple -from zipfile import ZIP_DEFLATED, ZipFile - -from packaging.version import Version as V -from saxonche import PySaxonProcessor - -logger = logging.getLogger(__name__) - -_idsdef_zip_relpath = Path("imas/assets/IDSDef.zip") -_build_dir = Path("build") - - -def prepare_data_dictionaries(): - """Build IMAS IDSDef.xml files for each tagged version in the DD repository - 1. Use saxonche for transformations - 2. Clone the DD repository (ask for user/pass unless ssh key access is available) - 3. Generate IDSDef.xml and rename to IDSDef_${version}.xml - 4. Zip all these IDSDefs together and include in wheel - """ - from git import Repo - - repo: Repo = get_data_dictionary_repo() - if repo: - newest_version_and_tag = (V("0"), None) - for tag in repo.tags: - version_and_tag = (V(str(tag)), tag) - if V(str(tag)) > V("3.21.1"): - newest_version_and_tag = max(newest_version_and_tag, version_and_tag) - logger.debug("Building data dictionary version %s", tag) - build_data_dictionary(repo, tag) - - logger.info("Creating zip file of DD versions") - - if _idsdef_zip_relpath.is_file(): - logger.warning("Overwriting '%s'", _idsdef_zip_relpath) - - with ZipFile( - _idsdef_zip_relpath, - mode="w", # this needs w, since zip can have multiple same entries - compression=ZIP_DEFLATED, - ) as dd_zip: - for filename in _build_dir.glob("[0-9]*.xml"): - arcname = Path("data-dictionary").joinpath(*filename.parts[1:]) - dd_zip.write(filename, arcname=arcname) - # Include identifiers from latest tag in zip file - repo.git.checkout(newest_version_and_tag[1], force=True) - # DD layout <= 4.0.0 - for filename in Path("data-dictionary").glob("*/*identifier.xml"): - arcname = Path("identifiers").joinpath(*filename.parts[1:]) - dd_zip.write(filename, arcname=arcname) - # DD layout > 4.0.0 - for filename in Path("data-dictionary").glob("schemas/*/*identifier.xml"): - arcname = Path("identifiers").joinpath(*filename.parts[2:]) - dd_zip.write(filename, arcname=arcname) - - -def get_data_dictionary_repo() -> Tuple[bool, bool]: - try: - import git # Import git here, the user might not have it! - except ModuleNotFoundError: - raise RuntimeError( - "Could not find 'git' module, try 'pip install gitpython'. \ - Will not build Data Dictionaries!" - ) - - # We need the actual source code (for now) so grab it from ITER - dd_repo_path = "data-dictionary" - - if "DD_DIRECTORY" in os.environ: - logger.info("Found DD_DIRECTORY, copying") - try: - shutil.copytree(os.environ["DD_DIRECTORY"], dd_repo_path) - except FileExistsError: - pass - else: - logger.info("Trying to pull data dictionary git repo from ITER") - - # Set up a bare repo and fetch the data-dictionary repository in it - os.makedirs(dd_repo_path, exist_ok=True) - try: - repo = git.Repo(dd_repo_path) - except git.exc.InvalidGitRepositoryError: - repo = git.Repo.init(dd_repo_path) - logger.info("Set up local git repository {!s}".format(repo)) - - try: - origin = repo.remote() - except ValueError: - dd_repo_url = "https://github.com/iterorganization/imas-data-dictionary.git" - origin = repo.create_remote("origin", url=dd_repo_url) - logger.info("Set up remote '{!s}' linking to '{!s}'".format(origin, origin.url)) - - try: - origin.fetch(tags=True) - except git.exc.GitCommandError as ee: - logger.warning( - "Could not fetch tags from %s. Git reports:\n %s." "\nTrying to continue", - list(origin.urls), - ee, - ) - else: - logger.info("Remote tags fetched") - return repo - - -def _run_xsl_transformation( - xsd_file: Path, xsl_file: Path, tag: str, output_file: Path -) -> None: - """ - This function performs an XSL transformation using Saxon-HE (saxonche) - with the provided XSD file, XSL file, tag, and output file. - - Args: - xsd_file (Path): XML Schema Definition (XSD) file - xsl_file (Path): The `xsl_file` parameter - tag (str): tag name to provide to 'DD_GIT_DESCRIBE' parameter - output_file (Path): The `output_file` parameter for resulting xml - """ - with PySaxonProcessor(license=False) as proc: - logger.debug("Initializing Saxon Processor") - xsltproc = proc.new_xslt30_processor() - xdm_ddgit = proc.make_string_value(tag) - xsltproc.set_parameter("DD_GIT_DESCRIBE", xdm_ddgit) - xsltproc.transform_to_file( - source_file=str(xsd_file), - stylesheet_file=str(xsl_file), - output_file=str(output_file), - ) - - -def build_data_dictionary(repo, tag: str, rebuild=False) -> None: - """Build a single version of the data dictionary given by the tag argument - if the IDS does not already exist. - - In the data-dictionary repository sometimes IDSDef.xml is stored - directly, in which case we do not call make. - - Args: - repo: Repository object containing the DD source code - tag: The DD version tag that will be build - rebuild: If true, overwrites existing pre-build tagged DD version - """ - _build_dir.mkdir(exist_ok=True) - result_xml = _build_dir / f"{tag}.xml" - - if result_xml.exists() and not rebuild: - logger.debug(f"XML for tag '{tag}' already exists, skipping") - return - - repo.git.checkout(tag, force=True) - - # Perform the XSL transformation with saxonche - dd_xsd = Path("data-dictionary/dd_data_dictionary.xml.xsd") - dd_xsl = Path("data-dictionary/dd_data_dictionary.xml.xsl") - _run_xsl_transformation(dd_xsd, dd_xsl, tag.name, result_xml) - - -if __name__ == "__main__": - prepare_data_dictionaries() diff --git a/imas/dd_zip.py b/imas/dd_zip.py index 2d62224a..e4cce369 100644 --- a/imas/dd_zip.py +++ b/imas/dd_zip.py @@ -1,103 +1,27 @@ # This file is part of IMAS-Python. # You should have received the IMAS-Python LICENSE file with this project. -""" Extract DD versions from a zip file. +"""Extract DD versions from the imas-data-dictionaries distribution.""" -The zip file contains files as -* `data-dictionary/3.30.0.xml` -* `data-dictionary/3.29.0.xml` - -multiple paths are checked. See `ZIPFILE_LOCATIONS`. -First the environment variable IMAS_DDZIP is checked. -If that exists and points to a file we will attempt to open it. -Then, IDSDef.zip is searched in site-packages, the current folder, -in .config/imas/ (`$$XDG_CONFIG_HOME`) and in -the assets/ folder within the IMAS-Python package. - -1. `$$IMAS_DDZIP` -2. The virtual environment -3. USER_BASE`imas/IDSDef.zip` -4. All `site-packages/imas/IDSDef.zip` -5. `./IDSDef.zip` -6. `~/.config/imas/IDSDef.zip` -7. `__file__/../../imas/assets/IDSDef.zip` - -All files are checked, i.e. if your .config/imas/IDSDef.zip is outdated -the IMAS-Python-packaged version will be used. - -The `assets/IDSDef.zip` provided with the package can be updated -with the `python setup.py build_DD` command, which is also performed on install -if you have access to the ITER data-dictionary git repo. -Reinstalling imas thus also will give you access to the latest DD versions. -""" import logging import os -import re import xml.etree.ElementTree as ET -from contextlib import contextmanager, nullcontext from functools import lru_cache from pathlib import Path -from typing import Dict, Iterator, List, Tuple, Union -from zipfile import ZipFile - -try: - from importlib.resources import as_file, files - - try: - from importlib.resources.abc import Traversable - except ModuleNotFoundError: # Python 3.9/3.10 support - from importlib.abc import Traversable - -except ImportError: # Python 3.8 support - from importlib_resources import as_file, files - from importlib_resources.abc import Traversable -from packaging.version import InvalidVersion, Version +# These methods in imas_data_dictionaries used to be defined here. We import them here +# for backwards compatibility: +from imas_data_dictionaries import dd_identifiers # noqa: F401 +from imas_data_dictionaries import get_dd_xml_crc # noqa: F401 +from imas_data_dictionaries import get_identifier_xml # noqa: F401 +from imas_data_dictionaries import dd_xml_versions, get_dd_xml, parse_dd_version +from packaging.version import InvalidVersion import imas -from imas.exception import UnknownDDVersion +from imas.exception import UnknownDDVersion # noqa: F401 logger = logging.getLogger(__name__) -def _get_xdg_config_dir(): - """ - Return the XDG config directory, according to the XDG base directory spec: - - https://standards.freedesktop.org/basedir-spec/basedir-spec-latest.html - """ - return os.environ.get("XDG_CONFIG_HOME") or str(Path.home() / ".config") - - -def _generate_zipfile_locations() -> Iterator[Union[Path, Traversable]]: - """Build a list of potential data dictionary locations. - We start with the path (if any) of the IMAS_DDZIP env var. - Then we look for IDSDef.zip in the current folder, in the - default XDG config dir (~/.config/imas/IDSDef.zip) and - finally in the assets distributed with this package. - """ - zip_name = "IDSDef.zip" - - environ = os.environ.get("IMAS_DDZIP") - if environ: - yield Path(environ).resolve() - - yield Path(zip_name).resolve() - yield Path(_get_xdg_config_dir()).resolve() / "imas" / zip_name - yield files(imas) / "assets" / zip_name - - -def parse_dd_version(version: str) -> Version: - try: - return Version(version) - except InvalidVersion: - # This is probably a dev build of the DD, of which the version is obtained with - # `git describe` in the format X.Y.Z--g with X.Y.Z the previous - # released version: try again after converting the first dash to a + and treat - # it like a `local` version specifier, which is recognized as newer. - # https://packaging.python.org/en/latest/specifications/version-specifiers/ - return Version(version.replace("-", "+", 1)) - - # Expected use case is one, maximum two DD versions # Cache is bigger than that: in pytest we currently use the following DD versions: # - 3.22.0 @@ -112,7 +36,6 @@ def parse_dd_version(version: str) -> Version: # - IDS_minimal_struct_array.xml # - IDS_minimal_types.xml _DD_CACHE_SIZE = 8 -ZIPFILE_LOCATIONS = list(_generate_zipfile_locations()) def dd_etree(version=None, xml_path=None): @@ -168,117 +91,6 @@ def _load_etree(version, xml_path): return tree -@contextmanager -def _open_zipfile(path: Union[Path, Traversable]) -> Iterator[ZipFile]: - """Open a zipfile, given a Path or Traversable.""" - if isinstance(path, Path): - ctx = nullcontext(path) - else: - ctx = as_file(path) - with ctx as file: - with ZipFile(file) as zipfile: - yield zipfile - - -@lru_cache -def _read_dd_versions() -> Dict[str, Tuple[Union[Path, Traversable], str]]: - """Traverse all possible DD zip files and return a map of known versions. - - Returns: - version_map: version -> (zipfile path, filename) - """ - versions = {} - xml_re = re.compile(r"^data-dictionary/([0-9.]+)\.xml$") - for path in ZIPFILE_LOCATIONS: - if not path.is_file(): - continue - with _open_zipfile(path) as zipfile: - for fname in zipfile.namelist(): - match = xml_re.match(fname) - if match: - version = match.group(1) - if version not in versions: - versions[version] = (path, fname) - if not versions: - raise RuntimeError( - "Could not find any data dictionary definitions. " - f"Looked in: {', '.join(map(repr, ZIPFILE_LOCATIONS))}." - ) - return versions - - -@lru_cache -def _read_identifiers() -> Dict[str, Tuple[Union[Path, Traversable], str]]: - """Traverse all possible DD zip files and return a map of known identifiers. - - Returns: - identifier_map: identifier -> (zipfile path, filename) - """ - identifiers = {} - xml_re = re.compile(r"^identifiers/\w+/(\w+_identifier).xml$") - for path in ZIPFILE_LOCATIONS: - if not path.is_file(): - continue - with _open_zipfile(path) as zipfile: - for fname in zipfile.namelist(): - match = xml_re.match(fname) - if match: - identifier_name = match.group(1) - if identifier_name not in identifiers: - identifiers[identifier_name] = (path, fname) - return identifiers - - -@lru_cache -def dd_xml_versions() -> List[str]: - """Parse IDSDef.zip to find version numbers available""" - - def sort_key(version): - try: - return parse_dd_version(version) - except InvalidVersion: - # Don't fail when a malformatted version is present in the DD zip - logger.error( - f"Could not convert DD XML version {version} to a Version.", exc_info=1 - ) - return Version(0) - - return sorted(_read_dd_versions(), key=sort_key) - - -@lru_cache -def dd_identifiers() -> List[str]: - """Parse IDSDef.zip to find available identifiers""" - - return sorted(_read_identifiers()) - - -def get_dd_xml(version): - """Read XML file for the given data dictionary version.""" - dd_versions = dd_xml_versions() - if version not in dd_versions: - raise UnknownDDVersion(version, dd_versions) - path, fname = _read_dd_versions()[version] - with _open_zipfile(path) as zipfile: - return zipfile.read(fname) - - -def get_dd_xml_crc(version): - """Given a version string, return its CRC checksum""" - # Note, by this time get_dd_xml is already called, so we don't need to check if the - # version is known - path, fname = _read_dd_versions()[version] - with _open_zipfile(path) as zipfile: - return zipfile.getinfo(fname).CRC - - -def get_identifier_xml(identifier_name): - """Get identifier XML for the given identifier name""" - path, fname = _read_identifiers()[identifier_name] - with _open_zipfile(path) as zipfile: - return zipfile.read(fname) - - def print_supported_version_warning(version): try: if parse_dd_version(version) < imas.OLDEST_SUPPORTED_VERSION: diff --git a/imas/exception.py b/imas/exception.py index 513c2caa..737680c2 100644 --- a/imas/exception.py +++ b/imas/exception.py @@ -1,11 +1,14 @@ # This file is part of IMAS-Python. # You should have received the IMAS-Python LICENSE file with this project. -"""Exception classes used in IMAS-Python. -""" +"""Exception classes used in IMAS-Python.""" import difflib import logging -from typing import TYPE_CHECKING, List +from typing import TYPE_CHECKING + +# This exception from imas_data_dictionaries used to be defined here. We import it here +# for backwards compatibility: +from imas_data_dictionaries import UnknownDDVersion # noqa: F401 from imas.backends.imas_core import imas_interface as _imas_interface @@ -23,20 +26,6 @@ ALException = None -class UnknownDDVersion(ValueError): - """Error raised when an unknown DD version is specified.""" - - def __init__(self, version: str, available: List[str], note: str = "") -> None: - close_matches = difflib.get_close_matches(version, available, n=1) - if close_matches: - suggestions = f"Did you mean {close_matches[0]!r}?" - else: - suggestions = f"Available versions are {', '.join(reversed(available))}" - super().__init__( - f"Data dictionary version {version!r} cannot be found. {suggestions}{note}" - ) - - class IDSNameError(ValueError): """Error raised by DBEntry.get(_slice) when providing an invalid IDS name.""" diff --git a/imas/test/test_dd_helpers.py b/imas/test/test_dd_helpers.py deleted file mode 100644 index 07d1d2b0..00000000 --- a/imas/test/test_dd_helpers.py +++ /dev/null @@ -1,53 +0,0 @@ -from pathlib import Path -import shutil -import pytest -import os -import zipfile - -from imas.dd_helpers import prepare_data_dictionaries, _idsdef_zip_relpath, _build_dir - -_idsdef_unzipped_relpath = Path("idsdef_unzipped") - - -@pytest.mark.skip(reason="skipping IDSDef.zip generation") -def test_prepare_data_dictionaries(): - prepare_data_dictionaries() - assert os.path.exists( - _idsdef_zip_relpath - ), f"IDSDef.zip file does not exist at path: {_idsdef_zip_relpath}" - - expected_xml_files = [ - _build_dir / "3.40.0.xml", - _build_dir / "3.41.0.xml", - _build_dir / "3.42.0.xml", - _build_dir / "4.0.0.xml", - ] - - for xml_file in expected_xml_files: - assert os.path.exists(xml_file), f"{xml_file} does not exist" - - with zipfile.ZipFile(_idsdef_zip_relpath, "r") as zip_ref: - zip_ref.extractall(_idsdef_unzipped_relpath) - - expected_ids_directories = [ - _idsdef_unzipped_relpath / "data-dictionary" / "3.40.0.xml", - _idsdef_unzipped_relpath / "data-dictionary" / "3.41.0.xml", - _idsdef_unzipped_relpath / "data-dictionary" / "3.42.0.xml", - _idsdef_unzipped_relpath / "data-dictionary" / "4.0.0.xml", - _idsdef_unzipped_relpath - / "identifiers" - / "core_sources" - / "core_source_identifier.xml", - _idsdef_unzipped_relpath - / "identifiers" - / "equilibrium" - / "equilibrium_profiles_2d_identifier.xml", - ] - - for file_path in expected_ids_directories: - assert os.path.exists( - file_path - ), f"Expected_ids_directories {file_path} does not exist" - - if _build_dir.exists(): - shutil.rmtree(_idsdef_unzipped_relpath) diff --git a/pyproject.toml b/pyproject.toml index db5111c4..6134366c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -64,7 +64,8 @@ dependencies = [ "packaging", "xxhash >= 2", "saxonche", - "gitpython" + "gitpython", + "imas_data_dictionaries", ] [project.optional-dependencies] @@ -108,7 +109,6 @@ test = [ ] [project.scripts] -build_DD = "imas.dd_helpers:prepare_data_dictionaries" imas = "imas.command.cli:cli" [project.urls] diff --git a/setup.py b/setup.py deleted file mode 100644 index 486b56d6..00000000 --- a/setup.py +++ /dev/null @@ -1,140 +0,0 @@ -# pylint: disable=wrong-import-position -# This file is part of IMAS-Python. -# You should have received the IMAS-Python LICENSE file with this project. -""" -Packaging settings. Inspired by a minimal setup.py file, the Pandas cython build -and the access-layer setup template. - -The installable IMAS-Python package tries to follow in the following order: -- The style guide for Python code [PEP8](https://www.python.org/dev/peps/pep-0008/) -- The [PyPA guide on packaging projects]( - https://packaging.python.org/guides/distributing-packages-using-setuptools/#distributing-packages) -- The [PyPA tool recommendations]( - https://packaging.python.org/guides/tool-recommendations/), specifically: - * Installing: [pip](https://pip.pypa.io/en/stable/) - * Environment management: [venv](https://docs.python.org/3/library/venv.html) - * Dependency management: [pip-tools](https://github.com/jazzband/pip-tools) - * Packaging source distributions: [setuptools](https://setuptools.readthedocs.io/) - * Packaging built distributions: [wheels](https://pythonwheels.com/) - -On the ITER cluster we handle the environment by using the `IMAS` module load. -So instead, we install packages to the `USER_SITE` there, and do not use -`pip`s `build-isolation`. See [IMAS-584](https://jira.iter.org/browse/IMAS-584) -""" -import importlib -import importlib.util -import site -import traceback -# Allow importing local files, see https://snarky.ca/what-the-heck-is-pyproject-toml/ -import sys -import warnings -# Import other stdlib packages -from pathlib import Path - -# Use setuptools to build packages. Advised to import setuptools before distutils -import setuptools -from packaging.version import Version as V -from setuptools import __version__ as setuptools_version -from setuptools import setup -from setuptools.command.build_ext import build_ext -from setuptools.command.build_py import build_py -from setuptools.command.sdist import sdist - -try: - from wheel.bdist_wheel import bdist_wheel -except ImportError: - bdist_wheel = None - -# Ensure the current folder is on the import path: -sys.path.append(str(Path(__file__).parent.resolve())) - -cannonical_python_command = "module load Python/3.8.6-GCCcore-10.2.0" - -if sys.version_info < (3, 7): - sys.exit( - "Sorry, Python < 3.7 is not supported. Use a different" - f" python e.g. '{cannonical_python_command}'" - ) -if sys.version_info < (3, 8): - warnings.warn("Python < 3.8 support on best-effort basis", FutureWarning) - - -# Check setuptools version before continuing for legacy builds -# Version 61 is required for pyproject.toml support -if V(setuptools_version) < V("61"): - raise RuntimeError( - "Setuptools version outdated. Found" - f" {V(setuptools_version)} need at least {V('61')}" - ) - -# Workaround for https://github.com/pypa/pip/issues/7953 -# Cannot install into user site directory with editable source -site.ENABLE_USER_SITE = "--user" in sys.argv[1:] - - -# We need to know where we are for many things -this_file = Path(__file__) -this_dir = this_file.parent.resolve() - -# Start: Load dd_helpers -dd_helpers_file = this_dir / "imas/dd_helpers.py" -assert dd_helpers_file.is_file() -spec = importlib.util.spec_from_file_location("dd_helpers", dd_helpers_file) -module = importlib.util.module_from_spec(spec) -spec.loader.exec_module(module) -sys.modules["imas.dd_helpers"] = module -from imas.dd_helpers import prepare_data_dictionaries # noqa - -# End: Load dd_helpers - - -# Define building of the Data Dictionary as custom build step -class BuildDDCommand(setuptools.Command): - """A custom command to build the data dictionaries.""" - - description = "build IDSDef.zip" - user_options = [] - - def initialize_options(self): - pass - - def finalize_options(self): - pass - - def run(self): - """Prepare DDs if they can be git pulled""" - prepare_data_dictionaries() - - -# Inject prepare_data_dictionaries() into the setuptool's build steps. So far it covers -# all installation cases: -# - `pip install -e .`` (from git clone) -# - `python -m build`` -# - Source tarball from git-archive. Note: version only picked up when doing git-archive -# from a tagged release, -# `git archive HEAD -v -o imas.tar.gz && pip install imas.tar.gz` -cmd_class = {} -build_overrides = {"build_ext": build_ext, "build_py": build_py, "sdist": sdist} -if bdist_wheel: - build_overrides["bdist_wheel"] = bdist_wheel -for name, cls in build_overrides.items(): - - class build_DD_before(cls): - """Build DD before executing original distutils command""" - - def run(self): - try: - prepare_data_dictionaries() - except Exception: - traceback.print_exc() - print("Failed to build DD during setup, continuing without.") - super().run() - - cmd_class[name] = build_DD_before - - -if __name__ == "__main__": - setup( - zip_safe=False, # https://mypy.readthedocs.io/en/latest/installed_packages.html - cmdclass={"build_DD": BuildDDCommand, **cmd_class} - ) \ No newline at end of file From da8b1a6d1eab217183ff44c3a0e1536c66e011a1 Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Fri, 6 Jun 2025 17:14:17 +0200 Subject: [PATCH 21/74] Update documentation --- docs/source/courses/advanced/dd_versions.rst | 8 ++++---- docs/source/imas_architecture.rst | 4 ---- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/docs/source/courses/advanced/dd_versions.rst b/docs/source/courses/advanced/dd_versions.rst index 3f7f19fa..ab87097e 100644 --- a/docs/source/courses/advanced/dd_versions.rst +++ b/docs/source/courses/advanced/dd_versions.rst @@ -60,7 +60,7 @@ Bundled Data Dictionary definitions IMAS-Python comes bundled [#DDdefs]_ with many versions of the Data Dictionary definitions. You can find out which versions are available by calling -:py:meth:`imas.dd_zip.dd_xml_versions`. +``imas.dd_zip.dd_xml_versions``. Converting an IDS between Data Dictionary versions @@ -290,6 +290,6 @@ build, you can use them like you normally would. .. rubric:: Footnotes -.. [#DDdefs] To be more precise, the Data Dictionary definitions are generated when the - IMAS-Python package is created. See :ref:`this reference
` for more - details. +.. [#DDdefs] To be more precise, the Data Dictionary definitions are provided by the + `IMAS Data Dictionaries `__ + package. diff --git a/docs/source/imas_architecture.rst b/docs/source/imas_architecture.rst index b1764bed..182d2a0c 100644 --- a/docs/source/imas_architecture.rst +++ b/docs/source/imas_architecture.rst @@ -72,11 +72,7 @@ Data Dictionary building and loading The following submodules are responsible for building the Data Dictionary and loading DD definitions at runtime. -- :py:mod:`imas.dd_helpers` handles building the ``IDSDef.zip`` file, containing all - versions of the Data Dictionary since ``3.22.0``. - - :py:mod:`imas.dd_zip` handles loading the Data Dictionary definitions at run time. - These definitions can be loaded from an ``IDSDef.zip`` or from a custom XML file. .. _imas_architecture/IDS_nodes: From fbbedd3cc6e15c8785a0f5ca5a1f363df1971c8c Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Wed, 11 Jun 2025 09:25:33 +0200 Subject: [PATCH 22/74] Make saxonche an optional dependency --- imas/backends/imas_core/mdsplus_model.py | 12 ++++++++++-- pyproject.toml | 11 ++++++----- 2 files changed, 16 insertions(+), 7 deletions(-) diff --git a/imas/backends/imas_core/mdsplus_model.py b/imas/backends/imas_core/mdsplus_model.py index 48864346..3c91cefb 100644 --- a/imas/backends/imas_core/mdsplus_model.py +++ b/imas/backends/imas_core/mdsplus_model.py @@ -12,7 +12,6 @@ import time import uuid from pathlib import Path -from saxonche import PySaxonProcessor from subprocess import CalledProcessError, check_output from zlib import crc32 @@ -244,11 +243,20 @@ def transform_with_xslt(xslt_processor, source, xslfile, output_file): def create_model_ids_xml(cache_dir_path, fname, version): """Use Saxon/C to compile an ids.xml suitable for creating an MDSplus model.""" + try: + import saxonche + except ImportError: + raise RuntimeError( + "Building mdsplus models requires the 'saxonche' python package. " + "Please install this package (for example with 'pip install saxonche') " + "and try again." + ) + try: with as_file(files("imas") / "assets" / "IDSDef2MDSpreTree.xsl") as xslfile: output_file = Path(cache_dir_path) / "ids.xml" - with PySaxonProcessor(license=False) as proc: + with saxonche.PySaxonProcessor(license=False) as proc: xslt_processor = proc.new_xslt30_processor() xdm_ddgit = proc.make_string_value(str(version) or fname) xslt_processor.set_parameter("DD_GIT_DESCRIBE", xdm_ddgit) diff --git a/pyproject.toml b/pyproject.toml index 6134366c..066e0ea9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,7 +1,7 @@ [build-system] # Minimum requirements for the build system to execute. # Keep this on a single line for the grep magic of build scripts to work -requires = ["setuptools>=61", "wheel", "numpy", "gitpython", "saxonche","packaging", "tomli;python_version<'3.11'", "setuptools_scm>8"] +requires = ["setuptools>=61", "wheel", "numpy", "packaging", "tomli;python_version<'3.11'", "setuptools_scm>8"] build-backend = "setuptools.build_meta" @@ -63,8 +63,6 @@ dependencies = [ "importlib_resources;python_version<'3.9'", "packaging", "xxhash >= 2", - "saxonche", - "gitpython", "imas_data_dictionaries", ] @@ -92,6 +90,9 @@ h5py = [ xarray = [ "xarray", ] +saxonche = [ + "saxonche", +] test = [ "pytest>=5.4.1", "pytest-cov>=0.6", @@ -101,11 +102,11 @@ test = [ "asv == 0.6.1", # virtualenv is a dependency of asv "virtualenv", - # Pint and xarray are used in training snippets + # Pint is used in training snippets "pint", # Optional dependencies # TODO add imas-core when it is available on pypi - "imas-python[netcdf,h5py,xarray]", + "imas-python[netcdf,h5py,xarray,saxonche]", ] [project.scripts] From 0eed64b836c51d32467b97764cd857275dde5bd9 Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Wed, 11 Jun 2025 10:06:03 +0200 Subject: [PATCH 23/74] Remove Bamboo CI scripts --- ci/build_dd_zip.sh | 30 --------------- ci/build_docs_and_dist.sh | 45 ---------------------- ci/linting.sh | 35 ----------------- ci/run_benchmark.sh | 79 --------------------------------------- ci/run_pytest.sh | 46 ----------------------- 5 files changed, 235 deletions(-) delete mode 100755 ci/build_dd_zip.sh delete mode 100755 ci/build_docs_and_dist.sh delete mode 100755 ci/linting.sh delete mode 100755 ci/run_benchmark.sh delete mode 100755 ci/run_pytest.sh diff --git a/ci/build_dd_zip.sh b/ci/build_dd_zip.sh deleted file mode 100755 index 1b95bc4b..00000000 --- a/ci/build_dd_zip.sh +++ /dev/null @@ -1,30 +0,0 @@ -#!/bin/bash -# Bamboo CI script to build IDSDef.zip -# Note: this script should be run from the root of the git repository - -# Debuggging: -if [[ "$(uname -n)" == *"bamboo"* ]]; then - set -e -o pipefail -fi -echo "Loading modules..." - -# Set up environment such that module files can be loaded -source /etc/profile.d/modules.sh -module purge -# Modules are supplied as arguments in the CI job: -if [ -z "$@" ]; then - module load Python -else - module load $@ -fi - -# Debuggging: -echo "Done loading modules" - -# Build the DD zip -rm -rf venv # Environment should be clean, but remove directory to be sure -python -m venv venv -source venv/bin/activate -pip install gitpython saxonche packaging -python imas/dd_helpers.py -deactivate diff --git a/ci/build_docs_and_dist.sh b/ci/build_docs_and_dist.sh deleted file mode 100755 index f0084b8a..00000000 --- a/ci/build_docs_and_dist.sh +++ /dev/null @@ -1,45 +0,0 @@ -#!/bin/bash -# Bamboo CI script to install imas Python module and run all tests -# Note: this script should be run from the root of the git repository - -# Debuggging: -if [[ "$(uname -n)" == *"bamboo"* ]]; then - set -e -o pipefail -fi -echo "Loading modules:" $@ - -# Set up environment such that module files can be loaded -source /etc/profile.d/modules.sh -module purge -# Modules are supplied as arguments in the CI job: -module load $@ - -# Debuggging: -echo "Done loading modules" - -# Set up the testing venv -rm -rf venv # Environment should be clean, but remove directory to be sure -python -m venv venv -source venv/bin/activate - -# Create sdist and wheel -pip install --upgrade pip setuptools wheel build -rm -rf dist -python -m build . - -# Install imas Python module and documentation dependencies from the just-built wheel -pip install "`readlink -f dist/*.whl`[docs,netcdf]" - -# Debugging: -pip freeze - -# Enable sphinx options: -# - `-W`: turn warnings into errors -# - `-n`: nit-picky mode, warn about all missing references -# - `--keep-going`: with -W, keep going when getting warnings -export SPHINXOPTS='-W -n --keep-going' - -# Run sphinx to create the documentation -make -C docs clean html - -deactivate diff --git a/ci/linting.sh b/ci/linting.sh deleted file mode 100755 index d9164777..00000000 --- a/ci/linting.sh +++ /dev/null @@ -1,35 +0,0 @@ -#!/bin/bash -# Bamboo CI script for linting -# Note: this script should be run from the root of the git repository - -# Debuggging: -if [[ "$(uname -n)" == *"bamboo"* ]]; then - set -e -o pipefail -fi -echo "Loading modules..." - -# Set up environment such that module files can be loaded -source /etc/profile.d/modules.sh -module purge -# Modules are supplied as arguments in the CI job: -if [ -z "$@" ]; then - module load Python -else - module load $@ -fi - -# Debuggging: -echo "Done loading modules" - -# Create a venv -rm -rf venv -python -m venv venv -. venv/bin/activate - -# Install and run linters -pip install --upgrade 'black >=24,<25' flake8 - -black --check imas -flake8 imas - -deactivate \ No newline at end of file diff --git a/ci/run_benchmark.sh b/ci/run_benchmark.sh deleted file mode 100755 index ae24ce2d..00000000 --- a/ci/run_benchmark.sh +++ /dev/null @@ -1,79 +0,0 @@ -#!/bin/bash -# Bamboo CI script to install imas Python module and run all tests -# Note: this script should be run from the root of the git repository - -# Debuggging: - -echo "Loading modules:" $@ -BENCHMARKS_DIR=$(realpath "$PWD/imas_benchmarks") -if [[ "$(uname -n)" == *"bamboo"* ]]; then - set -e -o pipefail - # create - BENCHMARKS_DIR=$(realpath "/mnt/bamboo_deploy/imas/benchmarks/") -fi - -# Set up environment such that module files can be loaded -source /etc/profile.d/modules.sh -module purge -# Modules are supplied as arguments in the CI job: -# IMAS-AL-Python/5.2.1-intel-2023b-DD-3.41.0 Saxon-HE/12.4-Java-21 -if [ -z "$@" ]; then - module load IMAS-AL-Core -else - module load $@ -fi - - - -# Debuggging: -echo "Done loading modules" - -# Export current PYTHONPATH so ASV benchmarks can import imas -export ASV_PYTHONPATH="$PYTHONPATH" - -# Set up the testing venv -rm -rf venv # Environment should be clean, but remove directory to be sure -python -m venv venv -source venv/bin/activate - -# Install asv and imas -pip install --upgrade pip setuptools wheel -pip install virtualenv .[test] - -# Generate MDS+ models cache -python -c 'import imas.backends.imas_core.mdsplus_model; print(imas.backends.imas_core.mdsplus_model.mdsplus_model_dir(imas.IDSFactory()))' - -# Copy previous results (if any) -mkdir -p "$BENCHMARKS_DIR/results" -mkdir -p .asv -cp -rf "$BENCHMARKS_DIR/results" .asv/ - -# Ensure numpy won't do multi-threading -export OPENBLAS_NUM_THREADS=1 -export MKL_NUM_THREADS=1 -export OMP_NUM_THREADS=1 - -# Ensure there is a machine configuration -asv machine --yes - -# Run ASV for the current commit, develop and main -asv run --skip-existing-successful HEAD^! -asv run --skip-existing-successful develop^! -asv run --skip-existing-successful main^! - -# Compare results -if [ `git rev-parse --abbrev-ref HEAD` == develop ] -then - asv compare main develop --machine $(hostname) || echo "asv compare failed" -else - asv compare develop HEAD --machine $(hostname) || echo "asv compare failed" -fi - -# Publish results -asv publish - -# And persistently store them -cp -rf .asv/{results,html} "$BENCHMARKS_DIR" - - - diff --git a/ci/run_pytest.sh b/ci/run_pytest.sh deleted file mode 100755 index 4af184dc..00000000 --- a/ci/run_pytest.sh +++ /dev/null @@ -1,46 +0,0 @@ -#!/bin/bash -# Bamboo CI script to install imas Python module and run all tests -# Note: this script should be run from the root of the git repository - -# Debuggging: -if [[ "$(uname -n)" == *"bamboo"* ]]; then - set -e -o pipefail -fi -echo "Loading modules:" $@ - -# Set up environment such that module files can be loaded -source /etc/profile.d/modules.sh -module purge -# Modules are supplied as arguments in the CI job: -if [ -z "$@" ]; then - module load IMAS-AL-Core Java MDSplus -else - module load $@ -fi - -# Debuggging: -echo "Done loading modules" - -# Set up the testing venv -rm -rf venv # Environment should be clean, but remove directory to be sure -python -m venv venv -source venv/bin/activate - -# Install imas and test dependencies -pip install --upgrade pip setuptools wheel -pip install .[h5py,netcdf,test] - -# Debugging: -pip freeze - -# Run pytest -# Clean artifacts created by pytest -rm -f junit.xml -rm -rf htmlcov - -# setups local directory to not to full /tmp directory with pytest temporary files -# mkdir -p ~/tmp -# export PYTEST_DEBUG_TEMPROOT=~/tmp -python -m pytest -n=auto --cov=imas --cov-report=term-missing --cov-report=html --junit-xml=junit.xml - - From 19922182c5de24dc3855a6d8b1f7dde06c5edd56 Mon Sep 17 00:00:00 2001 From: Olivier Hoenen Date: Wed, 18 Jun 2025 12:16:41 +0200 Subject: [PATCH 24/74] Changelog for release 2.0.1 --- docs/source/changelog.rst | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/docs/source/changelog.rst b/docs/source/changelog.rst index ae995b0e..11841840 100644 --- a/docs/source/changelog.rst +++ b/docs/source/changelog.rst @@ -3,6 +3,22 @@ Changelog ========= +What's new in IMAS-Python 2.0.1 +------------------------------- + +Improvements +'''''''''''' + +- improve DD3-->DD4 conversion (sign conversion to dodpsi_like) +- improve conversion of pulse_schedule IDS >= 3.39.0 +- allow using nzcarr for storing netCDF format +- numpy 2 compatibility +- improve UDA data fetch +- improve documentation +- new dependency on `imas-data-dictionaries package `__ (remove internal build via saxonche, except for the optional MDSplus models) +- full compatibility of tests with netCDF<1.7 (no complex numbers) + + What's new in IMAS-Python 2.0.0 ------------------------------- From 60e35d404de8665d7c983888a6e028089250835a Mon Sep 17 00:00:00 2001 From: Olivier Hoenen Date: Wed, 18 Jun 2025 13:39:00 +0200 Subject: [PATCH 25/74] Fixup release notes and install doc --- docs/source/changelog.rst | 1 - docs/source/installing.rst | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/changelog.rst b/docs/source/changelog.rst index 11841840..f99e24d2 100644 --- a/docs/source/changelog.rst +++ b/docs/source/changelog.rst @@ -11,7 +11,6 @@ Improvements - improve DD3-->DD4 conversion (sign conversion to dodpsi_like) - improve conversion of pulse_schedule IDS >= 3.39.0 -- allow using nzcarr for storing netCDF format - numpy 2 compatibility - improve UDA data fetch - improve documentation diff --git a/docs/source/installing.rst b/docs/source/installing.rst index a843ff6f..800a42ff 100644 --- a/docs/source/installing.rst +++ b/docs/source/installing.rst @@ -28,6 +28,7 @@ List of optional dependencies - ``h5py``: enables ``analyze-db`` CLI option - ``docs``: installs required packages to build the Sphinx documentation - ``test``: installs required packages to run the tests with ``pytest`` and ``asv`` +- ``saxonche``: installs saxonche to enable creation of MDSplus models for the selected versions of the IMAS Data Dictionary (only relevant when working with ``imas_core``) .. note:: From 40de236844dd9bc9b4655da8f7104e613ce96641 Mon Sep 17 00:00:00 2001 From: Olivier Hoenen Date: Wed, 18 Jun 2025 13:40:06 +0200 Subject: [PATCH 26/74] Fix typo in readme Co-authored-by: Maarten Sebregts <110895564+maarten-ic@users.noreply.github.com> --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index d0ded727..42d8d486 100644 --- a/README.md +++ b/README.md @@ -21,7 +21,7 @@ More details are described in the documentation generated from `/docs/source/ins The documentation is autogenerated from the source using [Sphinx](http://sphinx-doc.org/) and can be found at the [readthedocs](https://imas-python.readthedocs.io/en/latest/) -To generated the documentation yourself, install the ``docs`` optional dependencies and do: +To generate the documentation yourself, install the ``docs`` optional dependencies and do: ```bash make -C docs html ``` From e4d5a91495409274c191618ce53496aaf7fbe2ff Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Wed, 25 Jun 2025 13:09:11 +0200 Subject: [PATCH 27/74] Add example for em_coupling conversion Fixes #22. --- docs/source/examples.rst | 14 ++ .../examples/custom_conversion_em_coupling.py | 168 ++++++++++++++++++ .../custom_conversion_em_coupling.rst | 13 ++ docs/source/index.rst | 1 + 4 files changed, 196 insertions(+) create mode 100644 docs/source/examples.rst create mode 100644 docs/source/examples/custom_conversion_em_coupling.py create mode 100644 docs/source/examples/custom_conversion_em_coupling.rst diff --git a/docs/source/examples.rst b/docs/source/examples.rst new file mode 100644 index 00000000..41fa0388 --- /dev/null +++ b/docs/source/examples.rst @@ -0,0 +1,14 @@ +.. _`IMAS-Python Examples`: + +IMAS-Python Examples +==================== + +Most IMAS-Python usage examples can be found throughout the documentation pages. On this +page we collect some examples that are too big or too generic to include in specific +pages. Currently this is a short list, but we expect that it will grow over time. + +.. toctree:: + :caption: IMAS-Python examples + :maxdepth: 1 + + examples/custom_conversion_em_coupling diff --git a/docs/source/examples/custom_conversion_em_coupling.py b/docs/source/examples/custom_conversion_em_coupling.py new file mode 100644 index 00000000..5b5b8628 --- /dev/null +++ b/docs/source/examples/custom_conversion_em_coupling.py @@ -0,0 +1,168 @@ +"""IMAS-Python example for custom conversion logic. + +This example script loads a Data Entry (in Data Dictionary 3.38.1) created by +DINA and converts the em_coupling IDS to DD 4.0.0. +""" + +import imas +from imas.ids_defs import IDS_TIME_MODE_INDEPENDENT + +input_uri = "imas:hdf5?path=/work/imas/shared/imasdb/ITER_SCENARIOS/3/105013/1" +# An error is reported when there's already data at the output_uri! +output_uri = "imas:hdf5?path=105013-1-converted" +target_dd_version = "4.0.0" + + +# Mapping of DD 3.38.1 em_coupling data to DD 4.0.0 +# Map the name of the matrix in DD 3.38.1 to the identifier and coordinate URIs +COUPLING_MAPS = { + "field_probes_active": dict( + coupling_quantity=2, + rows_uri="#magnetics/b_field_pol_probe", + columns_uri="#pf_active/coil", + ), + "field_probes_grid": dict( + coupling_quantity=2, + rows_uri="#magnetics/b_field_pol_probe", + columns_uri="#pf_plasma/element", + ), + "field_probes_passive": dict( + coupling_quantity=2, + rows_uri="#magnetics/b_field_pol_probe", + columns_uri="#pf_passive/loop", + ), + "mutual_active_active": dict( + coupling_quantity=1, + rows_uri="#pf_active/coil", + columns_uri="#pf_active/coil", + ), + "mutual_grid_active": dict( + coupling_quantity=1, + rows_uri="#pf_plasma/element", + columns_uri="#pf_active/coil", + ), + "mutual_grid_grid": dict( + coupling_quantity=1, + rows_uri="#pf_plasma/element", + columns_uri="#pf_plasma/element", + ), + "mutual_grid_passive": dict( + coupling_quantity=1, + rows_uri="#pf_plasma/element", + columns_uri="#pf_passive/loop", + ), + "mutual_loops_active": dict( + coupling_quantity=1, + rows_uri="#magnetics/flux_loop", + columns_uri="#pf_active/coil", + ), + "mutual_loops_passive": dict( + coupling_quantity=1, + rows_uri="#magnetics/flux_loop", + columns_uri="#pf_passive/loop", + ), + "mutual_loops_grid": dict( + coupling_quantity=1, + rows_uri="#magnetics/flux_loop", + columns_uri="#pf_plasma/element", + ), + "mutual_passive_active": dict( + coupling_quantity=1, + rows_uri="#pf_passive/loop", + columns_uri="#pf_active/coil", + ), + "mutual_passive_passive": dict( + coupling_quantity=1, + rows_uri="#pf_passive/loop", + columns_uri="#pf_passive/loop", + ), +} + + +with ( + imas.DBEntry(input_uri, "r") as entry, + imas.DBEntry(output_uri, "x", dd_version=target_dd_version) as out, +): + print("Loaded IMAS Data Entry:", input_uri) + + print("This data entry contains the following IDSs:") + filled_idss = [] + for idsname in entry.factory.ids_names(): + occurrences = entry.list_all_occurrences(idsname) + if occurrences: + filled_idss.append(idsname) + print(f"- {idsname}, occurrences: {occurrences}") + print("") + + # Load and convert all IDSs (except em_coupling) with imas.convert_ids() + # N.B. we know that the input URI doesn't have multiple occurrences, so + # we do not need to worry about them: + for idsname in filled_idss: + if idsname == "em_coupling": + continue + + print(f"Loading IDS: {idsname}...") + ids = entry.get(idsname, autoconvert=False) + print(f"Converting IDS {idsname} to DD {target_dd_version}...") + ids4 = imas.convert_ids( + ids, + target_dd_version, + provenance_origin_uri=input_uri, + ) + print(f"Storing IDS {idsname} to output data entry...") + out.put(ids4) + + print("Conversion for em_coupling:") + emc = entry.get("em_coupling", autoconvert=False) + print("Using standard convert, this may log warnings about discarding data") + emc4 = imas.convert_ids( + emc, + target_dd_version, + provenance_origin_uri=input_uri, + ) + + print("Starting custom conversion of the coupling matrices") + for matrix_name, mapping in COUPLING_MAPS.items(): + # Skip empty matrices + if not emc[matrix_name].has_value: + continue + + # Allocate a new coupling_matrix AoS element + emc4.coupling_matrix.resize(len(emc4.coupling_matrix) + 1, keep=True) + # And fill it + + emc4.coupling_matrix[-1].name = matrix_name + # Assigning an integer to the identifier will automatically fill the + # index/name/description. See documentation: + # https://imas-python.readthedocs.io/en/latest/identifiers.html + emc4.coupling_matrix[-1].quantity = mapping["coupling_quantity"] + emc4.coupling_matrix[-1].rows_uri = [mapping["rows_uri"]] + emc4.coupling_matrix[-1].columns_uri = [mapping["columns_uri"]] + emc4.coupling_matrix[-1].data = emc[matrix_name].value + # N.B. the original data has no error_upper/error_lower so we skip these + # Store em_coupling IDS + out.put(emc4) + + print("Generating pf_plasma IDS...") + # N.B. This logic is specific to DINA + # Create a new pf_plasma IDS and set basic properties + pf_plasma = out.factory.pf_plasma() + pf_plasma.ids_properties.homogeneous_time = IDS_TIME_MODE_INDEPENDENT + pf_plasma.ids_properties.comment = "PF Plasma generated from equilibrium" + + equilibrium = entry.get("equilibrium", lazy=True, autoconvert=False) + r = equilibrium.time_slice[0].profiles_2d[0].grid.dim1 + z = equilibrium.time_slice[0].profiles_2d[0].grid.dim2 + nr, nz = len(r), len(z) + # Generate a pf_plasma element for each grid point: + pf_plasma.element.resize(nr * nz) + for ir, rval in enumerate(r): + for iz, zval in enumerate(z): + element = pf_plasma.element[ir * nr + iz] + element.geometry.geometry_type = 2 # rectangle + element.geometry.rectangle.r = rval + element.geometry.rectangle.z = zval + # Store pf_plasma IDS + out.put(pf_plasma) + +print("Conversion finished") diff --git a/docs/source/examples/custom_conversion_em_coupling.rst b/docs/source/examples/custom_conversion_em_coupling.rst new file mode 100644 index 00000000..96f6ec53 --- /dev/null +++ b/docs/source/examples/custom_conversion_em_coupling.rst @@ -0,0 +1,13 @@ +Custom conversion of the ``em_coupling`` IDS +============================================ + +The ``em_coupling`` IDS has had a big change between Data Dictionary 3.x and Data +Dictionary 4.x. These changes are not covered by the automatic conversions of +:py:meth:`imas.convert_ids ` because these are too +code-specific. + +Instead we show on this page an example to convert a DINA dataset from DD 3.38.1 to DD +4.0.0, which can be used as a starting point for converting output data from other codes +as well. + +.. literalinclude:: custom_conversion_em_coupling.py diff --git a/docs/source/index.rst b/docs/source/index.rst index 7aa06277..8388f5b5 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -51,6 +51,7 @@ Manual cli netcdf changelog + examples .. toctree:: :caption: IMAS-Python training courses From 7b01446bc9aed7d3ed9a5eabe85c87608bd8ea34 Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Wed, 2 Jul 2025 17:03:14 +0200 Subject: [PATCH 28/74] Add DD 3 to 4 conversion logic To migrate deprecated fields. See #55 for more details. --- imas/ids_convert.py | 67 +++++++++++++++++++++++------------ imas/test/test_ids_convert.py | 44 +++++++++++++++++++++++ 2 files changed, 89 insertions(+), 22 deletions(-) diff --git a/imas/ids_convert.py b/imas/ids_convert.py index c4e752e0..f244a9dd 100644 --- a/imas/ids_convert.py +++ b/imas/ids_convert.py @@ -15,7 +15,7 @@ from scipy.interpolate import interp1d import imas -from imas.dd_zip import parse_dd_version +from imas.dd_zip import parse_dd_version, dd_etree from imas.ids_base import IDSBase from imas.ids_data_type import IDSDataType from imas.ids_defs import IDS_TIME_MODE_HETEROGENEOUS @@ -332,27 +332,50 @@ def add_rename(old_path: str, new_path: str): new_version = parse_dd_version(new_version_node.text) # Additional conversion rules for DDv3 to DDv4 if self.version_old.major == 3 and new_version and new_version.major == 4: - # Postprocessing for COCOS definition change: - for psi_like in ["psi_like", "dodpsi_like"]: - xpath_query = f".//field[@cocos_label_transformation='{psi_like}']" - for old_item in old.iterfind(xpath_query): - old_path = old_item.get("path") - new_path = self.old_to_new.path.get(old_path, old_path) - self.new_to_old.post_process[new_path] = _cocos_change - self.old_to_new.post_process[old_path] = _cocos_change - # Definition change for pf_active circuit/connections - if self.ids_name == "pf_active": - path = "circuit/connections" - self.new_to_old.post_process[path] = _circuit_connections_4to3 - self.old_to_new.post_process[path] = _circuit_connections_3to4 - # Migrate ids_properties/source to ids_properties/provenance - # Only implement forward conversion (DD3 -> 4): - # - Pretend that this is a rename from ids_properties/source -> provenance - # - And register type_change handler which will be called with the source - # element and the new provenance structure - path = "ids_properties/source" - self.old_to_new.path[path] = "ids_properties/provenance" - self.old_to_new.type_change[path] = _ids_properties_source + self._apply_3to4_conversion(old, new) + + def _apply_3to4_conversion(self, old: Element, new: Element) -> None: + # Postprocessing for COCOS definition change: + for psi_like in ["psi_like", "dodpsi_like"]: + xpath_query = f".//field[@cocos_label_transformation='{psi_like}']" + for old_item in old.iterfind(xpath_query): + old_path = old_item.get("path") + new_path = self.old_to_new.path.get(old_path, old_path) + self.new_to_old.post_process[new_path] = _cocos_change + self.old_to_new.post_process[old_path] = _cocos_change + # Definition change for pf_active circuit/connections + if self.ids_name == "pf_active": + path = "circuit/connections" + self.new_to_old.post_process[path] = _circuit_connections_4to3 + self.old_to_new.post_process[path] = _circuit_connections_3to4 + + # Migrate ids_properties/source to ids_properties/provenance + # Only implement forward conversion (DD3 -> 4): + # - Pretend that this is a rename from ids_properties/source -> provenance + # - And register type_change handler which will be called with the source + # element and the new provenance structure + path = "ids_properties/source" + self.old_to_new.path[path] = "ids_properties/provenance" + self.old_to_new.type_change[path] = _ids_properties_source + + # GH#55: add logic to migrate some obsolete nodes in DD3.42.0 -> 4.0 + # These nodes (e.g. equilibrium profiles_1d/j_tor) have an NBC rename rule + # (to e.g. equilibrium profiles_1d/j_phi) applying to DD 3.41 and older. + # In DD 3.42, both the old AND new node names are present. + if self.version_old.minor >= 42: # Only apply for DD 3.42+ -> DD 4 + # Get a rename map for 3.41 -> new version + dd341_map = _DDVersionMap( + self.ids_name, + dd_etree("3.41.0"), + self.new_version, + Version("3.41.0"), + ) + for path, newpath in self.old_to_new.path.items(): + # Find all nodes that have disappeared in DD 4.x, and apply the rename + # rule from DD3.41 -> DD 4.x + if newpath is None and path in dd341_map.old_to_new: + # Apply the rename available in 3.41.0 + self.old_to_new.path[path] = dd341_map.old_to_new.path[path] def _map_missing(self, is_new: bool, missing_paths: Set[str]): rename_map = self.new_to_old if is_new else self.old_to_new diff --git a/imas/test/test_ids_convert.py b/imas/test/test_ids_convert.py index f2b9b7f7..a43edfe5 100644 --- a/imas/test/test_ids_convert.py +++ b/imas/test/test_ids_convert.py @@ -481,3 +481,47 @@ def test_3to4_pulse_schedule_fuzz(): fill_consistent(ps) convert_ids(ps, "4.0.0") + + +def test_3to4_migrate_deprecated_fields(): # GH#55 + # Test j_phi -> j_tor rename + eq342 = IDSFactory("3.42.0").equilibrium() + eq342.ids_properties.homogeneous_time = IDS_TIME_MODE_HOMOGENEOUS + eq342.time = [0.0] + eq342.time_slice.resize(1) + eq342.time_slice[0].profiles_1d.j_tor = [0.3, 0.2, 0.1] + eq342.time_slice[0].profiles_1d.psi = [1.0, 0.5, 0.0] + + # Basic case, check that j_tor (although deprecated) is migrated to j_phi: + eq4 = convert_ids(eq342, "4.0.0") + assert array_equal(eq4.time_slice[0].profiles_1d.j_phi.value, [0.3, 0.2, 0.1]) + + # When both j_tor and j_phi are present in the source IDS, we expect that j_phi + # takes precedence. This is a happy accident with how the DD defines both attributes + eq342.time_slice[0].profiles_1d.j_phi = [0.6, 0.4, 0.2] + eq4 = convert_ids(eq342, "4.0.0") + assert array_equal(eq4.time_slice[0].profiles_1d.j_phi.value, [0.6, 0.4, 0.2]) + + # Just to be sure, when j_tor has no value, it should also still work + del eq342.time_slice[0].profiles_1d.j_tor + eq4 = convert_ids(eq342, "4.0.0") + assert array_equal(eq4.time_slice[0].profiles_1d.j_phi.value, [0.6, 0.4, 0.2]) + + # Same applies to label -> name renames + cp342 = IDSFactory("3.42.0").core_profiles() + cp342.ids_properties.homogeneous_time = IDS_TIME_MODE_HOMOGENEOUS + cp342.time = [0.0] + cp342.profiles_1d.resize(1) + cp342.profiles_1d[0].ion.resize(1) + cp342.profiles_1d[0].ion[0].label = "x" + + cp4 = convert_ids(cp342, "4.0.0") + assert cp4.profiles_1d[0].ion[0].name == "x" + + cp342.profiles_1d[0].ion[0].name = "y" + cp4 = convert_ids(cp342, "4.0.0") + assert cp4.profiles_1d[0].ion[0].name == "y" + + del cp342.profiles_1d[0].ion[0].label + cp4 = convert_ids(cp342, "4.0.0") + assert cp4.profiles_1d[0].ion[0].name == "y" From ddd141f4213994b09b3d9260a8c89bc91ac54b05 Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Thu, 3 Jul 2025 10:24:20 +0200 Subject: [PATCH 29/74] Fix bugs with ids_convert - Catch when an IDS is new in 3.42.0 before attempting to migrate obsolete nodes - Fix issue with migrating obsolete structures and AoS ( --- imas/ids_convert.py | 33 +++++++++++++++++++++------------ imas/test/test_ids_convert.py | 4 ++++ 2 files changed, 25 insertions(+), 12 deletions(-) diff --git a/imas/ids_convert.py b/imas/ids_convert.py index f244a9dd..75359f8f 100644 --- a/imas/ids_convert.py +++ b/imas/ids_convert.py @@ -364,18 +364,27 @@ def _apply_3to4_conversion(self, old: Element, new: Element) -> None: # In DD 3.42, both the old AND new node names are present. if self.version_old.minor >= 42: # Only apply for DD 3.42+ -> DD 4 # Get a rename map for 3.41 -> new version - dd341_map = _DDVersionMap( - self.ids_name, - dd_etree("3.41.0"), - self.new_version, - Version("3.41.0"), - ) - for path, newpath in self.old_to_new.path.items(): - # Find all nodes that have disappeared in DD 4.x, and apply the rename - # rule from DD3.41 -> DD 4.x - if newpath is None and path in dd341_map.old_to_new: - # Apply the rename available in 3.41.0 - self.old_to_new.path[path] = dd341_map.old_to_new.path[path] + factory341 = imas.IDSFactory("3.41.0") + if self.ids_name in factory341.ids_names(): # Ensure the IDS exists in 3.41 + dd341_map = _DDVersionMap( + self.ids_name, + dd_etree("3.41.0"), + self.new_version, + Version("3.41.0"), + ) + to_update = {} + for path, newpath in self.old_to_new.path.items(): + # Find all nodes that have disappeared in DD 4.x, and apply the + # rename rule from DD3.41 -> DD 4.x + if newpath is None and path in dd341_map.old_to_new: + self.old_to_new.path[path] = dd341_map.old_to_new.path[path] + # Note: path could be a structure or AoS, so we also put all + # child paths in our map: + path = path + "/" # All child nodes will start with this + for p, v in dd341_map.old_to_new.path.items(): + if p.startswith(path): + to_update[p] = v + self.old_to_new.path.update(to_update) def _map_missing(self, is_new: bool, missing_paths: Set[str]): rename_map = self.new_to_old if is_new else self.old_to_new diff --git a/imas/test/test_ids_convert.py b/imas/test/test_ids_convert.py index a43edfe5..826a7979 100644 --- a/imas/test/test_ids_convert.py +++ b/imas/test/test_ids_convert.py @@ -490,11 +490,15 @@ def test_3to4_migrate_deprecated_fields(): # GH#55 eq342.time = [0.0] eq342.time_slice.resize(1) eq342.time_slice[0].profiles_1d.j_tor = [0.3, 0.2, 0.1] + eq342.time_slice[0].profiles_1d.j_tor_error_upper = [1.0] + eq342.time_slice[0].profiles_1d.j_tor_error_lower = [2.0] eq342.time_slice[0].profiles_1d.psi = [1.0, 0.5, 0.0] # Basic case, check that j_tor (although deprecated) is migrated to j_phi: eq4 = convert_ids(eq342, "4.0.0") assert array_equal(eq4.time_slice[0].profiles_1d.j_phi.value, [0.3, 0.2, 0.1]) + assert array_equal(eq4.time_slice[0].profiles_1d.j_phi_error_upper.value, [1.0]) + assert array_equal(eq4.time_slice[0].profiles_1d.j_phi_error_lower.value, [2.0]) # When both j_tor and j_phi are present in the source IDS, we expect that j_phi # takes precedence. This is a happy accident with how the DD defines both attributes From 4c50b0f93a931ef7ae67a89af0bd1fece4b996c1 Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Fri, 4 Jul 2025 16:56:24 +0200 Subject: [PATCH 30/74] Use debug log level when multiple alternative coordinates are set --- imas/ids_coordinates.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/imas/ids_coordinates.py b/imas/ids_coordinates.py index 29e62a87..f8b4f59d 100644 --- a/imas/ids_coordinates.py +++ b/imas/ids_coordinates.py @@ -1,7 +1,6 @@ # This file is part of IMAS-Python. # You should have received the IMAS-Python LICENSE file with this project. -"""Logic for interpreting coordinates in an IDS. -""" +"""Logic for interpreting coordinates in an IDS.""" import logging from contextlib import contextmanager @@ -235,7 +234,9 @@ def __getitem__(self, key: int) -> Union["IDSPrimitive", np.ndarray]: f"matching sizes:\n{sizes}" ) if len(nonzero_alternatives) > 1: - logger.info("Multiple alternative coordinates are set, using the first") + logger.debug( + "Multiple alternative coordinates are set, using the first" + ) return nonzero_alternatives[0] # Handle alternative coordinates, currently (DD 3.38.1) the `coordinate in From 2d260928ed66a17e4cb35c59fa2e5236aa3e8993 Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Tue, 8 Jul 2025 14:24:23 +0200 Subject: [PATCH 31/74] Ensure that 0D numpy arrays are unpacked when filling lazy loaded IDS from netCDF files ```python >>> entry = imas.DBEntry("ascii.nc", "r") >>> eq = entry.get("equilibrium", autoconvert=False, lazy=True) >>> # Previous behaviour: >>> eq.ids_properties.homogeneous_time ndarray(array(1, dtype=int32)) >>> # New behaviour, matches with non-lazy IDS: >>> eq.ids_properties.homogeneous_time int(1) ``` --- imas/backends/netcdf/nc2ids.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/imas/backends/netcdf/nc2ids.py b/imas/backends/netcdf/nc2ids.py index 306c128e..1b1dbfe8 100644 --- a/imas/backends/netcdf/nc2ids.py +++ b/imas/backends/netcdf/nc2ids.py @@ -366,9 +366,12 @@ def get_child(self, child): if value is not None: if isinstance(value, np.ndarray): - # Convert the numpy array to a read-only view - value = value.view() - value.flags.writeable = False + if value.ndim == 0: # Unpack 0D numpy arrays: + value = value.item() + else: + # Convert the numpy array to a read-only view + value = value.view() + value.flags.writeable = False # NOTE: bypassing IDSPrimitive.value.setter logic child._IDSPrimitive__value = value From 9446e62f6ce19cd7c64a30e320aa4627041abeaa Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Fri, 1 Aug 2025 10:01:52 +0200 Subject: [PATCH 32/74] Set access_layer_language to IMAS-Python on put() `access_layer_language` used to be set to `imaspy {version}`, which got renamed to the non-specific `imas {version}`. This commit updates it to `IMAS-Python {version}` to be clear which HLI the IDS was put with. --- imas/db_entry.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/imas/db_entry.py b/imas/db_entry.py index d7d74574..eab91d5e 100644 --- a/imas/db_entry.py +++ b/imas/db_entry.py @@ -1,7 +1,6 @@ # This file is part of IMAS-Python. # You should have received the IMAS-Python LICENSE file with this project. -"""Logic for interacting with IMAS Data Entries. -""" +"""Logic for interacting with IMAS Data Entries.""" import logging import os @@ -734,7 +733,7 @@ def _put(self, ids: IDSToplevel, occurrence: int, is_slice: bool): version_put = ids.ids_properties.version_put version_put.data_dictionary = self._ids_factory._version version_put.access_layer = self._dbe_impl.access_layer_version() - version_put.access_layer_language = f"imas {imas.__version__}" + version_put.access_layer_language = f"IMAS-Python {imas.__version__}" self._dbe_impl.put(ids, occurrence, is_slice) From a09186a772899fc013401e3fe3b53f4b8bca7d1d Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Mon, 25 Aug 2025 14:33:11 +0200 Subject: [PATCH 33/74] Initial implementation of equilibrium DD3to4 conversion for boundary_separatrix --- imas/ids_convert.py | 70 +++++++++++++++++++++++++++- imas/test/test_ids_convert.py | 88 +++++++++++++++++++++++++++++++++++ 2 files changed, 157 insertions(+), 1 deletion(-) diff --git a/imas/ids_convert.py b/imas/ids_convert.py index 75359f8f..9fa52ac4 100644 --- a/imas/ids_convert.py +++ b/imas/ids_convert.py @@ -7,7 +7,7 @@ import logging from functools import lru_cache, partial from pathlib import Path -from typing import Callable, Dict, Iterator, Optional, Set, Tuple +from typing import Callable, Dict, Iterator, List, Optional, Set, Tuple from xml.etree.ElementTree import Element, ElementTree import numpy @@ -87,6 +87,15 @@ def __init__(self) -> None: converted. """ + self.post_process_ids: List[ + Callable[[IDSToplevel, IDSToplevel, bool], None] + ] = [] + """Postprocess functions to be applied to the whole IDS. + + These postprocess functions should be applied to the whole IDS after all data is + converted. The arguments supplied are: source IDS, target IDS, deepcopy boolean. + """ + self.ignore_missing_paths: Set[str] = set() """Set of paths that should not be logged when data is present.""" @@ -343,6 +352,13 @@ def _apply_3to4_conversion(self, old: Element, new: Element) -> None: new_path = self.old_to_new.path.get(old_path, old_path) self.new_to_old.post_process[new_path] = _cocos_change self.old_to_new.post_process[old_path] = _cocos_change + # Convert equilibrium boundary_separatrix and populate contour_tree + if self.ids_name == "equilibrium": + self.old_to_new.post_process_ids.append(_equilibrium_boundary_3to4) + self.old_to_new.ignore_missing_paths |= { + "time_slice/boundary_separatrix", + "time_slice/boundary_secondary_separatrix", + } # Definition change for pf_active circuit/connections if self.ids_name == "pf_active": path = "circuit/connections" @@ -544,6 +560,10 @@ def convert_ids( else: _copy_structure(toplevel, target, deepcopy, rename_map) + # Global post-processing functions + for callback in rename_map.post_process_ids: + callback(toplevel, target, deepcopy) + logger.info("Conversion of IDS %s finished.", ids_name) if provenance_origin_uri: _add_provenance_entry(target, toplevel._version, provenance_origin_uri) @@ -1063,3 +1083,51 @@ def _pulse_schedule_resample_callback(timebase, item: IDSBase, target_item: IDSB assume_sorted=True, )(timebase) target_item.value = value.astype(numpy.int32) if is_integer else value + + +def _equilibrium_boundary_3to4(eq3: IDSToplevel, eq4: IDSToplevel, deepcopy: bool): + """Convert DD3 boundary[[_secondary]_separatrix] to DD4 contour_tree""" + # Implement https://github.com/iterorganization/IMAS-Python/issues/60 + copy = numpy.copy if deepcopy else lambda x: x + for ts3, ts4 in zip(eq3.time_slice, eq4.time_slice): + n_nodes = 1 # magnetic axis + if ts3.boundary_separatrix.psi.has_value: + n_nodes = 2 + if ( # boundary_secondary_separatrix is introduced in DD 3.32.0 + hasattr(ts3, "boundary_secondary_separatrix") + and ts3.boundary_secondary_separatrix.psi.has_value + ): + n_nodes = 3 + ts4.contour_tree.node.resize(n_nodes) + # Magnetic axis (primary O-point) + node = ts4.contour_tree.node + node[0].critical_type = 0 # minimum (?) + node[0].r = ts3.global_quantities.magnetic_axis.r + node[0].z = ts3.global_quantities.magnetic_axis.z + node[0].psi = -ts3.global_quantities.psi_axis # COCOS change + + # X-points + if n_nodes >= 2: + if ts3.boundary_separatrix.type == 0: # limiter plasma + node[1].critical_type = 2 # maximum (?) + node[1].r = ts3.boundary_separatrix.active_limiter_point.r + node[1].z = ts3.boundary_separatrix.active_limiter_point.z + else: + node[1].critical_type = 1 # saddle-point (x-point) + if len(ts3.boundary_separatrix.x_point): + node[1].r = ts3.boundary_separatrix.x_point[0].r + node[1].z = ts3.boundary_separatrix.x_point[0].z + # TODO: what if there are multiple x-points? + node[1].psi = -ts3.boundary_separatrix.psi # COCOS change + node[1].levelset.r = copy(ts3.boundary_separatrix.outline.r) + node[1].levelset.z = copy(ts3.boundary_separatrix.outline.z) + + if n_nodes >= 3: + node[2].critical_type = 1 # saddle-point (x-point) + if len(ts3.boundary_secondary_separatrix.x_point): + node[2].r = ts3.boundary_secondary_separatrix.x_point[0].r + node[2].z = ts3.boundary_secondary_separatrix.x_point[0].z + # TODO: what if there are multiple x-points? + node[2].psi = -ts3.boundary_secondary_separatrix.psi # COCOS change + node[2].levelset.r = copy(ts3.boundary_secondary_separatrix.outline.r) + node[2].levelset.z = copy(ts3.boundary_secondary_separatrix.outline.z) diff --git a/imas/test/test_ids_convert.py b/imas/test/test_ids_convert.py index 826a7979..f5e78061 100644 --- a/imas/test/test_ids_convert.py +++ b/imas/test/test_ids_convert.py @@ -529,3 +529,91 @@ def test_3to4_migrate_deprecated_fields(): # GH#55 del cp342.profiles_1d[0].ion[0].label cp4 = convert_ids(cp342, "4.0.0") assert cp4.profiles_1d[0].ion[0].name == "y" + + +def test_3to4_equilibrium_boundary(): + eq342 = IDSFactory("3.42.0").equilibrium() + eq342.time_slice.resize(5) + + for i, ts in enumerate(eq342.time_slice): + # Always fill boundary and magnetic axis + ts.boundary.psi = 1 + ts.boundary.outline.r = [1.0, 3.0, 2.0, 1.0] + ts.boundary.outline.z = [1.0, 2.0, 3.0, 1.0] + ts.global_quantities.psi_axis = 1.0 + ts.global_quantities.magnetic_axis.r = 2.0 + ts.global_quantities.magnetic_axis.z = 2.0 + + if i > 0: + # Fill separatrix + ts.boundary_separatrix.psi = -1.0 + # Use limiter for time_slice[1], otherwise divertor: + if i == 1: + ts.boundary_separatrix.type = 0 + ts.boundary_separatrix.active_limiter_point.r = 3.0 + ts.boundary_separatrix.active_limiter_point.z = 2.0 + else: + ts.boundary_separatrix.type = 1 + ts.boundary_separatrix.outline.r = [1.0, 3.0, 2.0, 1.0] + ts.boundary_separatrix.outline.z = [1.0, 2.0, 3.0, 1.0] + ts.boundary_separatrix.x_point.resize(1) + ts.boundary_separatrix.x_point[0].r = 1.0 + ts.boundary_separatrix.x_point[0].z = 1.0 + # These are not part of the conversion: + ts.boundary_separatrix.strike_point.resize(2) + ts.boundary_separatrix.closest_wall_point.r = 1.0 + ts.boundary_separatrix.closest_wall_point.z = 1.0 + ts.boundary_separatrix.closest_wall_point.distance = 0.2 + ts.boundary_separatrix.dr_dz_zero_point.r = 3.0 + ts.boundary_separatrix.dr_dz_zero_point.z = 2.0 + ts.boundary_separatrix.gap.resize(1) + if i == 3: + # Fill second_separatrix + ts.boundary_secondary_separatrix.psi = -1.0 + # Use limiter for time_slice[1], otherwise divertor: + ts.boundary_secondary_separatrix.outline.r = [0.9, 3.1, 2.1, 0.9] + ts.boundary_secondary_separatrix.outline.z = [0.9, 2.1, 3.1, 0.9] + ts.boundary_secondary_separatrix.x_point.resize(1) + ts.boundary_secondary_separatrix.x_point[0].r = 2.1 + ts.boundary_secondary_separatrix.x_point[0].z = 3.1 + # These are not part of the conversion: + ts.boundary_secondary_separatrix.distance_inner_outer = 0.1 + ts.boundary_secondary_separatrix.strike_point.resize(2) + if i == 4: + ts.boundary_separatrix.x_point.resize(2, keep=True) + ts.boundary_separatrix.x_point[1].r = 2.0 + ts.boundary_separatrix.x_point[1].z = 3.0 + + eq4 = convert_ids(eq342, "4.0.0") + assert len(eq4.time_slice) == 5 + for i, ts in enumerate(eq4.time_slice): + node = ts.contour_tree.node + assert len(node) == [1, 2, 2, 3, 2][i] + # Test magnetic axis + assert node[0].critical_type == 0 + assert node[0].r == node[0].z == 2.0 + assert len(node[0].levelset.r) == len(node[0].levelset.z) == 0 + # boundary_separatrix + if i == 1: # node[1] is boundary for limiter plasma + assert node[1].critical_type == 2 + assert node[1].r == 3.0 + assert node[1].z == 2.0 + elif i > 1: # node[1] is boundary for divertor plasma + assert node[1].critical_type == 1 + assert node[1].r == node[1].z == 1.0 + if i > 0: + assert numpy.array_equal(node[1].levelset.r, [1.0, 3.0, 2.0, 1.0]) + assert numpy.array_equal(node[1].levelset.z, [1.0, 2.0, 3.0, 1.0]) + # boundary_secondary_separatrix + if i == 3: + assert node[2].critical_type == 1 + assert node[2].r == 2.1 + assert node[2].z == 3.1 + assert numpy.array_equal(node[2].levelset.r, [0.9, 3.1, 2.1, 0.9]) + assert numpy.array_equal(node[2].levelset.z, [0.9, 2.1, 3.1, 0.9]) + + # not deepcopied, should share numpy arrays + assert ( + eq342.time_slice[1].boundary_separatrix.outline.r.value + is eq4.time_slice[1].contour_tree.node[1].levelset.r.value + ) From 9a7f8f74598a569ddf24da251d36d9a0518cfb50 Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Mon, 25 Aug 2025 14:43:40 +0200 Subject: [PATCH 34/74] Update python versions used in github actions - Remove Python 3.8 (which is already end-of-life for ~10 months) and add Python 3.13. - Update OS to `ubuntu-latest` instead of a fixed version --- .github/workflows/test_with_pytest.yml | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/.github/workflows/test_with_pytest.yml b/.github/workflows/test_with_pytest.yml index 7a345c7f..705e7a73 100644 --- a/.github/workflows/test_with_pytest.yml +++ b/.github/workflows/test_with_pytest.yml @@ -7,10 +7,10 @@ on: jobs: test: - runs-on: ubuntu-22.04 + runs-on: ubuntu-latest strategy: matrix: - python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"] # Test on multiple Python versions + python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"] # Test on multiple Python versions steps: - name: Checkout repository @@ -20,8 +20,6 @@ jobs: uses: actions/setup-python@v4 with: - # until saxonche is available in 3.13 - # https://saxonica.plan.io/issues/6561 python-version: ${{ matrix.python-version }} - name: Display Python version run: python -c "import sys; print(sys.version)" From e7e1ca5c7d6d3ca28cdf4c759fc20c4dc199093d Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Mon, 25 Aug 2025 15:57:45 +0200 Subject: [PATCH 35/74] Enable tests for Python 3.7 and 3.8 --- .github/workflows/test_with_pytest.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test_with_pytest.yml b/.github/workflows/test_with_pytest.yml index 705e7a73..a60a350e 100644 --- a/.github/workflows/test_with_pytest.yml +++ b/.github/workflows/test_with_pytest.yml @@ -10,7 +10,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"] # Test on multiple Python versions + python-version: ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12", "3.13"] # Test on multiple Python versions steps: - name: Checkout repository From 9b21a4f21243e99fa223a276b87a4b1a46e72013 Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Mon, 25 Aug 2025 16:00:55 +0200 Subject: [PATCH 36/74] Move back to `ubuntu-22.04` as runner Python 3.7 is not available on `ubuntu-latest`. --- .github/workflows/test_with_pytest.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test_with_pytest.yml b/.github/workflows/test_with_pytest.yml index a60a350e..d49baa7c 100644 --- a/.github/workflows/test_with_pytest.yml +++ b/.github/workflows/test_with_pytest.yml @@ -7,7 +7,7 @@ on: jobs: test: - runs-on: ubuntu-latest + runs-on: ubuntu-22.04 strategy: matrix: python-version: ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12", "3.13"] # Test on multiple Python versions From bcd47fe408f7bafc5c52911a40bf1918357ab443 Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Mon, 25 Aug 2025 16:07:02 +0200 Subject: [PATCH 37/74] Use `ubuntu-latest`, don't test python 3.7 (build) dependencies are not available for Python 3.7 on PyPI --- .github/workflows/test_with_pytest.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test_with_pytest.yml b/.github/workflows/test_with_pytest.yml index d49baa7c..9d1208ba 100644 --- a/.github/workflows/test_with_pytest.yml +++ b/.github/workflows/test_with_pytest.yml @@ -7,10 +7,10 @@ on: jobs: test: - runs-on: ubuntu-22.04 + runs-on: ubuntu-latest strategy: matrix: - python-version: ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12", "3.13"] # Test on multiple Python versions + python-version: ["3.8", "3.9", "3.10", "3.11", "3.12", "3.13"] # Test on multiple Python versions steps: - name: Checkout repository From d3f37939c655f9a96c9154c5a501138d4e2d16f1 Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Thu, 28 Aug 2025 14:17:18 +0200 Subject: [PATCH 38/74] Update conversion logic to fill equilibrium contour_tree - Handle multiple x-points in boundary_[secondary_]separatrix - Determine if O-point at magnetic axis is a local minimum or maximum of the psi map --- imas/ids_convert.py | 28 ++++++++++++++++++++++------ imas/test/test_ids_convert.py | 26 +++++++++++++++++++++----- 2 files changed, 43 insertions(+), 11 deletions(-) diff --git a/imas/ids_convert.py b/imas/ids_convert.py index 9fa52ac4..4d62246d 100644 --- a/imas/ids_convert.py +++ b/imas/ids_convert.py @@ -1098,10 +1098,14 @@ def _equilibrium_boundary_3to4(eq3: IDSToplevel, eq4: IDSToplevel, deepcopy: boo and ts3.boundary_secondary_separatrix.psi.has_value ): n_nodes = 3 - ts4.contour_tree.node.resize(n_nodes) - # Magnetic axis (primary O-point) node = ts4.contour_tree.node - node[0].critical_type = 0 # minimum (?) + node.resize(n_nodes) + # Magnetic axis (primary O-point) + axis_is_psi_minimum = ( + # Note the sign flip for psi due to the COCOS change between DD3 and DD4! + -ts3.global_quantities.psi_axis < -ts3.global_quantities.psi_boundary + ) + node[0].critical_type = 0 if axis_is_psi_minimum else 2 node[0].r = ts3.global_quantities.magnetic_axis.r node[0].z = ts3.global_quantities.magnetic_axis.z node[0].psi = -ts3.global_quantities.psi_axis # COCOS change @@ -1109,7 +1113,7 @@ def _equilibrium_boundary_3to4(eq3: IDSToplevel, eq4: IDSToplevel, deepcopy: boo # X-points if n_nodes >= 2: if ts3.boundary_separatrix.type == 0: # limiter plasma - node[1].critical_type = 2 # maximum (?) + node[1].critical_type = 2 if axis_is_psi_minimum else 0 node[1].r = ts3.boundary_separatrix.active_limiter_point.r node[1].z = ts3.boundary_separatrix.active_limiter_point.z else: @@ -1117,7 +1121,13 @@ def _equilibrium_boundary_3to4(eq3: IDSToplevel, eq4: IDSToplevel, deepcopy: boo if len(ts3.boundary_separatrix.x_point): node[1].r = ts3.boundary_separatrix.x_point[0].r node[1].z = ts3.boundary_separatrix.x_point[0].z - # TODO: what if there are multiple x-points? + # Additional x-points. N.B. levelset is only stored on the first node + for i in range(1, len(ts3.boundary_separatrix.x_point)): + node.resize(len(node) + 1, keep=True) + node[-1].critical_type = 1 + node[-1].r = ts3.boundary_separatrix.x_point[i].r + node[-1].z = ts3.boundary_separatrix.x_point[i].z + node[-1].psi = -ts3.boundary_separatrix.psi node[1].psi = -ts3.boundary_separatrix.psi # COCOS change node[1].levelset.r = copy(ts3.boundary_separatrix.outline.r) node[1].levelset.z = copy(ts3.boundary_separatrix.outline.z) @@ -1127,7 +1137,13 @@ def _equilibrium_boundary_3to4(eq3: IDSToplevel, eq4: IDSToplevel, deepcopy: boo if len(ts3.boundary_secondary_separatrix.x_point): node[2].r = ts3.boundary_secondary_separatrix.x_point[0].r node[2].z = ts3.boundary_secondary_separatrix.x_point[0].z - # TODO: what if there are multiple x-points? + # Additional x-points. N.B. levelset is only stored on the first node + for i in range(1, len(ts3.boundary_secondary_separatrix.x_point)): + node.resize(len(node) + 1, keep=True) + node[-1].critical_type = 1 + node[-1].r = ts3.boundary_secondary_separatrix.x_point[i].r + node[-1].z = ts3.boundary_secondary_separatrix.x_point[i].z + node[-1].psi = -ts3.boundary_secondary_separatrix.psi node[2].psi = -ts3.boundary_secondary_separatrix.psi # COCOS change node[2].levelset.r = copy(ts3.boundary_secondary_separatrix.outline.r) node[2].levelset.z = copy(ts3.boundary_secondary_separatrix.outline.z) diff --git a/imas/test/test_ids_convert.py b/imas/test/test_ids_convert.py index f5e78061..f51d4baa 100644 --- a/imas/test/test_ids_convert.py +++ b/imas/test/test_ids_convert.py @@ -569,7 +569,7 @@ def test_3to4_equilibrium_boundary(): ts.boundary_separatrix.gap.resize(1) if i == 3: # Fill second_separatrix - ts.boundary_secondary_separatrix.psi = -1.0 + ts.boundary_secondary_separatrix.psi = -1.1 # Use limiter for time_slice[1], otherwise divertor: ts.boundary_secondary_separatrix.outline.r = [0.9, 3.1, 2.1, 0.9] ts.boundary_secondary_separatrix.outline.z = [0.9, 2.1, 3.1, 0.9] @@ -588,10 +588,11 @@ def test_3to4_equilibrium_boundary(): assert len(eq4.time_slice) == 5 for i, ts in enumerate(eq4.time_slice): node = ts.contour_tree.node - assert len(node) == [1, 2, 2, 3, 2][i] + assert len(node) == [1, 2, 2, 3, 3][i] # Test magnetic axis assert node[0].critical_type == 0 assert node[0].r == node[0].z == 2.0 + assert node[0].psi == -1.0 assert len(node[0].levelset.r) == len(node[0].levelset.z) == 0 # boundary_separatrix if i == 1: # node[1] is boundary for limiter plasma @@ -602,6 +603,7 @@ def test_3to4_equilibrium_boundary(): assert node[1].critical_type == 1 assert node[1].r == node[1].z == 1.0 if i > 0: + assert node[1].psi == 1.0 assert numpy.array_equal(node[1].levelset.r, [1.0, 3.0, 2.0, 1.0]) assert numpy.array_equal(node[1].levelset.z, [1.0, 2.0, 3.0, 1.0]) # boundary_secondary_separatrix @@ -609,11 +611,25 @@ def test_3to4_equilibrium_boundary(): assert node[2].critical_type == 1 assert node[2].r == 2.1 assert node[2].z == 3.1 + assert node[2].psi == 1.1 assert numpy.array_equal(node[2].levelset.r, [0.9, 3.1, 2.1, 0.9]) assert numpy.array_equal(node[2].levelset.z, [0.9, 2.1, 3.1, 0.9]) + # Second x-point from boundary_separatrix + if i == 4: + assert node[2].critical_type == 1 + assert node[2].r == 2.0 + assert node[2].z == 3.0 + assert node[2].psi == node[1].psi == 1.0 + # Levelset is only filled for the main x-point (node[1]) + assert not node[2].levelset.r.has_value + assert not node[2].levelset.z.has_value # not deepcopied, should share numpy arrays - assert ( - eq342.time_slice[1].boundary_separatrix.outline.r.value - is eq4.time_slice[1].contour_tree.node[1].levelset.r.value + slice1_outline_r = eq342.time_slice[1].boundary_separatrix.outline.r.value + assert slice1_outline_r is eq4.time_slice[1].contour_tree.node[1].levelset.r.value + + # deepcopy should create a copy of the numpy arrays + eq4_cp = convert_ids(eq342, "4.0.0", deepcopy=True) + assert not numpy.may_share_memory( + slice1_outline_r, eq4_cp.time_slice[1].contour_tree.node[1].levelset.r.value ) From 83ff9451290d25410a4d43f50600368c2753593d Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Thu, 28 Aug 2025 14:34:41 +0200 Subject: [PATCH 39/74] Fix formatting --- imas/ids_convert.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/imas/ids_convert.py b/imas/ids_convert.py index 4d62246d..77163a11 100644 --- a/imas/ids_convert.py +++ b/imas/ids_convert.py @@ -1101,14 +1101,14 @@ def _equilibrium_boundary_3to4(eq3: IDSToplevel, eq4: IDSToplevel, deepcopy: boo node = ts4.contour_tree.node node.resize(n_nodes) # Magnetic axis (primary O-point) - axis_is_psi_minimum = ( - # Note the sign flip for psi due to the COCOS change between DD3 and DD4! - -ts3.global_quantities.psi_axis < -ts3.global_quantities.psi_boundary - ) + gq = ts3.global_quantities + # Note the sign flip for psi due to the COCOS change between DD3 and DD4! + axis_is_psi_minimum = -gq.psi_axis < -gq.psi_boundary + node[0].critical_type = 0 if axis_is_psi_minimum else 2 - node[0].r = ts3.global_quantities.magnetic_axis.r - node[0].z = ts3.global_quantities.magnetic_axis.z - node[0].psi = -ts3.global_quantities.psi_axis # COCOS change + node[0].r = gq.magnetic_axis.r + node[0].z = gq.magnetic_axis.z + node[0].psi = -gq.psi_axis # COCOS change # X-points if n_nodes >= 2: From fb8476d9dfd946d6dd75e415358326b3a8395c7e Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Thu, 28 Aug 2025 14:57:22 +0200 Subject: [PATCH 40/74] Do not fill contour tree in conversion logic when there is no magnetic axis --- imas/ids_convert.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/imas/ids_convert.py b/imas/ids_convert.py index 77163a11..a400a31c 100644 --- a/imas/ids_convert.py +++ b/imas/ids_convert.py @@ -1090,6 +1090,9 @@ def _equilibrium_boundary_3to4(eq3: IDSToplevel, eq4: IDSToplevel, deepcopy: boo # Implement https://github.com/iterorganization/IMAS-Python/issues/60 copy = numpy.copy if deepcopy else lambda x: x for ts3, ts4 in zip(eq3.time_slice, eq4.time_slice): + if not ts3.global_quantities.psi_axis.has_value: + # No magnetic axis, assume no boundary either: + continue n_nodes = 1 # magnetic axis if ts3.boundary_separatrix.psi.has_value: n_nodes = 2 From 17d76165d0e5f783d572747d29d1f187053c7cc3 Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Tue, 26 Aug 2025 15:44:28 +0200 Subject: [PATCH 41/74] Bypass __setattr__ logic when creating a deepcopy This improves deepcopy performance by ~40% for the ITER magnetics machine description. --- imas/ids_structure.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/imas/ids_structure.py b/imas/ids_structure.py index 27270034..fbc3042e 100644 --- a/imas/ids_structure.py +++ b/imas/ids_structure.py @@ -1,7 +1,6 @@ # This file is part of IMAS-Python. # You should have received the IMAS-Python LICENSE file with this project. -"""A structure in an IDS -""" +"""A structure in an IDS""" import logging from copy import deepcopy @@ -151,7 +150,9 @@ def __deepcopy__(self, memo): for child in self._children: if child in self.__dict__: child_copy = deepcopy(getattr(self, child), memo) - setattr(copy, child, child_copy) + # bypass __setattr__: + copy.__dict__[child] = child_copy + child_copy._parent = copy return copy def __dir__(self) -> List[str]: From 3aa96c3331c5d689a23e627e5aad0f8aa89ba54a Mon Sep 17 00:00:00 2001 From: munechika-koyo Date: Tue, 9 Sep 2025 09:51:25 +0200 Subject: [PATCH 42/74] Refactor type hints to use PEP 604 syntax and clean up import statements --- imas/db_entry.py | 49 ++++++++++++++++++++++++--------------------- imas/ids_factory.py | 13 +++++++----- 2 files changed, 34 insertions(+), 28 deletions(-) diff --git a/imas/db_entry.py b/imas/db_entry.py index eab91d5e..f19dc69a 100644 --- a/imas/db_entry.py +++ b/imas/db_entry.py @@ -2,9 +2,12 @@ # You should have received the IMAS-Python LICENSE file with this project. """Logic for interacting with IMAS Data Entries.""" +from __future__ import annotations + import logging import os -from typing import Any, List, Optional, Tuple, Type, Union, overload +from pathlib import Path +from typing import Any, Type, overload import numpy @@ -33,14 +36,14 @@ logger = logging.getLogger(__name__) -def _get_uri_mode(uri, mode) -> Tuple[str, str]: +def _get_uri_mode(uri, mode) -> tuple[str, str]: """Helper method to parse arguments of DBEntry.__init__.""" return uri, mode def _get_legacy_params( backend_id, db_name, pulse, run, user_name=None, data_version=None -) -> Tuple[int, str, int, int, Optional[str], Optional[str]]: +) -> tuple[int, str, int, int, str | None, str | None]: """Helper method to parse arguments of DBEntry.__init__.""" return backend_id, db_name, pulse, run, user_name, data_version @@ -74,8 +77,8 @@ def __init__( uri: str, mode: str, *, - dd_version: Optional[str] = None, - xml_path: Optional[str] = None, + dd_version: str | None = None, + xml_path: str | Path | None = None, ) -> None: ... @overload @@ -85,19 +88,19 @@ def __init__( db_name: str, pulse: int, run: int, - user_name: Optional[str] = None, - data_version: Optional[str] = None, + user_name: str | None = None, + data_version: str | None = None, *, - shot: Optional[int] = None, - dd_version: Optional[str] = None, - xml_path: Optional[str] = None, + shot: int | None = None, + dd_version: str | None = None, + xml_path: str | Path | None = None, ) -> None: ... def __init__( self, *args, - dd_version: Optional[str] = None, - xml_path: Optional[str] = None, + dd_version: str | None = None, + xml_path: str | Path | None = None, **kwargs, ): """Open or create a Data Entry based on the provided URI and mode, or prepare a @@ -162,7 +165,7 @@ def __init__( ) from None # Actual intializiation - self._dbe_impl: Optional[DBEntryImpl] = None + self._dbe_impl: DBEntryImpl | None = None self._dd_version = dd_version self._xml_path = xml_path self._ids_factory = IDSFactory(dd_version, xml_path) @@ -186,7 +189,7 @@ def __init__( self._dbe_impl = cls.from_uri(self.uri, mode, self._ids_factory) @staticmethod - def _select_implementation(uri: Optional[str]) -> Type[DBEntryImpl]: + def _select_implementation(uri: str | None) -> Type[DBEntryImpl]: """Select which DBEntry implementation to use based on the URI.""" if uri and uri.endswith(".nc") and not uri.startswith("imas:"): from imas.backends.netcdf.db_entry_nc import NCDBEntryImpl as impl @@ -307,7 +310,7 @@ def get( lazy: bool = False, autoconvert: bool = True, ignore_unknown_dd_version: bool = False, - destination: Optional[IDSToplevel] = None, + destination: IDSToplevel | None = None, ) -> IDSToplevel: """Read the contents of an IDS into memory. @@ -370,7 +373,7 @@ def get_slice( lazy: bool = False, autoconvert: bool = True, ignore_unknown_dd_version: bool = False, - destination: Optional[IDSToplevel] = None, + destination: IDSToplevel | None = None, ) -> IDSToplevel: """Read a single time slice from an IDS in this Database Entry. @@ -434,14 +437,14 @@ def get_sample( ids_name: str, tmin: float, tmax: float, - dtime: Optional[Union[float, numpy.ndarray]] = None, - interpolation_method: Optional[int] = None, + dtime: float | numpy.ndarray | None = None, + interpolation_method: int | None = None, occurrence: int = 0, *, lazy: bool = False, autoconvert: bool = True, ignore_unknown_dd_version: bool = False, - destination: Optional[IDSToplevel] = None, + destination: IDSToplevel | None = None, ) -> IDSToplevel: """Read a range of time slices from an IDS in this Database Entry. @@ -547,8 +550,8 @@ def _get( self, ids_name: str, occurrence: int, - parameters: Union[None, GetSliceParameters, GetSampleParameters], - destination: Optional[IDSToplevel], + parameters: None | GetSliceParameters | GetSampleParameters, + destination: IDSToplevel | None, lazy: bool, autoconvert: bool, ignore_unknown_dd_version: bool, @@ -751,12 +754,12 @@ def delete_data(self, ids_name: str, occurrence: int = 0) -> None: @overload def list_all_occurrences( self, ids_name: str, node_path: None = None - ) -> List[int]: ... + ) -> list[int]: ... @overload def list_all_occurrences( self, ids_name: str, node_path: str - ) -> Tuple[List[int], List[IDSBase]]: ... + ) -> tuple[list[int], list[IDSBase]]: ... def list_all_occurrences(self, ids_name, node_path=None): """List all non-empty occurrences of an IDS diff --git a/imas/ids_factory.py b/imas/ids_factory.py index cd88952d..1dac70cf 100644 --- a/imas/ids_factory.py +++ b/imas/ids_factory.py @@ -1,11 +1,14 @@ # This file is part of IMAS-Python. # You should have received the IMAS-Python LICENSE file with this project. -"""Tools for generating IDSs from a Data Dictionary version. -""" +"""Tools for generating IDSs from a Data Dictionary version.""" + +from __future__ import annotations import logging +from collections.abc import Iterable, Iterator from functools import partial -from typing import Any, Iterable, Iterator, List, Optional +from pathlib import Path +from typing import Any from imas import dd_zip from imas.exception import IDSNameError @@ -27,7 +30,7 @@ class IDSFactory: """ def __init__( - self, version: Optional[str] = None, xml_path: Optional[str] = None + self, version: str | None = None, xml_path: str | Path | None = None ) -> None: """Create a new IDS Factory @@ -77,7 +80,7 @@ def __iter__(self) -> Iterator[str]: """Iterate over the IDS names defined by the loaded Data Dictionary""" return iter(self._ids_elements) - def ids_names(self) -> List[str]: + def ids_names(self) -> list[str]: """Get a list of all known IDS names in the loaded Data Dictionary""" return list(self._ids_elements) From 504b997da9d9e553d1b97a94d5a5e0d42302aae3 Mon Sep 17 00:00:00 2001 From: munechika-koyo Date: Thu, 11 Sep 2025 09:57:05 +0200 Subject: [PATCH 43/74] Refactor type hints to use pathlib.Path instead of Path for consistency --- imas/db_entry.py | 8 ++++---- imas/ids_factory.py | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/imas/db_entry.py b/imas/db_entry.py index f19dc69a..401659f7 100644 --- a/imas/db_entry.py +++ b/imas/db_entry.py @@ -6,7 +6,7 @@ import logging import os -from pathlib import Path +import pathlib from typing import Any, Type, overload import numpy @@ -78,7 +78,7 @@ def __init__( mode: str, *, dd_version: str | None = None, - xml_path: str | Path | None = None, + xml_path: str | pathlib.Path | None = None, ) -> None: ... @overload @@ -93,14 +93,14 @@ def __init__( *, shot: int | None = None, dd_version: str | None = None, - xml_path: str | Path | None = None, + xml_path: str | pathlib.Path | None = None, ) -> None: ... def __init__( self, *args, dd_version: str | None = None, - xml_path: str | Path | None = None, + xml_path: str | pathlib.Path | None = None, **kwargs, ): """Open or create a Data Entry based on the provided URI and mode, or prepare a diff --git a/imas/ids_factory.py b/imas/ids_factory.py index 1dac70cf..b840d8a8 100644 --- a/imas/ids_factory.py +++ b/imas/ids_factory.py @@ -5,9 +5,9 @@ from __future__ import annotations import logging +import pathlib from collections.abc import Iterable, Iterator from functools import partial -from pathlib import Path from typing import Any from imas import dd_zip @@ -30,7 +30,7 @@ class IDSFactory: """ def __init__( - self, version: str | None = None, xml_path: str | Path | None = None + self, version: str | None = None, xml_path: str | pathlib.Path | None = None ) -> None: """Create a new IDS Factory From 9c7bff5da38775519dde3ed2e59c1c7727f5cfa4 Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Tue, 23 Sep 2025 10:48:12 +0200 Subject: [PATCH 44/74] Clarify that process-db-analysis shows usage across any occurrence of the IDS --- docs/source/cli.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/source/cli.rst b/docs/source/cli.rst index df6db851..9147746d 100644 --- a/docs/source/cli.rst +++ b/docs/source/cli.rst @@ -121,8 +121,8 @@ process these files with ``imas process-db-analysis``. This will: variable). 2. These results are summarized in a table, showing per IDS: - - The number of data fields [#data_fields]_ that were filled in *any* of the - analyzed data entries. + - The number of data fields [#data_fields]_ that were filled in *any* occurrence of + the IDS in *any* of the analyzed data entries. - The total number of data fields [#data_fields]_ that the Data Dictionary defines for this IDS. - The percentage of fields filled. From fa0a08535afee7cc82422f62591625e875f0172a Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Thu, 25 Sep 2025 10:53:30 +0200 Subject: [PATCH 45/74] Add new 3to4 conversion rules to change sign of poloidal flux Add a list of quantities that should get a sign flip on conversion, but are not covered by the generic rule that looks at DD3 `cocos_label_transformation` metadata. --- imas/ids_convert.py | 128 ++++++++++++++++++++++++++++++++-- imas/test/test_ids_convert.py | 21 ++++++ 2 files changed, 145 insertions(+), 4 deletions(-) diff --git a/imas/ids_convert.py b/imas/ids_convert.py index 75359f8f..48b105d4 100644 --- a/imas/ids_convert.py +++ b/imas/ids_convert.py @@ -336,13 +336,18 @@ def add_rename(old_path: str, new_path: str): def _apply_3to4_conversion(self, old: Element, new: Element) -> None: # Postprocessing for COCOS definition change: + cocos_paths = [] for psi_like in ["psi_like", "dodpsi_like"]: xpath_query = f".//field[@cocos_label_transformation='{psi_like}']" for old_item in old.iterfind(xpath_query): - old_path = old_item.get("path") - new_path = self.old_to_new.path.get(old_path, old_path) - self.new_to_old.post_process[new_path] = _cocos_change - self.old_to_new.post_process[old_path] = _cocos_change + cocos_paths.append(old_item.get("path")) + # Sign flips not covered by the generic rule: + cocos_paths.extend(_3to4_sign_flip_paths.get(self.ids_name, [])) + for old_path in cocos_paths: + new_path = self.old_to_new.path.get(old_path, old_path) + self.new_to_old.post_process[new_path] = _cocos_change + self.old_to_new.post_process[old_path] = _cocos_change + # Definition change for pf_active circuit/connections if self.ids_name == "pf_active": path = "circuit/connections" @@ -676,6 +681,121 @@ def _copy_structure( callback(item, target_item) +_3to4_sign_flip_paths = { + "core_instant_changes": [ + "change/profiles_1d/grid/psi_magnetic_axis", + "change/profiles_1d/grid/psi_boundary", + ], + "core_profiles": [ + "profiles_1d/grid/psi_magnetic_axis", + "profiles_1d/grid/psi_boundary", + ], + "core_sources": [ + "source/profiles_1d/grid/psi_magnetic_axis", + "source/profiles_1d/grid/psi_boundary", + ], + "core_transport": [ + "model/profiles_1d/grid_d/psi_magnetic_axis", + "model/profiles_1d/grid_d/psi_boundary", + "model/profiles_1d/grid_v/psi_magnetic_axis", + "model/profiles_1d/grid_v/psi_boundary", + "model/profiles_1d/grid_flux/psi_magnetic_axis", + "model/profiles_1d/grid_flux/psi_boundary", + ], + "disruption": [ + "global_quantities/psi_halo_boundary", + "profiles_1d/grid/psi_magnetic_axis", + "profiles_1d/grid/psi_boundary", + ], + "ece": [ + "channel/beam_tracing/beam/position/psi", + "psi_normalization/psi_magnetic_axis", + "psi_normalization/psi_boundary", + ], + "edge_profiles": [ + "profiles_1d/grid/psi", + "profiles_1d/grid/psi_magnetic_axis", + "profiles_1d/grid/psi_boundary", + ], + "equilibrium": [ + "time_slice/boundary/psi", + "time_slice/global_quantities/q_min/psi", + "time_slice/ggd/psi/values", + "time_slice/ggd/psi/coefficients", + ], + "mhd": [ + "ggd/psi/values", + "ggd/psi/coefficients", + ], + "pellets": ["time_slice/pellet/path_profiles/psi"], + "plasma_profiles": [ + "profiles_1d/grid/psi", + "profiles_1d/grid/psi_magnetic_axis", + "profiles_1d/grid/psi_boundary", + "ggd/psi/values", + "ggd/psi/coefficients", + ], + "plasma_sources": [ + "source/profiles_1d/grid/psi", + "source/profiles_1d/grid/psi_magnetic_axis", + "source/profiles_1d/grid/psi_boundary", + ], + "plasma_transport": [ + "model/profiles_1d/grid_d/psi", + "model/profiles_1d/grid_d/psi_magnetic_axis", + "model/profiles_1d/grid_d/psi_boundary", + "model/profiles_1d/grid_v/psi", + "model/profiles_1d/grid_v/psi_magnetic_axis", + "model/profiles_1d/grid_v/psi_boundary", + "model/profiles_1d/grid_flux/psi", + "model/profiles_1d/grid_flux/psi_magnetic_axis", + "model/profiles_1d/grid_flux/psi_boundary", + ], + "radiation": [ + "process/profiles_1d/grid/psi_magnetic_axis", + "process/profiles_1d/grid/psi_boundary", + ], + "reflectometer_profile": [ + "psi_normalization/psi_magnetic_axis", + "psi_normalization/psi_boundary", + ], + "reflectometer_fluctuation": [ + "psi_normalization/psi_magnetic_axis", + "psi_normalization/psi_boundary", + ], + "runaway_electrons": [ + "profiles_1d/grid/psi_magnetic_axis", + "profiles_1d/grid/psi_boundary", + ], + "sawteeth": [ + "profiles_1d/grid/psi_magnetic_axis", + "profiles_1d/grid/psi_boundary", + ], + "summary": [ + "global_quantities/psi_external_average/value", + "local/magnetic_axis/position/psi", + ], + "transport_solver_numerics": [ + "solver_1d/grid/psi_magnetic_axis", + "solver_1d/grid/psi_boundary", + "derivatives_1d/grid/psi_magnetic_axis", + "derivatives_1d/grid/psi_boundary", + ], + "wall": [ + "description_ggd/ggd/psi/values", + "description_ggd/ggd/psi/coefficients", + ], + "waves": [ + "coherent_wave/profiles_1d/grid/psi_magnetic_axis", + "coherent_wave/profiles_1d/grid/psi_boundary", + "coherent_wave/profiles_2d/grid/psi", + "coherent_wave/beam_tracing/beam/position/psi", + ], +} +"""List of paths per IDS that require a COCOS sign change, but aren't covered by the +generic rule.""" + + ######################################################################################## # Type changed handlers and post-processing functions # ######################################################################################## diff --git a/imas/test/test_ids_convert.py b/imas/test/test_ids_convert.py index 826a7979..af6f3a5d 100644 --- a/imas/test/test_ids_convert.py +++ b/imas/test/test_ids_convert.py @@ -12,12 +12,14 @@ from imas import identifiers from imas.ids_convert import ( + _3to4_sign_flip_paths, _get_ctxpath, _get_tbp, convert_ids, dd_version_map_from_factories, iter_parents, ) +from imas.ids_data_type import IDSDataType from imas.ids_defs import ( ASCII_BACKEND, IDS_TIME_MODE_HETEROGENEOUS, @@ -529,3 +531,22 @@ def test_3to4_migrate_deprecated_fields(): # GH#55 del cp342.profiles_1d[0].ion[0].label cp4 = convert_ids(cp342, "4.0.0") assert cp4.profiles_1d[0].ion[0].name == "y" + + +def test_3to4_cocos_hardcoded_paths(): + # Check for existence in 3.42.0 + factory = IDSFactory("3.42.0") + for ids_name, paths in _3to4_sign_flip_paths.items(): + ids = factory.new(ids_name) + for path in paths: + # Check path exists and is not a FLT + metadata = ids.metadata[path] + assert metadata.data_type is IDSDataType.FLT + + # Test a conversion + eq = factory.equilibrium() + eq.time_slice.resize(1) + eq.time_slice[0].boundary.psi = 3.141 + + eq4 = convert_ids(eq, "4.0.0") + assert eq4.time_slice[0].boundary.psi == -3.141 From e2195e4c92b38d91720915e387793940e87d72a6 Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Fri, 26 Sep 2025 13:08:20 +0200 Subject: [PATCH 46/74] Remove psi/ggd/coefficients from COCOS sign flip conversion rule --- imas/ids_convert.py | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/imas/ids_convert.py b/imas/ids_convert.py index 48b105d4..559e8a3b 100644 --- a/imas/ids_convert.py +++ b/imas/ids_convert.py @@ -721,19 +721,14 @@ def _copy_structure( "time_slice/boundary/psi", "time_slice/global_quantities/q_min/psi", "time_slice/ggd/psi/values", - "time_slice/ggd/psi/coefficients", - ], - "mhd": [ - "ggd/psi/values", - "ggd/psi/coefficients", ], + "mhd": ["ggd/psi/values"], "pellets": ["time_slice/pellet/path_profiles/psi"], "plasma_profiles": [ "profiles_1d/grid/psi", "profiles_1d/grid/psi_magnetic_axis", "profiles_1d/grid/psi_boundary", "ggd/psi/values", - "ggd/psi/coefficients", ], "plasma_sources": [ "source/profiles_1d/grid/psi", @@ -781,10 +776,7 @@ def _copy_structure( "derivatives_1d/grid/psi_magnetic_axis", "derivatives_1d/grid/psi_boundary", ], - "wall": [ - "description_ggd/ggd/psi/values", - "description_ggd/ggd/psi/coefficients", - ], + "wall": ["description_ggd/ggd/psi/values"], "waves": [ "coherent_wave/profiles_1d/grid/psi_magnetic_axis", "coherent_wave/profiles_1d/grid/psi_boundary", From 14afe297ecdb38cde73c1385b7bc8a30340096bd Mon Sep 17 00:00:00 2001 From: Deepak Mewar Date: Wed, 10 Sep 2025 15:45:25 +0200 Subject: [PATCH 47/74] [GH#59] Convert name/identifier(DD3) to description/name(DD4) Signed-off-by: Deepak Mewar --- docs/source/multi-dd.rst | 19 ++++++++ imas/ids_convert.py | 85 ++++++++++++++++++++++++++++------- imas/test/test_ids_convert.py | 42 +++++++++++++++++ 3 files changed, 130 insertions(+), 16 deletions(-) diff --git a/docs/source/multi-dd.rst b/docs/source/multi-dd.rst index bef1fe54..bb71e853 100644 --- a/docs/source/multi-dd.rst +++ b/docs/source/multi-dd.rst @@ -117,6 +117,25 @@ Explicit conversion versions, the corresponding data is not copied. IMAS-Python provides logging to indicate when this happens. +.. rubric:: DD3 -> DD4 special rule: name + identifier -> description + name (GH#59) + +IMAS‑Python implements an additional explicit conversion rule (see GH#59) to improve +migration of Machine Description parts of IDSs when moving from major version 3 to 4. +The rule targets simple sibling pairs on the same parent that provide both a "name" +and an "identifier" field and that are NOT part of an "identifier structure" (the +parent must not also have an "index" sibling). When applicable the rule performs the +following renames during explicit DD3->DD4 conversion: + +- DD3: parent/name -> DD4: parent/description +- DD3: parent/identifier -> DD4: parent/name + +The conversion is applied only when the corresponding target fields exist in the +DD4 definition and when no earlier mapping already covers the same paths. This +is performed by the explicit conversion machinery (for example via +imas.convert_ids or DBEntry explicit conversion) and is not guaranteed to be +applied by automatic conversion when reading/writing from a backend. + +In some cases like the one above, reverse conversion is also allowed(DD 4.0.0 -> 3.41.1) .. _`Supported conversions`: diff --git a/imas/ids_convert.py b/imas/ids_convert.py index 75359f8f..eb7f6760 100644 --- a/imas/ids_convert.py +++ b/imas/ids_convert.py @@ -201,6 +201,10 @@ def _build_map(self, old: Element, new: Element) -> None: old_path_set = set(old_paths) new_path_set = set(new_paths) + # expose the path->Element maps as members so other methods can reuse them + self.old_paths = old_paths + self.new_paths = new_paths + def process_parent_renames(path: str) -> str: # Apply any parent AoS/structure rename # Loop in reverse order to find the closest parent which was renamed: @@ -222,20 +226,6 @@ def get_old_path(path: str, previous_name: str) -> str: old_path = previous_name return process_parent_renames(old_path) - def add_rename(old_path: str, new_path: str): - old_item = old_paths[old_path] - new_item = new_paths[new_path] - self.new_to_old[new_path] = ( - old_path, - _get_tbp(old_item, old_paths), - _get_ctxpath(old_path, old_paths), - ) - self.old_to_new[old_path] = ( - new_path, - _get_tbp(new_item, new_paths), - _get_ctxpath(new_path, new_paths), - ) - # Iterate through all NBC metadata and add entries for new_item in new.iterfind(".//field[@change_nbc_description]"): new_path = new_item.get("path") @@ -275,14 +265,15 @@ def add_rename(old_path: str, new_path: str): self.version_old, ) elif self._check_data_type(old_item, new_item): - add_rename(old_path, new_path) + # use class helper to register simple renames and reciprocal mappings + self._add_rename(old_path, new_path) if old_item.get("data_type") in DDVersionMap.STRUCTURE_TYPES: # Add entries for common sub-elements for path in old_paths: if path.startswith(old_path): npath = path.replace(old_path, new_path, 1) if npath in new_path_set: - add_rename(path, npath) + self._add_rename(path, npath) elif nbc_description == "type_changed": pass # We will handle this (if possible) in self._check_data_type elif nbc_description == "repeat_children_first_point": @@ -334,6 +325,28 @@ def add_rename(old_path: str, new_path: str): if self.version_old.major == 3 and new_version and new_version.major == 4: self._apply_3to4_conversion(old, new) + def _add_rename(self, old_path: str, new_path: str) -> None: + """Register a simple rename from old_path -> new_path using the + path->Element maps stored on the instance (self.old_paths/self.new_paths). + This will also add the reciprocal mapping when possible. + """ + old_item = self.old_paths[old_path] + new_item = self.new_paths[new_path] + + # forward mapping + self.old_to_new[old_path] = ( + new_path, + _get_tbp(new_item, self.new_paths), + _get_ctxpath(new_path, self.new_paths), + ) + + # reciprocal mapping + self.new_to_old[new_path] = ( + old_path, + _get_tbp(old_item, self.old_paths), + _get_ctxpath(old_path, self.old_paths), + ) + def _apply_3to4_conversion(self, old: Element, new: Element) -> None: # Postprocessing for COCOS definition change: for psi_like in ["psi_like", "dodpsi_like"]: @@ -386,6 +399,46 @@ def _apply_3to4_conversion(self, old: Element, new: Element) -> None: to_update[p] = v self.old_to_new.path.update(to_update) + # GH#59: To improve further the conversion of DD3 to DD4, especially the + # Machine Description part of the IDSs, we would like to add a 3to4 specific + # rule to convert any siblings name + identifier (that are not part of an + # identifier structure, meaning that there is no index sibling) into + # description + name. Meaning: + # parent/name (DD3) -> parent/description (DD4) + # parent/identifier (DD3) -> parent/name (DD4) + # Only perform the mapping if the corresponding target fields exist in the + # new DD and if we don't already have a mapping for the involved paths. + # use self.old_paths and self.new_paths set in _build_map + for p in self.old_paths: + # look for name children + if not p.endswith("/name"): + continue + parent = p.rsplit("/", 1)[0] + name_path = f"{parent}/name" + id_path = f"{parent}/identifier" + index_path = f"{parent}/index" + desc_path = f"{parent}/description" + new_name_path = name_path + + # If neither 'name' nor 'identifier' existed in the old DD, skip this parent + if name_path not in self.old_paths or id_path not in self.old_paths: + continue + # exclude identifier-structure (has index sibling) + if index_path in self.old_paths: + continue + + # Ensure the candidate target fields exist in the new DD + if desc_path not in self.new_paths or new_name_path not in self.new_paths: + continue + + # Map DD3 name -> DD4 description + if name_path not in self.old_to_new.path: + self._add_rename(name_path, desc_path) + + # Map DD3 identifier -> DD4 name + if id_path in self.old_to_new.path: + self._add_rename(id_path, new_name_path) + def _map_missing(self, is_new: bool, missing_paths: Set[str]): rename_map = self.new_to_old if is_new else self.old_to_new # Find all structures which have a renamed sub-item diff --git a/imas/test/test_ids_convert.py b/imas/test/test_ids_convert.py index 826a7979..55afbddc 100644 --- a/imas/test/test_ids_convert.py +++ b/imas/test/test_ids_convert.py @@ -529,3 +529,45 @@ def test_3to4_migrate_deprecated_fields(): # GH#55 del cp342.profiles_1d[0].ion[0].label cp4 = convert_ids(cp342, "4.0.0") assert cp4.profiles_1d[0].ion[0].name == "y" + + +def test_3to4_name_identifier_mapping_magnetics(): + # Create source IDS using DD 3.40.1 + factory = IDSFactory("3.40.1") + + src = factory.magnetics() + src.ids_properties.homogeneous_time = IDS_TIME_MODE_HOMOGENEOUS + # Populate a parent that has name + identifier (no 'index' sibling) + src.b_field_pol_probe.resize(1) + src.b_field_pol_probe[0].name = "TEST_NAME" + src.b_field_pol_probe[0].identifier = "TEST_IDENTIFIER" + + # Convert to DD 4.0.0 + dst = convert_ids(src, "4.0.0") + + # DD3 name -> DD4 description + assert dst.b_field_pol_probe[0].description == "TEST_NAME" + + # DD3 identifier -> DD4 name + assert dst.b_field_pol_probe[0].name == "TEST_IDENTIFIER" + + +def test_4to3_name_identifier_mapping_magnetics(): + # Create source IDS using DD 4.0.0 + factory = IDSFactory("4.0.0") + + src = factory.magnetics() + src.ids_properties.homogeneous_time = IDS_TIME_MODE_HOMOGENEOUS + # Populate a parent that has description + name (no 'index' sibling) + src.b_field_pol_probe.resize(1) + src.b_field_pol_probe[0].description = "TEST_DESCRIPTION" + src.b_field_pol_probe[0].name = "TEST_NAME" + + # Convert to DD 3.40.1 + dst = convert_ids(src, "3.40.1") + + # DD4 description -> DD3 name + assert dst.b_field_pol_probe[0].name == "TEST_DESCRIPTION" + + # DD4 name -> DD3 identifier + assert dst.b_field_pol_probe[0].identifier == "TEST_NAME" From 2d4f1dece6094f3d491834e359ff6b7cd76c8d42 Mon Sep 17 00:00:00 2001 From: Deepak Mewar Date: Mon, 29 Sep 2025 09:56:20 +0200 Subject: [PATCH 48/74] resolved the lint error for code length limit Signed-off-by: Deepak Mewar --- imas/ids_convert.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/imas/ids_convert.py b/imas/ids_convert.py index 799d788b..72318a98 100644 --- a/imas/ids_convert.py +++ b/imas/ids_convert.py @@ -265,7 +265,8 @@ def get_old_path(path: str, previous_name: str) -> str: self.version_old, ) elif self._check_data_type(old_item, new_item): - # use class helper to register simple renames and reciprocal mappings + # use class helper to register simple renames and + # reciprocal mappings self._add_rename(old_path, new_path) if old_item.get("data_type") in DDVersionMap.STRUCTURE_TYPES: # Add entries for common sub-elements From 85d90378b8ae26339a5ece9bf2007ec9db9416f1 Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Mon, 29 Sep 2025 14:15:14 +0200 Subject: [PATCH 49/74] Update documentation --- docs/source/multi-dd.rst | 8 ++++++++ imas/db_entry.py | 2 +- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/docs/source/multi-dd.rst b/docs/source/multi-dd.rst index bb71e853..701b4e8b 100644 --- a/docs/source/multi-dd.rst +++ b/docs/source/multi-dd.rst @@ -166,6 +166,8 @@ explicit conversion mechanisms. Changed definition of ``space/coordinates_type`` in GGD grids, Yes, No Migrate obsolescent ``ids_properties/source`` to ``ids_properties/provenance``, Yes, No Convert the multiple time-bases in the ``pulse_schedule`` IDS [#ps3to4]_, Yes, No + Convert name + identifier -> description + name, Yes, Yes + Convert equilibrium ``boundary\_[secondary\_]separatrix`` to ``contour_tree`` [#contourtree]_, Yes, No .. [#rename] Quantities which have been renamed between the two DD versions. For example, the ``ec/beam`` Array of Structures in the ``pulse_schedule`` IDS, @@ -205,6 +207,12 @@ explicit conversion mechanisms. interpolation otherwise. See also: https://github.com/iterorganization/IMAS-Python/issues/21. +.. [#contourtree] Fills the `contour_tree + `__ + in the ``equilibrium`` IDS based on data in the ``boundary_separatrix`` and + ``boundary_secondary_separatrix`` structures from DD3. See also: + https://github.com/iterorganization/IMAS-Python/issues/60. + .. _`DD background`: Background information diff --git a/imas/db_entry.py b/imas/db_entry.py index 401659f7..d9ad7f3e 100644 --- a/imas/db_entry.py +++ b/imas/db_entry.py @@ -608,7 +608,7 @@ def _get( logger.warning( "On-disk data is stored in DD %s which has a different major " "version than the requested DD version (%s). IMAS-Python will " - "convert the data automatically, but this does not cover all" + "convert the data automatically, but this does not cover all " "changes. " "See %s/multi-dd.html#conversion-of-idss-between-dd-versions", dd_version, From 59e5e6550e608c6fa5794b96497f307e64676c0c Mon Sep 17 00:00:00 2001 From: Olivier Hoenen Date: Tue, 30 Sep 2025 13:39:58 +0200 Subject: [PATCH 50/74] Move logged message from critical to warning when imas_core is not available --- imas/backends/imas_core/imas_interface.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/imas/backends/imas_core/imas_interface.py b/imas/backends/imas_core/imas_interface.py index 6e463302..064508d7 100644 --- a/imas/backends/imas_core/imas_interface.py +++ b/imas/backends/imas_core/imas_interface.py @@ -31,7 +31,7 @@ has_imas = False imasdef = None lowlevel = None - logger.critical( + logger.warning( "Could not import 'imas_core': %s. Some functionality is not available.", exc, ) From 05f858aeaddefd1f41dab9f5abbd172c780b082c Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Tue, 7 Oct 2025 11:12:13 +0200 Subject: [PATCH 51/74] Remove code for AL4 from db_entry_al Lowlevel access layer version 4.x is not supported by IMAS-Python. See also #40. --- imas/backends/imas_core/db_entry_al.py | 106 +++++------------------ imas/backends/imas_core/mdsplus_model.py | 22 ----- imas/test/test_time_slicing.py | 7 -- 3 files changed, 20 insertions(+), 115 deletions(-) diff --git a/imas/backends/imas_core/db_entry_al.py b/imas/backends/imas_core/db_entry_al.py index b3240ebd..a42211fc 100644 --- a/imas/backends/imas_core/db_entry_al.py +++ b/imas/backends/imas_core/db_entry_al.py @@ -13,22 +13,17 @@ from imas.exception import DataEntryException, LowlevelError from imas.ids_convert import NBCPathMap, dd_version_map_from_factories from imas.ids_defs import ( - ASCII_BACKEND, CHAR_DATA, CLOSE_PULSE, CREATE_PULSE, ERASE_PULSE, FORCE_CREATE_PULSE, FORCE_OPEN_PULSE, - HDF5_BACKEND, IDS_TIME_MODE_UNKNOWN, IDS_TIME_MODES, INTEGER_DATA, - MDSPLUS_BACKEND, - MEMORY_BACKEND, OPEN_PULSE, READ_OP, - UDA_BACKEND, UNDEFINED_INTERP, UNDEFINED_TIME, WRITE_OP, @@ -40,16 +35,9 @@ from .al_context import ALContext, LazyALContext from .db_entry_helpers import delete_children, get_children, put_children from .imas_interface import LLInterfaceError, has_imas, ll_interface -from .mdsplus_model import ensure_data_dir, mdsplus_model_dir +from .mdsplus_model import mdsplus_model_dir from .uda_support import extract_idsdef, get_dd_version_from_idsdef_xml -_BACKEND_NAME = { - ASCII_BACKEND: "ascii", - HDF5_BACKEND: "hdf5", - MEMORY_BACKEND: "memory", - MDSPLUS_BACKEND: "mdsplus", - UDA_BACKEND: "uda", -} _OPEN_MODES = { "r": OPEN_PULSE, "a": FORCE_OPEN_PULSE, @@ -71,13 +59,19 @@ def require_imas_available(): class ALDBEntryImpl(DBEntryImpl): """DBEntry implementation using imas_core as a backend.""" - """Map to the expected open_pulse (AL4) / begin_dataentry_action (AL5) argument.""" + def __init__(self, uri: str, mode: int, factory: IDSFactory): + # Setup backend and lowlevel Access Layer: + backend = urlparse(uri).path.lower().lstrip("/") + self._setup_backend(backend, mode, factory) + status, ctx = ll_interface.begin_dataentry_action(uri, mode) + if status != 0: + raise LowlevelError("opening/creating data entry", status) - def __init__(self, backend: str, ctx: ALContext, factory: IDSFactory): self.backend = backend - self._db_ctx = ctx + self._db_ctx = ALContext(ctx) self._ids_factory = factory self._lazy_ctx_cache: Deque[ALContext] = deque() + self._uri = uri @classmethod def from_uri(cls, uri: str, mode: str, factory: IDSFactory) -> "ALDBEntryImpl": @@ -85,7 +79,7 @@ def from_uri(cls, uri: str, mode: str, factory: IDSFactory) -> "ALDBEntryImpl": if mode not in _OPEN_MODES: modes = list(_OPEN_MODES) raise ValueError(f"Unknown mode {mode!r}, was expecting any of {modes}") - return cls._from_uri(uri, _OPEN_MODES[mode], factory) + return cls(uri, _OPEN_MODES[mode], factory) @classmethod def from_pulse_run( @@ -108,60 +102,18 @@ def from_pulse_run( data_version = data_version or factory.dd_version options = options if options else "" - if ll_interface._al_version.major >= 5: - # We need a URI for AL 5 or later, construct from legacy parameters - status, uri = ll_interface.build_uri_from_legacy_parameters( - backend_id, pulse, run, user_name, db_name, data_version, options - ) - if status != 0: - raise LowlevelError("build URI from legacy parameters", status) - - return cls._from_uri(uri, mode, factory) - - else: - # AL4 legacy support: - backend = _BACKEND_NAME.get(backend_id, "") - cls._setup_backend(backend, mode, factory, user_name, db_name, run) - - status, ctx = ll_interface.begin_pulse_action( - backend_id, pulse, run, user_name, db_name, data_version - ) - if status != 0: - raise LowlevelError("begin pulse action", status) - - status = ll_interface.open_pulse(ctx, mode, options) - if status != 0: - raise LowlevelError("opening/creating data entry", status) - - return cls(backend, ALContext(ctx), factory) - - @classmethod - def _from_uri(cls, uri: str, mode: int, factory: IDSFactory) -> "ALDBEntryImpl": - """Helper method to actually open/create the dataentry.""" - backend = urlparse(uri).path.lower().lstrip("/") - cls._setup_backend(backend, mode, factory) - - status, ctx = ll_interface.begin_dataentry_action(uri, mode) + # Construct URI from legacy parameters + status, uri = ll_interface.build_uri_from_legacy_parameters( + backend_id, pulse, run, user_name, db_name, data_version, options + ) if status != 0: - raise LowlevelError("opening/creating data entry", status) + raise LowlevelError("build URI from legacy parameters", status) - return cls(backend, ALContext(ctx), factory) + return cls(uri, mode, factory) @classmethod - def _setup_backend( - cls, - backend: str, - mode: int, - factory: IDSFactory, - user_name: str = "", - db_name="", - run=1, - ) -> None: - """Custom logic for preparing some backends. - - Note: user_name, db_name and run are only used for AL 4.x, they can be - omitted when using AL 5 or later. - """ + def _setup_backend(cls, backend: str, mode: int, factory: IDSFactory) -> None: + """Custom logic for preparing some backends.""" if backend == "mdsplus": # MDSplus models: if mode != OPEN_PULSE: @@ -170,22 +122,6 @@ def _setup_backend( if ids_path: os.environ["ids_path"] = ids_path - if ll_interface._al_version.major == 4: - # Ensure the data directory exists - # Note: MDSPLUS model directory only uses the major version component of - # IMAS_VERSION, so we'll take the first character of IMAS_VERSION: - version = factory.version[0] - ensure_data_dir(user_name, db_name, version, run) - - elif backend == "hdf5": - pass # nothing to set up - - elif backend == "memory": - pass # nothing to set up - - elif backend == "ascii": - pass # nothing to set up - elif backend == "uda": # Set IDSDEF_PATH to point the UDA backend to the selected DD version idsdef_path = None @@ -193,7 +129,6 @@ def _setup_backend( if factory._xml_path is not None: # Factory was constructed with an explicit XML path, point UDA to that: idsdef_path = factory._xml_path - elif "IMAS_PREFIX" in os.environ: # Check if UDA can use the IDSDef.xml stored in $IMAS_PREFIX/include/ idsdef_path = os.environ["IMAS_PREFIX"] + "/include/IDSDef.xml" @@ -203,7 +138,6 @@ def _setup_backend( if idsdef_path is None: # Extract XML from the DD zip and point UDA to it idsdef_path = extract_idsdef(factory.version) - os.environ["IDSDEF_PATH"] = idsdef_path logger.warning( "The UDA backend is not tested with " @@ -211,7 +145,7 @@ def _setup_backend( "Please raise any issues you find." ) - elif backend == "flexbuffers": + elif backend in ["hdf5", "memory", "ascii", "flexbuffers"]: pass # nothing to set up else: diff --git a/imas/backends/imas_core/mdsplus_model.py b/imas/backends/imas_core/mdsplus_model.py index 3c91cefb..c5f09e29 100644 --- a/imas/backends/imas_core/mdsplus_model.py +++ b/imas/backends/imas_core/mdsplus_model.py @@ -364,25 +364,3 @@ def jTraverser_jar() -> Path: return jar_path else: raise MDSPlusModelError("jTraverser.jar not found. Is MDSplus-Java available?") - - -def ensure_data_dir(user: str, tokamak: str, version: str, run: int) -> None: - """Ensure that a data dir exists with a similar algorithm that - the MDSplus backend uses to set the data path. - See also mdsplus_backend.cpp:751 (setDataEnv)""" - if user == "public": - if "IMAS_HOME" not in os.environ: - raise RuntimeError( - "Environment variable IMAS_HOME must be set to access " - "the public database." - ) - dbdir = Path(os.environ["IMAS_HOME"]) / "shared" / "imasdb" / tokamak / version - elif user[0] == "/": - dbdir = Path(user) / tokamak / version - else: - dbdir = Path.home() / "public" / "imasdb" / tokamak / version - - # Check subfolder based on run - assert 0 <= run <= 99_999 - index = run // 10_000 - (dbdir / str(index)).mkdir(parents=True, exist_ok=True) diff --git a/imas/test/test_time_slicing.py b/imas/test/test_time_slicing.py index 60788689..21c689f4 100644 --- a/imas/test/test_time_slicing.py +++ b/imas/test/test_time_slicing.py @@ -3,18 +3,15 @@ """ import logging -import os import numpy as np import pytest -from imas.backends.imas_core.mdsplus_model import ensure_data_dir, mdsplus_model_dir from imas.ids_defs import ( ASCII_BACKEND, CLOSEST_INTERP, IDS_TIME_MODE_HETEROGENEOUS, IDS_TIME_MODE_HOMOGENEOUS, - MDSPLUS_BACKEND, ) from imas.ids_factory import IDSFactory from imas.test.test_helpers import open_dbentry @@ -97,10 +94,6 @@ def test_hli_time_slicing_put(backend, worker_id, tmp_path, time_mode): else: pulse = int(worker_id[2:]) + 1 - # ensure presence of mdsplus model dir - if backend == MDSPLUS_BACKEND: - os.environ["ids_path"] = mdsplus_model_dir(IDSFactory()) - ensure_data_dir(str(tmp_path), "test", "3", 9999) db_entry = imas.DBEntry(backend, "test", pulse, 9999, user_name=str(tmp_path)) status, ctx = db_entry.create() if status != 0: From 82f07eb7075f47f00f1fd8586aa16a54ff356f2c Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Tue, 7 Oct 2025 11:12:31 +0200 Subject: [PATCH 52/74] Update log messages when using UDA backend --- imas/backends/imas_core/db_entry_al.py | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/imas/backends/imas_core/db_entry_al.py b/imas/backends/imas_core/db_entry_al.py index a42211fc..835de9cc 100644 --- a/imas/backends/imas_core/db_entry_al.py +++ b/imas/backends/imas_core/db_entry_al.py @@ -5,6 +5,7 @@ import logging import os from collections import deque +import re from typing import Any, Deque, List, Optional, Union from urllib.parse import urlparse @@ -73,6 +74,13 @@ def __init__(self, uri: str, mode: int, factory: IDSFactory): self._lazy_ctx_cache: Deque[ALContext] = deque() self._uri = uri + # Parse query options, mimic logic in AL-Core instead of using + # urllib.parse.parse_qs(..). See https://github.com/jholloc/simple-uri-parser + self._querydict = {} + for option in re.split("[&;?]", urlparse(self._uri).query): + name, _, value = option.partition("=") + self._querydict[name] = value + @classmethod def from_uri(cls, uri: str, mode: str, factory: IDSFactory) -> "ALDBEntryImpl": require_imas_available() @@ -139,11 +147,6 @@ def _setup_backend(cls, backend: str, mode: int, factory: IDSFactory) -> None: # Extract XML from the DD zip and point UDA to it idsdef_path = extract_idsdef(factory.version) os.environ["IDSDEF_PATH"] = idsdef_path - logger.warning( - "The UDA backend is not tested with " - "IMAS-Python and may not work properly. " - "Please raise any issues you find." - ) elif backend in ["hdf5", "memory", "ascii", "flexbuffers"]: pass # nothing to set up @@ -183,6 +186,14 @@ def get( raise RuntimeError("Database entry is not open.") if lazy and self.backend == "ascii": raise RuntimeError("Lazy loading is not supported by the ASCII backend.") + if self.backend == "uda": + # cache_mode=none doesn't work right now, so the warning won't recommend it + # See: https://jira.iter.org/browse/IMAS-5644 + if lazy and self._querydict.get("cache_mode") != "none": + logger.warning( + "The UDA backend will load all IDS data from the remote server. " + "Lazy loading with the UDA backend may therefore still be slow." + ) # Mixing contexts can be problematic, ensure all lazy contexts are closed: self._clear_lazy_ctx_cache() From 898ead2eb869e3af160b2db2cc710902a48a7c13 Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Tue, 7 Oct 2025 11:54:56 +0200 Subject: [PATCH 53/74] Simplify benchmark logic --- benchmarks/core_profiles.py | 103 +++++++++++++++++------------------- benchmarks/edge_profiles.py | 45 +++++++--------- benchmarks/utils.py | 32 +++-------- 3 files changed, 77 insertions(+), 103 deletions(-) diff --git a/benchmarks/core_profiles.py b/benchmarks/core_profiles.py index d7ab54cd..0bdeef8b 100644 --- a/benchmarks/core_profiles.py +++ b/benchmarks/core_profiles.py @@ -10,8 +10,6 @@ available_serializers, available_slicing_backends, create_dbentry, - factory, - hlis, ) N_SLICES = 32 @@ -26,13 +24,13 @@ def fill_slices(core_profiles, times): times: time values to fill a slice for """ core_profiles.ids_properties.homogeneous_time = 1 # HOMOGENEOUS - core_profiles.ids_properties.comment = "Generated for the IMAS-Python benchmark suite" + core_profiles.ids_properties.comment = ( + "Generated for the IMAS-Python benchmark suite" + ) core_profiles.ids_properties.creation_date = datetime.date.today().isoformat() core_profiles.code.name = "IMAS-Python ASV benchmark" core_profiles.code.version = imas.__version__ - core_profiles.code.repository = ( - "https://github.com/iterorganization/IMAS-Python" - ) + core_profiles.code.repository = "https://github.com/iterorganization/IMAS-Python" core_profiles.time = np.array(times) core_profiles.profiles_1d.resize(len(times)) @@ -50,13 +48,13 @@ def fill_slices(core_profiles, times): profiles_1d.ion.resize(len(ions)) profiles_1d.neutral.resize(len(ions)) for i, ion in enumerate(ions): - if hasattr(profiles_1d.ion[i], 'label'): + if hasattr(profiles_1d.ion[i], "label"): profiles_1d.ion[i].label = ion profiles_1d.neutral[i].label = ion - if hasattr(profiles_1d.ion[i], 'name'): + if hasattr(profiles_1d.ion[i], "name"): profiles_1d.ion[i].name = ion profiles_1d.neutral[i].name = ion - + # profiles_1d.ion[i].label = profiles_1d.neutral[i].label = ion profiles_1d.ion[i].z_ion = 1.0 profiles_1d.ion[i].neutral_index = profiles_1d.neutral[i].ion_index = i + 1 @@ -70,31 +68,31 @@ def fill_slices(core_profiles, times): class GetSlice: - params = [hlis, available_slicing_backends] - param_names = ["hli", "backend"] + params = [available_slicing_backends] + param_names = ["backend"] - def setup(self, hli, backend): - self.dbentry = create_dbentry(hli, backend) - core_profiles = factory[hli].core_profiles() + def setup(self, backend): + self.dbentry = create_dbentry(backend) + core_profiles = imas.IDSFactory().core_profiles() fill_slices(core_profiles, TIME) self.dbentry.put(core_profiles) - def time_get_slice(self, hli, backend): + def time_get_slice(self, backend): for t in TIME: self.dbentry.get_slice("core_profiles", t, imas.ids_defs.CLOSEST_INTERP) - def teardown(self, hli, backend): + def teardown(self, backend): if hasattr(self, "dbentry"): # imas + netCDF has no dbentry self.dbentry.close() class Get: - params = [hlis, available_backends] - param_names = ["hli", "backend"] + params = [available_backends] + param_names = ["backend"] setup = GetSlice.setup teardown = GetSlice.teardown - def time_get(self, hli, backend): + def time_get(self, backend): self.dbentry.get("core_profiles") @@ -103,8 +101,8 @@ class LazyGet: param_names = ["lazy", "backend"] def setup(self, lazy, backend): - self.dbentry = create_dbentry("imas", backend) - core_profiles = factory["imas"].core_profiles() + self.dbentry = create_dbentry(backend) + core_profiles = imas.IDSFactory().core_profiles() fill_slices(core_profiles, TIME) self.dbentry.put(core_profiles) @@ -118,75 +116,72 @@ def teardown(self, lazy, backend): class Generate: - params = [hlis] - param_names = ["hli"] + def setup(self): + self.core_profiles = imas.IDSFactory().core_profiles() - def setup(self, hli): - self.core_profiles = factory[hli].core_profiles() - - def time_generate(self, hli): + def time_generate(self): fill_slices(self.core_profiles, TIME) - def time_generate_slices(self, hli): + def time_generate_slices(self): for t in TIME: fill_slices(self.core_profiles, [t]) - def time_create_core_profiles(self, hli): - factory[hli].core_profiles() + def time_create_core_profiles(self): + imas.IDSFactory().core_profiles() class Put: - params = [["0", "1"], hlis, available_backends] + params = [["0", "1"], available_backends] param_names = ["disable_validate", "hli", "backend"] - def setup(self, disable_validate, hli, backend): - create_dbentry(hli, backend).close() # catch unsupported combinations - self.core_profiles = factory[hli].core_profiles() + def setup(self, disable_validate, backend): + create_dbentry(backend).close() # catch unsupported combinations + self.core_profiles = imas.IDSFactory().core_profiles() fill_slices(self.core_profiles, TIME) os.environ["IMAS_AL_DISABLE_VALIDATE"] = disable_validate - def time_put(self, disable_validate, hli, backend): - with create_dbentry(hli, backend) as dbentry: + def time_put(self, disable_validate, backend): + with create_dbentry(backend) as dbentry: dbentry.put(self.core_profiles) class PutSlice: - params = [["0", "1"], hlis, available_slicing_backends] - param_names = ["disable_validate", "hli", "backend"] + params = [["0", "1"], available_slicing_backends] + param_names = ["disable_validate", "backend"] - def setup(self, disable_validate, hli, backend): - create_dbentry(hli, backend).close() # catch unsupported combinations - self.core_profiles = factory[hli].core_profiles() + def setup(self, disable_validate, backend): + create_dbentry(backend).close() # catch unsupported combinations + self.core_profiles = imas.IDSFactory().core_profiles() os.environ["IMAS_AL_DISABLE_VALIDATE"] = disable_validate - def time_put_slice(self, disable_validate, hli, backend): - with create_dbentry(hli, backend) as dbentry: + def time_put_slice(self, disable_validate, backend): + with create_dbentry(backend) as dbentry: for t in TIME: fill_slices(self.core_profiles, [t]) dbentry.put_slice(self.core_profiles) class Serialize: - params = [hlis, available_serializers] - param_names = ["hli", "serializer"] + params = [available_serializers] + param_names = ["serializer"] - def setup(self, hli, serializer): - self.core_profiles = factory[hli].core_profiles() + def setup(self, serializer): + self.core_profiles = imas.IDSFactory().core_profiles() fill_slices(self.core_profiles, TIME) - def time_serialize(self, hli, serializer): + def time_serialize(self, serializer): self.core_profiles.serialize(serializer) class Deserialize: - params = [hlis, available_serializers] - param_names = ["hli", "serializer"] + params = [available_serializers] + param_names = ["serializer"] - def setup(self, hli, serializer): - self.core_profiles = factory[hli].core_profiles() + def setup(self, serializer): + self.core_profiles = imas.IDSFactory().core_profiles() fill_slices(self.core_profiles, TIME) self.data = self.core_profiles.serialize(serializer) - self.core_profiles = factory[hli].core_profiles() + self.core_profiles = imas.IDSFactory().core_profiles() - def time_deserialize(self, hli, serializer): + def time_deserialize(self, serializer): self.core_profiles.deserialize(self.data) diff --git a/benchmarks/edge_profiles.py b/benchmarks/edge_profiles.py index cb78629f..f1ec7fd7 100644 --- a/benchmarks/edge_profiles.py +++ b/benchmarks/edge_profiles.py @@ -5,7 +5,7 @@ import imas -from .utils import available_backends, create_dbentry, factory, hlis +from .utils import available_backends, create_dbentry N_POINTS = 600 # number of random R,Z points N_LINES = 1200 # number of random lines in R,Z plane @@ -27,9 +27,7 @@ def fill_ggd(edge_profiles, times): edge_profiles.ids_properties.creation_date = datetime.date.today().isoformat() edge_profiles.code.name = "IMAS-Python ASV benchmark" edge_profiles.code.version = imas.__version__ - edge_profiles.code.repository = ( - "https://github.com/iterorganization/IMAS-Python" - ) + edge_profiles.code.repository = "https://github.com/iterorganization/IMAS-Python" # This GGD grid is not a valid description, but it's a good stress test for the # typical access patterns that exist in GGD grids @@ -124,45 +122,42 @@ def fill_ggd(edge_profiles, times): class Get: - params = [hlis, available_backends] - param_names = ["hli", "backend"] + params = [available_backends] + param_names = ["backend"] - def setup(self, hli, backend): - self.dbentry = create_dbentry(hli, backend) - edge_profiles = factory[hli].edge_profiles() + def setup(self, backend): + self.dbentry = create_dbentry(backend) + edge_profiles = imas.IDSFactory().edge_profiles() fill_ggd(edge_profiles, TIME) self.dbentry.put(edge_profiles) - def time_get(self, hli, backend): + def time_get(self, backend): self.dbentry.get("edge_profiles") - def teardown(self, hli, backend): + def teardown(self, backend): if hasattr(self, "dbentry"): # imas + netCDF has no dbentry self.dbentry.close() class Generate: - params = [hlis] - param_names = ["hli"] - - def time_generate(self, hli): - edge_profiles = factory[hli].edge_profiles() + def time_generate(self): + edge_profiles = imas.IDSFactory().edge_profiles() fill_ggd(edge_profiles, TIME) - def time_create_edge_profiles(self, hli): - factory[hli].edge_profiles() + def time_create_edge_profiles(self): + imas.IDSFactory().edge_profiles() class Put: - params = [["0", "1"], hlis, available_backends] - param_names = ["disable_validate", "hli", "backend"] + params = [["0", "1"], available_backends] + param_names = ["disable_validate", "backend"] - def setup(self, disable_validate, hli, backend): - create_dbentry(hli, backend).close() # catch unsupported combinations - self.edge_profiles = factory[hli].edge_profiles() + def setup(self, disable_validate, backend): + create_dbentry(backend).close() # catch unsupported combinations + self.edge_profiles = imas.IDSFactory().edge_profiles() fill_ggd(self.edge_profiles, TIME) os.environ["IMAS_AL_DISABLE_VALIDATE"] = disable_validate - def time_put(self, disable_validate, hli, backend): - with create_dbentry(hli, backend) as dbentry: + def time_put(self, disable_validate, backend): + with create_dbentry(backend) as dbentry: dbentry.put(self.edge_profiles) diff --git a/benchmarks/utils.py b/benchmarks/utils.py index 47ae2576..3ff30fd3 100644 --- a/benchmarks/utils.py +++ b/benchmarks/utils.py @@ -1,10 +1,10 @@ -import importlib import logging import uuid from pathlib import Path import imas import imas.exception +import imas.ids_defs # Backend constants HDF5 = "HDF5" @@ -56,28 +56,12 @@ def backend_exists(backend): backend for backend in available_backends if backend not in [ASCII, NETCDF] ] -hlis = ["imas"] -DBEntry = { - "imas": imas.DBEntry, -} -factory = { - "imas": imas.IDSFactory(), -} -available_serializers = [imas.ids_defs.ASCII_SERIALIZER_PROTOCOL] - +available_serializers = [ + imas.ids_defs.ASCII_SERIALIZER_PROTOCOL, + imas.ids_defs.FLEXBUFFERS_SERIALIZER_PROTOCOL, +] -def create_dbentry(hli, backend): - if backend == NETCDF: - if hli == "imas": # check if netcdf backend is available - try: - assert ( - imas.DBEntry._select_implementation("x.nc").__name__ - == "NCDBEntryImpl" - ) - except (AttributeError, AssertionError): - raise NotImplementedError( - "This version of IMAS-Python doesn't implement netCDF." - ) from None - path = Path.cwd() / f"DB-{hli}-{backend}" - return DBEntry[hli](create_uri(backend, path), "w") +def create_dbentry(backend): + path = Path.cwd() / f"DB-{backend}" + return imas.DBEntry(create_uri(backend, path), "w") From 39045fd3c37cf103ad00ed6749b71b20d125f828 Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Tue, 7 Oct 2025 12:04:01 +0200 Subject: [PATCH 54/74] Further remove logic for AL4 support Closes #40 --- imas/backends/imas_core/al_context.py | 5 +--- imas/backends/imas_core/imas_interface.py | 36 ++++------------------- imas/ids_toplevel.py | 25 +++++----------- imas/test/test_cli.py | 11 +------ imas/test/test_dbentry.py | 7 +---- imas/training.py | 29 ++++-------------- 6 files changed, 21 insertions(+), 92 deletions(-) diff --git a/imas/backends/imas_core/al_context.py b/imas/backends/imas_core/al_context.py index 1685e384..ede33bac 100644 --- a/imas/backends/imas_core/al_context.py +++ b/imas/backends/imas_core/al_context.py @@ -71,10 +71,7 @@ def global_action(self, path: str, rwmode: int, datapath: str = "") -> "ALContex Returns: The created context. """ - args = [self.ctx, path, rwmode] - if datapath: # AL4 compatibility: datapath arg was added in AL5 - args.append(datapath) - status, ctx = ll_interface.begin_global_action(*args) + status, ctx = ll_interface.begin_global_action(self.ctx, path, rwmode, datapath) if status != 0: raise LowlevelError("global_action", status) return ALContext(ctx) diff --git a/imas/backends/imas_core/imas_interface.py b/imas/backends/imas_core/imas_interface.py index 064508d7..8fa3963b 100644 --- a/imas/backends/imas_core/imas_interface.py +++ b/imas/backends/imas_core/imas_interface.py @@ -6,6 +6,7 @@ This module tries to abstract away most API incompatibilities between the supported Access Layer versions (for example the rename of _ual_lowlevel to _al_lowlevel). """ + import inspect import logging @@ -61,9 +62,6 @@ class LowlevelInterface: - If the lowlevel drops methods, we need to update the implementation fo the method to provide a proper error message or a workaround. - - Renamed methods (if this will ever happen) are perhaps best handled in the - ``__init__`` by providing a mapping of new to old name, so far this was only - relevant for the ``ual_`` to ``al_`` rename. """ def __init__(self, lowlevel): @@ -84,23 +82,13 @@ def __init__(self, lowlevel): # Introduced after 5.0.0 self._al_version_str = self._lowlevel.get_al_version() self._al_version = Version(self._al_version_str) - elif hasattr(lowlevel, "al_read_data"): - # In AL 5.0.0, all `ual_` methods were renamed to `al_` + else: self._al_version_str = "5.0.0" self._al_version = Version(self._al_version_str) - else: - # AL 4, don't try to determine in more detail - self._al_version_str = "4.?.?" - self._al_version = Version("4") - public_methods.remove("close_pulse") - if self._al_version < Version("5"): - method_prefix = "ual_" - else: - method_prefix = "al_" # Overwrite all of our methods that are implemented in the lowlevel for method in public_methods: - ll_method = getattr(lowlevel, method_prefix + method, None) + ll_method = getattr(lowlevel, f"al_{method}", None) if ll_method is not None: setattr(self, method, ll_method) @@ -115,24 +103,10 @@ def _minimal_version(self, minversion): f"but the current version is {self._al_version_str}" ) - # AL 4 lowlevel API - - def begin_pulse_action(self, backendID, shot, run, user, tokamak, version): - # Removed in AL5, compatibility handled in DBEntry - raise LLInterfaceError(f"{__name__} is not implemented") - - def open_pulse(self, pulseCtx, mode, options): - # Removed in AL5, compatibility handled in DBEntry - raise LLInterfaceError(f"{__name__} is not implemented") - def close_pulse(self, pulseCtx, mode): - # options argument (mandatory in AL4) was removed in AL5 - # This method is overwritten in AL5, but for AL4 we need to do this: - return lowlevel.ual_close_pulse(pulseCtx, mode, None) + raise LLInterfaceError(f"{__name__} is not implemented") - def begin_global_action(self, pulseCtx, dataobjectname, rwmode, datapath=""): - # datapath was added in AL5 to support more efficient partial_get in the - # UDA backend. TODO: figure out if this is useful for lazy loading. + def begin_global_action(self, pulseCtx, dataobjectname, rwmode, datapath): raise LLInterfaceError(f"{__name__} is not implemented") def begin_slice_action(self, pulseCtx, dataobjectname, rwmode, time, interpmode): diff --git a/imas/ids_toplevel.py b/imas/ids_toplevel.py index 15ae0970..947bf72f 100644 --- a/imas/ids_toplevel.py +++ b/imas/ids_toplevel.py @@ -1,7 +1,6 @@ # This file is part of IMAS-Python. # You should have received the IMAS-Python LICENSE file with this project. -"""Represents a Top-level IDS (like ``core_profiles``, ``equilibrium``, etc) -""" +"""Represents a Top-level IDS (like ``core_profiles``, ``equilibrium``, etc)""" import logging import os @@ -12,11 +11,10 @@ import numpy import imas -from imas.backends.imas_core.imas_interface import ll_interface, lowlevel +from imas.backends.imas_core.imas_interface import lowlevel from imas.exception import ValidationError from imas.ids_base import IDSDoc from imas.ids_defs import ( - ASCII_BACKEND, ASCII_SERIALIZER_PROTOCOL, CHAR_DATA, DEFAULT_SERIALIZER_PROTOCOL, @@ -47,19 +45,12 @@ def _serializer_tmpdir() -> str: def _create_serialization_dbentry(filepath: str, dd_version: str) -> "DBEntry": """Create a temporary DBEntry for use in the ASCII serialization protocol.""" - if ll_interface._al_version.major == 4: # AL4 compatibility - dbentry = imas.DBEntry( - ASCII_BACKEND, "serialize", 1, 1, "serialize", dd_version=dd_version - ) - dbentry.create(options=f"-fullpath {filepath}") - return dbentry - else: # AL5 - path = Path(filepath) - return imas.DBEntry( - f"imas:ascii?path={path.parent};filename={path.name}", - "w", - dd_version=dd_version, - ) + path = Path(filepath) + return imas.DBEntry( + f"imas:ascii?path={path.parent};filename={path.name}", + "w", + dd_version=dd_version, + ) class IDSToplevel(IDSStructure): diff --git a/imas/test/test_cli.py b/imas/test/test_cli.py index 6ff09c23..c6ddbc0e 100644 --- a/imas/test/test_cli.py +++ b/imas/test/test_cli.py @@ -2,10 +2,7 @@ import pytest from click.testing import CliRunner -from packaging.version import Version -from imas.backends.imas_core.imas_interface import has_imas -from imas.backends.imas_core.imas_interface import ll_interface from imas.command.cli import print_version from imas.command.db_analysis import analyze_db, process_db_analysis from imas.db_entry import DBEntry @@ -20,13 +17,7 @@ def test_imas_version(): @pytest.mark.cli -@pytest.mark.skipif( - not has_imas or ll_interface._al_version < Version("5.0"), - reason="Needs AL >= 5 AND Requires IMAS Core.", -) -def test_db_analysis( - tmp_path, -): +def test_db_analysis(tmp_path, requires_imas): # This only tests the happy flow, error handling is not tested db_path = tmp_path / "test_db_analysis" with DBEntry(f"imas:hdf5?path={db_path}", "w") as entry: diff --git a/imas/test/test_dbentry.py b/imas/test/test_dbentry.py index a1380101..e13d82a4 100644 --- a/imas/test/test_dbentry.py +++ b/imas/test/test_dbentry.py @@ -2,7 +2,6 @@ import imas import imas.ids_defs -from imas.backends.imas_core.imas_interface import has_imas, ll_interface from imas.exception import UnknownDDVersion from imas.test.test_helpers import compare_children, open_dbentry @@ -23,11 +22,7 @@ def test_dbentry_contextmanager(requires_imas): assert entry2._dbe_impl is None -@pytest.mark.skipif( - not has_imas or ll_interface._al_version.major < 5, - reason="URI API not available", -) -def test_dbentry_contextmanager_uri(tmp_path): +def test_dbentry_contextmanager_uri(tmp_path, requires_imas): entry = imas.DBEntry(f"imas:ascii?path={tmp_path}/testdb", "w") ids = entry.factory.core_profiles() ids.ids_properties.homogeneous_time = 0 diff --git a/imas/training.py b/imas/training.py index 9c4df602..93e0c006 100644 --- a/imas/training.py +++ b/imas/training.py @@ -1,9 +1,7 @@ # This file is part of IMAS-Python. # You should have received the IMAS-Python LICENSE file with this project. -"""Functions that are useful for the IMAS-Python training courses. -""" +"""Functions that are useful for the IMAS-Python training courses.""" -import importlib from unittest.mock import patch try: @@ -12,34 +10,17 @@ from importlib_resources import files import imas -from imas.backends.imas_core.imas_interface import ll_interface -def _initialize_training_db(DBEntry_cls): +def get_training_db_entry() -> imas.DBEntry: + """Open and return an ``imas.DBEntry`` pointing to the training data.""" assets_path = files(imas) / "assets/" - pulse, run, user, database = 134173, 106, "public", "ITER" - if ll_interface._al_version.major == 4: - entry = DBEntry_cls(imas.ids_defs.ASCII_BACKEND, database, pulse, run, user) - entry.open(options=f"-prefix {assets_path}/") - else: - entry = DBEntry_cls(f"imas:ascii?path={assets_path}", "r") + entry = imas.DBEntry(f"imas:ascii?path={assets_path}", "r") - output_entry = DBEntry_cls(imas.ids_defs.MEMORY_BACKEND, database, pulse, run) - output_entry.create() + output_entry = imas.DBEntry("imas:memory?path=/", "w") for ids_name in ["core_profiles", "equilibrium"]: ids = entry.get(ids_name) with patch.dict("os.environ", {"IMAS_AL_DISABLE_VALIDATE": "1"}): output_entry.put(ids) entry.close() return output_entry - - -def get_training_db_entry() -> imas.DBEntry: - """Open and return an ``imas.DBEntry`` pointing to the training data.""" - return _initialize_training_db(imas.DBEntry) - - -def get_training_imas_db_entry(): - """Open and return an ``imas.DBEntry`` pointing to the training data.""" - imas = importlib.import_module("imas") - return _initialize_training_db(imas.DBEntry) From aa88658b4c665da458262812bd6c8ad8fdce8ffe Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Tue, 7 Oct 2025 12:16:22 +0200 Subject: [PATCH 55/74] Remove `get_training_imas_db_entry` reference from documentation --- docs/source/courses/basic/analyze.rst | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/docs/source/courses/basic/analyze.rst b/docs/source/courses/basic/analyze.rst index 21a7c68b..97dd1ea3 100644 --- a/docs/source/courses/basic/analyze.rst +++ b/docs/source/courses/basic/analyze.rst @@ -25,15 +25,10 @@ can use the data. .. hint:: Use the ASCII data supplied with IMAS-Python for all exercises. It contains two IDSs (``equilibrium`` and ``core_profiles``) filled with data from three - time slices of ITER reference data. Two convenience methods are available in the - :mod:`imas.training` module to open the DBEntry for this training data. - - 1. :meth:`imas.training.get_training_db_entry()` returns an opened - ``imas.DBEntry`` object. Use this method if you want to use the IMAS-Python - interface. - 2. :meth:`imas.training.get_training_imas_db_entry()` returns an opened - ``imas.DBEntry`` object. Use this method if you want to use the Python Access - Layer interface. + time slices of ITER reference data. A convenience method is available in the + :mod:`imas.training` module to open the DBEntry for this training data: + :meth:`imas.training.get_training_db_entry()` returns an opened + ``imas.DBEntry`` object. Exercise 1 '''''''''' From 3230a0ded32f407b8b85c8a72a5b449f7d27075d Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Mon, 13 Oct 2025 11:47:21 +0200 Subject: [PATCH 56/74] Update UDA warnings and docs --- docs/source/lazy_loading.rst | 11 +++++++- imas/backends/imas_core/db_entry_al.py | 35 ++++++++++++++++++++------ 2 files changed, 38 insertions(+), 8 deletions(-) diff --git a/docs/source/lazy_loading.rst b/docs/source/lazy_loading.rst index 9dda19e0..745df066 100644 --- a/docs/source/lazy_loading.rst +++ b/docs/source/lazy_loading.rst @@ -90,4 +90,13 @@ Lazy loading of data may speed up your programs, but also comes with some limita more efficient to do a full :code:`get()` or :code:`get_slice()` when you intend to use most of the data stored in an IDS. 5. When using IMAS-Python with remote data access (i.e. the UDA backend), a full - :code:`get()` or :code:`get_slice()` is more efficient than lazy loading. + :code:`get()` or :code:`get_slice()` may be more efficient than using lazy loading. + + It is recommended to add the parameter ``;cache_mode=none`` [#cache_mode_none]_ to + the end of a UDA IMAS URI when using lazy loading: otherwise the UDA backend will + still load the full IDS from the remote server. + + +.. [#cache_mode_none] The option ``cache_mode=none`` requires IMAS Core version 5.5.1 or + newer, and a remote UDA server with `IMAS UDA-Plugins + `__ version 1.7.0 or newer. diff --git a/imas/backends/imas_core/db_entry_al.py b/imas/backends/imas_core/db_entry_al.py index 835de9cc..2500bd3c 100644 --- a/imas/backends/imas_core/db_entry_al.py +++ b/imas/backends/imas_core/db_entry_al.py @@ -9,6 +9,8 @@ from typing import Any, Deque, List, Optional, Union from urllib.parse import urlparse +from packaging.version import Version + from imas.backends.db_entry_impl import GetSampleParameters, GetSliceParameters from imas.db_entry import DBEntryImpl from imas.exception import DataEntryException, LowlevelError @@ -187,13 +189,7 @@ def get( if lazy and self.backend == "ascii": raise RuntimeError("Lazy loading is not supported by the ASCII backend.") if self.backend == "uda": - # cache_mode=none doesn't work right now, so the warning won't recommend it - # See: https://jira.iter.org/browse/IMAS-5644 - if lazy and self._querydict.get("cache_mode") != "none": - logger.warning( - "The UDA backend will load all IDS data from the remote server. " - "Lazy loading with the UDA backend may therefore still be slow." - ) + self._check_uda_warnings(lazy) # Mixing contexts can be problematic, ensure all lazy contexts are closed: self._clear_lazy_ctx_cache() @@ -350,3 +346,28 @@ def list_all_occurrences(self, ids_name: str) -> List[int]: "Access Layer 5.1 or newer is required." ) from None return occurrence_list + + def _check_uda_warnings(self, lazy: bool) -> None: + """Various checks / warnings for the UDA backend.""" + cache_mode = self._querydict.get("cache_mode") + if lazy and cache_mode != "none": + # cache_mode=none requires imas core 5.5.1 or newer, and a recent enough UDA + # server plugin (which we cannot check...) + cache_mode_hint = "" + if ll_interface._al_version >= Version("5.5.1"): + cache_mode_hint = ( + "\nYou may add the parameter ';cache_mode=none' to the IMAS URI " + "to avoid loading all of the data from the remote server." + ) + logger.warning( + "The UDA backend will load all IDS data from the remote server. " + "Lazy loading with the UDA backend may therefore still be slow.%s", + cache_mode_hint, + ) + + if cache_mode == "none" and ll_interface._al_version < Version("5.5.1"): + logger.warning( + "UDA option 'cache_mode=None' may not work correctly with " + "IMAS Core version %s.", + ll_interface._al_version, + ) From dc5012726152a32805e7b8cfcfbfa32d234b5bb9 Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Wed, 29 Oct 2025 10:51:19 +0100 Subject: [PATCH 57/74] Add warning when a user implicitly converts an IDS between major versions on put() And fix an example in the multi-DD documentation. --- docs/source/multi-dd.rst | 2 +- imas/backends/imas_core/db_entry_al.py | 11 +++++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/docs/source/multi-dd.rst b/docs/source/multi-dd.rst index 701b4e8b..a115ce27 100644 --- a/docs/source/multi-dd.rst +++ b/docs/source/multi-dd.rst @@ -23,7 +23,7 @@ example: factory_3_32_0 = imas.IDSFactory("3.32.0") # Use DD version 3.32.0 # Will write IDSs to the backend in DD version 3.32.0 - dbentry = imas.DBEntry(imas.ids_defs.HDF5_BACKEND, "TEST", 10, 2, version="3.32.0") + dbentry = imas.DBEntry("imas:hdf5?path=dd3.32.0-output/", "w", dd_version="3.32.0") dbentry.create() .. seealso:: :ref:`multi-dd training` diff --git a/imas/backends/imas_core/db_entry_al.py b/imas/backends/imas_core/db_entry_al.py index 2500bd3c..0336179f 100644 --- a/imas/backends/imas_core/db_entry_al.py +++ b/imas/backends/imas_core/db_entry_al.py @@ -11,6 +11,7 @@ from packaging.version import Version +import imas from imas.backends.db_entry_impl import GetSampleParameters, GetSliceParameters from imas.db_entry import DBEntryImpl from imas.exception import DataEntryException, LowlevelError @@ -280,6 +281,16 @@ def put(self, ids: IDSToplevel, occurrence: int, is_slice: bool) -> None: # Create a version conversion map, if needed nbc_map = None if ids._version != self._ids_factory._version: + if ids._version.split(".")[0] != self._ids_factory._version.split(".")[0]: + logger.warning( + "Provided IDS uses DD %s which has a different major version than " + "the Data Entry (%s). IMAS-Python will convert the data " + "automatically, but this does not cover all changes. " + "See %s/multi-dd.html#conversion-of-idss-between-dd-versions", + ids._version, + self._ids_factory._version, + imas.PUBLISHED_DOCUMENTATION_ROOT, + ) ddmap, source_is_older = dd_version_map_from_factories( ids_name, ids._parent, self._ids_factory ) From deed79d754cbdcb077c4c0d1ce1f020e0ecf7e80 Mon Sep 17 00:00:00 2001 From: Olivier Hoenen Date: Mon, 3 Nov 2025 08:21:24 +0100 Subject: [PATCH 58/74] Fix min_version_guard when AL-Core is not present --- imas/command/helpers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/imas/command/helpers.py b/imas/command/helpers.py index 8c664306..cded9ef1 100644 --- a/imas/command/helpers.py +++ b/imas/command/helpers.py @@ -36,7 +36,7 @@ def min_version_guard(al_version: Version): al_version: Minimum imas_core version required for this command. """ used_version = ll_interface._al_version - if used_version >= al_version: + if used_version and used_version >= al_version: return click.echo( f"This command requires at least version {al_version} of the Access Layer." From f84445ea5d318ad164a8b4492413c2b179a2e261 Mon Sep 17 00:00:00 2001 From: Olivier Hoenen Date: Mon, 3 Nov 2025 08:23:40 +0100 Subject: [PATCH 59/74] Remove guard as IMAS-Python dropped the AL4 support --- imas/command/cli.py | 1 - 1 file changed, 1 deletion(-) diff --git a/imas/command/cli.py b/imas/command/cli.py index 5e18d008..a270d834 100644 --- a/imas/command/cli.py +++ b/imas/command/cli.py @@ -102,7 +102,6 @@ def print_ids(uri, ids, occurrence, print_all): ids Name of the IDS to print (e.g. "core_profiles"). occurrence Which occurrence to print (defaults to 0). """ - min_version_guard(Version("5.0")) setup_rich_log_handler(False) with DBEntry(uri, "r") as dbentry: From 8f1db5f1572188d3e14ab024cc989d8fbcba5127 Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Thu, 13 Nov 2025 09:26:38 +0100 Subject: [PATCH 60/74] Disable implicit conversion between major versions of the DD Implicit conversion between major versions of the DD is almost always giving incorrect results, so it's better to disallow this instead of emitting warnings (which people usually don't read). This commit changes the warnings that were previously emitted into runtime errors. The errors refer to the (updated) documentation, which provides an example that does work correctly. --- docs/source/multi-dd.rst | 64 ++++++++++++++++++++++++++ imas/backends/imas_core/db_entry_al.py | 15 +++--- imas/db_entry.py | 16 +++---- 3 files changed, 78 insertions(+), 17 deletions(-) diff --git a/docs/source/multi-dd.rst b/docs/source/multi-dd.rst index a115ce27..ae1175fd 100644 --- a/docs/source/multi-dd.rst +++ b/docs/source/multi-dd.rst @@ -213,6 +213,70 @@ explicit conversion mechanisms. ``boundary_secondary_separatrix`` structures from DD3. See also: https://github.com/iterorganization/IMAS-Python/issues/60. + +.. _`Loading IDSs from a different major version`: + +Loading IDSs from a different major version +------------------------------------------- + +If you try to load an IDS that was stored in a different major version of the DD than +you are using, IMAS-Python will raise a runtime error, for example: + +.. code-block:: text + + On-disk data is stored in DD 3.39.1 which has a different major version than the + requested DD version (4.0.0). IMAS-Python will not automatically convert this + data for you. + +You need to explicitly convert the data, which you can do as follows: + +.. code-block:: python + + # Opened data entry + entry = imas.DBEntry(...) + + # A plain get, or get_slice will raise a RuntimeError when the data is stored in + # a different major version of the DD: + # entry.get("equilibrium") + + # So instead, we'll load the IDS in the DD version it is stored on disk + tmp_eq = entry.get("equilibrium", autoconvert=False) + # And explicitly convert it to the target version + equilibrium = imas.convert_ids(tmp_eq, entry.dd_version) + + +.. _`Storing IDSs with a different major version`: + +Storing IDSs with a different major version +------------------------------------------- + +If you try to put an IDS that was created for a different major version of the DD than +the Data Entry you want to store it in, IMAS-Python raise a runtime error, for example: + +.. code-block:: text + + Provided IDS uses DD 3.42.2 which has a different major version than the Data + Entry (4.0.0). IMAS-Python will not automatically convert this data for you. + +You need to explicitly convert the data, which you can do as follows: + +.. code-block:: python + + # IDS with data, in DD 3.42.2 + equilibrium = imas.IDSFactory("3.42.2").equilibrium() + ... + + # Data Entry uses DD 4.0.0 + with imas.DBEntry(uri, "w", dd_version="4.0.0") as entry: + # This put would raise a runtime error, because the major version of the IDS + # and the DBEntry don't match: + # entry.put(equilibrium) + + # So instead, we'll explicitly convert the IDS and put that one + entry.put(imas.convert_ids(equilibrium, entry.dd_version)) + + + .. _`DD background`: Background information diff --git a/imas/backends/imas_core/db_entry_al.py b/imas/backends/imas_core/db_entry_al.py index 0336179f..dad5019b 100644 --- a/imas/backends/imas_core/db_entry_al.py +++ b/imas/backends/imas_core/db_entry_al.py @@ -282,14 +282,13 @@ def put(self, ids: IDSToplevel, occurrence: int, is_slice: bool) -> None: nbc_map = None if ids._version != self._ids_factory._version: if ids._version.split(".")[0] != self._ids_factory._version.split(".")[0]: - logger.warning( - "Provided IDS uses DD %s which has a different major version than " - "the Data Entry (%s). IMAS-Python will convert the data " - "automatically, but this does not cover all changes. " - "See %s/multi-dd.html#conversion-of-idss-between-dd-versions", - ids._version, - self._ids_factory._version, - imas.PUBLISHED_DOCUMENTATION_ROOT, + raise RuntimeError( + f"Provided IDS uses DD {ids._version} which has a different major " + f"version than the Data Entry ({self._ids_factory._version}). " + "IMAS-Python will not automatically convert this data for you." + "See the documentation for more details and fixes: " + f"{imas.PUBLISHED_DOCUMENTATION_ROOT}" + "/multi-dd.html#storing-idss-with-a-different-major-version" ) ddmap, source_is_older = dd_version_map_from_factories( ids_name, ids._parent, self._ids_factory diff --git a/imas/db_entry.py b/imas/db_entry.py index d9ad7f3e..471a50ad 100644 --- a/imas/db_entry.py +++ b/imas/db_entry.py @@ -605,15 +605,13 @@ def _get( nbc_map = None if dd_version and dd_version != destination._dd_version: if dd_version.split(".")[0] != destination._dd_version.split(".")[0]: - logger.warning( - "On-disk data is stored in DD %s which has a different major " - "version than the requested DD version (%s). IMAS-Python will " - "convert the data automatically, but this does not cover all " - "changes. " - "See %s/multi-dd.html#conversion-of-idss-between-dd-versions", - dd_version, - destination._dd_version, - imas.PUBLISHED_DOCUMENTATION_ROOT, + raise RuntimeError( + f"On-disk data is stored in DD {dd_version} which has a different " + "major version than the requested DD version " + f"({destination._dd_version}). IMAS-Python will not automatically " + "convert this data for you. See the documentation for more " + f"details and fixes: {imas.PUBLISHED_DOCUMENTATION_ROOT}" + "/multi-dd.html#loading-idss-from-a-different-major-version" ) ddmap, source_is_older = dd_version_map_from_factories( ids_name, IDSFactory(version=dd_version), self._ids_factory From d15fe1a5924da3b24754b7a01133e7a589950573 Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Thu, 13 Nov 2025 09:55:10 +0100 Subject: [PATCH 61/74] Fix failing test cases --- imas/test/test_lazy_loading.py | 6 ++++-- imas/training.py | 4 ++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/imas/test/test_lazy_loading.py b/imas/test/test_lazy_loading.py index ff241016..4a7c65ca 100644 --- a/imas/test/test_lazy_loading.py +++ b/imas/test/test_lazy_loading.py @@ -224,10 +224,12 @@ def test_lazy_load_with_new_structure(requires_imas): eq.time_slice.resize(1) dbentry.put(eq) - entry2 = DBEntry(MEMORY_BACKEND, "ITER", 1, 1, data_version="3", dd_version="4.0.0") + entry2 = DBEntry( + MEMORY_BACKEND, "ITER", 1, 1, data_version="3", dd_version="3.39.0" + ) entry2.open() lazy_eq = entry2.get("equilibrium", lazy=True) - assert not lazy_eq.time_slice[0].boundary.dr_dz_zero_point.r.has_value + assert not lazy_eq.time_slice[0].boundary_separatrix.dr_dz_zero_point.r.has_value def test_lazy_load_multiple_ids(backend, worker_id, tmp_path): diff --git a/imas/training.py b/imas/training.py index 93e0c006..6effcc5b 100644 --- a/imas/training.py +++ b/imas/training.py @@ -19,8 +19,8 @@ def get_training_db_entry() -> imas.DBEntry: output_entry = imas.DBEntry("imas:memory?path=/", "w") for ids_name in ["core_profiles", "equilibrium"]: - ids = entry.get(ids_name) + ids = entry.get(ids_name, autoconvert=False) with patch.dict("os.environ", {"IMAS_AL_DISABLE_VALIDATE": "1"}): - output_entry.put(ids) + output_entry.put(imas.convert_ids(ids, output_entry.dd_version)) entry.close() return output_entry From 1e268c01dbb0d7e39988a688ae9bd3baede761a5 Mon Sep 17 00:00:00 2001 From: prasad-sawantdesai Date: Mon, 17 Nov 2025 14:36:59 +0100 Subject: [PATCH 62/74] fix issues in coordinate validation --- imas/test/test_helpers.py | 125 ++++++++++++++++++++++++++++++-------- 1 file changed, 101 insertions(+), 24 deletions(-) diff --git a/imas/test/test_helpers.py b/imas/test/test_helpers.py index f398d03f..6bb365ef 100644 --- a/imas/test/test_helpers.py +++ b/imas/test/test_helpers.py @@ -117,27 +117,71 @@ def maybe_set_random_value( primitive.value = random_data(primitive.metadata.data_type, ndim) return + for dim, same_as in enumerate(primitive.metadata.coordinates_same_as): + if same_as.references: + try: + ref_elem = same_as.references[0].goto(primitive) + if len(ref_elem.shape) <= dim or ref_elem.shape[dim] == 0: + return + except (ValueError, AttributeError, IndexError, RuntimeError): + return + + if primitive.metadata.name.endswith("_error_upper"): + name = primitive.metadata.name[: -len("_error_upper")] + try: + data = primitive._parent[name] + except (KeyError, AttributeError): + return + if ( + not data.has_value + or len(data.shape) == 0 + or any(s == 0 for s in data.shape) + ): + return + if any( + same_as.references for same_as in primitive.metadata.coordinates_same_as + ): + return + elif primitive.metadata.name.endswith("_error_lower"): + name = primitive.metadata.name[: -len("_error_lower")] + "_error_upper" + try: + data = primitive._parent[name] + except (KeyError, AttributeError): + return + if ( + not data.has_value + or len(data.shape) == 0 + or any(s == 0 for s in data.shape) + ): + return + if any( + same_as.references for same_as in primitive.metadata.coordinates_same_as + ): + return + shape = [] for dim, coordinate in enumerate(primitive.metadata.coordinates): same_as = primitive.metadata.coordinates_same_as[dim] - if not coordinate.has_validation and not same_as.has_validation: - if primitive.metadata.name.endswith("_error_upper"): - # _error_upper should only be filled when is - name = primitive.metadata.name[: -len("_error_upper")] - data = primitive._parent[name] - if not data.has_value: - return - size = data.shape[dim] - elif primitive.metadata.name.endswith("_error_lower"): - # _error_lower should only be filled when _error_upper is - name = primitive.metadata.name[: -len("_error_lower")] + "_error_upper" - data = primitive._parent[name] - if not data.has_value: - return - size = data.shape[dim] - else: - # we can independently choose a size for this dimension: - size = random.randint(1, 6) + + if primitive.metadata.name.endswith("_error_upper"): + name = primitive.metadata.name[: -len("_error_upper")] + data = primitive._parent[name] + if dim >= len(data.shape): + return + size = data.shape[dim] + if size == 0: + return + elif primitive.metadata.name.endswith("_error_lower"): + name = primitive.metadata.name[: -len("_error_lower")] + "_error_upper" + data = primitive._parent[name] + if dim >= len(data.shape): + return + size = data.shape[dim] + if size == 0: + return + elif not coordinate.has_validation and not same_as.has_validation: + # we can independently choose a size for this dimension: + size = random.randint(1, 6) elif coordinate.references or same_as.references: try: if coordinate.references: @@ -147,8 +191,8 @@ def maybe_set_random_value( coordinate_element = filled_refs[0] if filled_refs else refs[0] else: coordinate_element = same_as.references[0].goto(primitive) - except (ValueError, AttributeError): - # Ignore invalid coordinate specs + except (ValueError, AttributeError, IndexError): + # Ignore invalid coordinate specs or empty array references coordinate_element = np.ones((1,) * 6) if len(coordinate_element) == 0: @@ -269,10 +313,43 @@ def fill_consistent( elif any(len(coordinate.references) > 1 for coordinate in coordinates): exclusive_coordinates.append(child) else: - try: - maybe_set_random_value(child, leave_empty, skip_complex) - except (RuntimeError, ValueError): - pass + same_as_skip = False + for dim, same_as in enumerate(child.metadata.coordinates_same_as): + if same_as.references: + try: + ref_elem = same_as.references[0].goto(child) + if len(ref_elem.shape) <= dim or ref_elem.shape[dim] == 0: + same_as_skip = True + break + except (ValueError, AttributeError, IndexError, RuntimeError): + same_as_skip = True + break + + error_skip = False + if child.metadata.name.endswith("_error_upper"): + name = child.metadata.name[: -len("_error_upper")] + data = child._parent[name] + if ( + not data.has_value + or len(data.shape) == 0 + or any(s == 0 for s in data.shape) + ): + error_skip = True + elif child.metadata.name.endswith("_error_lower"): + name = child.metadata.name[: -len("_error_lower")] + "_error_upper" + data = child._parent[name] + if ( + not data.has_value + or len(data.shape) == 0 + or any(s == 0 for s in data.shape) + ): + error_skip = True + + if not same_as_skip and not error_skip: + try: + maybe_set_random_value(child, leave_empty, skip_complex) + except (RuntimeError, ValueError): + pass if isinstance(structure, IDSToplevel): # handle exclusive_coordinates From b7bbd8c10dfe993cab7f01d2b045d620362d8d56 Mon Sep 17 00:00:00 2001 From: prasad-sawantdesai Date: Mon, 17 Nov 2025 15:48:38 +0100 Subject: [PATCH 63/74] first fill the data and check later --- imas/test/test_helpers.py | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/imas/test/test_helpers.py b/imas/test/test_helpers.py index 6bb365ef..e8bd6702 100644 --- a/imas/test/test_helpers.py +++ b/imas/test/test_helpers.py @@ -328,21 +328,23 @@ def fill_consistent( error_skip = False if child.metadata.name.endswith("_error_upper"): name = child.metadata.name[: -len("_error_upper")] - data = child._parent[name] - if ( - not data.has_value - or len(data.shape) == 0 - or any(s == 0 for s in data.shape) - ): + try: + data = child._parent[name] + if not data.has_value: + maybe_set_random_value(data, 0.0, skip_complex) + if not data.has_value or len(data.shape) == 0 or any(s == 0 for s in data.shape): + error_skip = True + except (KeyError, AttributeError, RuntimeError, ValueError): error_skip = True elif child.metadata.name.endswith("_error_lower"): name = child.metadata.name[: -len("_error_lower")] + "_error_upper" - data = child._parent[name] - if ( - not data.has_value - or len(data.shape) == 0 - or any(s == 0 for s in data.shape) - ): + try: + data = child._parent[name] + if not data.has_value: + maybe_set_random_value(data, 0.0, skip_complex) + if not data.has_value or len(data.shape) == 0 or any(s == 0 for s in data.shape): + error_skip = True + except (KeyError, AttributeError, RuntimeError, ValueError): error_skip = True if not same_as_skip and not error_skip: From 36d01350159fe90fce4dc4000c8efc3bd3c028ac Mon Sep 17 00:00:00 2001 From: prasad-sawantdesai Date: Tue, 18 Nov 2025 09:22:26 +0100 Subject: [PATCH 64/74] fixed black formatting --- imas/test/test_helpers.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/imas/test/test_helpers.py b/imas/test/test_helpers.py index e8bd6702..ae8784aa 100644 --- a/imas/test/test_helpers.py +++ b/imas/test/test_helpers.py @@ -332,7 +332,11 @@ def fill_consistent( data = child._parent[name] if not data.has_value: maybe_set_random_value(data, 0.0, skip_complex) - if not data.has_value or len(data.shape) == 0 or any(s == 0 for s in data.shape): + if ( + not data.has_value + or len(data.shape) == 0 + or any(s == 0 for s in data.shape) + ): error_skip = True except (KeyError, AttributeError, RuntimeError, ValueError): error_skip = True @@ -342,7 +346,11 @@ def fill_consistent( data = child._parent[name] if not data.has_value: maybe_set_random_value(data, 0.0, skip_complex) - if not data.has_value or len(data.shape) == 0 or any(s == 0 for s in data.shape): + if ( + not data.has_value + or len(data.shape) == 0 + or any(s == 0 for s in data.shape) + ): error_skip = True except (KeyError, AttributeError, RuntimeError, ValueError): error_skip = True From af759c32de73ccfe79b3f61cebad7fc44d9dc520 Mon Sep 17 00:00:00 2001 From: prasad-sawantdesai Date: Thu, 20 Nov 2025 10:31:45 +0100 Subject: [PATCH 65/74] fix comments suggested by Maarten --- imas/test/test_helpers.py | 146 ++++++++++---------------------------- 1 file changed, 39 insertions(+), 107 deletions(-) diff --git a/imas/test/test_helpers.py b/imas/test/test_helpers.py index ae8784aa..a27c4ec9 100644 --- a/imas/test/test_helpers.py +++ b/imas/test/test_helpers.py @@ -126,12 +126,10 @@ def maybe_set_random_value( except (ValueError, AttributeError, IndexError, RuntimeError): return + shape = [] if primitive.metadata.name.endswith("_error_upper"): name = primitive.metadata.name[: -len("_error_upper")] - try: - data = primitive._parent[name] - except (KeyError, AttributeError): - return + data = primitive._parent[name] if ( not data.has_value or len(data.shape) == 0 @@ -142,12 +140,10 @@ def maybe_set_random_value( same_as.references for same_as in primitive.metadata.coordinates_same_as ): return + shape = list(data.shape) elif primitive.metadata.name.endswith("_error_lower"): name = primitive.metadata.name[: -len("_error_lower")] + "_error_upper" - try: - data = primitive._parent[name] - except (KeyError, AttributeError): - return + data = primitive._parent[name] if ( not data.has_value or len(data.shape) == 0 @@ -158,60 +154,39 @@ def maybe_set_random_value( same_as.references for same_as in primitive.metadata.coordinates_same_as ): return + shape = list(data.shape) + else: + for dim, coordinate in enumerate(primitive.metadata.coordinates): + same_as = primitive.metadata.coordinates_same_as[dim] - shape = [] - for dim, coordinate in enumerate(primitive.metadata.coordinates): - same_as = primitive.metadata.coordinates_same_as[dim] - - if primitive.metadata.name.endswith("_error_upper"): - name = primitive.metadata.name[: -len("_error_upper")] - data = primitive._parent[name] - if dim >= len(data.shape): - return - size = data.shape[dim] - if size == 0: - return - elif primitive.metadata.name.endswith("_error_lower"): - name = primitive.metadata.name[: -len("_error_lower")] + "_error_upper" - data = primitive._parent[name] - if dim >= len(data.shape): - return - size = data.shape[dim] + if not coordinate.has_validation and not same_as.has_validation: + # we can independently choose a size for this dimension: + size = random.randint(1, 6) + elif coordinate.references or same_as.references: + try: + if coordinate.references: + refs = [ref.goto(primitive) for ref in coordinate.references] + filled_refs = [ref for ref in refs if len(ref) > 0] + assert len(filled_refs) in (0, 1) + coordinate_element = filled_refs[0] if filled_refs else refs[0] + else: + coordinate_element = same_as.references[0].goto(primitive) + except (ValueError, AttributeError, IndexError): + # Ignore invalid coordinate specs or empty array references + coordinate_element = np.ones((1,) * 6) + + if len(coordinate_element) == 0: + maybe_set_random_value(coordinate_element, 0.5**ndim, skip_complex) + size = coordinate_element.shape[0 if coordinate.references else dim] + + if coordinate.size: # coordinateX = OR 1...1 + if random.random() < 0.5: + size = coordinate.size + else: + size = coordinate.size if size == 0: - return - elif not coordinate.has_validation and not same_as.has_validation: - # we can independently choose a size for this dimension: - size = random.randint(1, 6) - elif coordinate.references or same_as.references: - try: - if coordinate.references: - refs = [ref.goto(primitive) for ref in coordinate.references] - filled_refs = [ref for ref in refs if len(ref) > 0] - assert len(filled_refs) in (0, 1) - coordinate_element = filled_refs[0] if filled_refs else refs[0] - else: - coordinate_element = same_as.references[0].goto(primitive) - except (ValueError, AttributeError, IndexError): - # Ignore invalid coordinate specs or empty array references - coordinate_element = np.ones((1,) * 6) - - if len(coordinate_element) == 0: - # Scale chance of not setting a coordinate by our number of dimensions, - # such that overall there is roughly a 50% chance that any coordinate - # remains empty - maybe_set_random_value(coordinate_element, 0.5**ndim, skip_complex) - size = coordinate_element.shape[0 if coordinate.references else dim] - - if coordinate.size: # coordinateX = OR 1...1 - # Coin flip whether to use the size as determined by - # coordinate.references, or the size from coordinate.size - if random.random() < 0.5: - size = coordinate.size - else: - size = coordinate.size - if size == 0: - return # Leave empty - shape.append(size) + return # Leave empty + shape.append(size) if primitive.metadata.data_type is IDSDataType.STR: primitive.value = [random_string() for i in range(shape[0])] @@ -313,53 +288,10 @@ def fill_consistent( elif any(len(coordinate.references) > 1 for coordinate in coordinates): exclusive_coordinates.append(child) else: - same_as_skip = False - for dim, same_as in enumerate(child.metadata.coordinates_same_as): - if same_as.references: - try: - ref_elem = same_as.references[0].goto(child) - if len(ref_elem.shape) <= dim or ref_elem.shape[dim] == 0: - same_as_skip = True - break - except (ValueError, AttributeError, IndexError, RuntimeError): - same_as_skip = True - break - - error_skip = False - if child.metadata.name.endswith("_error_upper"): - name = child.metadata.name[: -len("_error_upper")] - try: - data = child._parent[name] - if not data.has_value: - maybe_set_random_value(data, 0.0, skip_complex) - if ( - not data.has_value - or len(data.shape) == 0 - or any(s == 0 for s in data.shape) - ): - error_skip = True - except (KeyError, AttributeError, RuntimeError, ValueError): - error_skip = True - elif child.metadata.name.endswith("_error_lower"): - name = child.metadata.name[: -len("_error_lower")] + "_error_upper" - try: - data = child._parent[name] - if not data.has_value: - maybe_set_random_value(data, 0.0, skip_complex) - if ( - not data.has_value - or len(data.shape) == 0 - or any(s == 0 for s in data.shape) - ): - error_skip = True - except (KeyError, AttributeError, RuntimeError, ValueError): - error_skip = True - - if not same_as_skip and not error_skip: - try: - maybe_set_random_value(child, leave_empty, skip_complex) - except (RuntimeError, ValueError): - pass + try: + maybe_set_random_value(child, leave_empty, skip_complex) + except (RuntimeError, ValueError): + pass if isinstance(structure, IDSToplevel): # handle exclusive_coordinates From 96738c8b27e516fc9e850dd82bf93a47bd213265 Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Thu, 20 Nov 2025 15:33:57 +0100 Subject: [PATCH 66/74] Fix issue with fill_consistent and simplify logic - Errorbars were not cleared by `unset_coordinate()`, this is fixed now - Use imas.util.tree_iter in unset_coordinate, which is more clear than visit_children and performs slightly better - Simplify some checks in `maybe_set_random_value()` --- imas/test/test_helpers.py | 42 ++++++++++++++++----------------------- 1 file changed, 17 insertions(+), 25 deletions(-) diff --git a/imas/test/test_helpers.py b/imas/test/test_helpers.py index a27c4ec9..cb6d107d 100644 --- a/imas/test/test_helpers.py +++ b/imas/test/test_helpers.py @@ -19,7 +19,7 @@ from imas.ids_struct_array import IDSStructArray from imas.ids_structure import IDSStructure from imas.ids_toplevel import IDSToplevel -from imas.util import idsdiffgen, visit_children +from imas.util import idsdiffgen, tree_iter logger = logging.getLogger(__name__) @@ -130,29 +130,13 @@ def maybe_set_random_value( if primitive.metadata.name.endswith("_error_upper"): name = primitive.metadata.name[: -len("_error_upper")] data = primitive._parent[name] - if ( - not data.has_value - or len(data.shape) == 0 - or any(s == 0 for s in data.shape) - ): - return - if any( - same_as.references for same_as in primitive.metadata.coordinates_same_as - ): + if not data.has_value: return shape = list(data.shape) elif primitive.metadata.name.endswith("_error_lower"): name = primitive.metadata.name[: -len("_error_lower")] + "_error_upper" data = primitive._parent[name] - if ( - not data.has_value - or len(data.shape) == 0 - or any(s == 0 for s in data.shape) - ): - return - if any( - same_as.references for same_as in primitive.metadata.coordinates_same_as - ): + if not data.has_value: return shape = list(data.shape) else: @@ -317,21 +301,29 @@ def fill_consistent( def unset_coordinate(coordinate): + def unset(element): + # Unset element value + element.value = [] + # But also its errorbars (if they exist) + try: + element._parent[element.metadata.name + "_error_upper"].value = [] + element._parent[element.metadata.name + "_error_lower"].value = [] + except AttributeError: + pass # Ignore when element has no errorbars + # Unset the coordinate quantity - coordinate.value = [] + unset(coordinate) # Find all elements that also have this as a coordinate and unset... parent = coordinate._dd_parent while parent.metadata.data_type is not IDSDataType.STRUCT_ARRAY: parent = parent._dd_parent - def callback(element): + for element in tree_iter(parent): if hasattr(element, "coordinates") and element.has_value: for ele_coor in element.coordinates: if ele_coor is coordinate: - element.value = [] - return - - visit_children(callback, parent) + unset(element) + break def compare_children(st1, st2, deleted_paths=set(), accept_lazy=False): From cf7a590c1eed9f5154545337564e164453b09b4c Mon Sep 17 00:00:00 2001 From: Deepak Mewar Date: Thu, 27 Nov 2025 16:23:09 +0100 Subject: [PATCH 67/74] [GH#71] Take into account identifier aliases (#77) --- docs/source/identifiers.rst | 105 ++++++++++++++++++- imas/ids_identifiers.py | 45 +++++--- imas/test/test_identifiers.py | 192 +++++++++++++++++++++++++++++++++- 3 files changed, 321 insertions(+), 21 deletions(-) diff --git a/docs/source/identifiers.rst b/docs/source/identifiers.rst index 312749e1..408c7abe 100644 --- a/docs/source/identifiers.rst +++ b/docs/source/identifiers.rst @@ -11,13 +11,16 @@ enumerated list of options for defining, for example: a neutron, or a photon. - Plasma heating may come from neutral beam injection, electron cyclotron heating, ion cyclotron heating, lower hybrid heating, alpha particles. +- These may have alternative naming conventions supported through aliases + (e.g., "235U" and "U_235" for Uranium 235). -Identifiers are a list of possible valid labels. Each label has three +Identifiers are a list of possible valid labels. Each label has up to four representations: 1. An index (integer) 2. A name (short string) 3. A description (long string) +4. List of aliases (list of short strings) Identifiers in IMAS-Python @@ -44,6 +47,15 @@ the available identifiers is stored as ``imas.identifiers.identifiers``. print(csid.total.index) print(csid.total.description) + # Access identifiers with aliases (when available) + mid = imas.identifiers.materials_identifier + print(mid["235U"].name) # Access by canonical name + print(mid["U_235"].name) # Access by alias + + # Both return the same object + assert mid["235U"].name is mid["U_235"].name + assert mid["235U"].name is mid.U_235.name + # Item access is also possible print(identifiers["edge_source_identifier"]) @@ -64,8 +76,8 @@ Assigning identifiers in IMAS-Python IMAS-Python implements smart assignment of identifiers. You may assign an identifier enum value (for example ``imas.identifiers.core_source_identifier.total``), a -string (for example ``"total"``) or an integer (for example ``"1"``) to an -identifier structure (for example ``core_profiles.source[0].identifier``) to set +string (for example ``"total"`` or its alias), or an integer (for example ``"1"``) +to an identifier structure (for example ``core_profiles.source[0].identifier``) to set all three child nodes ``name``, ``index`` and ``description`` in one go. See below example: @@ -86,6 +98,20 @@ below example: # 3. Assign an integer. This looks up the index in the identifier enum: core_sources.source[0].identifier = 1 + # Identifiers can still be assigned with the old alias name for backward compatibility: + wallids = imas.IDSFactory().wall() + wallids.description_ggd.resize(1) + wallids.description_ggd[0].material.resize(1) + wallids.description_ggd[0].material[0].grid_subset.resize(1) + mat = wallids.description_ggd[0].material[0].grid_subset[0].identifiers + mat.names.extend([""] * 1) + mid = imas.identifiers.materials_identifier + # Assign using canonical name + mat.names[0] = "235U" + # Or assign using alias (equivalent to above) + mat.names[0] = mid["U_235"].name + mat.names[0] = mid.U_235.name + # Inspect the contents of the structure imas.util.inspect(core_sources.source[0].identifier) @@ -101,6 +127,65 @@ below example: imas.util.inspect(core_sources.source[1].identifier) +Identifier aliases +------------------ + +Some identifiers may have multiple aliases defined in the Data Dictionary. Aliases are +former names kept as an option to ensure better backward compatibility after a change +and support multiple naming conventions. An identifier can have any number of +comma-separated aliases. + +Aliases can be accessed in the same ways as canonical names, and all aliases for an +identifier point to the same object. + +Aliases that begin with a number (e.g., 235U) cannot be accessed using dot notation +(e.g., material_identifier.235U) due to Python's syntax restrictions. Instead, such +aliases must be accessed using dictionary-style indexing, for example: +material_identifier["235U"]. + +.. code-block:: python + :caption: Working with identifier aliases + + import imas + + # Get materials identifier which has some aliases defined + mid = imas.identifiers.materials_identifier + + # Access by canonical name + uranium235_by_name = mid["235U"] + print(f"Name: {uranium235_by_name.name}") + print(f"Aliases: {uranium235_by_name.aliases}") # List of all aliases + print(f"First alias: {uranium235_by_name.alias}") # First alias for compatibility + print(f"Index: {uranium235_by_name.index}") + print(f"Description: {uranium235_by_name.description}") + + # Access by any alias - all return the same object + uranium235_by_alias1 = mid["U_235"].name + uranium235_by_alias2 = mid["Uranium_235"].name + print(f"Same objects: {uranium235_by_name is uranium235_by_alias1 is uranium235_by_alias2}") + + # You can also use attribute access for aliases (when valid Python identifiers) + uranium235_by_attr = mid.U_235.name + print(f"Same object: {uranium235_by_name is uranium235_by_attr}") + + # When assigning to IDS structures, alias works the following way + wallids = imas.IDSFactory().wall() + wallids.description_ggd.resize(1) + wallids.description_ggd[0].material.resize(1) + wallids.description_ggd[0].material[0].grid_subset.resize(1) + mat = wallids.description_ggd[0].material[0].grid_subset[0].identifiers + mat.names.extend([""] * 1) + mat.indices.resize(1) + mat.descriptions.extend([""] * 1) + mat.indices[0] = 20 + mat.descriptions[0] = "Uranium 235 isotope" + + # These assignments are all equivalent: + mat.names[0] = "235U" # canonical name + mat.names[0] = mid["235U"].name # enum value + mat.names[0] = mid.U_235.name # enum value via alias + mat.names[0] = mid["U_235"].name # enum value via alias + Compare identifiers ------------------- @@ -108,11 +193,12 @@ Identifier structures can be compared against the identifier enum as well. They compare equal when: 1. ``index`` is an exact match -2. ``name`` is an exact match, or ``name`` is not filled in the IDS node +2. ``name`` is an exact match, or ``name`` matches an alias, or ``name`` is not filled in the IDS node The ``description`` does not have to match with the Data Dictionary definition, but a warning is logged if the description in the IDS node does not match with -the Data Dictionary description: +the Data Dictionary description. The comparison also takes aliases into account, +so an identifier will match both its canonical name and any defined alias: .. code-block:: python :caption: Comparing identifiers @@ -139,6 +225,15 @@ the Data Dictionary description: >>> core_sources.source[0].identifier.name = "totalX" >>> core_sources.source[0].identifier == csid.total False + >>> # Alias comparison example with materials identifier + >>> mid = imas.identifiers.materials_identifier + >>> cxr = imas.IDSFactory().camera_x_rays() + >>> mat = cxr.filter_window.material + >>> mat.index = 20 + >>> mat.name = "U_235" # Using alias + >>> # Compares equal to the canonical identifier even though name is alias + >>> mat == mid["235U"].name + True .. seealso:: diff --git a/imas/ids_identifiers.py b/imas/ids_identifiers.py index a64dd87f..1525a070 100644 --- a/imas/ids_identifiers.py +++ b/imas/ids_identifiers.py @@ -1,7 +1,6 @@ # This file is part of IMAS-Python. # You should have received the IMAS-Python LICENSE file with this project. -"""IMAS-Python module to support Data Dictionary identifiers. -""" +"""IMAS-Python module to support Data Dictionary identifiers.""" import logging from enum import Enum @@ -16,16 +15,18 @@ class IDSIdentifier(Enum): """Base class for all identifier enums.""" - def __new__(self, value: int, description: str): - obj = object.__new__(self) + def __new__(cls, value: int, description: str, aliases: list = []): + obj = object.__new__(cls) obj._value_ = value return obj - def __init__(self, value: int, description: str) -> None: + def __init__(self, value: int, description: str, aliases: list = []) -> None: self.index = value """Unique index for this identifier value.""" self.description = description """Description for this identifier value.""" + self.aliases = aliases + """Alternative names for this identifier value.""" def __eq__(self, other): if self is other: @@ -37,35 +38,49 @@ def __eq__(self, other): except (AttributeError, TypeError, ValueError): # Attribute doesn't exist, or failed to convert return NotImplemented + # Index must match if other_index == self.index: - # Name may be left empty - if other_name == self.name or other_name == "": + # Name may be left empty, or match name or alias + if ( + other_name == self.name + or other_name == "" + or other_name in self.aliases + ): # Description doesn't have to match, though we will warn when it doesn't - if other_description != self.description and other_description != "": + if other_description not in (self.description, ""): logger.warning( "Description of %r does not match identifier description %r", other.description, self.description, ) return True - else: - logger.warning( - "Name %r does not match identifier name %r, but indexes are equal.", - other.name, - self.name, - ) + + # If we get here with matching indexes but no name/alias match, warn + logger.warning( + "Name %r does not match identifier name %r, but indexes are equal.", + other.name, + self.name, + ) return False @classmethod def _from_xml(cls, identifier_name, xml) -> Type["IDSIdentifier"]: element = fromstring(xml) enum_values = {} + aliases = {} for int_element in element.iterfind("int"): name = int_element.get("name") value = int_element.text description = int_element.get("description") - enum_values[name] = (int(value), description) + # alias attribute may contain multiple comma-separated aliases + alias_attr = int_element.get("alias", "") + aliases = [a.strip() for a in alias_attr.split(",") if a.strip()] + # Canonical entry: use the canonical 'name' as key + enum_values[name] = (int(value), description, aliases) + # Also add alias names as enum *aliases* (they become enum attributes) + for alias in aliases: + enum_values[alias] = (int(value), description, aliases) # Create the enumeration enum = cls( identifier_name, diff --git a/imas/test/test_identifiers.py b/imas/test/test_identifiers.py index 263a6ccf..119e0e88 100644 --- a/imas/test/test_identifiers.py +++ b/imas/test/test_identifiers.py @@ -1,9 +1,18 @@ -import pytest +import importlib.metadata +from packaging.version import Version +import pytest from imas.dd_zip import dd_identifiers from imas.ids_factory import IDSFactory from imas.ids_identifiers import IDSIdentifier, identifiers +has_aliases = Version(importlib.metadata.version("imas_data_dictionaries")) >= Version( + "4.1.0" +) +requires_aliases = pytest.mark.skipif( + not has_aliases, reason="Requires DD 4.1.0 for identifier aliases" +) + def test_list_identifiers(): assert identifiers.identifiers == dd_identifiers() @@ -70,6 +79,66 @@ def test_identifier_struct_assignment(caplog): assert source.identifier != csid.total +def test_identifiers_with_aliases(): + # Custom identifier XML, based on materials identifier, with some more features + custom_identifier_xml = """\ + + +
+Materials used in the device mechanical structures +
+20 +21 +22 +23 +
+""" + identifier = IDSIdentifier._from_xml("custom_identifier", custom_identifier_xml) + + assert len(identifier) == 4 + + # no aliases + assert identifier.Diamond.aliases == [] + # 1 alias + assert identifier["235U"] is identifier.U_235 + assert identifier["235U"].aliases == ["U_235"] + # 3 aliases + assert ( + identifier.CxHy + is identifier.alias1 + is identifier.alias2 + is identifier["3alias"] + ) + assert identifier.CxHy.aliases == ["alias1", "alias2", "3alias"] + + +@requires_aliases +def test_identifier_struct_assignment_with_aliases(): + """Test identifier struct assignment with aliases using materials_identifier.""" + mid = identifiers.materials_identifier + + # Create an actual IDS structure + wallids = IDSFactory().wall() + wallids.description_ggd.resize(1) + wallids.description_ggd[0].material.resize(1) + wallids.description_ggd[0].material[0].grid_subset.resize(1) + mat = wallids.description_ggd[0].material[0].grid_subset[0].identifiers + mat.names.extend([""] * 1) + mat.indices.resize(1) + mat.descriptions.extend([""] * 1) + mat.names[0] = mid.U_235.name + mat.indices[0] = 20 + mat.descriptions[0] = "Uranium 235 isotope" + + # Basic attribute checks + assert mat.names[0] == mid["235U"].name + assert mat.indices[0] == mid.U_235.index + + # Modify material properties and test equality + mat.names[0] = "some_name" + assert mat.names[0] != mid.U_235.name + + def test_identifier_aos_assignment(): cfid = identifiers.pf_active_coil_function_identifier pfa = IDSFactory("3.39.0").pf_active() @@ -103,3 +172,124 @@ def test_invalid_identifier_assignment(): with pytest.raises(ValueError): # negative identifiers are reserved for user-defined identifiers cs.source[0].identifier = -1 + + +@requires_aliases +def test_identifier_aliases(): + """Test identifier enum aliases functionality.""" + mid = identifiers.materials_identifier + + # Test that alias points to the same object as the canonical name + assert mid.U_235 is mid["235U"] + assert mid.U_238 is mid["238U"] + assert mid.In_115 is mid["115In"] + assert mid.He_4 is mid["4He"] + + # Test that both name and alias have the same properties + assert mid.U_235.name == "235U" + assert mid.U_235.index == mid["235U"].index + assert mid.U_235.description == mid["235U"].description + assert "U_235" in mid.U_235.aliases + assert isinstance(mid.U_235.aliases, list) + + # Test accessing by any alias via bracket notation + for alias in mid.U_235.aliases: + assert mid[alias] is mid.U_235 + + +@requires_aliases +def test_identifier_alias_equality(): + """Test that identifiers with aliases are equal when comparing names and aliases.""" + mid = identifiers.materials_identifier + target = mid.U_235 + + # Test equality with canonical name + wallids = IDSFactory().wall() + wallids.description_ggd.resize(1) + wallids.description_ggd[0].material.resize(1) + wallids.description_ggd[0].material[0].grid_subset.resize(1) + mat = wallids.description_ggd[0].material[0].grid_subset[0].identifiers + mat.names.extend([""] * 1) + mat.names[0] = "235U" + assert mat.names[0] == target.name + + # Test equality with alias name + wallids2 = IDSFactory().wall() + wallids2.description_ggd.resize(1) + wallids2.description_ggd[0].material.resize(1) + wallids2.description_ggd[0].material[0].grid_subset.resize(1) + mat2 = wallids2.description_ggd[0].material[0].grid_subset[0].identifiers + mat2.names.extend([""] * 1) + mat2.names[0] = mid["U_235"].name # Use alias as name + assert mat2.names[0] == target.name + + # Test inequality when material has alias not matching canonical name + wallids3 = IDSFactory().wall() + wallids3.description_ggd.resize(1) + wallids3.description_ggd[0].material.resize(1) + wallids3.description_ggd[0].material[0].grid_subset.resize(1) + mat3 = wallids3.description_ggd[0].material[0].grid_subset[0].identifiers + mat3.names.extend([""] * 1) + mat3.names[0] = "test_name" + assert mat3.names[0] != target.name + + # Test equality when index doesn't match + wallids4 = IDSFactory().wall() + wallids4.description_ggd.resize(1) + wallids4.description_ggd[0].material.resize(1) + wallids4.description_ggd[0].material[0].grid_subset.resize(1) + mat4 = wallids4.description_ggd[0].material[0].grid_subset[0].identifiers + mat4.names.extend([""] * 1) + mat4.indices.resize(1) + mat4.names[0] = "235U" + mat4.indices[0] = 999 + assert mat4.indices[0] != target.index + assert mat4.names[0] == target.name + + # Test equality for multiple names,indices and descriptions + wallids5 = IDSFactory().wall() + wallids5.description_ggd.resize(1) + wallids5.description_ggd[0].material.resize(1) + wallids5.description_ggd[0].material[0].grid_subset.resize(1) + mat5 = wallids5.description_ggd[0].material[0].grid_subset[0].identifiers + mat5.names.extend([""] * 3) + mat5.indices.resize(3) + mat5.descriptions.extend([""] * 3) + mat5.names[0] = "235U" + mat5.names[1] = "238U" + mat5.names[2] = mid.U_235.name # Use alias as name + mat5.indices[0] = 20 + mat5.indices[1] = 21 + mat5.indices[2] = 20 + mat5.descriptions[0] = "Uranium 235 isotope" + mat5.descriptions[1] = "Uranium 238 isotope" + mat5.descriptions[2] = "Uranium 235 isotope" + + assert mat5.names[0] == mid["235U"].name + assert mat5.names[1] == mid["238U"].name + assert mat5.names[2] == mid["U_235"].name + assert mat5.indices[0] == mid["235U"].index + assert mat5.indices[1] == mid["238U"].index + assert mat5.indices[2] == mid["U_235"].index + assert mat5.descriptions[0] == mid["235U"].description + assert mat5.descriptions[1] == mid["238U"].description + assert mat5.descriptions[2] == mid["U_235"].description + + +@requires_aliases +def test_identifier_alias_equality_non_ggd(): + """Test identifier aliases functionality on non-ggd material""" + mid = identifiers.materials_identifier + + summary_ids = IDSFactory().summary() + summary_ids.wall.material = mid.U_235 # Use alias as enum + assert summary_ids.wall.material == mid["235U"] + assert summary_ids.wall.material == mid["U_235"] + + summary_ids.wall.material.name = "U_235" # Use alias as name + assert summary_ids.wall.material == mid["235U"] + assert summary_ids.wall.material == mid["U_235"] + + summary_ids.wall.material.name = "235U" # Use canonical name + assert summary_ids.wall.material == mid["235U"] + assert summary_ids.wall.material == mid["U_235"] From 836798dcfb4a666c6ca371880f1b316bd8342cf6 Mon Sep 17 00:00:00 2001 From: Olivier Hoenen Date: Wed, 10 Dec 2025 17:28:50 +0100 Subject: [PATCH 68/74] Add imas-core dependency --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index 066e0ea9..6d50034a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -64,6 +64,7 @@ dependencies = [ "packaging", "xxhash >= 2", "imas_data_dictionaries", + "imas_core>=5.5.3" ] [project.optional-dependencies] From bade438ecf3ad5d34f1a62f011c07c63f914f1ed Mon Sep 17 00:00:00 2001 From: Maarten Sebregts <110895564+maarten-ic@users.noreply.github.com> Date: Wed, 10 Dec 2025 17:31:33 +0100 Subject: [PATCH 69/74] Add CSV output option to `imas process-db-analysis` (#85) --- imas/command/db_analysis.py | 109 +++++++++++++++++++++++++++++++++--- imas/test/test_cli.py | 61 ++++++++++++++++++++ 2 files changed, 163 insertions(+), 7 deletions(-) diff --git a/imas/command/db_analysis.py b/imas/command/db_analysis.py index 8f262e27..f8960858 100644 --- a/imas/command/db_analysis.py +++ b/imas/command/db_analysis.py @@ -7,6 +7,8 @@ import re import readline import sys +from csv import writer as csvwriter +from collections import Counter, defaultdict from dataclasses import dataclass, field from pathlib import Path from typing import Dict, Iterable, List, Optional @@ -139,12 +141,70 @@ def ids_info(idsfile: Path): } +@dataclass +class _PathUsage: + num_occurrences: int = 0 + path_counter: Counter = field(default_factory=Counter) + + +def _write_usage_stats_to_csv( + writer, usage_per_entry, usage_per_occurrence, num_entries +): + """Write usage statistics to csv file. + + Args: + writer: an instance of csv.writer + usage_per_entry: path usage statistics per data entry + usage_per_occurrence: path usage statistics per occurrence + num_entries: number of data entries + """ + # Write header + writer.writerow( + [ + "IDS", + "Path in IDS", + "Uses errorbar", + "Frequency (without occurrences)", + "Frequency (with occurences)", + ] + ) + + for ids_name in sorted(usage_per_entry): + entry_usage = usage_per_entry[ids_name] + occurrence_usage = usage_per_occurrence[ids_name] + + # Usage statistics of the IDS (# entries with this IDS / # entries) + freq = entry_usage.num_occurrences / num_entries + writer.writerow([ids_name, "", "", freq, ""]) + + for path, entry_count in sorted(entry_usage.path_counter.items()): + if "_error_" in path: + continue # Skip error nodes + occurrence_count = occurrence_usage.path_counter[path] + + uses_error = f"{path}_error_upper" in entry_usage.path_counter + # Frequency without occurrences, see GH#84 for details + freq1 = entry_count / entry_usage.num_occurrences + # Frequency with occurences + freq2 = occurrence_count / occurrence_usage.num_occurrences + + # Write data row + writer.writerow([ids_name, path, "X" if uses_error else "", freq1, freq2]) + + +_csv_help_text = ( + "Write analysis output to the provided CSV file. For details, " + "see https://github.com/iterorganization/IMAS-Python/issues/84." +) + + @click.command("process-db-analysis") @click.argument( "infiles", metavar="INPUT_FILES...", nargs=-1, type=infile_path, required=True ) @click.option("--show-empty-ids", is_flag=True, help="Show empty IDSs in the overview.") -def process_db_analysis(infiles, show_empty_ids): +@click.option("--csv", type=outfile_path, help=_csv_help_text) +def process_db_analysis(infiles, show_empty_ids, csv): """Process supplied Data Entry analyses, and display statistics. \b @@ -153,9 +213,10 @@ def process_db_analysis(infiles, show_empty_ids): """ setup_rich_log_handler(False) - factory = imas.IDSFactory() - filled_per_ids = {ids_name: set() for ids_name in factory.ids_names()} - logger.info("Using Data Dictionary version %s.", factory.dd_version) + usage_per_entry = defaultdict(_PathUsage) + usage_per_occurrence = defaultdict(_PathUsage) + num_entries = 0 + logger.info("Reading %d input files...", len(infiles)) # Read input data and collate usage info per IDS @@ -164,17 +225,51 @@ def process_db_analysis(infiles, show_empty_ids): data = json.load(file) for entry in data: + usage_for_this_entry = defaultdict(_PathUsage) for ids_info in entry["ids_info"]: - fill_info = filled_per_ids[ids_info["name"]] - fill_info.update(ids_info["filled_data"]) + ids_name = ids_info["name"] + filled_paths = ids_info["filled_data"] + # Update counters for this entry + usage_for_this_entry[ids_name].path_counter.update(filled_paths) + # Update counters for all occurrecnes + usage_per_occurrence[ids_name].num_occurrences += 1 + usage_per_occurrence[ids_name].path_counter.update(filled_paths) + # Update data entry usage + for ids_name, usage in usage_for_this_entry.items(): + usage_per_entry[ids_name].num_occurrences += 1 + usage_per_entry[ids_name].path_counter.update(usage.path_counter.keys()) + num_entries += 1 logger.info("Done reading input files.") + + if csv is not None: + # Output to CSV file + logger.info("Writing output to CSV file: %s", csv) + with open(csv, "w") as csvfile: + writer = csvwriter(csvfile) + _write_usage_stats_to_csv( + writer, usage_per_entry, usage_per_occurrence, num_entries + ) + logger.info("Done.") + return + logger.info("Analyzing filled data...") + factory = imas.IDSFactory() + logger.info("Using Data Dictionary version %s.", factory.dd_version) # Construct AnalysisNodes per IDS analysis_nodes: Dict[str, _AnalysisNode] = {} - for ids_name, filled in filled_per_ids.items(): + for ids_name, usage in usage_per_occurrence.items(): + if ids_name not in factory.ids_names(): + logger.warning( + "Founds IDS %s in data files, but this IDS is not available " + "in DD version %s. Statistics will not be tracked.", + ids_name, + factory.dd_version, + ) + continue metadata = factory.new(ids_name).metadata + filled = set(usage.path_counter.keys()) ids_analysis_node = _AnalysisNode("") def walk_metadata_tree(metadata: IDSMetadata, node: _AnalysisNode): diff --git a/imas/test/test_cli.py b/imas/test/test_cli.py index c6ddbc0e..0f4b305e 100644 --- a/imas/test/test_cli.py +++ b/imas/test/test_cli.py @@ -39,3 +39,64 @@ def test_db_analysis(tmp_path, requires_imas): ) assert process_result.exit_code == 0, process_result.output assert "core_profiles" in process_result.output + + +@pytest.mark.cli +def test_db_analysis_csv(tmp_path, requires_imas): + with DBEntry(f"imas:hdf5?path={tmp_path}/entry1", "w") as entry: + eq = entry.factory.equilibrium() + eq.ids_properties.homogeneous_time = 2 + entry.put(eq) + eq.ids_properties.comment = "filled" + entry.put(eq, 1) + eq.ids_properties.homogeneous_time = 1 + eq.time = [1.0] + eq.time_slice.resize(1) + eq.time_slice[0].boundary.psi = 1.0 + eq.time_slice[0].boundary.psi_error_upper = 0.1 + entry.put(eq, 2) + wall = entry.factory.wall() + wall.ids_properties.homogeneous_time = 2 + entry.put(wall) + wall.first_wall_surface_area = 1.0 + entry.put(wall, 1) + with DBEntry(f"imas:hdf5?path={tmp_path}/entry2", "w") as entry: + eq = entry.factory.equilibrium() + eq.ids_properties.homogeneous_time = 2 + eq.ids_properties.comment = "also filled" + entry.put(eq) + + runner = CliRunner() + with runner.isolated_filesystem(temp_dir=tmp_path) as td: + analyze_result = runner.invoke( + analyze_db, [f"{tmp_path}/entry1", f"{tmp_path}/entry2"] + ) + assert analyze_result.exit_code == 0 + + outfile = Path(td) / "imas-db-analysis.json.gz" + assert outfile.exists() + process_result = runner.invoke( + process_db_analysis, [str(outfile), "--csv", "output.csv"] + ) + assert process_result.exit_code == 0 + + assert ( + Path("output.csv").read_text() + == """\ +IDS,Path in IDS,Uses errorbar,Frequency (without occurrences),Frequency (with occurences) +equilibrium,,,1.0, +equilibrium,ids_properties/comment,,1.0,0.75 +equilibrium,ids_properties/homogeneous_time,,1.0,1.0 +equilibrium,ids_properties/version_put/access_layer,,1.0,1.0 +equilibrium,ids_properties/version_put/access_layer_language,,1.0,1.0 +equilibrium,ids_properties/version_put/data_dictionary,,1.0,1.0 +equilibrium,time,,0.5,0.25 +equilibrium,time_slice/boundary/psi,X,0.5,0.25 +wall,,,0.5, +wall,first_wall_surface_area,,1.0,0.5 +wall,ids_properties/homogeneous_time,,1.0,1.0 +wall,ids_properties/version_put/access_layer,,1.0,1.0 +wall,ids_properties/version_put/access_layer_language,,1.0,1.0 +wall,ids_properties/version_put/data_dictionary,,1.0,1.0 +""" # noqa: E501 (line too long) + ) From 0c38d5739a773d2995a9bba3fe40d1677028276d Mon Sep 17 00:00:00 2001 From: Olivier Hoenen Date: Thu, 11 Dec 2025 12:24:21 +0100 Subject: [PATCH 70/74] Update changelog for release 2.1.0 --- docs/source/changelog.rst | 42 +++++++++++++++++++++++++++++++++++++-- 1 file changed, 40 insertions(+), 2 deletions(-) diff --git a/docs/source/changelog.rst b/docs/source/changelog.rst index f99e24d2..abbd9960 100644 --- a/docs/source/changelog.rst +++ b/docs/source/changelog.rst @@ -3,6 +3,44 @@ Changelog ========= +What's new in IMAS-Python 2.1.0 +------------------------------- + +Build +''''' + +- update Python version support (remove 3.8, add 3.13) +- add dependency on `imas_core `__ + + +Improvements +'''''''''''' + +- :issue:`#84`: improve `imas process-db-analysis` +- :issue:`#71`: take into account identifier aliases (introduced in DD 4.1) +- :issue:`#78`: disable *implicit* conversion when crossing a major version update +- improve integration of UDA backend +- cleaning old AL4 deprecated code +- :issue:`#59`: convert name+identifier (DD3) into description+name (DD4) +- improve type hints (following PEP-585 and PEP-604) +- improve performance of IDS deepcopy +- :issue:`#60`: improve `equilibrium` DD3->4 by converting `boundary_separatrix` into `contour_tree` +- :issue:`#22`: add custom conversion example in the doc for `em_coupling` IDS + + +Bug fixes +''''''''' + +- fix testcases with coordinate validation issues +- :issue:`#80`: fix `imas print` when using netcdf and imas_core is not present +- :issue:`#61`: special DD3->4 rule to flip sign quantities missing the `cocos_label_transform attribute` in DD +- :merge:`#58`: fix unclear provenance capture +- :merge:`#57`: fix 0D arrays from lazy loading with netcdf +- :issue:`#55`: handle missing case when converting 3.42->4 (_tor->_phi) + + + + What's new in IMAS-Python 2.0.1 ------------------------------- @@ -74,9 +112,9 @@ Bug fixes Dictionary 4.0.0 and 3.42.0. In other cases, the Data Dictionary version is now explicitly indicated. -- :issue:`IMAS-5560`: Fix a bug where IMASPy would not correctly recognize that +- IMAS-5560: Fix a bug where IMASPy would not correctly recognize that the UDA backend is used. -- :issue:`IMAS-5541`: Fix a bug when converting a closed contour to Data +- IMAS-5541: Fix a bug when converting a closed contour to Data Dictionary version 4.0.0. - Work around incorrect Data Dictionary 3.x metadata when converting ``flux_loop/flux`` in the ``magnetics`` IDS to Data Dictionary version 4.0.0. From ec033b62a54bc9c47ba7b9ebf7429ecabe5d2882 Mon Sep 17 00:00:00 2001 From: Olivier Hoenen Date: Thu, 11 Dec 2025 12:32:15 +0100 Subject: [PATCH 71/74] fix sphinx ext_links --- docs/source/conf.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/source/conf.py b/docs/source/conf.py index 65f5e5f4..b8a56dd0 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -39,12 +39,12 @@ iter_projects = "https://github.com/iterorganization/" dd_url = urljoin(iter_projects, "imas-data-dictionary/") al_url = urljoin(iter_projects, "imas-core/") -issue_url = jira_url = "https://github.com/iterorganization/IMAS-Python/issues" # IMAS-Python repository_url = f"{iter_projects}/{src_project}/" blob_url = repository_url -mr_url = urljoin(repository_url, "/pulls") +issue_url = jira_url = urljoin(repository_url, "/issues/") +mr_url = urljoin(repository_url, "/pull/") # Configuration of sphinx.ext.extlinks @@ -53,7 +53,7 @@ extlinks = { "src": (blob_url + "%s", "%s"), "issue": (issue_url + "%s", "%s"), - "merge": (mr_url + "%s", "!%s"), + "merge": (mr_url + "%s", "%s"), "dd": (dd_url + "%s", "%s"), "al": (al_url + "%s", "%s"), "pypa": ("https://packaging.python.org/%s", None), From 3660a014c7848a825d6bfc1204eeb7917d1f0b57 Mon Sep 17 00:00:00 2001 From: Olivier Hoenen Date: Thu, 11 Dec 2025 12:38:24 +0100 Subject: [PATCH 72/74] remove EOL Python versions from tests --- .github/workflows/test_with_pytest.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test_with_pytest.yml b/.github/workflows/test_with_pytest.yml index 9d1208ba..03c4716e 100644 --- a/.github/workflows/test_with_pytest.yml +++ b/.github/workflows/test_with_pytest.yml @@ -10,7 +10,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ["3.8", "3.9", "3.10", "3.11", "3.12", "3.13"] # Test on multiple Python versions + python-version: ["3.10", "3.11", "3.12", "3.13"] # Test on multiple Python versions steps: - name: Checkout repository From ef7955966e35f2bce85253d31ec48ae4bfb6f450 Mon Sep 17 00:00:00 2001 From: Olivier Hoenen Date: Thu, 11 Dec 2025 20:19:00 +0100 Subject: [PATCH 73/74] avoid running test on MDSplus when the backend is not available --- conftest.py | 7 +++++++ docs/source/conf.py | 4 ++-- pyproject.toml | 2 +- 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/conftest.py b/conftest.py index b7ab1fe4..51aaa4d4 100644 --- a/conftest.py +++ b/conftest.py @@ -70,6 +70,13 @@ def pytest_addoption(parser): "hdf5": HDF5_BACKEND, "mdsplus": MDSPLUS_BACKEND, } +try: + from imas.db_entry import DBEntry + from imas_core.exception import ImasCoreBackendException + DBEntry("imas:mdsplus?path=dummy","r") +except ImasCoreBackendException as iex: + if "not available" in str(iex.message): + _BACKENDS.pop("mdsplus") try: diff --git a/docs/source/conf.py b/docs/source/conf.py index b8a56dd0..7a08634a 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -43,8 +43,8 @@ # IMAS-Python repository_url = f"{iter_projects}/{src_project}/" blob_url = repository_url -issue_url = jira_url = urljoin(repository_url, "/issues/") -mr_url = urljoin(repository_url, "/pull/") +issue_url = urljoin(repository_url, "issues/") +mr_url = urljoin(repository_url, "pull/") # Configuration of sphinx.ext.extlinks diff --git a/pyproject.toml b/pyproject.toml index 6d50034a..ed3f964e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -64,7 +64,7 @@ dependencies = [ "packaging", "xxhash >= 2", "imas_data_dictionaries", - "imas_core>=5.5.3" + "imas_core" ] [project.optional-dependencies] From 72d7ea383c554c742e906b332e6b51742f2d41b8 Mon Sep 17 00:00:00 2001 From: Olivier Hoenen Date: Thu, 11 Dec 2025 20:49:02 +0100 Subject: [PATCH 74/74] fixup changelog ext links to issues and PRs --- docs/source/changelog.rst | 22 +++++++++++----------- docs/source/conf.py | 4 ++-- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/docs/source/changelog.rst b/docs/source/changelog.rst index abbd9960..0e7348c1 100644 --- a/docs/source/changelog.rst +++ b/docs/source/changelog.rst @@ -16,27 +16,27 @@ Build Improvements '''''''''''' -- :issue:`#84`: improve `imas process-db-analysis` -- :issue:`#71`: take into account identifier aliases (introduced in DD 4.1) -- :issue:`#78`: disable *implicit* conversion when crossing a major version update +- :issue:`84`: improve `imas process-db-analysis` +- :issue:`71`: take into account identifier aliases (introduced in DD 4.1) +- :issue:`78`: disable *implicit* conversion when crossing a major version update - improve integration of UDA backend - cleaning old AL4 deprecated code -- :issue:`#59`: convert name+identifier (DD3) into description+name (DD4) +- :issue:`59`: convert name+identifier (DD3) into description+name (DD4) - improve type hints (following PEP-585 and PEP-604) - improve performance of IDS deepcopy -- :issue:`#60`: improve `equilibrium` DD3->4 by converting `boundary_separatrix` into `contour_tree` -- :issue:`#22`: add custom conversion example in the doc for `em_coupling` IDS +- :issue:`60`: improve `equilibrium` DD3->4 by converting `boundary_separatrix` into `contour_tree` +- :issue:`22`: add custom conversion example in the doc for `em_coupling` IDS Bug fixes ''''''''' - fix testcases with coordinate validation issues -- :issue:`#80`: fix `imas print` when using netcdf and imas_core is not present -- :issue:`#61`: special DD3->4 rule to flip sign quantities missing the `cocos_label_transform attribute` in DD -- :merge:`#58`: fix unclear provenance capture -- :merge:`#57`: fix 0D arrays from lazy loading with netcdf -- :issue:`#55`: handle missing case when converting 3.42->4 (_tor->_phi) +- :issue:`80`: fix `imas print` when using netcdf and imas_core is not present +- :issue:`61`: special DD3->4 rule to flip sign quantities missing the `cocos_label_transform attribute` in DD +- :merge:`58`: fix unclear provenance capture +- :merge:`57`: fix 0D arrays from lazy loading with netcdf +- :issue:`55`: handle missing case when converting 3.42->4 (_tor->_phi) diff --git a/docs/source/conf.py b/docs/source/conf.py index 7a08634a..06f59e76 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -52,8 +52,8 @@ # unique name: (base URL, label prefix) extlinks = { "src": (blob_url + "%s", "%s"), - "issue": (issue_url + "%s", "%s"), - "merge": (mr_url + "%s", "%s"), + "issue": (issue_url + "%s", "#%s"), + "merge": (mr_url + "%s", "#%s"), "dd": (dd_url + "%s", "%s"), "al": (al_url + "%s", "%s"), "pypa": ("https://packaging.python.org/%s", None),