From 45954a0a8dc58af2e60d1008d16132cbddfc69cd Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Wed, 2 Apr 2025 16:52:19 +0200 Subject: [PATCH 1/7] Initial logic to implement iterorganization/IMAS-Python#21 --- imas/ids_convert.py | 187 ++++++++++++++++++++++++++++------ imas/test/test_ids_convert.py | 95 ++++++++++++----- 2 files changed, 225 insertions(+), 57 deletions(-) diff --git a/imas/ids_convert.py b/imas/ids_convert.py index e5dc0911..5d3ef601 100644 --- a/imas/ids_convert.py +++ b/imas/ids_convert.py @@ -12,19 +12,16 @@ import numpy from packaging.version import InvalidVersion, Version +from scipy.interpolate import interp1d import imas from imas.dd_zip import parse_dd_version from imas.ids_base import IDSBase from imas.ids_data_type import IDSDataType +from imas.ids_defs import IDS_TIME_MODE_HETEROGENEOUS from imas.ids_factory import IDSFactory from imas.ids_path import IDSPath -from imas.ids_primitive import ( - IDSNumeric0D, - IDSNumericArray, - IDSPrimitive, - IDSString0D, -) +from imas.ids_primitive import IDSNumeric0D, IDSNumericArray, IDSPrimitive, IDSString0D from imas.ids_struct_array import IDSStructArray from imas.ids_structure import IDSStructure from imas.ids_toplevel import IDSToplevel @@ -495,7 +492,17 @@ def convert_ids( else: version_map = _DDVersionMap(ids_name, source_tree, target_tree, source_version) - _copy_structure(toplevel, target_ids, deepcopy, source_is_new, version_map) + # Special case for DD3to4 pulse_schedule conversion + if ( + toplevel.metadata.name == "pulse_schedule" + and toplevel.ids_properties.homogeneous_time == IDS_TIME_MODE_HETEROGENEOUS + and source_version < Version("3.40.0") + and target_version.major == 4 + ): + _pulse_schedule_3to4(toplevel, target_ids, deepcopy, version_map) + else: + _copy_structure(toplevel, target_ids, deepcopy, source_is_new, version_map) + logger.info("Conversion of IDS %s finished.", ids_name) if provenance_origin_uri: _add_provenance_entry(target_ids, toplevel._version, provenance_origin_uri) @@ -541,6 +548,41 @@ def _add_provenance_entry( node.sources.append(source_txt) # sources is a STR_1D (=list of strings) +def _get_target_item( + item: IDSBase, target: IDSStructure, rename_map: NBCPathMap +) -> Optional[IDSBase]: + """Find and return the corresponding target item if it exists. + + This method follows NBC renames (as stored in the rename map). It returns None if + there is no corresponding target item in the target structure. + """ + path = item.metadata.path_string + + # Follow NBC renames: + if path in rename_map: + if rename_map.path[path] is None: + if path not in rename_map.ignore_missing_paths: + if path in rename_map.type_change: + msg = "Element %r changed type in the target IDS." + else: + msg = "Element %r does not exist in the target IDS." + logger.warning(msg + " Data is not copied.", path) + return None + else: + return IDSPath(rename_map.path[path]).goto(target) + + # No NBC renames: + try: + return target[item.metadata.name] + except AttributeError: + # In exceptional cases the item does not exist in the target. Example: + # neutron_diagnostic IDS between DD 3.40.1 and 3.41.0. has renamed + # synthetic_signals/fusion_power -> fusion_power. The synthetic_signals + # structure no longer exists but we need to descend into it to get the + # total_neutron_flux. + return target + + def _copy_structure( source: IDSStructure, target: IDSStructure, @@ -561,27 +603,10 @@ def _copy_structure( rename_map = version_map.new_to_old if source_is_new else version_map.old_to_new for item in source.iter_nonempty_(): path = item.metadata.path_string - if path in rename_map: - if rename_map.path[path] is None: - if path not in rename_map.ignore_missing_paths: - if path in rename_map.type_change: - msg = "Element %r changed type in the target IDS." - else: - msg = "Element %r does not exist in the target IDS." - logger.warning(msg + " Data is not copied.", path) - continue - else: - target_item = IDSPath(rename_map.path[path]).goto(target) - else: - try: - target_item = target[item.metadata.name] - except AttributeError: - # In exceptional cases the item does not exist in the target. Example: - # neutron_diagnostic IDS between DD 3.40.1 and 3.41.0. has renamed - # synthetic_signals/fusion_power -> fusion_power. The synthetic_signals - # structure no longer exists but we need to descend into it to get the - # total_neutron_flux. - target_item = target + target_item = _get_target_item(item, target, rename_map) + if target_item is None: + continue + if path in rename_map.type_change: # Handle type change new_items = rename_map.type_change[path](item, target_item) @@ -600,11 +625,7 @@ def _copy_structure( elif isinstance(item, IDSStructure): _copy_structure(item, target_item, deepcopy, source_is_new, version_map) else: - if deepcopy: - # No nested types are used as data, so a shallow copy is sufficient - target_item.value = copy.copy(item.value) - else: - target_item.value = item.value + target_item.value = copy.copy(item.value) if deepcopy else item.value # Post-process the node: if path in rename_map.post_process: @@ -919,3 +940,103 @@ def _ids_properties_source(source: IDSString0D, provenance: IDSStructure) -> Non provenance.node.resize(1) provenance.node[0].reference.resize(1) provenance.node[0].reference[0].name = source.value + + +def _pulse_schedule_3to4( + source: IDSStructure, + target: IDSStructure, + deepcopy: bool, + version_map: DDVersionMap, +): + """Recursively copy data, following NBC renames, and converting time bases for the + pulse_schedule IDS. + + Args: + source: Source structure. + target: Target structure. + deepcopy: See :func:`convert_ids`. + version_map: Version map containing NBC renames. + """ + # All prerequisites are checked before calling this function: + # - source and target are pulse_schedule IDSs + # - source has DD version < 3.40.0 + # - target has DD version >= 4.0.0, < 5.0 + # - IDS is using heterogeneous time + rename_map = version_map.old_to_new + + def copy_and_interpolate( + source: IDSStructure, target: IDSStructure, timebase: numpy.ndarray + ): + """Reimplementation of _copy_structure that can interpolate nodes to the common + timebase.""" + for item in source.iter_nonempty_(): + path = item.metadata.path_string + if path.endswith("/time"): + continue # Skip time bases + + target_item = _get_target_item(item, target, rename_map) + if target_item is None: + continue + # We don't implement type changes and post process in this conversion: + assert path not in rename_map.type_change + assert path not in rename_map.post_process + + if isinstance(item, IDSStructArray): + size = len(item) + target_item.resize(size) + for i in range(size): + copy_and_interpolate(item[i], target_item[i], timebase) + elif isinstance(item, IDSStructure): + copy_and_interpolate(item, target_item, timebase) + elif ( + item.metadata.ndim == 1 + and item.metadata.coordinates[0].is_time_coordinate + ): + # Interpolate 1D dynamic quantities to the common time base + time = item.coordinates[0] # TODO, this can fail? + if len(item) != len(time): + raise ValueError( + f"Array {item} has a different size than its time base {time}." + ) + is_integer = item.metadata.data_type is IDSDataType.INT + value = interp1d( + time.value, + item.value, + "previous" if is_integer else "linear", + copy=False, + bounds_error=False, + fill_value=(item[0], item[-1]), + assume_sorted=True, + )(timebase) + target_item.value = value.astype(numpy.int32) if is_integer else value + else: # Default copy + target_item.value = copy.copy(item.value) if deepcopy else item.value + + for item in source.iter_nonempty_(): + # Special cases for non-dynamic stuff + name = item.metadata.name + target_item = _get_target_item(item, target, rename_map) + if target_item is None: + continue + + if name in ["ids_properties", "code"]: + _copy_structure(item, target_item, deepcopy, False, version_map) + elif name == "time": + target_item.value = item.value if not deepcopy else copy.copy(item.value) + elif name == "event": + size = len(item) + target_item.resize(size) + for i in range(size): + _copy_structure(item[i], target_item[i], deepcopy, False, version_map) + else: + # Find all time bases + time_bases = [ + node.value + for node in imas.util.tree_iter(item) + if node.metadata.name == "time" + ] + # Construct the common time base + timebase = numpy.unique(numpy.concatenate(time_bases)) if time_bases else [] + target_item.time = timebase + # Do the conversion + copy_and_interpolate(item, target_item, timebase) diff --git a/imas/test/test_ids_convert.py b/imas/test/test_ids_convert.py index 55045bbc..f2b9b7f7 100644 --- a/imas/test/test_ids_convert.py +++ b/imas/test/test_ids_convert.py @@ -7,6 +7,7 @@ from unittest.mock import MagicMock import numpy +from numpy import array_equal import pytest from imas import identifiers @@ -27,7 +28,7 @@ from imas.ids_factory import IDSFactory from imas.ids_struct_array import IDSStructArray from imas.ids_structure import IDSStructure -from imas.test.test_helpers import compare_children, open_dbentry +from imas.test.test_helpers import compare_children, fill_consistent, open_dbentry UTC = timezone.utc @@ -287,22 +288,22 @@ def test_3to4_repeat_children_first_point_conditional(dd4factory): for i in range(2): outline_inner = wall4.description_2d[0].vessel.unit[i].annular.outline_inner if i == 0: # open outline, first point not repeated: - assert numpy.array_equal(outline_inner.r, [1.0, 2.0, 3.0]) - assert numpy.array_equal(outline_inner.z, [-1.0, -2.0, -3.0]) + assert array_equal(outline_inner.r, [1.0, 2.0, 3.0]) + assert array_equal(outline_inner.z, [-1.0, -2.0, -3.0]) else: # closed outline, first point repeated: - assert numpy.array_equal(outline_inner.r, [1.0, 2.0, 3.0, 1.0]) - assert numpy.array_equal(outline_inner.z, [-1.0, -2.0, -3.0, -1.0]) + assert array_equal(outline_inner.r, [1.0, 2.0, 3.0, 1.0]) + assert array_equal(outline_inner.z, [-1.0, -2.0, -3.0, -1.0]) # Test conversion for case 2: assert len(wall4.description_2d[0].limiter.unit) == 2 for i in range(2): unit = wall4.description_2d[0].limiter.unit[i] if i == 0: # open outline, first point not repeated: - assert numpy.array_equal(unit.outline.r, [1.0, 2.0, 3.0]) - assert numpy.array_equal(unit.outline.z, [-1.0, -2.0, -3.0]) + assert array_equal(unit.outline.r, [1.0, 2.0, 3.0]) + assert array_equal(unit.outline.z, [-1.0, -2.0, -3.0]) else: # closed outline, first point repeated: - assert numpy.array_equal(unit.outline.r, [1.0, 2.0, 3.0, 1.0]) - assert numpy.array_equal(unit.outline.z, [-1.0, -2.0, -3.0, -1.0]) + assert array_equal(unit.outline.r, [1.0, 2.0, 3.0, 1.0]) + assert array_equal(unit.outline.z, [-1.0, -2.0, -3.0, -1.0]) # Test conversion for case 3: assert len(wall4.description_2d[0].mobile.unit) == 2 @@ -310,11 +311,11 @@ def test_3to4_repeat_children_first_point_conditional(dd4factory): unit = wall4.description_2d[0].mobile.unit[i] for j in range(3): if i == 0: # open outline, first point not repeated: - assert numpy.array_equal(unit.outline[j].r, [1.0, 2.0, 3.0]) - assert numpy.array_equal(unit.outline[j].z, [-1.0, -2.0, -3.0]) + assert array_equal(unit.outline[j].r, [1.0, 2.0, 3.0]) + assert array_equal(unit.outline[j].z, [-1.0, -2.0, -3.0]) else: # closed outline, first point repeated: - assert numpy.array_equal(unit.outline[j].r, [1.0, 2.0, 3.0, 1.0]) - assert numpy.array_equal(unit.outline[j].z, [-1.0, -2.0, -3.0, -1.0]) + assert array_equal(unit.outline[j].r, [1.0, 2.0, 3.0, 1.0]) + assert array_equal(unit.outline[j].z, [-1.0, -2.0, -3.0, -1.0]) assert unit.outline[j].time == pytest.approx(j / 5) # Test conversion for case 4: @@ -322,9 +323,9 @@ def test_3to4_repeat_children_first_point_conditional(dd4factory): for i in range(2): thickness = wall4.description_2d[1].vessel.unit[i].annular.thickness if i == 0: # open outline, there was one value too many, drop the last one - assert numpy.array_equal(thickness, [1, 0.9]) + assert array_equal(thickness, [1, 0.9]) else: # closed outline, thickness values kept - assert numpy.array_equal(thickness, [1, 0.9, 0.9]) + assert array_equal(thickness, [1, 0.9, 0.9]) # Test conversion back wall3 = convert_ids(wall4, "3.39.0") @@ -340,8 +341,8 @@ def test_3to4_repeat_children_first_point(dd4factory): iron_core4 = convert_ids(iron_core, None, factory=dd4factory) geometry = iron_core4.segment[0].geometry - assert numpy.array_equal(geometry.outline.r, [1.0, 2.0, 3.0, 1.0]) - assert numpy.array_equal(geometry.outline.z, [-1.0, -2.0, -3.0, -1.0]) + assert array_equal(geometry.outline.r, [1.0, 2.0, 3.0, 1.0]) + assert array_equal(geometry.outline.z, [-1.0, -2.0, -3.0, -1.0]) iron_core3 = convert_ids(iron_core4, "3.39.0") compare_children(iron_core, iron_core3) @@ -356,11 +357,11 @@ def test_3to4_cocos_change(dd4factory): cp.profiles_1d[0].grid.psi = numpy.linspace(10, 20, 11) cp4 = convert_ids(cp, None, factory=dd4factory) - assert numpy.array_equal( + assert array_equal( cp4.profiles_1d[0].grid.rho_tor_norm, cp.profiles_1d[0].grid.rho_tor_norm, ) - assert numpy.array_equal( + assert array_equal( cp4.profiles_1d[0].grid.psi, -cp.profiles_1d[0].grid.psi, ) @@ -376,11 +377,11 @@ def test_3to4_cocos_change(dd4factory): eq.time_slice[0].profiles_1d.dpressure_dpsi = numpy.linspace(1, 2, 11) eq4 = convert_ids(eq, None, factory=dd4factory) - assert numpy.array_equal( + assert array_equal( eq4.time_slice[0].profiles_1d.psi, -eq.time_slice[0].profiles_1d.psi, ) - assert numpy.array_equal( + assert array_equal( eq4.time_slice[0].profiles_1d.dpressure_dpsi, -eq.time_slice[0].profiles_1d.dpressure_dpsi, ) @@ -400,7 +401,7 @@ def test_3to4_circuit_connections(dd4factory, caplog): ] pfa4 = convert_ids(pfa, None, factory=dd4factory) - assert numpy.array_equal( + assert array_equal( pfa4.circuit[0].connections, [[-1, 0, 1], [0, 1, -1], [1, -1, 0]] ) @@ -417,7 +418,7 @@ def test_3to4_circuit_connections(dd4factory, caplog): with caplog.at_level(logging.ERROR): pfa4 = convert_ids(pfa, None, factory=dd4factory) # Incorrect shape, data is not converted: - assert numpy.array_equal(pfa.circuit[0].connections, pfa4.circuit[0].connections) + assert array_equal(pfa.circuit[0].connections, pfa4.circuit[0].connections) # Check that a message with ERROR severity was logged assert len(caplog.record_tuples) == 1 assert caplog.record_tuples[0][1] == logging.ERROR @@ -430,7 +431,53 @@ def test_3to4_cocos_magnetics_workaround(dd4factory): mag.flux_loop[0].flux.data = [1.0, 2.0] mag4 = convert_ids(mag, None, factory=dd4factory) - assert numpy.array_equal(mag4.flux_loop[0].flux.data, [-1.0, -2.0]) + assert array_equal(mag4.flux_loop[0].flux.data, [-1.0, -2.0]) mag3 = convert_ids(mag4, "3.39.0") compare_children(mag, mag3) + + +def test_3to4_pulse_schedule(): + ps = IDSFactory("3.39.0").pulse_schedule() + ps.ids_properties.homogeneous_time = IDS_TIME_MODE_HETEROGENEOUS + + ps.ec.launcher.resize(3) + ps.ec.launcher[0].power.reference.data = [1.0, 2.0, 3.0] + ps.ec.launcher[0].power.reference.time = [1.0, 2.0, 3.0] + ps.ec.launcher[1].power.reference.data = [0.0, 2.0, 5.0] + ps.ec.launcher[1].power.reference.time = [0.0, 2.0, 5.0] + ps.ec.launcher[2].power.reference.data = [1.0, 1.5] + ps.ec.launcher[2].power.reference.time = [1.0, 1.5] + + ps.ec.mode.data = [1, 2, 5] + ps.ec.mode.time = [1.0, 2.0, 5.0] + + ps4 = convert_ids(ps, "4.0.0") + assert array_equal(ps4.ec.time, [0.0, 1.0, 1.5, 2.0, 3.0, 5.0]) + item = "power_launched/reference" + assert array_equal(ps4.ec.beam[0][item], [1.0, 1.0, 1.5, 2.0, 3.0, 3.0]) + assert array_equal(ps4.ec.beam[1][item], [0.0, 1.0, 1.5, 2.0, 3.0, 5.0]) + assert array_equal(ps4.ec.beam[2][item], [1.0, 1.0, 1.5, 1.5, 1.5, 1.5]) + assert array_equal(ps4.ec.mode, [1, 1, 1, 2, 2, 5]) + + +def test_3to4_pulse_schedule_exceptions(): + ps = IDSFactory("3.39.0").pulse_schedule() + ps.ids_properties.homogeneous_time = IDS_TIME_MODE_HETEROGENEOUS + + ps.ec.launcher.resize(3) + ps.ec.launcher[0].power.reference.data = [1.0, 2.0, 3.0] + with pytest.raises(ValueError): # missing time base + convert_ids(ps, "4.0.0") + + ps.ec.launcher[0].power.reference.time = [1.0, 2.0] + with pytest.raises(ValueError): # incorrect size of time base + convert_ids(ps, "4.0.0") + + +def test_3to4_pulse_schedule_fuzz(): + ps = IDSFactory("3.39.0").pulse_schedule() + ps.ids_properties.homogeneous_time = IDS_TIME_MODE_HETEROGENEOUS + + fill_consistent(ps) + convert_ids(ps, "4.0.0") From 92f0008b4fae8e652f6ad177cc96f3c0c818905b Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Thu, 3 Apr 2025 09:30:04 +0200 Subject: [PATCH 2/7] Restructure pulse_schedule resampling to reuse more existing logic --- imas/ids_convert.py | 97 ++++++++++++++++++++------------------------- 1 file changed, 44 insertions(+), 53 deletions(-) diff --git a/imas/ids_convert.py b/imas/ids_convert.py index 5d3ef601..6f76f812 100644 --- a/imas/ids_convert.py +++ b/imas/ids_convert.py @@ -5,7 +5,7 @@ import copy import datetime import logging -from functools import lru_cache +from functools import lru_cache, partial from pathlib import Path from typing import Callable, Dict, Iterator, Optional, Set, Tuple from xml.etree.ElementTree import Element, ElementTree @@ -589,6 +589,7 @@ def _copy_structure( deepcopy: bool, source_is_new: bool, version_map: DDVersionMap, + callback: Optional[Callable] = None, ): """Recursively copy data, following NBC renames. @@ -599,6 +600,7 @@ def _copy_structure( source_is_new: True iff the DD version of the source is newer than that of the target. version_map: Version map containing NBC renames. + callback: Optional callback that is called for every copied node. """ rename_map = version_map.new_to_old if source_is_new else version_map.old_to_new for item in source.iter_nonempty_(): @@ -620,16 +622,25 @@ def _copy_structure( target_item.resize(size) for i in range(size): _copy_structure( - item[i], target_item[i], deepcopy, source_is_new, version_map + item[i], + target_item[i], + deepcopy, + source_is_new, + version_map, + callback, ) elif isinstance(item, IDSStructure): - _copy_structure(item, target_item, deepcopy, source_is_new, version_map) + _copy_structure( + item, target_item, deepcopy, source_is_new, version_map, callback + ) else: target_item.value = copy.copy(item.value) if deepcopy else item.value # Post-process the node: if path in rename_map.post_process: rename_map.post_process[path](target_item) + if callback is not None: + callback(item, target_item) ######################################################################################## @@ -964,61 +975,13 @@ def _pulse_schedule_3to4( # - IDS is using heterogeneous time rename_map = version_map.old_to_new - def copy_and_interpolate( - source: IDSStructure, target: IDSStructure, timebase: numpy.ndarray - ): - """Reimplementation of _copy_structure that can interpolate nodes to the common - timebase.""" - for item in source.iter_nonempty_(): - path = item.metadata.path_string - if path.endswith("/time"): - continue # Skip time bases - - target_item = _get_target_item(item, target, rename_map) - if target_item is None: - continue - # We don't implement type changes and post process in this conversion: - assert path not in rename_map.type_change - assert path not in rename_map.post_process - - if isinstance(item, IDSStructArray): - size = len(item) - target_item.resize(size) - for i in range(size): - copy_and_interpolate(item[i], target_item[i], timebase) - elif isinstance(item, IDSStructure): - copy_and_interpolate(item, target_item, timebase) - elif ( - item.metadata.ndim == 1 - and item.metadata.coordinates[0].is_time_coordinate - ): - # Interpolate 1D dynamic quantities to the common time base - time = item.coordinates[0] # TODO, this can fail? - if len(item) != len(time): - raise ValueError( - f"Array {item} has a different size than its time base {time}." - ) - is_integer = item.metadata.data_type is IDSDataType.INT - value = interp1d( - time.value, - item.value, - "previous" if is_integer else "linear", - copy=False, - bounds_error=False, - fill_value=(item[0], item[-1]), - assume_sorted=True, - )(timebase) - target_item.value = value.astype(numpy.int32) if is_integer else value - else: # Default copy - target_item.value = copy.copy(item.value) if deepcopy else item.value - for item in source.iter_nonempty_(): - # Special cases for non-dynamic stuff name = item.metadata.name target_item = _get_target_item(item, target, rename_map) if target_item is None: continue + # Special cases for non-dynamic stuff if name in ["ids_properties", "code"]: _copy_structure(item, target_item, deepcopy, False, version_map) elif name == "time": @@ -1039,4 +1002,32 @@ def copy_and_interpolate( timebase = numpy.unique(numpy.concatenate(time_bases)) if time_bases else [] target_item.time = timebase # Do the conversion - copy_and_interpolate(item, target_item, timebase) + _copy_structure( + item, + target_item, + deepcopy, + False, + version_map, + partial(_pulse_schedule_resample_callback, timebase), + ) + + +def _pulse_schedule_resample_callback(timebase, item: IDSBase, target_item: IDSBase): + if item.metadata.ndim == 1 and item.metadata.coordinates[0].is_time_coordinate: + # Interpolate 1D dynamic quantities to the common time base + time = item.coordinates[0] + if len(item) != len(time): + raise ValueError( + f"Array {item} has a different size than its time base {time}." + ) + is_integer = item.metadata.data_type is IDSDataType.INT + value = interp1d( + time.value, + item.value, + "previous" if is_integer else "linear", + copy=False, + bounds_error=False, + fill_value=(item[0], item[-1]), + assume_sorted=True, + )(timebase) + target_item.value = value.astype(numpy.int32) if is_integer else value From 97c9b0d99c12f6a5686cb463b5bf2d4c59ef3f3a Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Thu, 3 Apr 2025 10:22:03 +0200 Subject: [PATCH 3/7] Refactoring and suppress some log messages --- imas/ids_convert.py | 52 ++++++++++++++++++++------------------------- 1 file changed, 23 insertions(+), 29 deletions(-) diff --git a/imas/ids_convert.py b/imas/ids_convert.py index 6f76f812..bce8c4ac 100644 --- a/imas/ids_convert.py +++ b/imas/ids_convert.py @@ -489,8 +489,10 @@ def convert_ids( target_tree = target_ids._parent._etree if source_is_new: version_map = _DDVersionMap(ids_name, target_tree, source_tree, target_version) + rename_map = version_map.new_to_old else: version_map = _DDVersionMap(ids_name, source_tree, target_tree, source_version) + rename_map = version_map.old_to_new # Special case for DD3to4 pulse_schedule conversion if ( @@ -499,9 +501,14 @@ def convert_ids( and source_version < Version("3.40.0") and target_version.major == 4 ): - _pulse_schedule_3to4(toplevel, target_ids, deepcopy, version_map) + try: + # Suppress "'.../time' does not exist in the target IDS." log messages. + logger.addFilter(_pulse_schedule_3to4_logfilter) + _pulse_schedule_3to4(toplevel, target_ids, deepcopy, rename_map) + finally: + logger.removeFilter(_pulse_schedule_3to4_logfilter) else: - _copy_structure(toplevel, target_ids, deepcopy, source_is_new, version_map) + _copy_structure(toplevel, target_ids, deepcopy, rename_map) logger.info("Conversion of IDS %s finished.", ids_name) if provenance_origin_uri: @@ -587,8 +594,7 @@ def _copy_structure( source: IDSStructure, target: IDSStructure, deepcopy: bool, - source_is_new: bool, - version_map: DDVersionMap, + rename_map: NBCPathMap, callback: Optional[Callable] = None, ): """Recursively copy data, following NBC renames. @@ -602,7 +608,6 @@ def _copy_structure( version_map: Version map containing NBC renames. callback: Optional callback that is called for every copied node. """ - rename_map = version_map.new_to_old if source_is_new else version_map.old_to_new for item in source.iter_nonempty_(): path = item.metadata.path_string target_item = _get_target_item(item, target, rename_map) @@ -621,18 +626,9 @@ def _copy_structure( size = len(item) target_item.resize(size) for i in range(size): - _copy_structure( - item[i], - target_item[i], - deepcopy, - source_is_new, - version_map, - callback, - ) + _copy_structure(item[i], target_item[i], deepcopy, rename_map, callback) elif isinstance(item, IDSStructure): - _copy_structure( - item, target_item, deepcopy, source_is_new, version_map, callback - ) + _copy_structure(item, target_item, deepcopy, rename_map, callback) else: target_item.value = copy.copy(item.value) if deepcopy else item.value @@ -957,7 +953,7 @@ def _pulse_schedule_3to4( source: IDSStructure, target: IDSStructure, deepcopy: bool, - version_map: DDVersionMap, + rename_map: NBCPathMap, ): """Recursively copy data, following NBC renames, and converting time bases for the pulse_schedule IDS. @@ -966,14 +962,13 @@ def _pulse_schedule_3to4( source: Source structure. target: Target structure. deepcopy: See :func:`convert_ids`. - version_map: Version map containing NBC renames. + rename_map: Map containing NBC renames. """ # All prerequisites are checked before calling this function: # - source and target are pulse_schedule IDSs # - source has DD version < 3.40.0 # - target has DD version >= 4.0.0, < 5.0 # - IDS is using heterogeneous time - rename_map = version_map.old_to_new for item in source.iter_nonempty_(): name = item.metadata.name @@ -983,14 +978,14 @@ def _pulse_schedule_3to4( # Special cases for non-dynamic stuff if name in ["ids_properties", "code"]: - _copy_structure(item, target_item, deepcopy, False, version_map) + _copy_structure(item, target_item, deepcopy, rename_map) elif name == "time": target_item.value = item.value if not deepcopy else copy.copy(item.value) elif name == "event": size = len(item) target_item.resize(size) for i in range(size): - _copy_structure(item[i], target_item[i], deepcopy, False, version_map) + _copy_structure(item[i], target_item[i], deepcopy, rename_map) else: # Find all time bases time_bases = [ @@ -1002,14 +997,13 @@ def _pulse_schedule_3to4( timebase = numpy.unique(numpy.concatenate(time_bases)) if time_bases else [] target_item.time = timebase # Do the conversion - _copy_structure( - item, - target_item, - deepcopy, - False, - version_map, - partial(_pulse_schedule_resample_callback, timebase), - ) + callback = partial(_pulse_schedule_resample_callback, timebase) + _copy_structure(item, target_item, deepcopy, rename_map, callback) + + +def _pulse_schedule_3to4_logfilter(logrecord: logging.LogRecord) -> bool: + """Suppress "'.../time' does not exist in the target IDS." log messages.""" + return not (logrecord.args and str(logrecord.args[0]).endswith("/time")) def _pulse_schedule_resample_callback(timebase, item: IDSBase, target_item: IDSBase): From 8e6584eb0311b0bcba042b3eebe408da056ad46a Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Thu, 3 Apr 2025 11:07:51 +0200 Subject: [PATCH 4/7] More refactoring --- imas/ids_convert.py | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/imas/ids_convert.py b/imas/ids_convert.py index bce8c4ac..47512d2f 100644 --- a/imas/ids_convert.py +++ b/imas/ids_convert.py @@ -471,12 +471,10 @@ def convert_ids( raise RuntimeError( f"There is no IDS with name {ids_name} in DD version {version}." ) - target_ids = factory.new(ids_name) - else: - target_ids = target + target = factory.new(ids_name) source_version = parse_dd_version(toplevel._version) - target_version = parse_dd_version(target_ids._version) + target_version = parse_dd_version(target._version) logger.info( "Starting conversion of IDS %s from version %s to version %s.", ids_name, @@ -484,10 +482,9 @@ def convert_ids( target_version, ) - source_is_new = source_version > target_version source_tree = toplevel._parent._etree - target_tree = target_ids._parent._etree - if source_is_new: + target_tree = target._parent._etree + if source_version > target_version: version_map = _DDVersionMap(ids_name, target_tree, source_tree, target_version) rename_map = version_map.new_to_old else: @@ -504,16 +501,16 @@ def convert_ids( try: # Suppress "'.../time' does not exist in the target IDS." log messages. logger.addFilter(_pulse_schedule_3to4_logfilter) - _pulse_schedule_3to4(toplevel, target_ids, deepcopy, rename_map) + _pulse_schedule_3to4(toplevel, target, deepcopy, rename_map) finally: logger.removeFilter(_pulse_schedule_3to4_logfilter) else: - _copy_structure(toplevel, target_ids, deepcopy, rename_map) + _copy_structure(toplevel, target, deepcopy, rename_map) logger.info("Conversion of IDS %s finished.", ids_name) if provenance_origin_uri: - _add_provenance_entry(target_ids, toplevel._version, provenance_origin_uri) - return target_ids + _add_provenance_entry(target, toplevel._version, provenance_origin_uri) + return target def _add_provenance_entry( From 2b9c3739a23b560ee016e326d31c7364c81b7300 Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Thu, 3 Apr 2025 11:29:18 +0200 Subject: [PATCH 5/7] Add documentation --- docs/source/multi-dd.rst | 10 ++++++++++ imas/ids_convert.py | 1 + 2 files changed, 11 insertions(+) diff --git a/docs/source/multi-dd.rst b/docs/source/multi-dd.rst index 6ddd7cd1..6585ed2c 100644 --- a/docs/source/multi-dd.rst +++ b/docs/source/multi-dd.rst @@ -146,6 +146,7 @@ explicit conversion mechanisms. Changed definition of open/closed contours, Yes, No Changed definition of ``space/coordinates_type`` in GGD grids, Yes, No Migrate obsolescent ``ids_properties/source`` to ``ids_properties/provenance``, Yes, No + Convert the multiple time-bases in the ``pulse_schedule`` IDS [#ps3to4]_, Yes, No .. [#rename] Quantities which have been renamed between the two DD versions. For example, the ``ec/beam`` Array of Structures in the ``pulse_schedule`` IDS, @@ -175,6 +176,15 @@ explicit conversion mechanisms. .. [#ignore_type_change] These type changes are not supported. Quantities in the destination IDS will remain empty. +.. [#ps3to4] In Data Dictionary 3.39.0 and older, all dynamic quantities in the + ``pulse_schedule`` IDS had their own time array. In DD 4.0.0 this was + restructured to one time array per component (for example `ec/time + `__). + This migration constructs a common time base per subroup, and interpolates + the dynamic quantities within the group to the new time base. Resampling + uses `previous neighbour` interpolation for integer quantities, and linear + interpolation otherwise. See also: + https://github.com/iterorganization/IMAS-Python/issues/21. .. _`DD background`: diff --git a/imas/ids_convert.py b/imas/ids_convert.py index 47512d2f..a6d3d3cf 100644 --- a/imas/ids_convert.py +++ b/imas/ids_convert.py @@ -1004,6 +1004,7 @@ def _pulse_schedule_3to4_logfilter(logrecord: logging.LogRecord) -> bool: def _pulse_schedule_resample_callback(timebase, item: IDSBase, target_item: IDSBase): + """Callback from _copy_structure to resample dynamic data on the new timebase""" if item.metadata.ndim == 1 and item.metadata.coordinates[0].is_time_coordinate: # Interpolate 1D dynamic quantities to the common time base time = item.coordinates[0] From 7c557e086ba9b26b24855295ee67b6726519ef5d Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Thu, 3 Apr 2025 14:21:32 +0200 Subject: [PATCH 6/7] Fix typo --- docs/source/multi-dd.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/multi-dd.rst b/docs/source/multi-dd.rst index 6585ed2c..b63d18e4 100644 --- a/docs/source/multi-dd.rst +++ b/docs/source/multi-dd.rst @@ -180,7 +180,7 @@ explicit conversion mechanisms. ``pulse_schedule`` IDS had their own time array. In DD 4.0.0 this was restructured to one time array per component (for example `ec/time `__). - This migration constructs a common time base per subroup, and interpolates + This migration constructs a common time base per subgroup, and interpolates the dynamic quantities within the group to the new time base. Resampling uses `previous neighbour` interpolation for integer quantities, and linear interpolation otherwise. See also: From 6ef9353ca26c4ff55e4f0496c57b48acb4a35da0 Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Fri, 4 Apr 2025 09:43:34 +0200 Subject: [PATCH 7/7] Extend DD version range to which the pulse schedule conversion applies --- imas/ids_convert.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/imas/ids_convert.py b/imas/ids_convert.py index a6d3d3cf..295a87be 100644 --- a/imas/ids_convert.py +++ b/imas/ids_convert.py @@ -496,7 +496,7 @@ def convert_ids( toplevel.metadata.name == "pulse_schedule" and toplevel.ids_properties.homogeneous_time == IDS_TIME_MODE_HETEROGENEOUS and source_version < Version("3.40.0") - and target_version.major == 4 + and target_version >= Version("3.40.0") ): try: # Suppress "'.../time' does not exist in the target IDS." log messages.