diff --git a/docs/source/multi-dd.rst b/docs/source/multi-dd.rst index 6ddd7cd1..b63d18e4 100644 --- a/docs/source/multi-dd.rst +++ b/docs/source/multi-dd.rst @@ -146,6 +146,7 @@ explicit conversion mechanisms. Changed definition of open/closed contours, Yes, No Changed definition of ``space/coordinates_type`` in GGD grids, Yes, No Migrate obsolescent ``ids_properties/source`` to ``ids_properties/provenance``, Yes, No + Convert the multiple time-bases in the ``pulse_schedule`` IDS [#ps3to4]_, Yes, No .. [#rename] Quantities which have been renamed between the two DD versions. For example, the ``ec/beam`` Array of Structures in the ``pulse_schedule`` IDS, @@ -175,6 +176,15 @@ explicit conversion mechanisms. .. [#ignore_type_change] These type changes are not supported. Quantities in the destination IDS will remain empty. +.. [#ps3to4] In Data Dictionary 3.39.0 and older, all dynamic quantities in the + ``pulse_schedule`` IDS had their own time array. In DD 4.0.0 this was + restructured to one time array per component (for example `ec/time + `__). + This migration constructs a common time base per subgroup, and interpolates + the dynamic quantities within the group to the new time base. Resampling + uses `previous neighbour` interpolation for integer quantities, and linear + interpolation otherwise. See also: + https://github.com/iterorganization/IMAS-Python/issues/21. .. _`DD background`: diff --git a/imas/ids_convert.py b/imas/ids_convert.py index e5dc0911..295a87be 100644 --- a/imas/ids_convert.py +++ b/imas/ids_convert.py @@ -5,26 +5,23 @@ import copy import datetime import logging -from functools import lru_cache +from functools import lru_cache, partial from pathlib import Path from typing import Callable, Dict, Iterator, Optional, Set, Tuple from xml.etree.ElementTree import Element, ElementTree import numpy from packaging.version import InvalidVersion, Version +from scipy.interpolate import interp1d import imas from imas.dd_zip import parse_dd_version from imas.ids_base import IDSBase from imas.ids_data_type import IDSDataType +from imas.ids_defs import IDS_TIME_MODE_HETEROGENEOUS from imas.ids_factory import IDSFactory from imas.ids_path import IDSPath -from imas.ids_primitive import ( - IDSNumeric0D, - IDSNumericArray, - IDSPrimitive, - IDSString0D, -) +from imas.ids_primitive import IDSNumeric0D, IDSNumericArray, IDSPrimitive, IDSString0D from imas.ids_struct_array import IDSStructArray from imas.ids_structure import IDSStructure from imas.ids_toplevel import IDSToplevel @@ -474,12 +471,10 @@ def convert_ids( raise RuntimeError( f"There is no IDS with name {ids_name} in DD version {version}." ) - target_ids = factory.new(ids_name) - else: - target_ids = target + target = factory.new(ids_name) source_version = parse_dd_version(toplevel._version) - target_version = parse_dd_version(target_ids._version) + target_version = parse_dd_version(target._version) logger.info( "Starting conversion of IDS %s from version %s to version %s.", ids_name, @@ -487,19 +482,35 @@ def convert_ids( target_version, ) - source_is_new = source_version > target_version source_tree = toplevel._parent._etree - target_tree = target_ids._parent._etree - if source_is_new: + target_tree = target._parent._etree + if source_version > target_version: version_map = _DDVersionMap(ids_name, target_tree, source_tree, target_version) + rename_map = version_map.new_to_old else: version_map = _DDVersionMap(ids_name, source_tree, target_tree, source_version) + rename_map = version_map.old_to_new + + # Special case for DD3to4 pulse_schedule conversion + if ( + toplevel.metadata.name == "pulse_schedule" + and toplevel.ids_properties.homogeneous_time == IDS_TIME_MODE_HETEROGENEOUS + and source_version < Version("3.40.0") + and target_version >= Version("3.40.0") + ): + try: + # Suppress "'.../time' does not exist in the target IDS." log messages. + logger.addFilter(_pulse_schedule_3to4_logfilter) + _pulse_schedule_3to4(toplevel, target, deepcopy, rename_map) + finally: + logger.removeFilter(_pulse_schedule_3to4_logfilter) + else: + _copy_structure(toplevel, target, deepcopy, rename_map) - _copy_structure(toplevel, target_ids, deepcopy, source_is_new, version_map) logger.info("Conversion of IDS %s finished.", ids_name) if provenance_origin_uri: - _add_provenance_entry(target_ids, toplevel._version, provenance_origin_uri) - return target_ids + _add_provenance_entry(target, toplevel._version, provenance_origin_uri) + return target def _add_provenance_entry( @@ -541,12 +552,47 @@ def _add_provenance_entry( node.sources.append(source_txt) # sources is a STR_1D (=list of strings) +def _get_target_item( + item: IDSBase, target: IDSStructure, rename_map: NBCPathMap +) -> Optional[IDSBase]: + """Find and return the corresponding target item if it exists. + + This method follows NBC renames (as stored in the rename map). It returns None if + there is no corresponding target item in the target structure. + """ + path = item.metadata.path_string + + # Follow NBC renames: + if path in rename_map: + if rename_map.path[path] is None: + if path not in rename_map.ignore_missing_paths: + if path in rename_map.type_change: + msg = "Element %r changed type in the target IDS." + else: + msg = "Element %r does not exist in the target IDS." + logger.warning(msg + " Data is not copied.", path) + return None + else: + return IDSPath(rename_map.path[path]).goto(target) + + # No NBC renames: + try: + return target[item.metadata.name] + except AttributeError: + # In exceptional cases the item does not exist in the target. Example: + # neutron_diagnostic IDS between DD 3.40.1 and 3.41.0. has renamed + # synthetic_signals/fusion_power -> fusion_power. The synthetic_signals + # structure no longer exists but we need to descend into it to get the + # total_neutron_flux. + return target + + def _copy_structure( source: IDSStructure, target: IDSStructure, deepcopy: bool, - source_is_new: bool, - version_map: DDVersionMap, + rename_map: NBCPathMap, + callback: Optional[Callable] = None, ): """Recursively copy data, following NBC renames. @@ -557,31 +603,14 @@ def _copy_structure( source_is_new: True iff the DD version of the source is newer than that of the target. version_map: Version map containing NBC renames. + callback: Optional callback that is called for every copied node. """ - rename_map = version_map.new_to_old if source_is_new else version_map.old_to_new for item in source.iter_nonempty_(): path = item.metadata.path_string - if path in rename_map: - if rename_map.path[path] is None: - if path not in rename_map.ignore_missing_paths: - if path in rename_map.type_change: - msg = "Element %r changed type in the target IDS." - else: - msg = "Element %r does not exist in the target IDS." - logger.warning(msg + " Data is not copied.", path) - continue - else: - target_item = IDSPath(rename_map.path[path]).goto(target) - else: - try: - target_item = target[item.metadata.name] - except AttributeError: - # In exceptional cases the item does not exist in the target. Example: - # neutron_diagnostic IDS between DD 3.40.1 and 3.41.0. has renamed - # synthetic_signals/fusion_power -> fusion_power. The synthetic_signals - # structure no longer exists but we need to descend into it to get the - # total_neutron_flux. - target_item = target + target_item = _get_target_item(item, target, rename_map) + if target_item is None: + continue + if path in rename_map.type_change: # Handle type change new_items = rename_map.type_change[path](item, target_item) @@ -594,21 +623,17 @@ def _copy_structure( size = len(item) target_item.resize(size) for i in range(size): - _copy_structure( - item[i], target_item[i], deepcopy, source_is_new, version_map - ) + _copy_structure(item[i], target_item[i], deepcopy, rename_map, callback) elif isinstance(item, IDSStructure): - _copy_structure(item, target_item, deepcopy, source_is_new, version_map) + _copy_structure(item, target_item, deepcopy, rename_map, callback) else: - if deepcopy: - # No nested types are used as data, so a shallow copy is sufficient - target_item.value = copy.copy(item.value) - else: - target_item.value = item.value + target_item.value = copy.copy(item.value) if deepcopy else item.value # Post-process the node: if path in rename_map.post_process: rename_map.post_process[path](target_item) + if callback is not None: + callback(item, target_item) ######################################################################################## @@ -919,3 +944,82 @@ def _ids_properties_source(source: IDSString0D, provenance: IDSStructure) -> Non provenance.node.resize(1) provenance.node[0].reference.resize(1) provenance.node[0].reference[0].name = source.value + + +def _pulse_schedule_3to4( + source: IDSStructure, + target: IDSStructure, + deepcopy: bool, + rename_map: NBCPathMap, +): + """Recursively copy data, following NBC renames, and converting time bases for the + pulse_schedule IDS. + + Args: + source: Source structure. + target: Target structure. + deepcopy: See :func:`convert_ids`. + rename_map: Map containing NBC renames. + """ + # All prerequisites are checked before calling this function: + # - source and target are pulse_schedule IDSs + # - source has DD version < 3.40.0 + # - target has DD version >= 4.0.0, < 5.0 + # - IDS is using heterogeneous time + + for item in source.iter_nonempty_(): + name = item.metadata.name + target_item = _get_target_item(item, target, rename_map) + if target_item is None: + continue + + # Special cases for non-dynamic stuff + if name in ["ids_properties", "code"]: + _copy_structure(item, target_item, deepcopy, rename_map) + elif name == "time": + target_item.value = item.value if not deepcopy else copy.copy(item.value) + elif name == "event": + size = len(item) + target_item.resize(size) + for i in range(size): + _copy_structure(item[i], target_item[i], deepcopy, rename_map) + else: + # Find all time bases + time_bases = [ + node.value + for node in imas.util.tree_iter(item) + if node.metadata.name == "time" + ] + # Construct the common time base + timebase = numpy.unique(numpy.concatenate(time_bases)) if time_bases else [] + target_item.time = timebase + # Do the conversion + callback = partial(_pulse_schedule_resample_callback, timebase) + _copy_structure(item, target_item, deepcopy, rename_map, callback) + + +def _pulse_schedule_3to4_logfilter(logrecord: logging.LogRecord) -> bool: + """Suppress "'.../time' does not exist in the target IDS." log messages.""" + return not (logrecord.args and str(logrecord.args[0]).endswith("/time")) + + +def _pulse_schedule_resample_callback(timebase, item: IDSBase, target_item: IDSBase): + """Callback from _copy_structure to resample dynamic data on the new timebase""" + if item.metadata.ndim == 1 and item.metadata.coordinates[0].is_time_coordinate: + # Interpolate 1D dynamic quantities to the common time base + time = item.coordinates[0] + if len(item) != len(time): + raise ValueError( + f"Array {item} has a different size than its time base {time}." + ) + is_integer = item.metadata.data_type is IDSDataType.INT + value = interp1d( + time.value, + item.value, + "previous" if is_integer else "linear", + copy=False, + bounds_error=False, + fill_value=(item[0], item[-1]), + assume_sorted=True, + )(timebase) + target_item.value = value.astype(numpy.int32) if is_integer else value diff --git a/imas/test/test_ids_convert.py b/imas/test/test_ids_convert.py index 55045bbc..f2b9b7f7 100644 --- a/imas/test/test_ids_convert.py +++ b/imas/test/test_ids_convert.py @@ -7,6 +7,7 @@ from unittest.mock import MagicMock import numpy +from numpy import array_equal import pytest from imas import identifiers @@ -27,7 +28,7 @@ from imas.ids_factory import IDSFactory from imas.ids_struct_array import IDSStructArray from imas.ids_structure import IDSStructure -from imas.test.test_helpers import compare_children, open_dbentry +from imas.test.test_helpers import compare_children, fill_consistent, open_dbentry UTC = timezone.utc @@ -287,22 +288,22 @@ def test_3to4_repeat_children_first_point_conditional(dd4factory): for i in range(2): outline_inner = wall4.description_2d[0].vessel.unit[i].annular.outline_inner if i == 0: # open outline, first point not repeated: - assert numpy.array_equal(outline_inner.r, [1.0, 2.0, 3.0]) - assert numpy.array_equal(outline_inner.z, [-1.0, -2.0, -3.0]) + assert array_equal(outline_inner.r, [1.0, 2.0, 3.0]) + assert array_equal(outline_inner.z, [-1.0, -2.0, -3.0]) else: # closed outline, first point repeated: - assert numpy.array_equal(outline_inner.r, [1.0, 2.0, 3.0, 1.0]) - assert numpy.array_equal(outline_inner.z, [-1.0, -2.0, -3.0, -1.0]) + assert array_equal(outline_inner.r, [1.0, 2.0, 3.0, 1.0]) + assert array_equal(outline_inner.z, [-1.0, -2.0, -3.0, -1.0]) # Test conversion for case 2: assert len(wall4.description_2d[0].limiter.unit) == 2 for i in range(2): unit = wall4.description_2d[0].limiter.unit[i] if i == 0: # open outline, first point not repeated: - assert numpy.array_equal(unit.outline.r, [1.0, 2.0, 3.0]) - assert numpy.array_equal(unit.outline.z, [-1.0, -2.0, -3.0]) + assert array_equal(unit.outline.r, [1.0, 2.0, 3.0]) + assert array_equal(unit.outline.z, [-1.0, -2.0, -3.0]) else: # closed outline, first point repeated: - assert numpy.array_equal(unit.outline.r, [1.0, 2.0, 3.0, 1.0]) - assert numpy.array_equal(unit.outline.z, [-1.0, -2.0, -3.0, -1.0]) + assert array_equal(unit.outline.r, [1.0, 2.0, 3.0, 1.0]) + assert array_equal(unit.outline.z, [-1.0, -2.0, -3.0, -1.0]) # Test conversion for case 3: assert len(wall4.description_2d[0].mobile.unit) == 2 @@ -310,11 +311,11 @@ def test_3to4_repeat_children_first_point_conditional(dd4factory): unit = wall4.description_2d[0].mobile.unit[i] for j in range(3): if i == 0: # open outline, first point not repeated: - assert numpy.array_equal(unit.outline[j].r, [1.0, 2.0, 3.0]) - assert numpy.array_equal(unit.outline[j].z, [-1.0, -2.0, -3.0]) + assert array_equal(unit.outline[j].r, [1.0, 2.0, 3.0]) + assert array_equal(unit.outline[j].z, [-1.0, -2.0, -3.0]) else: # closed outline, first point repeated: - assert numpy.array_equal(unit.outline[j].r, [1.0, 2.0, 3.0, 1.0]) - assert numpy.array_equal(unit.outline[j].z, [-1.0, -2.0, -3.0, -1.0]) + assert array_equal(unit.outline[j].r, [1.0, 2.0, 3.0, 1.0]) + assert array_equal(unit.outline[j].z, [-1.0, -2.0, -3.0, -1.0]) assert unit.outline[j].time == pytest.approx(j / 5) # Test conversion for case 4: @@ -322,9 +323,9 @@ def test_3to4_repeat_children_first_point_conditional(dd4factory): for i in range(2): thickness = wall4.description_2d[1].vessel.unit[i].annular.thickness if i == 0: # open outline, there was one value too many, drop the last one - assert numpy.array_equal(thickness, [1, 0.9]) + assert array_equal(thickness, [1, 0.9]) else: # closed outline, thickness values kept - assert numpy.array_equal(thickness, [1, 0.9, 0.9]) + assert array_equal(thickness, [1, 0.9, 0.9]) # Test conversion back wall3 = convert_ids(wall4, "3.39.0") @@ -340,8 +341,8 @@ def test_3to4_repeat_children_first_point(dd4factory): iron_core4 = convert_ids(iron_core, None, factory=dd4factory) geometry = iron_core4.segment[0].geometry - assert numpy.array_equal(geometry.outline.r, [1.0, 2.0, 3.0, 1.0]) - assert numpy.array_equal(geometry.outline.z, [-1.0, -2.0, -3.0, -1.0]) + assert array_equal(geometry.outline.r, [1.0, 2.0, 3.0, 1.0]) + assert array_equal(geometry.outline.z, [-1.0, -2.0, -3.0, -1.0]) iron_core3 = convert_ids(iron_core4, "3.39.0") compare_children(iron_core, iron_core3) @@ -356,11 +357,11 @@ def test_3to4_cocos_change(dd4factory): cp.profiles_1d[0].grid.psi = numpy.linspace(10, 20, 11) cp4 = convert_ids(cp, None, factory=dd4factory) - assert numpy.array_equal( + assert array_equal( cp4.profiles_1d[0].grid.rho_tor_norm, cp.profiles_1d[0].grid.rho_tor_norm, ) - assert numpy.array_equal( + assert array_equal( cp4.profiles_1d[0].grid.psi, -cp.profiles_1d[0].grid.psi, ) @@ -376,11 +377,11 @@ def test_3to4_cocos_change(dd4factory): eq.time_slice[0].profiles_1d.dpressure_dpsi = numpy.linspace(1, 2, 11) eq4 = convert_ids(eq, None, factory=dd4factory) - assert numpy.array_equal( + assert array_equal( eq4.time_slice[0].profiles_1d.psi, -eq.time_slice[0].profiles_1d.psi, ) - assert numpy.array_equal( + assert array_equal( eq4.time_slice[0].profiles_1d.dpressure_dpsi, -eq.time_slice[0].profiles_1d.dpressure_dpsi, ) @@ -400,7 +401,7 @@ def test_3to4_circuit_connections(dd4factory, caplog): ] pfa4 = convert_ids(pfa, None, factory=dd4factory) - assert numpy.array_equal( + assert array_equal( pfa4.circuit[0].connections, [[-1, 0, 1], [0, 1, -1], [1, -1, 0]] ) @@ -417,7 +418,7 @@ def test_3to4_circuit_connections(dd4factory, caplog): with caplog.at_level(logging.ERROR): pfa4 = convert_ids(pfa, None, factory=dd4factory) # Incorrect shape, data is not converted: - assert numpy.array_equal(pfa.circuit[0].connections, pfa4.circuit[0].connections) + assert array_equal(pfa.circuit[0].connections, pfa4.circuit[0].connections) # Check that a message with ERROR severity was logged assert len(caplog.record_tuples) == 1 assert caplog.record_tuples[0][1] == logging.ERROR @@ -430,7 +431,53 @@ def test_3to4_cocos_magnetics_workaround(dd4factory): mag.flux_loop[0].flux.data = [1.0, 2.0] mag4 = convert_ids(mag, None, factory=dd4factory) - assert numpy.array_equal(mag4.flux_loop[0].flux.data, [-1.0, -2.0]) + assert array_equal(mag4.flux_loop[0].flux.data, [-1.0, -2.0]) mag3 = convert_ids(mag4, "3.39.0") compare_children(mag, mag3) + + +def test_3to4_pulse_schedule(): + ps = IDSFactory("3.39.0").pulse_schedule() + ps.ids_properties.homogeneous_time = IDS_TIME_MODE_HETEROGENEOUS + + ps.ec.launcher.resize(3) + ps.ec.launcher[0].power.reference.data = [1.0, 2.0, 3.0] + ps.ec.launcher[0].power.reference.time = [1.0, 2.0, 3.0] + ps.ec.launcher[1].power.reference.data = [0.0, 2.0, 5.0] + ps.ec.launcher[1].power.reference.time = [0.0, 2.0, 5.0] + ps.ec.launcher[2].power.reference.data = [1.0, 1.5] + ps.ec.launcher[2].power.reference.time = [1.0, 1.5] + + ps.ec.mode.data = [1, 2, 5] + ps.ec.mode.time = [1.0, 2.0, 5.0] + + ps4 = convert_ids(ps, "4.0.0") + assert array_equal(ps4.ec.time, [0.0, 1.0, 1.5, 2.0, 3.0, 5.0]) + item = "power_launched/reference" + assert array_equal(ps4.ec.beam[0][item], [1.0, 1.0, 1.5, 2.0, 3.0, 3.0]) + assert array_equal(ps4.ec.beam[1][item], [0.0, 1.0, 1.5, 2.0, 3.0, 5.0]) + assert array_equal(ps4.ec.beam[2][item], [1.0, 1.0, 1.5, 1.5, 1.5, 1.5]) + assert array_equal(ps4.ec.mode, [1, 1, 1, 2, 2, 5]) + + +def test_3to4_pulse_schedule_exceptions(): + ps = IDSFactory("3.39.0").pulse_schedule() + ps.ids_properties.homogeneous_time = IDS_TIME_MODE_HETEROGENEOUS + + ps.ec.launcher.resize(3) + ps.ec.launcher[0].power.reference.data = [1.0, 2.0, 3.0] + with pytest.raises(ValueError): # missing time base + convert_ids(ps, "4.0.0") + + ps.ec.launcher[0].power.reference.time = [1.0, 2.0] + with pytest.raises(ValueError): # incorrect size of time base + convert_ids(ps, "4.0.0") + + +def test_3to4_pulse_schedule_fuzz(): + ps = IDSFactory("3.39.0").pulse_schedule() + ps.ids_properties.homogeneous_time = IDS_TIME_MODE_HETEROGENEOUS + + fill_consistent(ps) + convert_ids(ps, "4.0.0")