Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions docs/source/multi-dd.rst
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,7 @@ explicit conversion mechanisms.
Changed definition of open/closed contours, Yes, No
Changed definition of ``space/coordinates_type`` in GGD grids, Yes, No
Migrate obsolescent ``ids_properties/source`` to ``ids_properties/provenance``, Yes, No
Convert the multiple time-bases in the ``pulse_schedule`` IDS [#ps3to4]_, Yes, No

.. [#rename] Quantities which have been renamed between the two DD versions. For
example, the ``ec/beam`` Array of Structures in the ``pulse_schedule`` IDS,
Expand Down Expand Up @@ -175,6 +176,15 @@ explicit conversion mechanisms.
.. [#ignore_type_change] These type changes are not supported. Quantities in the
destination IDS will remain empty.

.. [#ps3to4] In Data Dictionary 3.39.0 and older, all dynamic quantities in the
``pulse_schedule`` IDS had their own time array. In DD 4.0.0 this was
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what about 3.40 etc? was it not restructured at that point?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Correct, but IO requested to make the conversion logic available for DD3.x to 4.x.

what about 3.39 -> 3.41? there is a bit of a gap in the conversion logic, right?

3.39 -> 3.41 follows regular conversion logic and will not resample dynamic quantities. It would be trivial to update the logic to execute when converting from DD <=3.39.0 to DD >= 3.40.0, though. @olivhoenen any thoughts?

3.41 -> 4?

Nothing needs to be done w.r.t. the time bases between these DD versions.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's true, on the one hand we should not expect imas-python to have to fix every single non-backward compatible changes to the DD via ad-hoc conversion functions (but it's nice to have that option). But in this case, you've implemented the function already (with DD4 in mind but the same transformation applies for DD > 3.39.0), so I think there is no harm extending the range.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Clear! The conversion now also applies when going to a DD version >= 3.40.0.

restructured to one time array per component (for example `ec/time
<https://imas-data-dictionary.readthedocs.io/en/latest/generated/ids/pulse_schedule.html#pulse_schedule-ec-time>`__).
This migration constructs a common time base per subgroup, and interpolates
the dynamic quantities within the group to the new time base. Resampling
uses `previous neighbour` interpolation for integer quantities, and linear
interpolation otherwise. See also:
https://github.com/iterorganization/IMAS-Python/issues/21.

.. _`DD background`:

Expand Down
204 changes: 154 additions & 50 deletions imas/ids_convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,26 +5,23 @@
import copy
import datetime
import logging
from functools import lru_cache
from functools import lru_cache, partial
from pathlib import Path
from typing import Callable, Dict, Iterator, Optional, Set, Tuple
from xml.etree.ElementTree import Element, ElementTree

import numpy
from packaging.version import InvalidVersion, Version
from scipy.interpolate import interp1d

import imas
from imas.dd_zip import parse_dd_version
from imas.ids_base import IDSBase
from imas.ids_data_type import IDSDataType
from imas.ids_defs import IDS_TIME_MODE_HETEROGENEOUS
from imas.ids_factory import IDSFactory
from imas.ids_path import IDSPath
from imas.ids_primitive import (
IDSNumeric0D,
IDSNumericArray,
IDSPrimitive,
IDSString0D,
)
from imas.ids_primitive import IDSNumeric0D, IDSNumericArray, IDSPrimitive, IDSString0D
from imas.ids_struct_array import IDSStructArray
from imas.ids_structure import IDSStructure
from imas.ids_toplevel import IDSToplevel
Expand Down Expand Up @@ -474,32 +471,46 @@ def convert_ids(
raise RuntimeError(
f"There is no IDS with name {ids_name} in DD version {version}."
)
target_ids = factory.new(ids_name)
else:
target_ids = target
target = factory.new(ids_name)

source_version = parse_dd_version(toplevel._version)
target_version = parse_dd_version(target_ids._version)
target_version = parse_dd_version(target._version)
logger.info(
"Starting conversion of IDS %s from version %s to version %s.",
ids_name,
source_version,
target_version,
)

source_is_new = source_version > target_version
source_tree = toplevel._parent._etree
target_tree = target_ids._parent._etree
if source_is_new:
target_tree = target._parent._etree
if source_version > target_version:
version_map = _DDVersionMap(ids_name, target_tree, source_tree, target_version)
rename_map = version_map.new_to_old
else:
version_map = _DDVersionMap(ids_name, source_tree, target_tree, source_version)
rename_map = version_map.old_to_new

# Special case for DD3to4 pulse_schedule conversion
if (
toplevel.metadata.name == "pulse_schedule"
and toplevel.ids_properties.homogeneous_time == IDS_TIME_MODE_HETEROGENEOUS
and source_version < Version("3.40.0")
and target_version >= Version("3.40.0")
):
try:
# Suppress "'.../time' does not exist in the target IDS." log messages.
logger.addFilter(_pulse_schedule_3to4_logfilter)
_pulse_schedule_3to4(toplevel, target, deepcopy, rename_map)
finally:
logger.removeFilter(_pulse_schedule_3to4_logfilter)
else:
_copy_structure(toplevel, target, deepcopy, rename_map)

_copy_structure(toplevel, target_ids, deepcopy, source_is_new, version_map)
logger.info("Conversion of IDS %s finished.", ids_name)
if provenance_origin_uri:
_add_provenance_entry(target_ids, toplevel._version, provenance_origin_uri)
return target_ids
_add_provenance_entry(target, toplevel._version, provenance_origin_uri)
return target


def _add_provenance_entry(
Expand Down Expand Up @@ -541,12 +552,47 @@ def _add_provenance_entry(
node.sources.append(source_txt) # sources is a STR_1D (=list of strings)


def _get_target_item(
item: IDSBase, target: IDSStructure, rename_map: NBCPathMap
) -> Optional[IDSBase]:
"""Find and return the corresponding target item if it exists.

This method follows NBC renames (as stored in the rename map). It returns None if
there is no corresponding target item in the target structure.
"""
path = item.metadata.path_string

# Follow NBC renames:
if path in rename_map:
if rename_map.path[path] is None:
if path not in rename_map.ignore_missing_paths:
if path in rename_map.type_change:
msg = "Element %r changed type in the target IDS."
else:
msg = "Element %r does not exist in the target IDS."
logger.warning(msg + " Data is not copied.", path)
return None
else:
return IDSPath(rename_map.path[path]).goto(target)

# No NBC renames:
try:
return target[item.metadata.name]
except AttributeError:
# In exceptional cases the item does not exist in the target. Example:
# neutron_diagnostic IDS between DD 3.40.1 and 3.41.0. has renamed
# synthetic_signals/fusion_power -> fusion_power. The synthetic_signals
# structure no longer exists but we need to descend into it to get the
# total_neutron_flux.
return target


def _copy_structure(
source: IDSStructure,
target: IDSStructure,
deepcopy: bool,
source_is_new: bool,
version_map: DDVersionMap,
rename_map: NBCPathMap,
callback: Optional[Callable] = None,
):
"""Recursively copy data, following NBC renames.

Expand All @@ -557,31 +603,14 @@ def _copy_structure(
source_is_new: True iff the DD version of the source is newer than that of the
target.
version_map: Version map containing NBC renames.
callback: Optional callback that is called for every copied node.
"""
rename_map = version_map.new_to_old if source_is_new else version_map.old_to_new
for item in source.iter_nonempty_():
path = item.metadata.path_string
if path in rename_map:
if rename_map.path[path] is None:
if path not in rename_map.ignore_missing_paths:
if path in rename_map.type_change:
msg = "Element %r changed type in the target IDS."
else:
msg = "Element %r does not exist in the target IDS."
logger.warning(msg + " Data is not copied.", path)
continue
else:
target_item = IDSPath(rename_map.path[path]).goto(target)
else:
try:
target_item = target[item.metadata.name]
except AttributeError:
# In exceptional cases the item does not exist in the target. Example:
# neutron_diagnostic IDS between DD 3.40.1 and 3.41.0. has renamed
# synthetic_signals/fusion_power -> fusion_power. The synthetic_signals
# structure no longer exists but we need to descend into it to get the
# total_neutron_flux.
target_item = target
target_item = _get_target_item(item, target, rename_map)
if target_item is None:
continue

if path in rename_map.type_change:
# Handle type change
new_items = rename_map.type_change[path](item, target_item)
Expand All @@ -594,21 +623,17 @@ def _copy_structure(
size = len(item)
target_item.resize(size)
for i in range(size):
_copy_structure(
item[i], target_item[i], deepcopy, source_is_new, version_map
)
_copy_structure(item[i], target_item[i], deepcopy, rename_map, callback)
elif isinstance(item, IDSStructure):
_copy_structure(item, target_item, deepcopy, source_is_new, version_map)
_copy_structure(item, target_item, deepcopy, rename_map, callback)
else:
if deepcopy:
# No nested types are used as data, so a shallow copy is sufficient
target_item.value = copy.copy(item.value)
else:
target_item.value = item.value
target_item.value = copy.copy(item.value) if deepcopy else item.value

# Post-process the node:
if path in rename_map.post_process:
rename_map.post_process[path](target_item)
if callback is not None:
callback(item, target_item)


########################################################################################
Expand Down Expand Up @@ -919,3 +944,82 @@ def _ids_properties_source(source: IDSString0D, provenance: IDSStructure) -> Non
provenance.node.resize(1)
provenance.node[0].reference.resize(1)
provenance.node[0].reference[0].name = source.value


def _pulse_schedule_3to4(
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should these go in a separate file perhaps? or do we do that when we add another large migration?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We could, but these _pulse_schedule_3to4* functions also depend on some of the other functions and globals from this module (e.g. _get_target_item, and logger).

Separating them into separate files is slightly messy with circular imports etc. At this point that is not really worth it IMO :)

source: IDSStructure,
target: IDSStructure,
deepcopy: bool,
rename_map: NBCPathMap,
):
"""Recursively copy data, following NBC renames, and converting time bases for the
pulse_schedule IDS.

Args:
source: Source structure.
target: Target structure.
deepcopy: See :func:`convert_ids`.
rename_map: Map containing NBC renames.
"""
# All prerequisites are checked before calling this function:
# - source and target are pulse_schedule IDSs
# - source has DD version < 3.40.0
# - target has DD version >= 4.0.0, < 5.0
# - IDS is using heterogeneous time

for item in source.iter_nonempty_():
name = item.metadata.name
target_item = _get_target_item(item, target, rename_map)
if target_item is None:
continue

# Special cases for non-dynamic stuff
if name in ["ids_properties", "code"]:
_copy_structure(item, target_item, deepcopy, rename_map)
elif name == "time":
target_item.value = item.value if not deepcopy else copy.copy(item.value)
elif name == "event":
size = len(item)
target_item.resize(size)
for i in range(size):
_copy_structure(item[i], target_item[i], deepcopy, rename_map)
else:
# Find all time bases
time_bases = [
node.value
for node in imas.util.tree_iter(item)
if node.metadata.name == "time"
]
# Construct the common time base
timebase = numpy.unique(numpy.concatenate(time_bases)) if time_bases else []
target_item.time = timebase
# Do the conversion
callback = partial(_pulse_schedule_resample_callback, timebase)
_copy_structure(item, target_item, deepcopy, rename_map, callback)


def _pulse_schedule_3to4_logfilter(logrecord: logging.LogRecord) -> bool:
"""Suppress "'.../time' does not exist in the target IDS." log messages."""
return not (logrecord.args and str(logrecord.args[0]).endswith("/time"))


def _pulse_schedule_resample_callback(timebase, item: IDSBase, target_item: IDSBase):
"""Callback from _copy_structure to resample dynamic data on the new timebase"""
if item.metadata.ndim == 1 and item.metadata.coordinates[0].is_time_coordinate:
# Interpolate 1D dynamic quantities to the common time base
time = item.coordinates[0]
if len(item) != len(time):
raise ValueError(
f"Array {item} has a different size than its time base {time}."
)
is_integer = item.metadata.data_type is IDSDataType.INT
value = interp1d(
time.value,
item.value,
"previous" if is_integer else "linear",
copy=False,
bounds_error=False,
fill_value=(item[0], item[-1]),
assume_sorted=True,
)(timebase)
target_item.value = value.astype(numpy.int32) if is_integer else value
Loading
Loading