-
Notifications
You must be signed in to change notification settings - Fork 18
Feature/pulse schedule 3to4 #33
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
45954a0
92f0008
97c9b0d
8e6584e
2b9c373
7c557e0
6ef9353
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -5,26 +5,23 @@ | |
| import copy | ||
| import datetime | ||
| import logging | ||
| from functools import lru_cache | ||
| from functools import lru_cache, partial | ||
| from pathlib import Path | ||
| from typing import Callable, Dict, Iterator, Optional, Set, Tuple | ||
| from xml.etree.ElementTree import Element, ElementTree | ||
|
|
||
| import numpy | ||
| from packaging.version import InvalidVersion, Version | ||
| from scipy.interpolate import interp1d | ||
|
|
||
| import imas | ||
| from imas.dd_zip import parse_dd_version | ||
| from imas.ids_base import IDSBase | ||
| from imas.ids_data_type import IDSDataType | ||
| from imas.ids_defs import IDS_TIME_MODE_HETEROGENEOUS | ||
| from imas.ids_factory import IDSFactory | ||
| from imas.ids_path import IDSPath | ||
| from imas.ids_primitive import ( | ||
| IDSNumeric0D, | ||
| IDSNumericArray, | ||
| IDSPrimitive, | ||
| IDSString0D, | ||
| ) | ||
| from imas.ids_primitive import IDSNumeric0D, IDSNumericArray, IDSPrimitive, IDSString0D | ||
| from imas.ids_struct_array import IDSStructArray | ||
| from imas.ids_structure import IDSStructure | ||
| from imas.ids_toplevel import IDSToplevel | ||
|
|
@@ -474,32 +471,46 @@ def convert_ids( | |
| raise RuntimeError( | ||
| f"There is no IDS with name {ids_name} in DD version {version}." | ||
| ) | ||
| target_ids = factory.new(ids_name) | ||
| else: | ||
| target_ids = target | ||
| target = factory.new(ids_name) | ||
|
|
||
| source_version = parse_dd_version(toplevel._version) | ||
| target_version = parse_dd_version(target_ids._version) | ||
| target_version = parse_dd_version(target._version) | ||
| logger.info( | ||
| "Starting conversion of IDS %s from version %s to version %s.", | ||
| ids_name, | ||
| source_version, | ||
| target_version, | ||
| ) | ||
|
|
||
| source_is_new = source_version > target_version | ||
| source_tree = toplevel._parent._etree | ||
| target_tree = target_ids._parent._etree | ||
| if source_is_new: | ||
| target_tree = target._parent._etree | ||
| if source_version > target_version: | ||
| version_map = _DDVersionMap(ids_name, target_tree, source_tree, target_version) | ||
| rename_map = version_map.new_to_old | ||
| else: | ||
| version_map = _DDVersionMap(ids_name, source_tree, target_tree, source_version) | ||
| rename_map = version_map.old_to_new | ||
|
|
||
| # Special case for DD3to4 pulse_schedule conversion | ||
| if ( | ||
| toplevel.metadata.name == "pulse_schedule" | ||
| and toplevel.ids_properties.homogeneous_time == IDS_TIME_MODE_HETEROGENEOUS | ||
| and source_version < Version("3.40.0") | ||
| and target_version >= Version("3.40.0") | ||
| ): | ||
| try: | ||
| # Suppress "'.../time' does not exist in the target IDS." log messages. | ||
| logger.addFilter(_pulse_schedule_3to4_logfilter) | ||
| _pulse_schedule_3to4(toplevel, target, deepcopy, rename_map) | ||
| finally: | ||
| logger.removeFilter(_pulse_schedule_3to4_logfilter) | ||
| else: | ||
| _copy_structure(toplevel, target, deepcopy, rename_map) | ||
|
|
||
| _copy_structure(toplevel, target_ids, deepcopy, source_is_new, version_map) | ||
| logger.info("Conversion of IDS %s finished.", ids_name) | ||
| if provenance_origin_uri: | ||
| _add_provenance_entry(target_ids, toplevel._version, provenance_origin_uri) | ||
| return target_ids | ||
| _add_provenance_entry(target, toplevel._version, provenance_origin_uri) | ||
| return target | ||
|
|
||
|
|
||
| def _add_provenance_entry( | ||
|
|
@@ -541,12 +552,47 @@ def _add_provenance_entry( | |
| node.sources.append(source_txt) # sources is a STR_1D (=list of strings) | ||
|
|
||
|
|
||
| def _get_target_item( | ||
| item: IDSBase, target: IDSStructure, rename_map: NBCPathMap | ||
| ) -> Optional[IDSBase]: | ||
| """Find and return the corresponding target item if it exists. | ||
|
|
||
| This method follows NBC renames (as stored in the rename map). It returns None if | ||
| there is no corresponding target item in the target structure. | ||
| """ | ||
| path = item.metadata.path_string | ||
|
|
||
| # Follow NBC renames: | ||
| if path in rename_map: | ||
| if rename_map.path[path] is None: | ||
| if path not in rename_map.ignore_missing_paths: | ||
| if path in rename_map.type_change: | ||
| msg = "Element %r changed type in the target IDS." | ||
| else: | ||
| msg = "Element %r does not exist in the target IDS." | ||
| logger.warning(msg + " Data is not copied.", path) | ||
| return None | ||
| else: | ||
| return IDSPath(rename_map.path[path]).goto(target) | ||
|
|
||
| # No NBC renames: | ||
| try: | ||
| return target[item.metadata.name] | ||
| except AttributeError: | ||
| # In exceptional cases the item does not exist in the target. Example: | ||
| # neutron_diagnostic IDS between DD 3.40.1 and 3.41.0. has renamed | ||
| # synthetic_signals/fusion_power -> fusion_power. The synthetic_signals | ||
| # structure no longer exists but we need to descend into it to get the | ||
| # total_neutron_flux. | ||
| return target | ||
|
|
||
|
|
||
| def _copy_structure( | ||
| source: IDSStructure, | ||
| target: IDSStructure, | ||
| deepcopy: bool, | ||
| source_is_new: bool, | ||
| version_map: DDVersionMap, | ||
| rename_map: NBCPathMap, | ||
| callback: Optional[Callable] = None, | ||
| ): | ||
| """Recursively copy data, following NBC renames. | ||
|
|
||
|
|
@@ -557,31 +603,14 @@ def _copy_structure( | |
| source_is_new: True iff the DD version of the source is newer than that of the | ||
| target. | ||
| version_map: Version map containing NBC renames. | ||
| callback: Optional callback that is called for every copied node. | ||
| """ | ||
| rename_map = version_map.new_to_old if source_is_new else version_map.old_to_new | ||
| for item in source.iter_nonempty_(): | ||
| path = item.metadata.path_string | ||
| if path in rename_map: | ||
| if rename_map.path[path] is None: | ||
| if path not in rename_map.ignore_missing_paths: | ||
| if path in rename_map.type_change: | ||
| msg = "Element %r changed type in the target IDS." | ||
| else: | ||
| msg = "Element %r does not exist in the target IDS." | ||
| logger.warning(msg + " Data is not copied.", path) | ||
| continue | ||
| else: | ||
| target_item = IDSPath(rename_map.path[path]).goto(target) | ||
| else: | ||
| try: | ||
| target_item = target[item.metadata.name] | ||
| except AttributeError: | ||
| # In exceptional cases the item does not exist in the target. Example: | ||
| # neutron_diagnostic IDS between DD 3.40.1 and 3.41.0. has renamed | ||
| # synthetic_signals/fusion_power -> fusion_power. The synthetic_signals | ||
| # structure no longer exists but we need to descend into it to get the | ||
| # total_neutron_flux. | ||
| target_item = target | ||
| target_item = _get_target_item(item, target, rename_map) | ||
maarten-ic marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| if target_item is None: | ||
| continue | ||
|
|
||
| if path in rename_map.type_change: | ||
| # Handle type change | ||
| new_items = rename_map.type_change[path](item, target_item) | ||
|
|
@@ -594,21 +623,17 @@ def _copy_structure( | |
| size = len(item) | ||
| target_item.resize(size) | ||
| for i in range(size): | ||
| _copy_structure( | ||
| item[i], target_item[i], deepcopy, source_is_new, version_map | ||
| ) | ||
| _copy_structure(item[i], target_item[i], deepcopy, rename_map, callback) | ||
| elif isinstance(item, IDSStructure): | ||
| _copy_structure(item, target_item, deepcopy, source_is_new, version_map) | ||
| _copy_structure(item, target_item, deepcopy, rename_map, callback) | ||
| else: | ||
| if deepcopy: | ||
| # No nested types are used as data, so a shallow copy is sufficient | ||
| target_item.value = copy.copy(item.value) | ||
| else: | ||
| target_item.value = item.value | ||
| target_item.value = copy.copy(item.value) if deepcopy else item.value | ||
|
|
||
| # Post-process the node: | ||
| if path in rename_map.post_process: | ||
| rename_map.post_process[path](target_item) | ||
| if callback is not None: | ||
| callback(item, target_item) | ||
|
|
||
|
|
||
| ######################################################################################## | ||
|
|
@@ -919,3 +944,82 @@ def _ids_properties_source(source: IDSString0D, provenance: IDSStructure) -> Non | |
| provenance.node.resize(1) | ||
| provenance.node[0].reference.resize(1) | ||
| provenance.node[0].reference[0].name = source.value | ||
|
|
||
|
|
||
| def _pulse_schedule_3to4( | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. should these go in a separate file perhaps? or do we do that when we add another large migration?
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We could, but these Separating them into separate files is slightly messy with circular imports etc. At this point that is not really worth it IMO :) |
||
| source: IDSStructure, | ||
| target: IDSStructure, | ||
| deepcopy: bool, | ||
| rename_map: NBCPathMap, | ||
| ): | ||
| """Recursively copy data, following NBC renames, and converting time bases for the | ||
| pulse_schedule IDS. | ||
|
|
||
| Args: | ||
| source: Source structure. | ||
| target: Target structure. | ||
| deepcopy: See :func:`convert_ids`. | ||
| rename_map: Map containing NBC renames. | ||
| """ | ||
| # All prerequisites are checked before calling this function: | ||
| # - source and target are pulse_schedule IDSs | ||
| # - source has DD version < 3.40.0 | ||
| # - target has DD version >= 4.0.0, < 5.0 | ||
| # - IDS is using heterogeneous time | ||
|
|
||
| for item in source.iter_nonempty_(): | ||
| name = item.metadata.name | ||
| target_item = _get_target_item(item, target, rename_map) | ||
| if target_item is None: | ||
| continue | ||
|
|
||
| # Special cases for non-dynamic stuff | ||
| if name in ["ids_properties", "code"]: | ||
| _copy_structure(item, target_item, deepcopy, rename_map) | ||
| elif name == "time": | ||
| target_item.value = item.value if not deepcopy else copy.copy(item.value) | ||
| elif name == "event": | ||
| size = len(item) | ||
| target_item.resize(size) | ||
| for i in range(size): | ||
| _copy_structure(item[i], target_item[i], deepcopy, rename_map) | ||
| else: | ||
| # Find all time bases | ||
| time_bases = [ | ||
| node.value | ||
| for node in imas.util.tree_iter(item) | ||
| if node.metadata.name == "time" | ||
| ] | ||
| # Construct the common time base | ||
| timebase = numpy.unique(numpy.concatenate(time_bases)) if time_bases else [] | ||
| target_item.time = timebase | ||
| # Do the conversion | ||
| callback = partial(_pulse_schedule_resample_callback, timebase) | ||
| _copy_structure(item, target_item, deepcopy, rename_map, callback) | ||
|
|
||
|
|
||
| def _pulse_schedule_3to4_logfilter(logrecord: logging.LogRecord) -> bool: | ||
| """Suppress "'.../time' does not exist in the target IDS." log messages.""" | ||
| return not (logrecord.args and str(logrecord.args[0]).endswith("/time")) | ||
|
|
||
|
|
||
| def _pulse_schedule_resample_callback(timebase, item: IDSBase, target_item: IDSBase): | ||
| """Callback from _copy_structure to resample dynamic data on the new timebase""" | ||
| if item.metadata.ndim == 1 and item.metadata.coordinates[0].is_time_coordinate: | ||
| # Interpolate 1D dynamic quantities to the common time base | ||
| time = item.coordinates[0] | ||
| if len(item) != len(time): | ||
| raise ValueError( | ||
| f"Array {item} has a different size than its time base {time}." | ||
| ) | ||
| is_integer = item.metadata.data_type is IDSDataType.INT | ||
| value = interp1d( | ||
| time.value, | ||
| item.value, | ||
| "previous" if is_integer else "linear", | ||
| copy=False, | ||
| bounds_error=False, | ||
| fill_value=(item[0], item[-1]), | ||
| assume_sorted=True, | ||
| )(timebase) | ||
| target_item.value = value.astype(numpy.int32) if is_integer else value | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
what about 3.40 etc? was it not restructured at that point?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Correct, but IO requested to make the conversion logic available for DD3.x to 4.x.
3.39 -> 3.41 follows regular conversion logic and will not resample dynamic quantities. It would be trivial to update the logic to execute when converting from DD <=3.39.0 to DD >= 3.40.0, though. @olivhoenen any thoughts?
Nothing needs to be done w.r.t. the time bases between these DD versions.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That's true, on the one hand we should not expect imas-python to have to fix every single non-backward compatible changes to the DD via ad-hoc conversion functions (but it's nice to have that option). But in this case, you've implemented the function already (with DD4 in mind but the same transformation applies for DD > 3.39.0), so I think there is no harm extending the range.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Clear! The conversion now also applies when going to a DD version >= 3.40.0.