From 4228cad8db9784f72f962a83bf757b184226129b Mon Sep 17 00:00:00 2001 From: AreWeDreaming Date: Tue, 17 Mar 2026 17:30:03 +0100 Subject: [PATCH 01/13] Add wrangler - Wrangling works - Unwrangling awkward data needs work --- imas/backends/netcdf/ids_tensorizer.py | 41 +++++++- imas/test/test_wrangle.py | 135 +++++++++++++++++++++++++ imas/wrangler.py | 76 ++++++++++++++ 3 files changed, 251 insertions(+), 1 deletion(-) create mode 100644 imas/test/test_wrangle.py create mode 100644 imas/wrangler.py diff --git a/imas/backends/netcdf/ids_tensorizer.py b/imas/backends/netcdf/ids_tensorizer.py index 7e9e33ec..ef6f33c8 100644 --- a/imas/backends/netcdf/ids_tensorizer.py +++ b/imas/backends/netcdf/ids_tensorizer.py @@ -3,9 +3,10 @@ """Tensorization logic to convert IDSs to netCDF files and/or xarray Datasets.""" from collections import deque -from typing import List +from typing import List, Tuple import numpy +import awkward as ak from imas.backends.netcdf.iterators import indexed_tree_iter from imas.backends.netcdf.nc_metadata import NCMetadata @@ -203,3 +204,41 @@ def tensorize(self, path, fillvalue): tmp_var[aos_coords + tuple(map(slice, node.shape))] = node.value return tmp_var + + def recursively_convert_to_list(self, path: str, inactive_index:Tuple, + shape:Tuple, i_dim: int): + entry = [] + for index in path: + new_index = inactive_index + (index,) + if i_dim == len(shape) - 1: + entry.append(self.filled_data[path][new_index].value) + else: + entry.append(self.recursively_convert_to_list(path, new_index, + shape, i_dim + 1)) + return entry + + def awkward_tensorize(self, path:str): + """ + Tensorizes the data at the given path with the specified fill value. + + Args: + path: The path to the data in the IDS. + fillvalue: The value to fill the tensor with. Can be of any type, + including strings. + + Returns: + A tensor filled with the data from the specified path. + """ + if path in self.shapes: + shape = self.shapes[path] + else: + dimensions = self.ncmeta.get_dimensions(path, self.homogeneous_time) + shape = tuple(self.dimension_size[dim] for dim in dimensions) + # Get the split between HDF5 indices and stored matrices + # i.e. equilibrium.time_slice.profiles_2d <-> psi + hdf5_dim = len(list(self.filled_data[path].keys())[0]) + if hdf5_dim == 0: + return self.filled_data[path][()].value + else: + return ak.Array(self.recursively_convert_to_list(path, tuple(), shape[:hdf5_dim], 0)) + \ No newline at end of file diff --git a/imas/test/test_wrangle.py b/imas/test/test_wrangle.py new file mode 100644 index 00000000..8d649297 --- /dev/null +++ b/imas/test/test_wrangle.py @@ -0,0 +1,135 @@ +import pytest +import numpy as np +import awkward as ak + +from imas.wrangler import wrangle, unwrangle +from imas.ids_factory import IDSFactory +from imas.util import idsdiffgen + +@pytest.fixture +def test_data(): + data = {"equilibrium": {}} + data["equilibrium"]["N_time"] = 100 + data["equilibrium"]["N_radial"] = 100 + data["equilibrium"]["N_grid"] = 1 + data["equilibrium"]["time"] = np.linspace(0.0, 5.0, data["equilibrium"]["N_time"]) + data["equilibrium"]["psi_1d"] = np.linspace(0.0, 1.0, data["equilibrium"]["N_radial"]) + data["equilibrium"]["r"] = np.linspace(1.0, 2.0, data["equilibrium"]["N_radial"]) + data["equilibrium"]["z"] = np.linspace(-1.0, 1.0, data["equilibrium"]["N_radial"]) + r_grid, z_grid = np.meshgrid(data["equilibrium"]["r"], + data["equilibrium"]["z"], indexing="ij") + data["equilibrium"]["psi_2d"] = (r_grid - 1.5) ** 2 + z_grid**2 + + data["thomson_scattering"] = {} + data["thomson_scattering"]["N_ch"] = (20,10) + data["thomson_scattering"]["N_time"] = (100, 300) + data["thomson_scattering"]["r"] = np.concatenate([np.ones(data["thomson_scattering"]["N_ch"][0])*1.6, + np.ones(data["thomson_scattering"]["N_ch"][1])*1.7]) + data["thomson_scattering"]["z"] = np.concatenate([np.linspace(-1.0, 1.0, data["thomson_scattering"]["N_ch"][0]), + np.linspace(-1.0, 1.0, data["thomson_scattering"]["N_ch"][1])]) + data["thomson_scattering"]["t_e"] = data["thomson_scattering"]["z"]**2 * 5.e3 + data["thomson_scattering"]["n_e"] = data["thomson_scattering"]["z"]**2 * 5.e19 + data["thomson_scattering"]["time"] = (np.linspace(0,5.0, data["thomson_scattering"]["N_time"][0]), + np.linspace(0,5.0, data["thomson_scattering"]["N_time"][1])) + return data + +@pytest.fixture +def flat(test_data): + flat = {} + # Equilibrium test data + flat["equilibrium.time"] = test_data["equilibrium"]["time"] + flat["equilibrium.time_slice.time"] = test_data["equilibrium"]["time"] + flat["equilibrium.ids_properties.homogeneous_time"] = 1 + flat["equilibrium.time_slice.profiles_1d.psi"] = np.zeros( + (test_data["equilibrium"]["N_time"], test_data["equilibrium"]["N_radial"]) + ) + flat["equilibrium.time_slice.profiles_1d.psi"][:] = test_data["equilibrium"]["psi_1d"] + flat["equilibrium.time_slice.profiles_2d.grid.dim1"] = np.zeros( + (test_data["equilibrium"]["N_time"], + test_data["equilibrium"]["N_grid"], + test_data["equilibrium"]["N_radial"]) + ) + flat["equilibrium.time_slice.profiles_2d.grid.dim1"][:] = test_data["equilibrium"]["r"][None, :] + flat["equilibrium.time_slice.profiles_2d.grid.dim2"] = np.zeros( + (test_data["equilibrium"]["N_time"], + test_data["equilibrium"]["N_grid"], + test_data["equilibrium"]["N_radial"]) + ) + flat["equilibrium.time_slice.profiles_2d.grid.dim2"][:] = test_data["equilibrium"]["z"][None, :] + flat["equilibrium.time_slice.profiles_2d.psi"] = np.zeros( + ( + test_data["equilibrium"]["N_time"], + test_data["equilibrium"]["N_grid"], + test_data["equilibrium"]["N_radial"], + test_data["equilibrium"]["N_radial"], + ) + ) + flat["equilibrium.time_slice.profiles_2d.psi"][:] = test_data["equilibrium"]["psi_2d"][None, ...] + + # Thomson scattering test data (ragged) + flat["thomson_scattering.ids_properties.homogeneous_time"] = 0 + flat["thomson_scattering.channel.t_e.time"] = ak.concatenate([np.tile(test_data["thomson_scattering"]["time"][0], + (test_data["thomson_scattering"]["N_ch"][0],1)), + np.tile(test_data["thomson_scattering"]["time"][1], + (test_data["thomson_scattering"]["N_ch"][1],1))]) + flat["thomson_scattering.channel.t_e.data"] = ak.concatenate([np.tile(test_data["thomson_scattering"]["t_e"][0], + (test_data["thomson_scattering"]["N_ch"][0],1)), + np.tile(test_data["thomson_scattering"]["t_e"][1], + (test_data["thomson_scattering"]["N_ch"][1],1))]) + flat["thomson_scattering.channel.n_e.time"] = ak.concatenate([np.tile(test_data["thomson_scattering"]["time"][0], + (test_data["thomson_scattering"]["N_ch"][0],1)), + np.tile(test_data["thomson_scattering"]["time"][1], + (test_data["thomson_scattering"]["N_ch"][1],1))]) + flat["thomson_scattering.channel.n_e.data"] = ak.concatenate([np.tile(test_data["thomson_scattering"]["n_e"][0], + (test_data["thomson_scattering"]["N_ch"][0],1)), + np.tile(test_data["thomson_scattering"]["n_e"][1], + (test_data["thomson_scattering"]["N_ch"][1],1))]) + flat["thomson_scattering.channel.position.r"] = test_data["thomson_scattering"]["r"] + flat["thomson_scattering.channel.position.z"] = test_data["thomson_scattering"]["z"] + return flat + +@pytest.fixture +def test_ids_dict(test_data): + factory = IDSFactory("3.41.0") + equilibrium = factory.equilibrium() + equilibrium.time = test_data["equilibrium"]["time"] + equilibrium.time_slice.resize(test_data["equilibrium"]["N_time"]) + equilibrium.ids_properties.homogeneous_time = 1 + for i in range(test_data["equilibrium"]["N_time"]): + equilibrium.time_slice[i].time = test_data["equilibrium"]["time"][i] + equilibrium.time_slice[i].profiles_1d.psi = test_data["equilibrium"]["psi_1d"] + equilibrium.time_slice[i].profiles_2d.resize(1) + equilibrium.time_slice[i].profiles_2d[0].grid.dim1 = test_data["equilibrium"]["r"] + equilibrium.time_slice[i].profiles_2d[0].grid.dim2 = test_data["equilibrium"]["z"] + equilibrium.time_slice[i].profiles_2d[0].psi = test_data["equilibrium"]["psi_2d"] + + thomson_scattering = factory.thomson_scattering() + thomson_scattering.ids_properties.homogeneous_time = 0 + N = test_data["thomson_scattering"]["N_ch"][0] + test_data["thomson_scattering"]["N_ch"][1] + thomson_scattering.channel.resize(N) + index = 0 + for i in range(N): + if i == test_data["thomson_scattering"]["N_ch"][0]: + index = 1 + thomson_scattering.channel[i].t_e.time = test_data["thomson_scattering"]["time"][index] + thomson_scattering.channel[i].t_e.data = np.tile(test_data["thomson_scattering"]["t_e"][i], + test_data["thomson_scattering"]["N_time"][index]) + thomson_scattering.channel[i].n_e.time = test_data["thomson_scattering"]["time"][index] + thomson_scattering.channel[i].n_e.data = np.tile(test_data["thomson_scattering"]["t_e"][i], + test_data["thomson_scattering"]["N_time"][index]) + thomson_scattering.channel[i].position.r = test_data["thomson_scattering"]["r"][i] + thomson_scattering.channel[i].position.z = test_data["thomson_scattering"]["z"][i] + + return {"equilibrium":equilibrium, "thomson_scattering": thomson_scattering} + + +def test_wrangle(test_ids_dict, flat): + wrangled = wrangle(flat) + for key in test_ids_dict: + diff = idsdiffgen(wrangled[key],test_ids_dict[key]) + assert len(list(diff)) == 0, diff + +def test_unwrangle(test_ids_dict, flat): + result = unwrangle(list(flat.keys()), test_ids_dict) + for key in flat.keys(): + np.testing.assert_allclose(result[key], flat[key]) diff --git a/imas/wrangler.py b/imas/wrangler.py new file mode 100644 index 00000000..f74e346b --- /dev/null +++ b/imas/wrangler.py @@ -0,0 +1,76 @@ +from typing import Dict, List +import awkward as ak +import numpy as np +from . import IDSFactory +from .ids_toplevel import IDSToplevel +from .backends.netcdf.ids_tensorizer import IDSTensorizer + +def recursively_put(location, value, ids): + # time_slice.profiles_1d.psi + if "." in location: + position, sub_location = location.split(".", 1) + sub_ids = getattr(ids, position) + if hasattr(sub_ids, "size"): + N = len(value) + if sub_ids.size == 0: + sub_ids.resize(N) + elif sub_ids.size != N: + raise ValueError( + f"""Inconsistent size across flat entries {location}, {N} (flat) vs. ids {ids.size}! +""" + ) + # Need to iterate over indices (e.g. equilibrium.time_slice[:].) + for index in range(N): + recursively_put(sub_location, value[index], sub_ids[index]) + else: + # Need to set an attribute + # Now get the new substring, e.g. time_slice + position, sub_location = location.split(".", 1) + recursively_put(sub_location, value, sub_ids) + else: + setattr(ids, location, value) + return ids + + +def wrangle(flat: Dict, version="3.41.0") -> Dict[str, IDSToplevel]: + wrangled = {} + factory = IDSFactory(version) + for key in flat: + ids, location = key.split(".", 1) + if ids not in wrangled: + wrangled[ids] = getattr(factory, ids)() + wrangled[ids] = recursively_put(location, flat[key], wrangled[ids]) + return wrangled + +def split_location_across_ids(locations: List[str]) -> Dict[str, List[str]]: + ids_locations = {} + for location in locations: + ids, path = location.split(".",1) + if ids not in ids_locations: + ids_locations[ids] = [] + ids_locations[ids].append(path.replace(".","/") ) + return ids_locations + +def unwrangle( + locations: List[str], ids_dict: Dict[str, IDSToplevel], version="3.41.0" +) -> Dict[str, ak.Array | np.ndarray]: + flat = {} + ids_locations = split_location_across_ids(locations) + for key in ids_locations: + tensorizer = IDSTensorizer(ids_dict[key], ids_locations[key]) + tensorizer.include_coordinate_paths() + tensorizer.collect_filled_data() + tensorizer.determine_data_shapes() + # Add IDS conversion + for ids_location in ids_locations[key]: + location = key + "." + ids_location.replace("/", ".") + values = tensorizer.awkward_tensorize(ids_location) + if hasattr(values, "__getattr__"): + # Not a scalar, e.g. homogenous_time + try: + flat[location] = np.asarray(values) + except ValueError as e: + flat[location] = ak.Array(values) + else: + flat[location] = values + return flat From 7faa192be13b54b750c2cec956f374223791978c Mon Sep 17 00:00:00 2001 From: AreWeDreaming Date: Wed, 18 Mar 2026 10:06:39 +0100 Subject: [PATCH 02/13] Fix errors in test data and handle 2d ragged data --- imas/backends/netcdf/ids_tensorizer.py | 12 ++++++---- imas/test/test_wrangle.py | 33 +++++++++++++++----------- imas/wrangler.py | 2 +- 3 files changed, 28 insertions(+), 19 deletions(-) diff --git a/imas/backends/netcdf/ids_tensorizer.py b/imas/backends/netcdf/ids_tensorizer.py index ef6f33c8..b6b57245 100644 --- a/imas/backends/netcdf/ids_tensorizer.py +++ b/imas/backends/netcdf/ids_tensorizer.py @@ -208,7 +208,7 @@ def tensorize(self, path, fillvalue): def recursively_convert_to_list(self, path: str, inactive_index:Tuple, shape:Tuple, i_dim: int): entry = [] - for index in path: + for index in range(shape[i_dim]): new_index = inactive_index + (index,) if i_dim == len(shape) - 1: entry.append(self.filled_data[path][new_index].value) @@ -231,12 +231,16 @@ def awkward_tensorize(self, path:str): """ if path in self.shapes: shape = self.shapes[path] + if shape.ndim > 2: + raise NotImplementedError("Dimensions higher than 2 are not yet implemented.") + shape = shape.shape + hdf5_dim = 1 else: dimensions = self.ncmeta.get_dimensions(path, self.homogeneous_time) shape = tuple(self.dimension_size[dim] for dim in dimensions) - # Get the split between HDF5 indices and stored matrices - # i.e. equilibrium.time_slice.profiles_2d <-> psi - hdf5_dim = len(list(self.filled_data[path].keys())[0]) + # Get the split between HDF5 indices and stored matrices + # i.e. equilibrium.time_slice.profiles_2d <-> psi + hdf5_dim = len(list(self.filled_data[path].keys())[0]) if hdf5_dim == 0: return self.filled_data[path][()].value else: diff --git a/imas/test/test_wrangle.py b/imas/test/test_wrangle.py index 8d649297..94e3f0df 100644 --- a/imas/test/test_wrangle.py +++ b/imas/test/test_wrangle.py @@ -67,23 +67,28 @@ def flat(test_data): flat["equilibrium.time_slice.profiles_2d.psi"][:] = test_data["equilibrium"]["psi_2d"][None, ...] # Thomson scattering test data (ragged) + N = test_data["thomson_scattering"]["N_ch"][0] + test_data["thomson_scattering"]["N_ch"][1] flat["thomson_scattering.ids_properties.homogeneous_time"] = 0 flat["thomson_scattering.channel.t_e.time"] = ak.concatenate([np.tile(test_data["thomson_scattering"]["time"][0], - (test_data["thomson_scattering"]["N_ch"][0],1)), + (test_data["thomson_scattering"]["N_ch"][0], + 1)), np.tile(test_data["thomson_scattering"]["time"][1], - (test_data["thomson_scattering"]["N_ch"][1],1))]) - flat["thomson_scattering.channel.t_e.data"] = ak.concatenate([np.tile(test_data["thomson_scattering"]["t_e"][0], - (test_data["thomson_scattering"]["N_ch"][0],1)), - np.tile(test_data["thomson_scattering"]["t_e"][1], - (test_data["thomson_scattering"]["N_ch"][1],1))]) + (test_data["thomson_scattering"]["N_ch"][1], + 1))]) + flat["thomson_scattering.channel.t_e.data"] = ak.concatenate([np.repeat(test_data["thomson_scattering"]["t_e"][:test_data["thomson_scattering"]["N_ch"][0],None], + test_data["thomson_scattering"]["N_time"][0], axis=1), + np.repeat(test_data["thomson_scattering"]["t_e"][test_data["thomson_scattering"]["N_ch"][0]:,None], + test_data["thomson_scattering"]["N_time"][1], axis=1)]) flat["thomson_scattering.channel.n_e.time"] = ak.concatenate([np.tile(test_data["thomson_scattering"]["time"][0], - (test_data["thomson_scattering"]["N_ch"][0],1)), + (test_data["thomson_scattering"]["N_ch"][0], + 1)), np.tile(test_data["thomson_scattering"]["time"][1], - (test_data["thomson_scattering"]["N_ch"][1],1))]) - flat["thomson_scattering.channel.n_e.data"] = ak.concatenate([np.tile(test_data["thomson_scattering"]["n_e"][0], - (test_data["thomson_scattering"]["N_ch"][0],1)), - np.tile(test_data["thomson_scattering"]["n_e"][1], - (test_data["thomson_scattering"]["N_ch"][1],1))]) + (test_data["thomson_scattering"]["N_ch"][1], + 1))]) + flat["thomson_scattering.channel.n_e.data"] = ak.concatenate([np.repeat(test_data["thomson_scattering"]["n_e"][:test_data["thomson_scattering"]["N_ch"][0],None], + test_data["thomson_scattering"]["N_time"][0], axis=1), + np.repeat(test_data["thomson_scattering"]["n_e"][test_data["thomson_scattering"]["N_ch"][0]:,None], + test_data["thomson_scattering"]["N_time"][1], axis=1)]) flat["thomson_scattering.channel.position.r"] = test_data["thomson_scattering"]["r"] flat["thomson_scattering.channel.position.z"] = test_data["thomson_scattering"]["z"] return flat @@ -115,7 +120,7 @@ def test_ids_dict(test_data): thomson_scattering.channel[i].t_e.data = np.tile(test_data["thomson_scattering"]["t_e"][i], test_data["thomson_scattering"]["N_time"][index]) thomson_scattering.channel[i].n_e.time = test_data["thomson_scattering"]["time"][index] - thomson_scattering.channel[i].n_e.data = np.tile(test_data["thomson_scattering"]["t_e"][i], + thomson_scattering.channel[i].n_e.data = np.tile(test_data["thomson_scattering"]["n_e"][i], test_data["thomson_scattering"]["N_time"][index]) thomson_scattering.channel[i].position.r = test_data["thomson_scattering"]["r"][i] thomson_scattering.channel[i].position.z = test_data["thomson_scattering"]["z"][i] @@ -132,4 +137,4 @@ def test_wrangle(test_ids_dict, flat): def test_unwrangle(test_ids_dict, flat): result = unwrangle(list(flat.keys()), test_ids_dict) for key in flat.keys(): - np.testing.assert_allclose(result[key], flat[key]) + assert ak.almost_equal(result[key], flat[key]) \ No newline at end of file diff --git a/imas/wrangler.py b/imas/wrangler.py index f74e346b..6229ca06 100644 --- a/imas/wrangler.py +++ b/imas/wrangler.py @@ -16,7 +16,7 @@ def recursively_put(location, value, ids): sub_ids.resize(N) elif sub_ids.size != N: raise ValueError( - f"""Inconsistent size across flat entries {location}, {N} (flat) vs. ids {ids.size}! + f"""Inconsistent size across flat entries {location}, {N} (flat) vs. ids {sub_ids.size}! """ ) # Need to iterate over indices (e.g. equilibrium.time_slice[:].) From 17ad3b8d94c27e475a01bca4e6c0910884b00748 Mon Sep 17 00:00:00 2001 From: AreWeDreaming Date: Wed, 18 Mar 2026 13:54:23 +0100 Subject: [PATCH 03/13] Extend tests to strings --- imas/test/test_wrangle.py | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/imas/test/test_wrangle.py b/imas/test/test_wrangle.py index 94e3f0df..c4b1538c 100644 --- a/imas/test/test_wrangle.py +++ b/imas/test/test_wrangle.py @@ -22,6 +22,8 @@ def test_data(): data["thomson_scattering"] = {} data["thomson_scattering"]["N_ch"] = (20,10) + N = data["thomson_scattering"]["N_ch"][0] + data["thomson_scattering"]["N_ch"][1] + data["thomson_scattering"]["identifier"] = np.asarray("channel_" + np.asarray(np.linspace(1,N+1,N, dtype=int),dtype="|U2"),dtype="|U10") data["thomson_scattering"]["N_time"] = (100, 300) data["thomson_scattering"]["r"] = np.concatenate([np.ones(data["thomson_scattering"]["N_ch"][0])*1.6, np.ones(data["thomson_scattering"]["N_ch"][1])*1.7]) @@ -65,9 +67,8 @@ def flat(test_data): ) ) flat["equilibrium.time_slice.profiles_2d.psi"][:] = test_data["equilibrium"]["psi_2d"][None, ...] - # Thomson scattering test data (ragged) - N = test_data["thomson_scattering"]["N_ch"][0] + test_data["thomson_scattering"]["N_ch"][1] + flat["thomson_scattering.channel.identifier"] = test_data["thomson_scattering"]["identifier"] flat["thomson_scattering.ids_properties.homogeneous_time"] = 0 flat["thomson_scattering.channel.t_e.time"] = ak.concatenate([np.tile(test_data["thomson_scattering"]["time"][0], (test_data["thomson_scattering"]["N_ch"][0], @@ -116,6 +117,7 @@ def test_ids_dict(test_data): for i in range(N): if i == test_data["thomson_scattering"]["N_ch"][0]: index = 1 + thomson_scattering.channel[i].identifier = test_data["thomson_scattering"]["identifier"][i] thomson_scattering.channel[i].t_e.time = test_data["thomson_scattering"]["time"][index] thomson_scattering.channel[i].t_e.data = np.tile(test_data["thomson_scattering"]["t_e"][i], test_data["thomson_scattering"]["N_time"][index]) @@ -134,7 +136,20 @@ def test_wrangle(test_ids_dict, flat): diff = idsdiffgen(wrangled[key],test_ids_dict[key]) assert len(list(diff)) == 0, diff +def get_dtype(arr): + """Get dtype from either numpy or awkward array.""" + if isinstance(arr, ak.Array): + # This is the easiest way I found to extract the numpy dtype from an awkward array + return eval("np." + arr.typestr.split("*")[-1]) + if hasattr(arr, "dtype"): + return arr.dtype + else: + return type(arr) + def test_unwrangle(test_ids_dict, flat): result = unwrangle(list(flat.keys()), test_ids_dict) for key in flat.keys(): - assert ak.almost_equal(result[key], flat[key]) \ No newline at end of file + if np.issubdtype(get_dtype(result[key]), np.floating): + assert ak.almost_equal(result[key], flat[key]) + else: + assert ak.array_equal(result[key], flat[key]) \ No newline at end of file From 066a1aeeec4baa79f1b3cf4629a31cf97131f1c1 Mon Sep 17 00:00:00 2001 From: Severin Denk <60154343+AreWeDreaming@users.noreply.github.com> Date: Wed, 18 Mar 2026 08:42:46 -0700 Subject: [PATCH 04/13] Update imas/wrangler.py Handle version propagation more cleanly Co-authored-by: Prasad --- imas/wrangler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/imas/wrangler.py b/imas/wrangler.py index 6229ca06..07e36906 100644 --- a/imas/wrangler.py +++ b/imas/wrangler.py @@ -34,7 +34,7 @@ def recursively_put(location, value, ids): def wrangle(flat: Dict, version="3.41.0") -> Dict[str, IDSToplevel]: wrangled = {} - factory = IDSFactory(version) + factory = IDSFactory(version) if version is not None else IDSFactory() for key in flat: ids, location = key.split(".", 1) if ids not in wrangled: From 098418c7a1b2711b638011420404a3a81a55544e Mon Sep 17 00:00:00 2001 From: Severin Denk <60154343+AreWeDreaming@users.noreply.github.com> Date: Wed, 18 Mar 2026 08:43:13 -0700 Subject: [PATCH 05/13] Update imas/wrangler.py Remove preference for specific version Co-authored-by: Prasad --- imas/wrangler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/imas/wrangler.py b/imas/wrangler.py index 07e36906..f7ce078c 100644 --- a/imas/wrangler.py +++ b/imas/wrangler.py @@ -32,7 +32,7 @@ def recursively_put(location, value, ids): return ids -def wrangle(flat: Dict, version="3.41.0") -> Dict[str, IDSToplevel]: +def wrangle(flat: Dict, version: Optional[str] = None) -> Dict[str, IDSToplevel]: wrangled = {} factory = IDSFactory(version) if version is not None else IDSFactory() for key in flat: From 19c0f776e5595cdc33772caf29bff8c178c38139 Mon Sep 17 00:00:00 2001 From: AreWeDreaming Date: Wed, 18 Mar 2026 16:44:05 +0100 Subject: [PATCH 06/13] Make awkward import more selective --- imas/backends/netcdf/ids_tensorizer.py | 3 +-- imas/test/test_wrangle.py | 8 +++++++- imas/wrangler.py | 2 +- 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/imas/backends/netcdf/ids_tensorizer.py b/imas/backends/netcdf/ids_tensorizer.py index b6b57245..19aba24b 100644 --- a/imas/backends/netcdf/ids_tensorizer.py +++ b/imas/backends/netcdf/ids_tensorizer.py @@ -6,7 +6,6 @@ from typing import List, Tuple import numpy -import awkward as ak from imas.backends.netcdf.iterators import indexed_tree_iter from imas.backends.netcdf.nc_metadata import NCMetadata @@ -244,5 +243,5 @@ def awkward_tensorize(self, path:str): if hdf5_dim == 0: return self.filled_data[path][()].value else: - return ak.Array(self.recursively_convert_to_list(path, tuple(), shape[:hdf5_dim], 0)) + return self.recursively_convert_to_list(path, tuple(), shape[:hdf5_dim], 0) \ No newline at end of file diff --git a/imas/test/test_wrangle.py b/imas/test/test_wrangle.py index c4b1538c..6b460d16 100644 --- a/imas/test/test_wrangle.py +++ b/imas/test/test_wrangle.py @@ -1,6 +1,12 @@ import pytest import numpy as np -import awkward as ak +try: + import awkward as ak +except ImportError as exc: + raise ImportError( + "awkward-array is required" + "Install it with: pip install imas-python[awkward]" + ) from exc from imas.wrangler import wrangle, unwrangle from imas.ids_factory import IDSFactory diff --git a/imas/wrangler.py b/imas/wrangler.py index 6229ca06..81e0f816 100644 --- a/imas/wrangler.py +++ b/imas/wrangler.py @@ -65,7 +65,7 @@ def unwrangle( for ids_location in ids_locations[key]: location = key + "." + ids_location.replace("/", ".") values = tensorizer.awkward_tensorize(ids_location) - if hasattr(values, "__getattr__"): + if hasattr(values, "__getitem__"): # Not a scalar, e.g. homogenous_time try: flat[location] = np.asarray(values) From e990c5ff482b4bbd6a3ee6f63c6d761804f86b43 Mon Sep 17 00:00:00 2001 From: AreWeDreaming Date: Thu, 19 Mar 2026 08:54:59 +0100 Subject: [PATCH 07/13] Fix messed up indent --- imas/wrangler.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/imas/wrangler.py b/imas/wrangler.py index 6f7793a9..81e0f816 100644 --- a/imas/wrangler.py +++ b/imas/wrangler.py @@ -32,9 +32,9 @@ def recursively_put(location, value, ids): return ids -def wrangle(flat: Dict, version: Optional[str] = None) -> Dict[str, IDSToplevel]: +def wrangle(flat: Dict, version="3.41.0") -> Dict[str, IDSToplevel]: wrangled = {} - factory = IDSFactory(version) if version is not None else IDSFactory() + factory = IDSFactory(version) for key in flat: ids, location = key.split(".", 1) if ids not in wrangled: From 13149728dcdc866a446ab594cc32c74e4d852bbd Mon Sep 17 00:00:00 2001 From: AreWeDreaming Date: Thu, 19 Mar 2026 13:54:59 +0100 Subject: [PATCH 08/13] Handle missing fields --- imas/test/test_wrangle.py | 3 ++- imas/wrangler.py | 9 +++++++-- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/imas/test/test_wrangle.py b/imas/test/test_wrangle.py index 6b460d16..4929433e 100644 --- a/imas/test/test_wrangle.py +++ b/imas/test/test_wrangle.py @@ -153,7 +153,8 @@ def get_dtype(arr): return type(arr) def test_unwrangle(test_ids_dict, flat): - result = unwrangle(list(flat.keys()), test_ids_dict) + result, failed = unwrangle(list(flat.keys()), test_ids_dict) + assert failed == 0, f"The following fields failed to load {failed}" for key in flat.keys(): if np.issubdtype(get_dtype(result[key]), np.floating): assert ak.almost_equal(result[key], flat[key]) diff --git a/imas/wrangler.py b/imas/wrangler.py index 81e0f816..e0baaf51 100644 --- a/imas/wrangler.py +++ b/imas/wrangler.py @@ -56,6 +56,7 @@ def unwrangle( ) -> Dict[str, ak.Array | np.ndarray]: flat = {} ids_locations = split_location_across_ids(locations) + failed_locations = [] for key in ids_locations: tensorizer = IDSTensorizer(ids_dict[key], ids_locations[key]) tensorizer.include_coordinate_paths() @@ -64,7 +65,11 @@ def unwrangle( # Add IDS conversion for ids_location in ids_locations[key]: location = key + "." + ids_location.replace("/", ".") - values = tensorizer.awkward_tensorize(ids_location) + try: + values = tensorizer.awkward_tensorize(ids_location) + except KeyError: + failed_locations.append(location) + continue if hasattr(values, "__getitem__"): # Not a scalar, e.g. homogenous_time try: @@ -73,4 +78,4 @@ def unwrangle( flat[location] = ak.Array(values) else: flat[location] = values - return flat + return flat, failed_locations From 31bee40deeefb6af67b7ef07f25ac6ee2965ad36 Mon Sep 17 00:00:00 2001 From: AreWeDreaming Date: Thu, 19 Mar 2026 14:26:52 +0100 Subject: [PATCH 09/13] Fix incorrect return type hint --- imas/wrangler.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/imas/wrangler.py b/imas/wrangler.py index e0baaf51..824ec938 100644 --- a/imas/wrangler.py +++ b/imas/wrangler.py @@ -1,4 +1,4 @@ -from typing import Dict, List +from typing import Dict, List, Tuple import awkward as ak import numpy as np from . import IDSFactory @@ -53,7 +53,7 @@ def split_location_across_ids(locations: List[str]) -> Dict[str, List[str]]: def unwrangle( locations: List[str], ids_dict: Dict[str, IDSToplevel], version="3.41.0" -) -> Dict[str, ak.Array | np.ndarray]: +) -> Tuple[Dict[str, ak.Array | np.ndarray], List[str]]: flat = {} ids_locations = split_location_across_ids(locations) failed_locations = [] From b091f4f0ad71d36d53e2527eefa3c03a39b17f17 Mon Sep 17 00:00:00 2001 From: AreWeDreaming Date: Thu, 19 Mar 2026 14:27:52 +0100 Subject: [PATCH 10/13] Fix wrong check in test assertion --- imas/test/test_wrangle.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/imas/test/test_wrangle.py b/imas/test/test_wrangle.py index 4929433e..f593956e 100644 --- a/imas/test/test_wrangle.py +++ b/imas/test/test_wrangle.py @@ -154,7 +154,7 @@ def get_dtype(arr): def test_unwrangle(test_ids_dict, flat): result, failed = unwrangle(list(flat.keys()), test_ids_dict) - assert failed == 0, f"The following fields failed to load {failed}" + assert len(failed) == 0, f"The following fields failed to load {failed}" for key in flat.keys(): if np.issubdtype(get_dtype(result[key]), np.floating): assert ak.almost_equal(result[key], flat[key]) From dda7533de6492ec37e461a1f019b9310544a4e23 Mon Sep 17 00:00:00 2001 From: AreWeDreaming Date: Thu, 19 Mar 2026 14:37:20 +0100 Subject: [PATCH 11/13] Be verbose on what version means --- imas/wrangler.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/imas/wrangler.py b/imas/wrangler.py index 824ec938..528b8238 100644 --- a/imas/wrangler.py +++ b/imas/wrangler.py @@ -32,9 +32,9 @@ def recursively_put(location, value, ids): return ids -def wrangle(flat: Dict, version="3.41.0") -> Dict[str, IDSToplevel]: +def wrangle(flat: Dict, source_version="3.41.0") -> Dict[str, IDSToplevel]: wrangled = {} - factory = IDSFactory(version) + factory = IDSFactory(source_version) for key in flat: ids, location = key.split(".", 1) if ids not in wrangled: @@ -52,7 +52,7 @@ def split_location_across_ids(locations: List[str]) -> Dict[str, List[str]]: return ids_locations def unwrangle( - locations: List[str], ids_dict: Dict[str, IDSToplevel], version="3.41.0" + locations: List[str], ids_dict: Dict[str, IDSToplevel], target_version="3.41.0" ) -> Tuple[Dict[str, ak.Array | np.ndarray], List[str]]: flat = {} ids_locations = split_location_across_ids(locations) From bddaa9b77013865898844b71f4470a1379394096 Mon Sep 17 00:00:00 2001 From: prasad-sawantdesai Date: Thu, 19 Mar 2026 16:32:47 +0100 Subject: [PATCH 12/13] fixed issue with 2D data and blackify --- imas/backends/netcdf/ids_tensorizer.py | 38 ++-- imas/test/test_wrangle.py | 232 ++++++++++++++++++------- imas/wrangler.py | 15 +- 3 files changed, 193 insertions(+), 92 deletions(-) diff --git a/imas/backends/netcdf/ids_tensorizer.py b/imas/backends/netcdf/ids_tensorizer.py index 19aba24b..f9074146 100644 --- a/imas/backends/netcdf/ids_tensorizer.py +++ b/imas/backends/netcdf/ids_tensorizer.py @@ -204,44 +204,44 @@ def tensorize(self, path, fillvalue): return tmp_var - def recursively_convert_to_list(self, path: str, inactive_index:Tuple, - shape:Tuple, i_dim: int): + def recursively_convert_to_list( + self, path: str, inactive_index: Tuple, shape: Tuple, i_dim: int + ): entry = [] for index in range(shape[i_dim]): new_index = inactive_index + (index,) if i_dim == len(shape) - 1: entry.append(self.filled_data[path][new_index].value) else: - entry.append(self.recursively_convert_to_list(path, new_index, - shape, i_dim + 1)) + entry.append( + self.recursively_convert_to_list(path, new_index, shape, i_dim + 1) + ) return entry - def awkward_tensorize(self, path:str): + def awkward_tensorize(self, path: str): """ Tensorizes the data at the given path with the specified fill value. Args: path: The path to the data in the IDS. - fillvalue: The value to fill the tensor with. Can be of any type, - including strings. Returns: A tensor filled with the data from the specified path. """ + if not self.filled_data[path]: + return [] + hdf5_dim = len(next(iter(self.filled_data[path]))) + + if hdf5_dim == 0: + return self.filled_data[path][()].value + if path in self.shapes: - shape = self.shapes[path] - if shape.ndim > 2: - raise NotImplementedError("Dimensions higher than 2 are not yet implemented.") - shape = shape.shape - hdf5_dim = 1 + shape = self.shapes[path].shape[:hdf5_dim] else: dimensions = self.ncmeta.get_dimensions(path, self.homogeneous_time) - shape = tuple(self.dimension_size[dim] for dim in dimensions) + full_shape = tuple(self.dimension_size[dim] for dim in dimensions) # Get the split between HDF5 indices and stored matrices # i.e. equilibrium.time_slice.profiles_2d <-> psi - hdf5_dim = len(list(self.filled_data[path].keys())[0]) - if hdf5_dim == 0: - return self.filled_data[path][()].value - else: - return self.recursively_convert_to_list(path, tuple(), shape[:hdf5_dim], 0) - \ No newline at end of file + shape = full_shape[:hdf5_dim] + + return self.recursively_convert_to_list(path, tuple(), shape, 0) diff --git a/imas/test/test_wrangle.py b/imas/test/test_wrangle.py index f593956e..e7152b49 100644 --- a/imas/test/test_wrangle.py +++ b/imas/test/test_wrangle.py @@ -1,17 +1,18 @@ import pytest import numpy as np + try: import awkward as ak except ImportError as exc: raise ImportError( - "awkward-array is required" - "Install it with: pip install imas-python[awkward]" + "awkward-array is required" "Install it with: pip install imas-python[awkward]" ) from exc from imas.wrangler import wrangle, unwrangle from imas.ids_factory import IDSFactory from imas.util import idsdiffgen + @pytest.fixture def test_data(): data = {"equilibrium": {}} @@ -19,28 +20,45 @@ def test_data(): data["equilibrium"]["N_radial"] = 100 data["equilibrium"]["N_grid"] = 1 data["equilibrium"]["time"] = np.linspace(0.0, 5.0, data["equilibrium"]["N_time"]) - data["equilibrium"]["psi_1d"] = np.linspace(0.0, 1.0, data["equilibrium"]["N_radial"]) + data["equilibrium"]["psi_1d"] = np.linspace( + 0.0, 1.0, data["equilibrium"]["N_radial"] + ) data["equilibrium"]["r"] = np.linspace(1.0, 2.0, data["equilibrium"]["N_radial"]) data["equilibrium"]["z"] = np.linspace(-1.0, 1.0, data["equilibrium"]["N_radial"]) - r_grid, z_grid = np.meshgrid(data["equilibrium"]["r"], - data["equilibrium"]["z"], indexing="ij") + r_grid, z_grid = np.meshgrid( + data["equilibrium"]["r"], data["equilibrium"]["z"], indexing="ij" + ) data["equilibrium"]["psi_2d"] = (r_grid - 1.5) ** 2 + z_grid**2 data["thomson_scattering"] = {} - data["thomson_scattering"]["N_ch"] = (20,10) + data["thomson_scattering"]["N_ch"] = (20, 10) N = data["thomson_scattering"]["N_ch"][0] + data["thomson_scattering"]["N_ch"][1] - data["thomson_scattering"]["identifier"] = np.asarray("channel_" + np.asarray(np.linspace(1,N+1,N, dtype=int),dtype="|U2"),dtype="|U10") + data["thomson_scattering"]["identifier"] = np.asarray( + "channel_" + np.asarray(np.linspace(1, N + 1, N, dtype=int), dtype="|U2"), + dtype="|U10", + ) data["thomson_scattering"]["N_time"] = (100, 300) - data["thomson_scattering"]["r"] = np.concatenate([np.ones(data["thomson_scattering"]["N_ch"][0])*1.6, - np.ones(data["thomson_scattering"]["N_ch"][1])*1.7]) - data["thomson_scattering"]["z"] = np.concatenate([np.linspace(-1.0, 1.0, data["thomson_scattering"]["N_ch"][0]), - np.linspace(-1.0, 1.0, data["thomson_scattering"]["N_ch"][1])]) - data["thomson_scattering"]["t_e"] = data["thomson_scattering"]["z"]**2 * 5.e3 - data["thomson_scattering"]["n_e"] = data["thomson_scattering"]["z"]**2 * 5.e19 - data["thomson_scattering"]["time"] = (np.linspace(0,5.0, data["thomson_scattering"]["N_time"][0]), - np.linspace(0,5.0, data["thomson_scattering"]["N_time"][1])) + data["thomson_scattering"]["r"] = np.concatenate( + [ + np.ones(data["thomson_scattering"]["N_ch"][0]) * 1.6, + np.ones(data["thomson_scattering"]["N_ch"][1]) * 1.7, + ] + ) + data["thomson_scattering"]["z"] = np.concatenate( + [ + np.linspace(-1.0, 1.0, data["thomson_scattering"]["N_ch"][0]), + np.linspace(-1.0, 1.0, data["thomson_scattering"]["N_ch"][1]), + ] + ) + data["thomson_scattering"]["t_e"] = data["thomson_scattering"]["z"] ** 2 * 5.0e3 + data["thomson_scattering"]["n_e"] = data["thomson_scattering"]["z"] ** 2 * 5.0e19 + data["thomson_scattering"]["time"] = ( + np.linspace(0, 5.0, data["thomson_scattering"]["N_time"][0]), + np.linspace(0, 5.0, data["thomson_scattering"]["N_time"][1]), + ) return data + @pytest.fixture def flat(test_data): flat = {} @@ -51,19 +69,29 @@ def flat(test_data): flat["equilibrium.time_slice.profiles_1d.psi"] = np.zeros( (test_data["equilibrium"]["N_time"], test_data["equilibrium"]["N_radial"]) ) - flat["equilibrium.time_slice.profiles_1d.psi"][:] = test_data["equilibrium"]["psi_1d"] + flat["equilibrium.time_slice.profiles_1d.psi"][:] = test_data["equilibrium"][ + "psi_1d" + ] flat["equilibrium.time_slice.profiles_2d.grid.dim1"] = np.zeros( - (test_data["equilibrium"]["N_time"], - test_data["equilibrium"]["N_grid"], - test_data["equilibrium"]["N_radial"]) + ( + test_data["equilibrium"]["N_time"], + test_data["equilibrium"]["N_grid"], + test_data["equilibrium"]["N_radial"], + ) ) - flat["equilibrium.time_slice.profiles_2d.grid.dim1"][:] = test_data["equilibrium"]["r"][None, :] + flat["equilibrium.time_slice.profiles_2d.grid.dim1"][:] = test_data["equilibrium"][ + "r" + ][None, :] flat["equilibrium.time_slice.profiles_2d.grid.dim2"] = np.zeros( - (test_data["equilibrium"]["N_time"], - test_data["equilibrium"]["N_grid"], - test_data["equilibrium"]["N_radial"]) + ( + test_data["equilibrium"]["N_time"], + test_data["equilibrium"]["N_grid"], + test_data["equilibrium"]["N_radial"], + ) ) - flat["equilibrium.time_slice.profiles_2d.grid.dim2"][:] = test_data["equilibrium"]["z"][None, :] + flat["equilibrium.time_slice.profiles_2d.grid.dim2"][:] = test_data["equilibrium"][ + "z" + ][None, :] flat["equilibrium.time_slice.profiles_2d.psi"] = np.zeros( ( test_data["equilibrium"]["N_time"], @@ -72,34 +100,79 @@ def flat(test_data): test_data["equilibrium"]["N_radial"], ) ) - flat["equilibrium.time_slice.profiles_2d.psi"][:] = test_data["equilibrium"]["psi_2d"][None, ...] + flat["equilibrium.time_slice.profiles_2d.psi"][:] = test_data["equilibrium"][ + "psi_2d" + ][None, ...] # Thomson scattering test data (ragged) - flat["thomson_scattering.channel.identifier"] = test_data["thomson_scattering"]["identifier"] + flat["thomson_scattering.channel.identifier"] = test_data["thomson_scattering"][ + "identifier" + ] flat["thomson_scattering.ids_properties.homogeneous_time"] = 0 - flat["thomson_scattering.channel.t_e.time"] = ak.concatenate([np.tile(test_data["thomson_scattering"]["time"][0], - (test_data["thomson_scattering"]["N_ch"][0], - 1)), - np.tile(test_data["thomson_scattering"]["time"][1], - (test_data["thomson_scattering"]["N_ch"][1], - 1))]) - flat["thomson_scattering.channel.t_e.data"] = ak.concatenate([np.repeat(test_data["thomson_scattering"]["t_e"][:test_data["thomson_scattering"]["N_ch"][0],None], - test_data["thomson_scattering"]["N_time"][0], axis=1), - np.repeat(test_data["thomson_scattering"]["t_e"][test_data["thomson_scattering"]["N_ch"][0]:,None], - test_data["thomson_scattering"]["N_time"][1], axis=1)]) - flat["thomson_scattering.channel.n_e.time"] = ak.concatenate([np.tile(test_data["thomson_scattering"]["time"][0], - (test_data["thomson_scattering"]["N_ch"][0], - 1)), - np.tile(test_data["thomson_scattering"]["time"][1], - (test_data["thomson_scattering"]["N_ch"][1], - 1))]) - flat["thomson_scattering.channel.n_e.data"] = ak.concatenate([np.repeat(test_data["thomson_scattering"]["n_e"][:test_data["thomson_scattering"]["N_ch"][0],None], - test_data["thomson_scattering"]["N_time"][0], axis=1), - np.repeat(test_data["thomson_scattering"]["n_e"][test_data["thomson_scattering"]["N_ch"][0]:,None], - test_data["thomson_scattering"]["N_time"][1], axis=1)]) + flat["thomson_scattering.channel.t_e.time"] = ak.concatenate( + [ + np.tile( + test_data["thomson_scattering"]["time"][0], + (test_data["thomson_scattering"]["N_ch"][0], 1), + ), + np.tile( + test_data["thomson_scattering"]["time"][1], + (test_data["thomson_scattering"]["N_ch"][1], 1), + ), + ] + ) + flat["thomson_scattering.channel.t_e.data"] = ak.concatenate( + [ + np.repeat( + test_data["thomson_scattering"]["t_e"][ + : test_data["thomson_scattering"]["N_ch"][0], None + ], + test_data["thomson_scattering"]["N_time"][0], + axis=1, + ), + np.repeat( + test_data["thomson_scattering"]["t_e"][ + test_data["thomson_scattering"]["N_ch"][0] :, None + ], + test_data["thomson_scattering"]["N_time"][1], + axis=1, + ), + ] + ) + flat["thomson_scattering.channel.n_e.time"] = ak.concatenate( + [ + np.tile( + test_data["thomson_scattering"]["time"][0], + (test_data["thomson_scattering"]["N_ch"][0], 1), + ), + np.tile( + test_data["thomson_scattering"]["time"][1], + (test_data["thomson_scattering"]["N_ch"][1], 1), + ), + ] + ) + flat["thomson_scattering.channel.n_e.data"] = ak.concatenate( + [ + np.repeat( + test_data["thomson_scattering"]["n_e"][ + : test_data["thomson_scattering"]["N_ch"][0], None + ], + test_data["thomson_scattering"]["N_time"][0], + axis=1, + ), + np.repeat( + test_data["thomson_scattering"]["n_e"][ + test_data["thomson_scattering"]["N_ch"][0] :, None + ], + test_data["thomson_scattering"]["N_time"][1], + axis=1, + ), + ] + ) flat["thomson_scattering.channel.position.r"] = test_data["thomson_scattering"]["r"] flat["thomson_scattering.channel.position.z"] = test_data["thomson_scattering"]["z"] return flat + @pytest.fixture def test_ids_dict(test_data): factory = IDSFactory("3.41.0") @@ -111,47 +184,72 @@ def test_ids_dict(test_data): equilibrium.time_slice[i].time = test_data["equilibrium"]["time"][i] equilibrium.time_slice[i].profiles_1d.psi = test_data["equilibrium"]["psi_1d"] equilibrium.time_slice[i].profiles_2d.resize(1) - equilibrium.time_slice[i].profiles_2d[0].grid.dim1 = test_data["equilibrium"]["r"] - equilibrium.time_slice[i].profiles_2d[0].grid.dim2 = test_data["equilibrium"]["z"] - equilibrium.time_slice[i].profiles_2d[0].psi = test_data["equilibrium"]["psi_2d"] + equilibrium.time_slice[i].profiles_2d[0].grid.dim1 = test_data["equilibrium"][ + "r" + ] + equilibrium.time_slice[i].profiles_2d[0].grid.dim2 = test_data["equilibrium"][ + "z" + ] + equilibrium.time_slice[i].profiles_2d[0].psi = test_data["equilibrium"][ + "psi_2d" + ] thomson_scattering = factory.thomson_scattering() thomson_scattering.ids_properties.homogeneous_time = 0 - N = test_data["thomson_scattering"]["N_ch"][0] + test_data["thomson_scattering"]["N_ch"][1] + N = ( + test_data["thomson_scattering"]["N_ch"][0] + + test_data["thomson_scattering"]["N_ch"][1] + ) thomson_scattering.channel.resize(N) index = 0 for i in range(N): if i == test_data["thomson_scattering"]["N_ch"][0]: index = 1 - thomson_scattering.channel[i].identifier = test_data["thomson_scattering"]["identifier"][i] - thomson_scattering.channel[i].t_e.time = test_data["thomson_scattering"]["time"][index] - thomson_scattering.channel[i].t_e.data = np.tile(test_data["thomson_scattering"]["t_e"][i], - test_data["thomson_scattering"]["N_time"][index]) - thomson_scattering.channel[i].n_e.time = test_data["thomson_scattering"]["time"][index] - thomson_scattering.channel[i].n_e.data = np.tile(test_data["thomson_scattering"]["n_e"][i], - test_data["thomson_scattering"]["N_time"][index]) - thomson_scattering.channel[i].position.r = test_data["thomson_scattering"]["r"][i] - thomson_scattering.channel[i].position.z = test_data["thomson_scattering"]["z"][i] - - return {"equilibrium":equilibrium, "thomson_scattering": thomson_scattering} - - -def test_wrangle(test_ids_dict, flat): + thomson_scattering.channel[i].identifier = test_data["thomson_scattering"][ + "identifier" + ][i] + thomson_scattering.channel[i].t_e.time = test_data["thomson_scattering"][ + "time" + ][index] + thomson_scattering.channel[i].t_e.data = np.tile( + test_data["thomson_scattering"]["t_e"][i], + test_data["thomson_scattering"]["N_time"][index], + ) + thomson_scattering.channel[i].n_e.time = test_data["thomson_scattering"][ + "time" + ][index] + thomson_scattering.channel[i].n_e.data = np.tile( + test_data["thomson_scattering"]["n_e"][i], + test_data["thomson_scattering"]["N_time"][index], + ) + thomson_scattering.channel[i].position.r = test_data["thomson_scattering"]["r"][ + i + ] + thomson_scattering.channel[i].position.z = test_data["thomson_scattering"]["z"][ + i + ] + + return {"equilibrium": equilibrium, "thomson_scattering": thomson_scattering} + + +def test_wrangle(test_ids_dict, flat): wrangled = wrangle(flat) for key in test_ids_dict: - diff = idsdiffgen(wrangled[key],test_ids_dict[key]) + diff = idsdiffgen(wrangled[key], test_ids_dict[key]) assert len(list(diff)) == 0, diff + def get_dtype(arr): """Get dtype from either numpy or awkward array.""" if isinstance(arr, ak.Array): - # This is the easiest way I found to extract the numpy dtype from an awkward array + # Easiest way to extract the numpy dtype from an awkward array return eval("np." + arr.typestr.split("*")[-1]) if hasattr(arr, "dtype"): return arr.dtype else: return type(arr) + def test_unwrangle(test_ids_dict, flat): result, failed = unwrangle(list(flat.keys()), test_ids_dict) assert len(failed) == 0, f"The following fields failed to load {failed}" @@ -159,4 +257,4 @@ def test_unwrangle(test_ids_dict, flat): if np.issubdtype(get_dtype(result[key]), np.floating): assert ak.almost_equal(result[key], flat[key]) else: - assert ak.array_equal(result[key], flat[key]) \ No newline at end of file + assert ak.array_equal(result[key], flat[key]) diff --git a/imas/wrangler.py b/imas/wrangler.py index 528b8238..f01962df 100644 --- a/imas/wrangler.py +++ b/imas/wrangler.py @@ -5,6 +5,7 @@ from .ids_toplevel import IDSToplevel from .backends.netcdf.ids_tensorizer import IDSTensorizer + def recursively_put(location, value, ids): # time_slice.profiles_1d.psi if "." in location: @@ -16,8 +17,8 @@ def recursively_put(location, value, ids): sub_ids.resize(N) elif sub_ids.size != N: raise ValueError( - f"""Inconsistent size across flat entries {location}, {N} (flat) vs. ids {sub_ids.size}! -""" + f"Inconsistent size across flat entries {location}, " + f"{N} (flat) vs. ids {sub_ids.size}!" ) # Need to iterate over indices (e.g. equilibrium.time_slice[:].) for index in range(N): @@ -42,19 +43,21 @@ def wrangle(flat: Dict, source_version="3.41.0") -> Dict[str, IDSToplevel]: wrangled[ids] = recursively_put(location, flat[key], wrangled[ids]) return wrangled + def split_location_across_ids(locations: List[str]) -> Dict[str, List[str]]: ids_locations = {} for location in locations: - ids, path = location.split(".",1) + ids, path = location.split(".", 1) if ids not in ids_locations: ids_locations[ids] = [] - ids_locations[ids].append(path.replace(".","/") ) + ids_locations[ids].append(path.replace(".", "/")) return ids_locations + def unwrangle( locations: List[str], ids_dict: Dict[str, IDSToplevel], target_version="3.41.0" ) -> Tuple[Dict[str, ak.Array | np.ndarray], List[str]]: - flat = {} + flat = {} ids_locations = split_location_across_ids(locations) failed_locations = [] for key in ids_locations: @@ -74,7 +77,7 @@ def unwrangle( # Not a scalar, e.g. homogenous_time try: flat[location] = np.asarray(values) - except ValueError as e: + except ValueError: flat[location] = ak.Array(values) else: flat[location] = values From 5ec40eb9af22ec61190f91a6a3f896854cc1b2ab Mon Sep 17 00:00:00 2001 From: prasad-sawantdesai Date: Thu, 19 Mar 2026 17:14:14 +0100 Subject: [PATCH 13/13] version is not optional and convert_ids function when target_version is provided in unwrangle --- imas/wrangler.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/imas/wrangler.py b/imas/wrangler.py index f01962df..f44563ce 100644 --- a/imas/wrangler.py +++ b/imas/wrangler.py @@ -2,6 +2,7 @@ import awkward as ak import numpy as np from . import IDSFactory +from .ids_convert import convert_ids from .ids_toplevel import IDSToplevel from .backends.netcdf.ids_tensorizer import IDSTensorizer @@ -33,7 +34,7 @@ def recursively_put(location, value, ids): return ids -def wrangle(flat: Dict, source_version="3.41.0") -> Dict[str, IDSToplevel]: +def wrangle(flat: Dict, source_version: str) -> Dict[str, IDSToplevel]: wrangled = {} factory = IDSFactory(source_version) for key in flat: @@ -55,13 +56,18 @@ def split_location_across_ids(locations: List[str]) -> Dict[str, List[str]]: def unwrangle( - locations: List[str], ids_dict: Dict[str, IDSToplevel], target_version="3.41.0" + locations: List[str], + ids_dict: Dict[str, IDSToplevel], + target_version: str | None = None, ) -> Tuple[Dict[str, ak.Array | np.ndarray], List[str]]: flat = {} ids_locations = split_location_across_ids(locations) failed_locations = [] for key in ids_locations: - tensorizer = IDSTensorizer(ids_dict[key], ids_locations[key]) + ids = ids_dict[key] + if target_version is not None: + ids = convert_ids(ids, target_version) + tensorizer = IDSTensorizer(ids, ids_locations[key]) tensorizer.include_coordinate_paths() tensorizer.collect_filled_data() tensorizer.determine_data_shapes()