iterorganization · olivhoenen · Mar 13, 2025 · Mar 6, 2025 · Mar 7, 2025 · Mar 7, 2025
@@ -28,7 +28,7 @@ jobs:
           python -m venv venv
           source venv/bin/activate
           pip install --upgrade pip setuptools wheel
-          pip install .[h5py,netcdf,test]
+          pip install .[test]
 
       - name: Run tests
         run: |

@@ -1,6 +1,7 @@
 import os
 
 import matplotlib
+
 # To avoid possible display issues when Matplotlib uses a non-GUI backend
 if "DISPLAY" not in os.environ:
     matplotlib.use("agg")
@@ -17,6 +18,39 @@
 entry = imas.training.get_training_db_entry()
 cp = entry.get("core_profiles")
 
+#######################################################################################
+# Steps 2, 3 and 4, using imas.util.to_xarray
+# Create an xarray Dataset containing t_i_average and its coordinates
+xrds = imas.util.to_xarray(cp, "profiles_1d/t_i_average")
+# Note that profiles_1d.grid.rho_tor_norm is a 2D coordinate: its values may be
+# different at different times.
+#
+# Since the values at different time slices differ only minutely in this example, we'll
+# rename the `profiles_1d.grid.rho_tor_norm:i` dimension to `rho_tor_norm` and set the
+# values to the values of rho_tor_norm of the first time slice:
+xrds = xrds.rename({"profiles_1d.grid.rho_tor_norm:i": "rho_tor_norm"}).assign_coords(
+    {"rho_tor_norm": xrds["profiles_1d.grid.rho_tor_norm"].isel(time=0).data}
+)
+
+# Extract temperatures as an xarray DataArray
+temperature = xrds["profiles_1d.t_i_average"]
+
+# 5a. Select subset of temperature where 0.4 <= rho_tor_norm < 0.6:
+print(temperature.sel(rho_tor_norm=slice(0.4, 0.6)))
+
+# 5b. Interpolate temperature on a new grid: [0, 0.1, 0.2, ..., 0.9, 1.0]
+print(temperature.interp(rho_tor_norm=numpy.linspace(0, 1, 11)))
+
+# 5c. Interpolate temperature on a new time base: [10, 20]
+print(temperature.interp(time=[10, 20]))
+
+# 5d. Plot
+temperature.plot(x="time", norm=matplotlib.colors.LogNorm())
+plt.show()
+
+#######################################################################################
+# We can also manually build an xarray DataArray, this is shown below:
+
 # 2. Store the temperature of the first time slice
 temperature = cp.profiles_1d[0].t_i_average
 

@@ -3,9 +3,12 @@ Create ``xarray.DataArray`` from an IDS
 
 .. info::
 
-    In this lesson you will create a ``DataArray`` manually. In a future version of
-    IMAS-Python we plan to include functionality that will automatically do this for you.
-    That should further simplify working with data inside IDSs.
+    This lesson was written before :py:func:`imas.util.to_xarray` was
+    implemented. This lesson is retained for educational purposes, however we
+    recommend to use :py:func:`imas.util.to_xarray` instead of manually creating
+    xarray ``DataArray``\ s.
+
+    See also: :ref:`Convert IMAS-Python IDSs directly to Xarray Datasets`.
 
 Let's start with an introduction of Xarray. According to `their website
 <https://docs.xarray.dev/en/stable/getting-started-guide/why-xarray.html>`_ (where you
@@ -61,6 +64,10 @@ Exercise 1: create a ``DataArray`` for ``profiles_1d/temperature``
 
     .. md-tab-item:: Solution
 
+        This exercise was created before the implementation of
+        :py:func:`imas.util.to_xarray`. The original approach is available below
+        for educational purposes.
+
         .. literalinclude:: imas_snippets/ids_to_xarray.py
 
 
@@ -96,4 +103,9 @@ the ``profiles_1d`` array of structures. When the grid is not changing in the ID
 
     .. md-tab-item:: Solution
 
+        This exercise was created before the implementation of
+        :py:func:`imas.util.to_xarray`. Below code sample is updated to provide
+        two alternatives: the first is based on :py:func:`imas.util.to_xarray`,
+        the second is the original, manual approach.
+
         .. literalinclude:: imas_snippets/tensorized_ids_to_xarray.py
@@ -1,7 +1,7 @@
 .. _`IMAS netCDF files`:
 
-IMAS netCDF files
-=================
+IMAS netCDF files \& Xarray
+===========================
 
 .. toctree::
     :hidden:
@@ -69,6 +69,7 @@ features that are supported by DBEntries using ``imas_core`` respectively
       - Yes (requires ``imas_core >= 5.4.0``)
       - Not implemented
 
+.. _`Using IMAS netCDF files with 3rd-party tools`:
 
 Using IMAS netCDF files with 3rd-party tools
 --------------------------------------------
@@ -138,3 +139,47 @@ Validating an IMAS netCDF file
 IMAS netCDF files can be validated with IMAS-Python through the command line ``imas
 validate_nc <filename>``. See also :ref:`IMAS-Python Command Line tool` or type
 ``imas validate_nc --help`` in a command line.
+
+
+.. _`Convert IMAS-Python IDSs directly to Xarray Datasets`:
+
+Convert IMAS-Python IDSs directly to Xarray Datasets
+----------------------------------------------------
+
+In the section :ref:`Using IMAS netCDF files with 3rd-party tools`, we showed
+how to open an IMAS netCDF file with Xarray. However, IMAS-Python IDSs can also
+be converted directly to Xarray ``Dataset``\ s with
+:py:func:`imas.util.to_xarray`.
+
+This method can be used to convert a full IDS to an Xarray ``Dataset``, or only
+specific paths inside the IDS. The latter variant can also be combined with
+:ref:`lazy loading`. We'll show a small example below:
+
+.. code-block:: python
+    :caption: Converting a lazy loaded IDS to Xarray
+
+    import imas.training
+
+    # Open the training entry
+    with imas.training.get_training_db_entry() as training_entry:
+        # Lazy load the core_profiles IDS
+        core_profiles = training_entry.get("core_profiles", lazy=True)
+        # Load the average ion temperature and all coordinate data
+        xrds = imas.util.to_xarray(core_profiles, "profiles_1d.t_i_average")
+        # All relevant data is now loaded from the data entry into the xarray
+        # Dataset. We close the data entry by exiting the with-statement.
+
+    # Inspect what's inside the dataset
+    print(xrds.data_vars)
+    # Data variables:
+    #   profiles_1d.t_i_average
+
+    # Included coordinates depends on the used Data Dictionary version
+    print(xrds.coords)
+    # Coordinates:  (with DD 4.0.0)
+    # * time
+    #   profiles_1d.grid.area
+    #   profiles_1d.grid.volume
+    #   profiles_1d.grid.rho_tor
+    #   profiles_1d.grid.rho_tor_norm
+    #   profiles_1d.grid.psi
@@ -0,0 +1,73 @@
+# xarray is an optional dependency, but this module won't be imported when xarray is not
+# available
+import numpy
+import xarray
+
+from imas.ids_toplevel import IDSToplevel
+from imas.backends.netcdf.ids_tensorizer import IDSTensorizer
+from imas.ids_data_type import IDSDataType
+
+fillvals = {
+    IDSDataType.INT: -(2**31) + 1,
+    IDSDataType.STR: "",
+    IDSDataType.FLT: numpy.nan,
+    IDSDataType.CPX: numpy.nan * (1 + 1j),
+}
+
+
+def to_xarray(ids: IDSToplevel, *paths: str) -> xarray.Dataset:
+    """See :func:`imas.util.to_xarray`"""
+    # We really need an IDS toplevel element
+    if not isinstance(ids, IDSToplevel):
+        raise TypeError(
+            f"to_xarray needs a toplevel IDS element as first argument, but got {ids!r}"
+        )
+
+    # Valid path can use / or . as separator, but IDSTensorizer expects /. The following
+    # block checks if the paths are valid, and by using "metadata.path_string" we ensure
+    # that / are used as separator.
+    try:
+        paths = [ids.metadata[path].path_string for path in paths]
+    except KeyError as exc:
+        raise ValueError(str(exc)) from None
+
+    # Converting lazy-loaded IDSs requires users to specify at least one path
+    if ids._lazy and not paths:
+        raise RuntimeError(
+            "This IDS is lazy loaded. Please provide at least one path to convert to"
+            " xarray."
+        )
+
+    # Use netcdf IDS Tensorizer to tensorize the data and determine metadata
+    tensorizer = IDSTensorizer(ids, paths)
+    tensorizer.include_coordinate_paths()
+    tensorizer.collect_filled_data()
+    tensorizer.determine_data_shapes()
+
+    data_vars = {}
+    coordinate_names = set()
+    for path in tensorizer.filled_data:
+        var_name = path.replace("/", ".")
+        metadata = ids.metadata[path]
+        if metadata.data_type in (IDSDataType.STRUCTURE, IDSDataType.STRUCT_ARRAY):
+            continue  # We don't store these in xarray
+
+        dimensions = tensorizer.ncmeta.get_dimensions(path, tensorizer.homogeneous_time)
+        data = tensorizer.tensorize(path, fillvals[metadata.data_type])
+
+        attrs = dict(documentation=metadata.documentation)
+        if metadata.units:
+            attrs["units"] = metadata.units
+        coordinates = tensorizer.filter_coordinates(path)
+        if coordinates:
+            coordinate_names.update(coordinates.split(" "))
+            attrs["coordinates"] = coordinates
+
+        data_vars[var_name] = (dimensions, data, attrs)
+
+    # Remove coordinates from data_vars and put in coordinates mapping:
+    coordinates = {}
+    for coordinate_name in coordinate_names:
+        coordinates[coordinate_name] = data_vars.pop(coordinate_name)
+
+    return xarray.Dataset(data_vars, coordinates)