From 6476ff87e6a8ff8fb5c156146df261ea8a7f8bd0 Mon Sep 17 00:00:00 2001
From: Maarten Sebregts <msebregts@ignitioncomputing.com>
Date: Thu, 6 Mar 2025 16:38:07 +0100
Subject: [PATCH 1/7] Refactor iterators used in ids2nc and nc2ids

---
 imas/backends/netcdf/ids2nc.py    | 31 +---------
 imas/backends/netcdf/iterators.py | 98 +++++++++++++++++++++++++++++++
 imas/backends/netcdf/nc2ids.py    | 58 +++---------------
 3 files changed, 108 insertions(+), 79 deletions(-)
 create mode 100644 imas/backends/netcdf/iterators.py

diff --git a/imas/backends/netcdf/ids2nc.py b/imas/backends/netcdf/ids2nc.py
index 2b892838..d5a8816c 100644
--- a/imas/backends/netcdf/ids2nc.py
+++ b/imas/backends/netcdf/ids2nc.py
@@ -1,21 +1,16 @@
 # This file is part of IMAS-Python.
 # You should have received the IMAS-Python LICENSE file with this project.
-"""NetCDF IO support for IMAS-Python. Requires [netcdf] extra dependencies.
-"""
-
-from typing import Iterator, Tuple
+"""NetCDF IO support for IMAS-Python. Requires [netcdf] extra dependencies."""
 
 import netCDF4
 import numpy
 from packaging import version
 
 from imas.backends.netcdf.nc_metadata import NCMetadata
+from imas.backends.netcdf.iterators import indexed_tree_iter
 from imas.exception import InvalidNetCDFEntry
-from imas.ids_base import IDSBase
 from imas.ids_data_type import IDSDataType
 from imas.ids_defs import IDS_TIME_MODE_HOMOGENEOUS
-from imas.ids_struct_array import IDSStructArray
-from imas.ids_structure import IDSStructure
 from imas.ids_toplevel import IDSToplevel
 
 default_fillvals = {
@@ -33,26 +28,6 @@
 SHAPE_DTYPE = numpy.int32
 
 
-def nc_tree_iter(
-    node: IDSStructure, aos_index: Tuple[int, ...] = ()
-) -> Iterator[Tuple[Tuple[int, ...], IDSBase]]:
-    """Tree iterator that tracks indices of all ancestor array of structures.
-
-    Args:
-        node: IDS node to iterate over
-
-    Yields:
-        (aos_index, node) for all filled nodes.
-    """
-    for child in node.iter_nonempty_():
-        yield (aos_index, child)
-        if isinstance(child, IDSStructArray):
-            for i in range(len(child)):
-                yield from nc_tree_iter(child[i], aos_index + (i,))
-        elif isinstance(child, IDSStructure):
-            yield from nc_tree_iter(child, aos_index)
-
-
 class IDS2NC:
     """Class responsible for storing an IDS to a NetCDF file."""
 
@@ -105,7 +80,7 @@ def collect_filled_data(self) -> None:
         dimension_size = {}
         get_dimensions = self.ncmeta.get_dimensions
 
-        for aos_index, node in nc_tree_iter(self.ids):
+        for aos_index, node in indexed_tree_iter(self.ids):
             path = node.metadata.path_string
             filled_data[path][aos_index] = node
             ndim = node.metadata.ndim
diff --git a/imas/backends/netcdf/iterators.py b/imas/backends/netcdf/iterators.py
new file mode 100644
index 00000000..bff40e43
--- /dev/null
+++ b/imas/backends/netcdf/iterators.py
@@ -0,0 +1,98 @@
+from typing import Iterator, List, Optional, Tuple
+
+from imas.ids_base import IDSBase
+from imas.ids_data_type import IDSDataType
+from imas.ids_metadata import IDSMetadata
+from imas.ids_struct_array import IDSStructArray
+from imas.ids_structure import IDSStructure
+from imas.ids_toplevel import IDSToplevel
+
+
+def _split_on_aos(metadata: IDSMetadata):
+    """Split paths per IDS."""
+    paths = []
+    curpath = metadata.name
+
+    item = metadata
+    while item._parent.data_type is not None:
+        item = item._parent
+        if item.data_type is IDSDataType.STRUCT_ARRAY:
+            paths.append(curpath)
+            curpath = item.name
+        else:
+            curpath = f"{item.name}/{curpath}"
+    paths.append(curpath)
+    return paths[::-1]
+
+
+IndexedNode = Tuple[Tuple[int, ...], IDSBase]
+
+
+def indexed_tree_iter(
+    ids: IDSToplevel, metadata: Optional[IDSMetadata] = None
+) -> Iterator[IndexedNode]:
+    """Tree iterator that tracks indices of all ancestor array of structures.
+
+    Args:
+        ids: IDS top level element to iterate over
+        metadata: Iterate over all nodes inside the IDS at the metadata object.
+            If ``None``, all filled items in the IDS are iterated over.
+
+    Yields:
+        (aos_indices, node) for all filled nodes.
+
+    Example:
+        >>> ids = imas.IDSFactory().new("core_profiles")
+        >>> ids.profiles_1d.resize(2)
+        >>> ids.profiles_1d[0].time = 1.0
+        >>> ids.profiles_1d[1].t_i_average = [1.0]
+        >>> list(indexed_tree_iter(ids))
+        [
+            ((), <IDSStructArray (IDS:core_profiles, profiles_1d with 2 items)>),
+            ((0,), <IDSFloat0D (IDS:core_profiles, profiles_1d[0]/time, FLT_0D)>),
+            ((1,), <IDSNumericArray (IDS:core_profiles, profiles_1d[1]/t_i_average, FLT_1D)>)
+        ]
+        >>> list(indexed_tree_iter(ids, ids.metadata["profiles_1d/time"]))
+        [
+            ((0,), <IDSFloat0D (IDS:core_profiles, profiles_1d[0]/time, FLT_0D)>),
+            ((1,), <IDSFloat0D (IDS:core_profiles, profiles_1d[1]/time, empty FLT_0D)>)
+        ]
+    """  # noqa: E501
+    if metadata is None:
+        # Iterate over all filled nodes in the IDS
+        yield from _full_tree_iter(ids, ())
+
+    else:
+        paths = _split_on_aos(metadata)
+        if len(paths) == 1:
+            yield (), ids[paths[0]]
+        else:
+            yield from _tree_iter(ids, paths, ())
+
+
+def _tree_iter(
+    structure: IDSStructure, paths: List[str], curindex: Tuple[int, ...]
+) -> Iterator[IndexedNode]:
+    aos_path, *paths = paths
+    aos = structure[aos_path]
+
+    if len(paths) == 1:
+        path = paths[0]
+        for i, node in enumerate(aos):
+            yield curindex + (i,), node[path]
+
+    else:
+        for i, node in enumerate(aos):
+            yield from _tree_iter(node, paths, curindex + (i,))
+
+
+def _full_tree_iter(
+    node: IDSStructure, cur_index: Tuple[int, ...]
+) -> Iterator[IndexedNode]:
+    for child in node.iter_nonempty_():
+        yield (cur_index, child)
+        if isinstance(child, IDSStructArray):
+            for i in range(len(child)):
+                yield from _full_tree_iter(child[i], cur_index + (i,))
+        elif isinstance(child, IDSStructure):
+            yield from _full_tree_iter(child, cur_index)
diff --git a/imas/backends/netcdf/nc2ids.py b/imas/backends/netcdf/nc2ids.py
index 7829d257..e9b524fb 100644
--- a/imas/backends/netcdf/nc2ids.py
+++ b/imas/backends/netcdf/nc2ids.py
@@ -1,19 +1,18 @@
 import logging
 import os
-from typing import Iterator, List, Optional, Tuple
+from typing import Optional
 
 import netCDF4
 import numpy as np
 
 from imas.backends.netcdf import ids2nc
 from imas.backends.netcdf.nc_metadata import NCMetadata
+from imas.backends.netcdf.iterators import indexed_tree_iter
 from imas.exception import InvalidNetCDFEntry
-from imas.ids_base import IDSBase
 from imas.ids_convert import NBCPathMap
 from imas.ids_data_type import IDSDataType
 from imas.ids_defs import IDS_TIME_MODE_HOMOGENEOUS
 from imas.ids_metadata import IDSMetadata
-from imas.ids_structure import IDSStructure
 from imas.ids_toplevel import IDSToplevel
 
 logger = logging.getLogger(__name__)
@@ -26,49 +25,6 @@ def variable_error(var, issue, value, expected=None) -> InvalidNetCDFEntry:
     )
 
 
-def split_on_aos(metadata: IDSMetadata):
-    paths = []
-    curpath = metadata.name
-
-    item = metadata
-    while item._parent.data_type is not None:
-        item = item._parent
-        if item.data_type is IDSDataType.STRUCT_ARRAY:
-            paths.append(curpath)
-            curpath = item.name
-        else:
-            curpath = f"{item.name}/{curpath}"
-    paths.append(curpath)
-    return paths[::-1]
-
-
-IndexedNode = Tuple[Tuple[int, ...], IDSBase]
-
-
-def tree_iter(structure: IDSStructure, metadata: IDSMetadata) -> Iterator[IndexedNode]:
-    paths = split_on_aos(metadata)
-    if len(paths) == 1:
-        yield (), structure[paths[0]]
-    else:
-        yield from _tree_iter(structure, paths, ())
-
-
-def _tree_iter(
-    structure: IDSStructure, paths: List[str], curindex: Tuple[int, ...]
-) -> Iterator[IndexedNode]:
-    aos_path, *paths = paths
-    aos = structure[aos_path]
-
-    if len(paths) == 1:
-        path = paths[0]
-        for i, node in enumerate(aos):
-            yield curindex + (i,), node[path]
-
-    else:
-        for i, node in enumerate(aos):
-            yield from _tree_iter(node, paths, curindex + (i,))
-
-
 class NC2IDS:
     """Class responsible for reading an IDS from a NetCDF group."""
 
@@ -169,7 +125,7 @@ def run(self, lazy: bool) -> None:
             if metadata.data_type is IDSDataType.STRUCT_ARRAY:
                 if "sparse" in var.ncattrs():
                     shapes = self.group[var_name + ":shape"][()]
-                    for index, node in tree_iter(self.ids, target_metadata):
+                    for index, node in indexed_tree_iter(self.ids, target_metadata):
                         node.resize(shapes[index][0])
 
                 else:
@@ -178,7 +134,7 @@ def run(self, lazy: bool) -> None:
                         metadata.path_string, self.homogeneous_time
                     )[-1]
                     size = self.group.dimensions[dim].size
-                    for _, node in tree_iter(self.ids, target_metadata):
+                    for _, node in indexed_tree_iter(self.ids, target_metadata):
                         node.resize(size)
 
                 continue
@@ -190,7 +146,7 @@ def run(self, lazy: bool) -> None:
             if "sparse" in var.ncattrs():
                 if metadata.ndim:
                     shapes = self.group[var_name + ":shape"][()]
-                    for index, node in tree_iter(self.ids, target_metadata):
+                    for index, node in indexed_tree_iter(self.ids, target_metadata):
                         shape = shapes[index]
                         if shape.all():
                             # NOTE: bypassing IDSPrimitive.value.setter logic
@@ -198,7 +154,7 @@ def run(self, lazy: bool) -> None:
                                 index + tuple(map(slice, shape))
                             ]
                 else:
-                    for index, node in tree_iter(self.ids, target_metadata):
+                    for index, node in indexed_tree_iter(self.ids, target_metadata):
                         value = data[index]
                         if value != getattr(var, "_FillValue", None):
                             # NOTE: bypassing IDSPrimitive.value.setter logic
@@ -211,7 +167,7 @@ def run(self, lazy: bool) -> None:
                 self.ids[target_metadata.path].value = data
 
             else:
-                for index, node in tree_iter(self.ids, target_metadata):
+                for index, node in indexed_tree_iter(self.ids, target_metadata):
                     # NOTE: bypassing IDSPrimitive.value.setter logic
                     node._IDSPrimitive__value = data[index]
 

From 815729c436fc51b660c5b7256b8efc12a74c6dd4 Mon Sep 17 00:00:00 2001
From: Maarten Sebregts <msebregts@ignitioncomputing.com>
Date: Fri, 7 Mar 2025 11:14:18 +0100
Subject: [PATCH 2/7] Refactor ids2nc, extract common tensorization logic in
 IDSTensorizer

Allows reuse of functionality to (partially) convert IDSs to xarray Datasets.
---
 imas/backends/netcdf/ids2nc.py         | 138 +-------------------
 imas/backends/netcdf/ids_tensorizer.py | 173 +++++++++++++++++++++++++
 2 files changed, 177 insertions(+), 134 deletions(-)
 create mode 100644 imas/backends/netcdf/ids_tensorizer.py

diff --git a/imas/backends/netcdf/ids2nc.py b/imas/backends/netcdf/ids2nc.py
index d5a8816c..531c7ac2 100644
--- a/imas/backends/netcdf/ids2nc.py
+++ b/imas/backends/netcdf/ids2nc.py
@@ -3,14 +3,11 @@
 """NetCDF IO support for IMAS-Python. Requires [netcdf] extra dependencies."""
 
 import netCDF4
-import numpy
 from packaging import version
 
-from imas.backends.netcdf.nc_metadata import NCMetadata
-from imas.backends.netcdf.iterators import indexed_tree_iter
+from imas.backends.netcdf.ids_tensorizer import SHAPE_DTYPE, IDSTensorizer, dtypes
 from imas.exception import InvalidNetCDFEntry
 from imas.ids_data_type import IDSDataType
-from imas.ids_defs import IDS_TIME_MODE_HOMOGENEOUS
 from imas.ids_toplevel import IDSToplevel
 
 default_fillvals = {
@@ -19,16 +16,9 @@
     IDSDataType.FLT: netCDF4.default_fillvals["f8"],
     IDSDataType.CPX: netCDF4.default_fillvals["f8"] * (1 + 1j),
 }
-dtypes = {
-    IDSDataType.INT: numpy.dtype(numpy.int32),
-    IDSDataType.STR: str,
-    IDSDataType.FLT: numpy.dtype(numpy.float64),
-    IDSDataType.CPX: numpy.dtype(numpy.complex128),
-}
-SHAPE_DTYPE = numpy.int32
 
 
-class IDS2NC:
+class IDS2NC(IDSTensorizer):
     """Class responsible for storing an IDS to a NetCDF file."""
 
     def __init__(self, ids: IDSToplevel, group: netCDF4.Group) -> None:
@@ -38,112 +28,18 @@ def __init__(self, ids: IDSToplevel, group: netCDF4.Group) -> None:
             ids: IDSToplevel to store in the netCDF group
             group: Empty netCDF group to store the IDS in.
         """
-        self.ids = ids
-        """IDS to store."""
+        super().__init__(ids, [])  # pass empty list: tensorize full IDS
         self.group = group
         """NetCDF Group to store the IDS in."""
 
-        self.ncmeta = NCMetadata(ids.metadata)
-        """NetCDF related metadata."""
-        self.dimension_size = {}
-        """Map dimension name to its size."""
-        self.filled_data = {}
-        """Map of IDS paths to filled data nodes."""
-        self.filled_variables = set()
-        """Set of filled IDS variables"""
-        self.homogeneous_time = (
-            ids.ids_properties.homogeneous_time == IDS_TIME_MODE_HOMOGENEOUS
-        )
-        """True iff the IDS time mode is homogeneous."""
-        self.shapes = {}
-        """Map of IDS paths to data shape arrays."""
-
     def run(self) -> None:
         """Store the IDS in the NetCDF group."""
         self.collect_filled_data()
         self.determine_data_shapes()
         self.create_dimensions()
         self.create_variables()
-        # Synchronize variables to disk
-        # This is not strictly required (automatically done by netCDF4 when needed), but
-        # by separating it we get more meaningful profiling statistics
-        self.group.sync()
         self.store_data()
 
-    def collect_filled_data(self) -> None:
-        """Collect all filled data in the IDS and determine dimension sizes.
-
-        Results are stored in :attr:`filled_data` and :attr:`dimension_size`.
-        """
-        # Initialize dictionary with all paths that could exist in this IDS
-        filled_data = {path: {} for path in self.ncmeta.paths}
-        dimension_size = {}
-        get_dimensions = self.ncmeta.get_dimensions
-
-        for aos_index, node in indexed_tree_iter(self.ids):
-            path = node.metadata.path_string
-            filled_data[path][aos_index] = node
-            ndim = node.metadata.ndim
-            if not ndim:
-                continue
-            dimensions = get_dimensions(path, self.homogeneous_time)
-            # We're only interested in the non-tensorized dimensions: [-ndim:]
-            for dim_name, size in zip(dimensions[-ndim:], node.shape):
-                dimension_size[dim_name] = max(dimension_size.get(dim_name, 0), size)
-
-        # Remove paths without data
-        self.filled_data = {path: data for path, data in filled_data.items() if data}
-        self.filled_variables = {path.replace("/", ".") for path in self.filled_data}
-        # Store dimension sizes
-        self.dimension_size = dimension_size
-
-    def determine_data_shapes(self) -> None:
-        """Determine tensorized data shapes and sparsity, save in :attr:`shapes`."""
-        get_dimensions = self.ncmeta.get_dimensions
-
-        for path, nodes_dict in self.filled_data.items():
-            metadata = self.ids.metadata[path]
-            # Structures don't have a size
-            if metadata.data_type is IDSDataType.STRUCTURE:
-                continue
-            ndim = metadata.ndim
-            dimensions = get_dimensions(path, self.homogeneous_time)
-
-            # node shape if it is completely filled
-            full_shape = tuple(self.dimension_size[dim] for dim in dimensions[-ndim:])
-
-            if len(dimensions) == ndim:
-                # Data at this path is not tensorized
-                node = nodes_dict[()]
-                sparse = node.shape != full_shape
-                if sparse:
-                    shapes = numpy.array(node.shape, dtype=SHAPE_DTYPE)
-
-            else:
-                # Data is tensorized, determine if it is homogeneously shaped
-                aos_dims = get_dimensions(self.ncmeta.aos[path], self.homogeneous_time)
-                shapes_shape = [self.dimension_size[dim] for dim in aos_dims]
-                if ndim:
-                    shapes_shape.append(ndim)
-                shapes = numpy.zeros(shapes_shape, dtype=SHAPE_DTYPE)
-
-                if ndim:  # ND types have a shape
-                    for aos_coords, node in nodes_dict.items():
-                        shapes[aos_coords] = node.shape
-                    sparse = not numpy.array_equiv(shapes, full_shape)
-
-                else:  # 0D types don't have a shape
-                    for aos_coords in nodes_dict.keys():
-                        shapes[aos_coords] = 1
-                    sparse = not shapes.all()
-                    shapes = None
-
-            if sparse:
-                self.shapes[path] = shapes
-                if ndim:
-                    # Ensure there is a pseudo-dimension f"{ndim}D" for shapes variable
-                    self.dimension_size[f"{ndim}D"] = ndim
-
     def create_dimensions(self) -> None:
         """Create netCDF dimensions."""
         for dimension, size in self.dimension_size.items():
@@ -228,14 +124,6 @@ def create_variables(self) -> None:
                         "shape is unset (i.e. filled with _Fillvalue)."
                     )
 
-    def filter_coordinates(self, path: str) -> str:
-        """Filter the coordinates list from NCMetadata to filled variables only."""
-        return " ".join(
-            coordinate
-            for coordinate in self.ncmeta.get_coordinates(path, self.homogeneous_time)
-            if coordinate in self.filled_variables
-        )
-
     def store_data(self) -> None:
         """Store data in the netCDF variables"""
         for path, nodes_dict in self.filled_data.items():
@@ -273,22 +161,4 @@ def store_data(self) -> None:
 
             else:
                 # Data is tensorized: tensorize in-memory
-                # TODO: depending on the data, tmp_var may be HUGE, we may need a more
-                # efficient assignment algorithm for large and/or irregular data
-                tmp_var = numpy.full(var.shape, default_fillvals[metadata.data_type])
-                if metadata.data_type is IDSDataType.STR:
-                    tmp_var = numpy.asarray(tmp_var, dtype=object)
-
-                # Fill tmp_var
-                if shapes is None:
-                    # Data is not sparse, so we can assign to the aos_coords
-                    for aos_coords, node in nodes_dict.items():
-                        tmp_var[aos_coords] = node.value
-                else:
-                    # Data is sparse, so we must select a slice
-                    for aos_coords, node in nodes_dict.items():
-                        tmp_var[aos_coords + tuple(map(slice, node.shape))] = node.value
-
-                # Assign data to variable
-                var[()] = tmp_var
-                del tmp_var
+                var[()] = self.tensorize(path, default_fillvals[metadata.data_type])
diff --git a/imas/backends/netcdf/ids_tensorizer.py b/imas/backends/netcdf/ids_tensorizer.py
new file mode 100644
index 00000000..4619919b
--- /dev/null
+++ b/imas/backends/netcdf/ids_tensorizer.py
@@ -0,0 +1,173 @@
+# This file is part of IMAS-Python.
+# You should have received the IMAS-Python LICENSE file with this project.
+"""Tensorization logic to convert IDSs to netCDF files and/or xarray Datasets."""
+
+from typing import List
+
+import numpy
+
+from imas.backends.netcdf.iterators import indexed_tree_iter
+from imas.backends.netcdf.nc_metadata import NCMetadata
+from imas.ids_data_type import IDSDataType
+from imas.ids_defs import IDS_TIME_MODE_HOMOGENEOUS
+from imas.ids_toplevel import IDSToplevel
+
+dtypes = {
+    IDSDataType.INT: numpy.dtype(numpy.int32),
+    IDSDataType.STR: str,
+    IDSDataType.FLT: numpy.dtype(numpy.float64),
+    IDSDataType.CPX: numpy.dtype(numpy.complex128),
+}
+SHAPE_DTYPE = numpy.int32
+
+
+class IDSTensorizer:
+    """Common functionality for tensorizing IDSs. Used in IDS2NC and util.to_xarray."""
+
+    def __init__(self, ids: IDSToplevel, paths_to_tensorize: List[str]) -> None:
+        """Initialize IDSTensorizer.
+
+        Args:
+            ids: IDSToplevel to store in the netCDF group
+            paths_to_tensorize: Restrict tensorization to the provided paths. If an
+                empty list is provided, all filled quantities in the IDS will be
+                tensorized.
+        """
+        self.ids = ids
+        """IDS to tensorize."""
+        self.paths_to_tensorize = paths_to_tensorize
+        """List of paths to tensorize"""
+
+        self.ncmeta = NCMetadata(ids.metadata)
+        """NetCDF related metadata."""
+        self.dimension_size = {}
+        """Map dimension name to its size."""
+        self.filled_data = {}
+        """Map of IDS paths to filled data nodes."""
+        self.filled_variables = set()
+        """Set of filled IDS variables"""
+        self.homogeneous_time = (
+            ids.ids_properties.homogeneous_time == IDS_TIME_MODE_HOMOGENEOUS
+        )
+        """True iff the IDS time mode is homogeneous."""
+        self.shapes = {}
+        """Map of IDS paths to data shape arrays."""
+
+    def collect_filled_data(self) -> None:
+        """Collect all filled data in the IDS and determine dimension sizes.
+
+        Results are stored in :attr:`filled_data` and :attr:`dimension_size`.
+        """
+        # Initialize dictionary with all paths that could exist in this IDS
+        filled_data = {path: {} for path in self.ncmeta.paths}
+        dimension_size = {}
+        get_dimensions = self.ncmeta.get_dimensions
+
+        if self.paths_to_tensorize:
+            # Restrict tensorization to provided paths
+            iterator = (
+                item
+                for path in self.paths_to_tensorize
+                for item in indexed_tree_iter(self.ids, self.ids.metadata[path])
+                if item[1].has_value  # Skip nodes without value set
+            )
+        else:
+            # Tensorize all non-empty nodes
+            iterator = indexed_tree_iter(self.ids)
+
+        for aos_index, node in iterator:
+            path = node.metadata.path_string
+            filled_data[path][aos_index] = node
+            ndim = node.metadata.ndim
+            if not ndim:
+                continue
+            dimensions = get_dimensions(path, self.homogeneous_time)
+            # We're only interested in the non-tensorized dimensions: [-ndim:]
+            for dim_name, size in zip(dimensions[-ndim:], node.shape):
+                dimension_size[dim_name] = max(dimension_size.get(dim_name, 0), size)
+
+        # Remove paths without data
+        self.filled_data = {path: data for path, data in filled_data.items() if data}
+        self.filled_variables = {path.replace("/", ".") for path in self.filled_data}
+        # Store dimension sizes
+        self.dimension_size = dimension_size
+
+    def determine_data_shapes(self) -> None:
+        """Determine tensorized data shapes and sparsity, save in :attr:`shapes`."""
+        get_dimensions = self.ncmeta.get_dimensions
+
+        for path, nodes_dict in self.filled_data.items():
+            metadata = self.ids.metadata[path]
+            # Structures don't have a size
+            if metadata.data_type is IDSDataType.STRUCTURE:
+                continue
+            ndim = metadata.ndim
+            dimensions = get_dimensions(path, self.homogeneous_time)
+
+            # node shape if it is completely filled
+            full_shape = tuple(self.dimension_size[dim] for dim in dimensions[-ndim:])
+
+            if len(dimensions) == ndim:
+                # Data at this path is not tensorized
+                node = nodes_dict[()]
+                sparse = node.shape != full_shape
+                if sparse:
+                    shapes = numpy.array(node.shape, dtype=SHAPE_DTYPE)
+
+            else:
+                # Data is tensorized, determine if it is homogeneously shaped
+                aos_dims = get_dimensions(self.ncmeta.aos[path], self.homogeneous_time)
+                shapes_shape = [self.dimension_size[dim] for dim in aos_dims]
+                if ndim:
+                    shapes_shape.append(ndim)
+                shapes = numpy.zeros(shapes_shape, dtype=SHAPE_DTYPE)
+
+                if ndim:  # ND types have a shape
+                    for aos_coords, node in nodes_dict.items():
+                        shapes[aos_coords] = node.shape
+                    sparse = not numpy.array_equiv(shapes, full_shape)
+
+                else:  # 0D types don't have a shape
+                    for aos_coords in nodes_dict.keys():
+                        shapes[aos_coords] = 1
+                    sparse = not shapes.all()
+                    shapes = None
+
+            if sparse:
+                self.shapes[path] = shapes
+                if ndim:
+                    # Ensure there is a pseudo-dimension f"{ndim}D" for shapes variable
+                    self.dimension_size[f"{ndim}D"] = ndim
+
+    def filter_coordinates(self, path: str) -> str:
+        """Filter the coordinates list from NCMetadata to filled variables only."""
+        return " ".join(
+            coordinate
+            for coordinate in self.ncmeta.get_coordinates(path, self.homogeneous_time)
+            if coordinate in self.filled_variables
+        )
+
+    def tensorize(self, path, fillvalue):
+        dimensions = self.ncmeta.get_dimensions(path, self.homogeneous_time)
+        shape = tuple(self.dimension_size[dim] for dim in dimensions)
+
+        # TODO: depending on the data, tmp_var may be HUGE, we may need a more
+        # efficient assignment algorithm for large and/or irregular data
+        tmp_var = numpy.full(shape, fillvalue)
+        if isinstance(fillvalue, str):
+            tmp_var = numpy.asarray(tmp_var, dtype=object)
+
+        shapes = self.shapes.get(path)
+        nodes_dict = self.filled_data[path]
+
+        # Fill tmp_var
+        if shapes is None:
+            # Data is not sparse, so we can assign to the aos_coords
+            for aos_coords, node in nodes_dict.items():
+                tmp_var[aos_coords] = node.value
+        else:
+            # Data is sparse, so we must select a slice
+            for aos_coords, node in nodes_dict.items():
+                tmp_var[aos_coords + tuple(map(slice, node.shape))] = node.value
+
+        return tmp_var

From 76c265a817f07456501980c49548530b3c3ec61b Mon Sep 17 00:00:00 2001
From: Maarten Sebregts <msebregts@ignitioncomputing.com>
Date: Fri, 7 Mar 2025 13:42:34 +0100
Subject: [PATCH 3/7] Implement `imas.util.to_xarray`

Reuses most of the tensorization and metadata logic from the netCDF export.
---
 imas/_to_xarray.py                     | 73 ++++++++++++++++++++
 imas/backends/netcdf/ids_tensorizer.py | 21 ++++++
 imas/test/test_to_xarray.py            | 94 ++++++++++++++++++++++++++
 imas/util.py                           | 56 ++++++++++++++-
 4 files changed, 241 insertions(+), 3 deletions(-)
 create mode 100644 imas/_to_xarray.py
 create mode 100644 imas/test/test_to_xarray.py

diff --git a/imas/_to_xarray.py b/imas/_to_xarray.py
new file mode 100644
index 00000000..6caec501
--- /dev/null
+++ b/imas/_to_xarray.py
@@ -0,0 +1,73 @@
+# xarray is an optional dependency, but this module won't be imported when xarray is not
+# available
+import numpy
+import xarray
+
+from imas.ids_toplevel import IDSToplevel
+from imas.backends.netcdf.ids_tensorizer import IDSTensorizer
+from imas.ids_data_type import IDSDataType
+
+fillvals = {
+    IDSDataType.INT: -(2**31) + 1,
+    IDSDataType.STR: "",
+    IDSDataType.FLT: numpy.nan,
+    IDSDataType.CPX: numpy.nan * (1 + 1j),
+}
+
+
+def to_xarray(ids: IDSToplevel, *paths: str) -> xarray.Dataset:
+    """See :func:`imas.util.to_xarray`"""
+    # We really need an IDS toplevel element
+    if not isinstance(ids, IDSToplevel):
+        raise TypeError(
+            f"to_xarray needs a toplevel IDS element as first argument, but got {ids!r}"
+        )
+
+    # Valid path can use / or . as separator, but IDSTensorizer expects /. The following
+    # block checks if the paths are valid, and by using "metadata.path_string" we ensure
+    # that / are used as separator.
+    try:
+        paths = [ids.metadata[path].path_string for path in paths]
+    except KeyError as exc:
+        raise ValueError(str(exc)) from None
+
+    # Converting lazy-loaded IDSs requires users to specify at least one path
+    if ids._lazy and not paths:
+        raise RuntimeError(
+            "This IDS is lazy loaded. Please provide at least one path to convert to"
+            " xarray."
+        )
+
+    # Use netcdf IDS Tensorizer to tensorize the data and determine metadata
+    tensorizer = IDSTensorizer(ids, paths)
+    tensorizer.include_coordinate_paths()
+    tensorizer.collect_filled_data()
+    tensorizer.determine_data_shapes()
+
+    data_vars = {}
+    coordinate_names = set()
+    for path in tensorizer.filled_data:
+        var_name = path.replace("/", ".")
+        metadata = ids.metadata[path]
+        if metadata.data_type in (IDSDataType.STRUCTURE, IDSDataType.STRUCT_ARRAY):
+            continue  # We don't store these in xarray
+
+        dimensions = tensorizer.ncmeta.get_dimensions(path, tensorizer.homogeneous_time)
+        data = tensorizer.tensorize(path, fillvals[metadata.data_type])
+
+        attrs = dict(documentation=metadata.documentation)
+        if metadata.units:
+            attrs["units"] = metadata.units
+        coordinates = tensorizer.filter_coordinates(path)
+        if coordinates:
+            coordinate_names.update(coordinates.split(" "))
+            attrs["coordinates"] = coordinates
+
+        data_vars[var_name] = (dimensions, data, attrs)
+
+    # Remove coordinates from data_vars and put in coordinates mapping:
+    coordinates = {}
+    for coordinate_name in coordinate_names:
+        coordinates[coordinate_name] = data_vars.pop(coordinate_name)
+
+    return xarray.Dataset(data_vars, coordinates)
diff --git a/imas/backends/netcdf/ids_tensorizer.py b/imas/backends/netcdf/ids_tensorizer.py
index 4619919b..95bfba47 100644
--- a/imas/backends/netcdf/ids_tensorizer.py
+++ b/imas/backends/netcdf/ids_tensorizer.py
@@ -2,6 +2,7 @@
 # You should have received the IMAS-Python LICENSE file with this project.
 """Tensorization logic to convert IDSs to netCDF files and/or xarray Datasets."""
 
+from collections import deque
 from typing import List
 
 import numpy
@@ -53,6 +54,26 @@ def __init__(self, ids: IDSToplevel, paths_to_tensorize: List[str]) -> None:
         self.shapes = {}
         """Map of IDS paths to data shape arrays."""
 
+    def include_coordinate_paths(self) -> None:
+        """Append all paths that are coordinates of self.paths_to_tensorize"""
+        # Use a queue so we can also take coordinates of coordinates into account
+        queue = deque(self.paths_to_tensorize)
+        # Include all parent AoS as well:
+        for path in self.paths_to_tensorize:
+            while path:
+                path, _, _ = path.rpartition("/")
+                if self.ncmeta.get_dimensions(path, self.homogeneous_time):
+                    queue.append(path)
+
+        self.paths_to_tensorize = []
+        while queue:
+            path = queue.popleft()
+            if path in self.paths_to_tensorize:
+                continue  # already processed
+            self.paths_to_tensorize.append(path)
+            for coordinate in self.ncmeta.get_coordinates(path, self.homogeneous_time):
+                queue.append(coordinate.replace(".", "/"))
+
     def collect_filled_data(self) -> None:
         """Collect all filled data in the IDS and determine dimension sizes.
 
diff --git a/imas/test/test_to_xarray.py b/imas/test/test_to_xarray.py
new file mode 100644
index 00000000..1767a6d9
--- /dev/null
+++ b/imas/test/test_to_xarray.py
@@ -0,0 +1,94 @@
+import numpy as np
+import pytest
+
+import imas
+import imas.training
+from imas.util import to_xarray
+
+pytest.importorskip("xarray")
+
+
+@pytest.fixture
+def entry(requires_imas, monkeypatch):
+    monkeypatch.setenv("IMAS_VERSION", "3.39.0")  # Use fixed DD version
+    return imas.training.get_training_db_entry()
+
+
+def test_to_xarray_invalid_argtype():
+    ids = imas.IDSFactory("3.39.0").core_profiles()
+
+    with pytest.raises(TypeError):
+        to_xarray("test")
+    with pytest.raises(TypeError):
+        to_xarray(ids.time)
+    with pytest.raises(TypeError):
+        to_xarray(ids.ids_properties)
+
+
+def test_to_xarray_invalid_paths():
+    ids = imas.IDSFactory("3.39.0").core_profiles()
+
+    with pytest.raises(ValueError, match="xyz"):
+        to_xarray(ids, "xyz")
+    with pytest.raises(ValueError, match="ids_properties/xyz"):
+        to_xarray(ids, "ids_properties/xyz")
+    with pytest.raises(ValueError, match="Xtime"):
+        to_xarray(ids, "time", "Xtime")
+
+
+def validate_trainingdb_electron_temperature_dataset(ds):
+    assert ds.sizes == {"time": 3, "profiles_1d.grid.rho_tor_norm:i": 101}
+    assert ds.data_vars.keys() == {"profiles_1d.electrons.temperature"}
+    assert ds.coords.keys() == {"time", "profiles_1d.grid.rho_tor_norm"}
+
+    # Check that values are loaded as expected
+    assert np.allclose(ds["time"], [3.987222, 432.937598, 792.0])
+    assert np.allclose(
+        ds.isel(time=1)["profiles_1d.electrons.temperature"][10:13],
+        [17728.81703089, 17440.78020568, 17139.35431082],
+    )
+
+
+def test_to_xarray_lazy_loaded(entry):
+    ids = entry.get("core_profiles", lazy=True)
+
+    with pytest.raises(RuntimeError):
+        to_xarray(ids)
+
+    ds = to_xarray(ids, "profiles_1d.electrons.temperature")
+    validate_trainingdb_electron_temperature_dataset(ds)
+
+
+def test_to_xarray_from_trainingdb(entry):
+    ids = entry.get("core_profiles")
+
+    ds = to_xarray(ids)
+    validate_trainingdb_electron_temperature_dataset(
+        ds["profiles_1d.electrons.temperature"].to_dataset()
+    )
+    ds = to_xarray(ids, "profiles_1d.electrons.temperature")
+    validate_trainingdb_electron_temperature_dataset(ds)
+
+    ds = to_xarray(
+        ids, "profiles_1d.electrons.temperature", "profiles_1d/electrons/density"
+    )
+    assert ds.data_vars.keys() == {
+        "profiles_1d.electrons.temperature",
+        "profiles_1d.electrons.density",
+    }
+
+
+def test_to_xarray():
+    ids = imas.IDSFactory("3.39.0").core_profiles()
+
+    ids.profiles_1d.resize(2)
+    ids.profiles_1d[0].electrons.temperature = [1.0, 2.0]
+    ids.profiles_1d[0].grid.rho_tor_norm = [0.0, 1.0]
+    ids.profiles_1d[0].time = 0.0
+
+    # These should all be identical:
+    ds1 = to_xarray(ids)
+    ds2 = to_xarray(ids, "profiles_1d.electrons.temperature")
+    ds3 = to_xarray(ids, "profiles_1d/electrons/temperature")
+    assert ds1.equals(ds2)
+    assert ds2.equals(ds3)
diff --git a/imas/util.py b/imas/util.py
index aafad2c7..64e2b228 100644
--- a/imas/util.py
+++ b/imas/util.py
@@ -1,8 +1,6 @@
 # This file is part of IMAS-Python.
 # You should have received the IMAS-Python LICENSE file with this project.
-"""Collection of useful helper methods when working with IMAS-Python.
-"""
-
+"""Collection of useful helper methods when working with IMAS-Python."""
 
 import logging
 import re
@@ -524,3 +522,55 @@ def get_data_dictionary_version(obj: Union[IDSBase, DBEntry, IDSFactory]) -> str
     if isinstance(obj, IDSBase):
         return obj._version
     raise TypeError(f"Cannot get data dictionary version of '{type(obj)}'")
+
+
+def to_xarray(ids: IDSToplevel, *paths: str) -> Any:
+    """Convert an IDS to an xarray Dataset.
+
+    Args:
+        ids: An IDS toplevel element
+        paths: Optional list of element paths to convert to xarray. The full IDS will be
+            converted to an xarray Dataset if no paths are provided.
+
+            Paths must not contain indices, and may use a ``/`` or a ``.`` as separator.
+            For example, ``"profiles_1d(itime)/electrons/density"`` is not allowed as
+            path, use ``"profiles_1d/electrons/density"`` or
+            ``profiles_1d.electrons.density"`` instead.
+
+            Coordinates to the quantities in the requested paths will also be included
+            in the xarray Dataset.
+
+    Returns:
+        An ``xarray.Dataset`` object.
+
+    Examples:
+        .. code-block:: python
+
+            # Convert the whole IDS to an xarray Dataset
+            ds = imas.util.to_xarray(ids)
+
+            # Convert only some elements in the IDS (including their coordinates)
+            ds = imas.util.to_xarray(
+                ids,
+                "profiles_1d/electrons/density",
+                "profiles_1d/electrons/temperature",
+            )
+
+            # Paths can be provided with "/" or "." as separator
+            ds = imas.util.to_xarray(
+                ids,
+                "profiles_1d.electrons.density",
+                "profiles_1d.electrons.temperature",
+            )
+
+    See Also:
+        https://docs.xarray.dev/en/stable/generated/xarray.Dataset.html
+    """
+    try:
+        import xarray  # noqa: F401
+    except ImportError:
+        raise RuntimeError("xarray is not available, cannot convert the IDS to xarray.")
+
+    from imas._to_xarray import to_xarray
+
+    return to_xarray(ids, *paths)

From bd153735f0adaea0fddf4327281fa4ae4d6f1608 Mon Sep 17 00:00:00 2001
From: Maarten Sebregts <msebregts@ignitioncomputing.com>
Date: Fri, 7 Mar 2025 13:43:01 +0100
Subject: [PATCH 4/7] Add `xarray` as optional imas-python dependency

---
 .github/workflows/test_with_pytest.yml |  2 +-
 pyproject.toml                         | 10 +++++++---
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/test_with_pytest.yml b/.github/workflows/test_with_pytest.yml
index 4febc7a3..7e56ac38 100644
--- a/.github/workflows/test_with_pytest.yml
+++ b/.github/workflows/test_with_pytest.yml
@@ -28,7 +28,7 @@ jobs:
           python -m venv venv
           source venv/bin/activate
           pip install --upgrade pip setuptools wheel
-          pip install .[h5py,netcdf,test]
+          pip install .[test]
 
       - name: Run tests
         run: |
diff --git a/pyproject.toml b/pyproject.toml
index 1b1b86c3..56e6dc1b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -71,8 +71,7 @@ dependencies = [
 [project.optional-dependencies]
 # these self-dependencies are available since pip 21.2
 all = [
-    "imas-python[test,docs,netcdf,h5py]" 
-    # "imas-python[test,docs,imas-core,netcdf,h5py]" TODO enable when imas-core is available on pypi
+    "imas-python[test,docs]" 
 ]
 docs = [
     "sphinx>=6.0.0,<7.0.0",
@@ -90,6 +89,9 @@ netcdf = [
 h5py = [
     "h5py",
 ]
+xarray = [
+    "xarray",
+]
 test = [
     "pytest>=5.4.1",
     "pytest-cov>=0.6",
@@ -101,7 +103,9 @@ test = [
     "virtualenv",
     # Pint and xarray are used in training snippets
     "pint",
-    "xarray",
+    # Optional dependencies
+    # TODO add imas-core when it is available on pypi
+    "imas-python[netcdf,h5py,xarray]",
 ]
 
 [project.scripts]

From 334268ffa40f7602a18856bd6b1666c868d1a599 Mon Sep 17 00:00:00 2001
From: Maarten Sebregts <msebregts@ignitioncomputing.com>
Date: Mon, 10 Mar 2025 13:58:57 +0100
Subject: [PATCH 5/7] Update xarray advanced course to mention
 imas.util.to_xarray

---
 .../imas_snippets/tensorized_ids_to_xarray.py | 34 +++++++++++++++++++
 docs/source/courses/advanced/xarray.rst       | 16 +++++++--
 2 files changed, 47 insertions(+), 3 deletions(-)

diff --git a/docs/source/courses/advanced/imas_snippets/tensorized_ids_to_xarray.py b/docs/source/courses/advanced/imas_snippets/tensorized_ids_to_xarray.py
index ff4f4e28..559b27a7 100644
--- a/docs/source/courses/advanced/imas_snippets/tensorized_ids_to_xarray.py
+++ b/docs/source/courses/advanced/imas_snippets/tensorized_ids_to_xarray.py
@@ -1,6 +1,7 @@
 import os
 
 import matplotlib
+
 # To avoid possible display issues when Matplotlib uses a non-GUI backend
 if "DISPLAY" not in os.environ:
     matplotlib.use("agg")
@@ -17,6 +18,39 @@
 entry = imas.training.get_training_db_entry()
 cp = entry.get("core_profiles")
 
+#######################################################################################
+# Steps 2, 3 and 4, using imas.util.to_xarray
+# Create an xarray Dataset containing t_i_average and its coordinates
+xrds = imas.util.to_xarray(cp, "profiles_1d/t_i_average")
+# Note that profiles_1d.grid.rho_tor_norm is a 2D coordinate: its values may be
+# different at different times.
+#
+# Since the values at different time slices differ only minutely in this example, we'll
+# rename the `profiles_1d.grid.rho_tor_norm:i` dimension to `rho_tor_norm` and set the
+# values to the values of rho_tor_norm of the first time slice:
+xrds = xrds.rename({"profiles_1d.grid.rho_tor_norm:i": "rho_tor_norm"}).assign_coords(
+    {"rho_tor_norm": xrds["profiles_1d.grid.rho_tor_norm"].isel(time=0).data}
+)
+
+# Extract temperatures as an xarray DataArray
+temperature = xrds["profiles_1d.t_i_average"]
+
+# 5a. Select subset of temperature where 0.4 <= rho_tor_norm < 0.6:
+print(temperature.sel(rho_tor_norm=slice(0.4, 0.6)))
+
+# 5b. Interpolate temperature on a new grid: [0, 0.1, 0.2, ..., 0.9, 1.0]
+print(temperature.interp(rho_tor_norm=numpy.linspace(0, 1, 11)))
+
+# 5c. Interpolate temperature on a new time base: [10, 20]
+print(temperature.interp(time=[10, 20]))
+
+# 5d. Plot
+temperature.plot(x="time", norm=matplotlib.colors.LogNorm())
+plt.show()
+
+#######################################################################################
+# We can also manually build an xarray DataArray, this is shown below:
+
 # 2. Store the temperature of the first time slice
 temperature = cp.profiles_1d[0].t_i_average
 
diff --git a/docs/source/courses/advanced/xarray.rst b/docs/source/courses/advanced/xarray.rst
index f28b452b..d1375a45 100644
--- a/docs/source/courses/advanced/xarray.rst
+++ b/docs/source/courses/advanced/xarray.rst
@@ -3,9 +3,10 @@ Create ``xarray.DataArray`` from an IDS
 
 .. info::
 
-    In this lesson you will create a ``DataArray`` manually. In a future version of
-    IMAS-Python we plan to include functionality that will automatically do this for you.
-    That should further simplify working with data inside IDSs.
+    This lesson was written before :py:func:`imas.util.to_xarray` was
+    implemented. This lesson is retained for educational purposes, however we
+    recommend to use :py:func:`imas.util.to_xarray` instead of manually creating
+    xarray ``DataArray``\ s.
 
 Let's start with an introduction of Xarray. According to `their website
 <https://docs.xarray.dev/en/stable/getting-started-guide/why-xarray.html>`_ (where you
@@ -61,6 +62,10 @@ Exercise 1: create a ``DataArray`` for ``profiles_1d/temperature``
 
     .. md-tab-item:: Solution
 
+        This exercise was created before the implementation of
+        :py:func:`imas.util.to_xarray`. The original approach is available below
+        for educational purposes.
+
         .. literalinclude:: imas_snippets/ids_to_xarray.py
 
 
@@ -96,4 +101,9 @@ the ``profiles_1d`` array of structures. When the grid is not changing in the ID
 
     .. md-tab-item:: Solution
 
+        This exercise was created before the implementation of
+        :py:func:`imas.util.to_xarray`. Below code sample is updated to provide
+        two alternatives: the first is based on :py:func:`imas.util.to_xarray`,
+        the second is the original, manual approach.
+
         .. literalinclude:: imas_snippets/tensorized_ids_to_xarray.py

From 10d30ec6f327cdcb5677c6ba4280ad87fb1e96b6 Mon Sep 17 00:00:00 2001
From: Maarten Sebregts <msebregts@ignitioncomputing.com>
Date: Mon, 10 Mar 2025 15:00:37 +0100
Subject: [PATCH 6/7] Additional documentation for `imas.util.to_xarray`

---
 docs/source/courses/advanced/xarray.rst |  2 +
 docs/source/netcdf.rst                  | 49 ++++++++++++++++++++++++-
 2 files changed, 49 insertions(+), 2 deletions(-)

diff --git a/docs/source/courses/advanced/xarray.rst b/docs/source/courses/advanced/xarray.rst
index d1375a45..249520d0 100644
--- a/docs/source/courses/advanced/xarray.rst
+++ b/docs/source/courses/advanced/xarray.rst
@@ -8,6 +8,8 @@ Create ``xarray.DataArray`` from an IDS
     recommend to use :py:func:`imas.util.to_xarray` instead of manually creating
     xarray ``DataArray``\ s.
 
+    See also: :ref:`Convert IMAS-Python IDSs directly to Xarray Datasets`.
+
 Let's start with an introduction of Xarray. According to `their website
 <https://docs.xarray.dev/en/stable/getting-started-guide/why-xarray.html>`_ (where you
 can also find an excellent summary of why that is useful):
diff --git a/docs/source/netcdf.rst b/docs/source/netcdf.rst
index fb85ea23..868ae429 100644
--- a/docs/source/netcdf.rst
+++ b/docs/source/netcdf.rst
@@ -1,7 +1,7 @@
 .. _`IMAS netCDF files`:
 
-IMAS netCDF files
-=================
+IMAS netCDF files \& Xarray
+===========================
 
 .. toctree::
     :hidden:
@@ -69,6 +69,7 @@ features that are supported by DBEntries using ``imas_core`` respectively
       - Yes (requires ``imas_core >= 5.4.0``)
       - Not implemented
 
+.. _`Using IMAS netCDF files with 3rd-party tools`:
 
 Using IMAS netCDF files with 3rd-party tools
 --------------------------------------------
@@ -138,3 +139,47 @@ Validating an IMAS netCDF file
 IMAS netCDF files can be validated with IMAS-Python through the command line ``imas
 validate_nc <filename>``. See also :ref:`IMAS-Python Command Line tool` or type
 ``imas validate_nc --help`` in a command line.
+
+
+.. _`Convert IMAS-Python IDSs directly to Xarray Datasets`:
+
+Convert IMAS-Python IDSs directly to Xarray Datasets
+----------------------------------------------------
+
+In the section :ref:`Using IMAS netCDF files with 3rd-party tools`, we showed
+how to open an IMAS netCDF file with Xarray. However, IMAS-Python IDSs can also
+be converted directly to Xarray ``Dataset``\ s with
+:py:func:`imas.util.to_xarray`.
+
+This method can be used to convert a full IDS to an Xarray ``Dataset``, or only
+specific paths inside the IDS. The latter variant can also be combined with
+:ref:`lazy loading`. We'll show a small example below:
+
+.. code-block:: python
+    :caption: Converting a lazy loaded IDS to Xarray
+
+    import imas.training
+
+    # Open the training entry
+    with imas.training.get_training_db_entry() as training_entry:
+        # Lazy load the core_profiles IDS
+        core_profiles = training_entry.get("core_profiles", lazy=True)
+        # Load the average ion temperature and all coordinate data
+        xrds = imas.util.to_xarray(core_profiles, "profiles_1d.t_i_average")
+        # All relevant data is now loaded from the data entry into the xarray
+        # Dataset. We close the data entry by exiting the with-statement.
+
+    # Inspect what's inside the dataset
+    print(xrds.data_vars)
+    # Data variables:
+    #   profiles_1d.t_i_average
+
+    # Included coordinates depends on the used Data Dictionary version
+    print(xrds.coords)
+    # Coordinates:  (with DD 4.0.0)
+    # * time
+    #   profiles_1d.grid.area
+    #   profiles_1d.grid.volume
+    #   profiles_1d.grid.rho_tor
+    #   profiles_1d.grid.rho_tor_norm
+    #   profiles_1d.grid.psi

From f44bcbbeff763724c7923fc5a4415c6b125a456e Mon Sep 17 00:00:00 2001
From: Maarten Sebregts <msebregts@ignitioncomputing.com>
Date: Thu, 13 Mar 2025 11:33:06 +0100
Subject: [PATCH 7/7] Additional documentation and example for to_xarray

---
 imas/util.py | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/imas/util.py b/imas/util.py
index 64e2b228..e41d28a2 100644
--- a/imas/util.py
+++ b/imas/util.py
@@ -543,6 +543,22 @@ def to_xarray(ids: IDSToplevel, *paths: str) -> Any:
     Returns:
         An ``xarray.Dataset`` object.
 
+    Notes:
+        - Lazy loaded IDSs are not supported for full IDS conversion
+          (``imas.util.to_xarray(ids)`` will raise an exception for lazy loaded IDSs).
+          This function can work with lazy loaded IDSs when paths are explicitly
+          provided: this might take a while because it will load all data for the
+          provided paths and their coordinates.
+        - This function does not accept wildcards for the paths. However, it is possible
+          to combine this method with :py:func:`imas.util.find_paths`, see the Examples
+          below.
+        - This function may return an empty dataset in the following cases:
+
+          - The provided IDS does not contain any data.
+          - The IDS does not contain any data for the provided paths.
+          - The provided paths do not point to data nodes, but to (arrays of)
+            structures.
+
     Examples:
         .. code-block:: python
 
@@ -563,6 +579,12 @@ def to_xarray(ids: IDSToplevel, *paths: str) -> Any:
                 "profiles_1d.electrons.temperature",
             )
 
+            # Combine with imas.util.find_paths to include all paths containing
+            # "profiles_1d" in the xarray conversion:
+            profiles_1d_paths = imas.util.find_paths(ids, "profiles_1d")
+            assert len(profiles_1d_paths) > 0
+            ds = imas.util.to_xarray(ids, *profiles_1d_paths)
+
     See Also:
         https://docs.xarray.dev/en/stable/generated/xarray.Dataset.html
     """