diff --git a/lindi/LindiH5pyFile/LindiH5pyDataset.py b/lindi/LindiH5pyFile/LindiH5pyDataset.py index 1ec9da3..e824a3d 100644 --- a/lindi/LindiH5pyFile/LindiH5pyDataset.py +++ b/lindi/LindiH5pyFile/LindiH5pyDataset.py @@ -214,9 +214,22 @@ def _get_item_for_zarr(self, zarr_array: zarr.Array, selection: Any): ) return ret else: - raise TypeError( - f"Compound dataset {self.name} does not support selection with {selection}" - ) + # Numeric slicing (e.g., [:], [0], [0:2]) + # Get the raw data from zarr (will be object array of lists) + raw_data = zarr_array[selection] + # Convert to structured array with compound dtype + if isinstance(raw_data, np.ndarray): + # Multiple elements - convert each list to tuple + tuples = [tuple(item) if isinstance(item, (list, np.ndarray)) else item for item in raw_data] + return np.array(tuples, dtype=self._compound_dtype) + elif isinstance(raw_data, (list, tuple)): + # Single element - raw_data is a list or tuple + return np.array(tuple(raw_data), dtype=self._compound_dtype)[()] + else: + # Unexpected type - this shouldn't happen, but handle gracefully + raise TypeError( + f"Unexpected data type from zarr array for compound dataset {self.name}: {type(raw_data)}" + ) # We use zarr's slicing, except in the case of a scalar dataset if self.ndim == 0: diff --git a/tests/test_lindi_h5py_file.py b/tests/test_lindi_h5py_file.py index 032f8e6..17a13f5 100644 --- a/tests/test_lindi_h5py_file.py +++ b/tests/test_lindi_h5py_file.py @@ -2,6 +2,8 @@ import os import pytest import h5py +import numpy as np +import zarr import lindi from .utils import assert_h5py_files_equal @@ -357,6 +359,57 @@ def test_create_dataset(): assert ds.shape == (3,) +def test_compound_dtype_slicing(): + """Test that compound dtype datasets support numeric slicing (e.g., [:], [0], [0:2])""" + + with tempfile.TemporaryDirectory() as tmpdir: + dirname = f'{tmpdir}/test.zarr' + store = zarr.DirectoryStore(dirname) + zarr.group(store=store) + + # Write compound dataset + with lindi.LindiH5pyFile.from_zarr_store(store, mode='r+') as h5f: + compound_dtype = np.dtype([('x', np.int32), ('y', np.float64)]) + data = np.array([(1, 2.2), (3, 4.4), (5, 6.6)], dtype=compound_dtype) + h5f.create_dataset('dset_compound', data=data) + + # Read and test slicing + with lindi.LindiH5pyFile.from_zarr_store(store, mode='r') as h5f: + dset = h5f['dset_compound'] + + # Test full slice + result = dset[:] + assert result.shape == (3,) + assert result.dtype == compound_dtype + assert result[0]['x'] == 1 + assert result[0]['y'] == 2.2 + assert result[1]['x'] == 3 + assert result[1]['y'] == 4.4 + + # Test single element access + result = dset[0] + assert isinstance(result, np.void) + assert result['x'] == 1 + assert result['y'] == 2.2 + + # Test partial slice + result = dset[0:2] + assert result.shape == (2,) + assert result.dtype == compound_dtype + assert result[0]['x'] == 1 + assert result[1]['x'] == 3 + + # Test negative indexing + result = dset[-1] + assert result['x'] == 5 + assert result['y'] == 6.6 + + # Test field selection (should still work) + result = dset['x'][:] + assert result.shape == (3,) + assert np.array_equal(result, [1, 3, 5]) + + def create_example_h5_file(fname): with h5py.File(fname, 'w') as f: f.attrs['attr1'] = 'value1'