From d412badc5bb88013688234dbc730f62283c132d7 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sat, 3 Jan 2026 00:54:28 +0000 Subject: [PATCH 1/4] Initial plan From 5806429a97b243952ca9d921cda9ab5719006912 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sat, 3 Jan 2026 01:00:28 +0000 Subject: [PATCH 2/4] Fix compound dtype slicing - support numeric indexing Co-authored-by: rly <310197+rly@users.noreply.github.com> --- lindi/LindiH5pyFile/LindiH5pyDataset.py | 14 +++++-- tests/test_lindi_h5py_file.py | 53 +++++++++++++++++++++++++ 2 files changed, 64 insertions(+), 3 deletions(-) diff --git a/lindi/LindiH5pyFile/LindiH5pyDataset.py b/lindi/LindiH5pyFile/LindiH5pyDataset.py index 1ec9da3..f017d4c 100644 --- a/lindi/LindiH5pyFile/LindiH5pyDataset.py +++ b/lindi/LindiH5pyFile/LindiH5pyDataset.py @@ -214,9 +214,17 @@ def _get_item_for_zarr(self, zarr_array: zarr.Array, selection: Any): ) return ret else: - raise TypeError( - f"Compound dataset {self.name} does not support selection with {selection}" - ) + # Numeric slicing (e.g., [:], [0], [0:2]) + # Get the raw data from zarr (will be object array of lists) + raw_data = zarr_array[selection] + # Convert to structured array with compound dtype + if isinstance(raw_data, np.ndarray): + # Multiple elements - convert each list to tuple + tuples = [tuple(item) for item in raw_data] + return np.array(tuples, dtype=self._compound_dtype) + else: + # Single element - raw_data is a list + return np.array(tuple(raw_data), dtype=self._compound_dtype)[()] # We use zarr's slicing, except in the case of a scalar dataset if self.ndim == 0: diff --git a/tests/test_lindi_h5py_file.py b/tests/test_lindi_h5py_file.py index 032f8e6..b8058c0 100644 --- a/tests/test_lindi_h5py_file.py +++ b/tests/test_lindi_h5py_file.py @@ -357,6 +357,59 @@ def test_create_dataset(): assert ds.shape == (3,) +def test_compound_dtype_slicing(): + """Test that compound dtype datasets support numeric slicing (e.g., [:], [0], [0:2])""" + import numpy as np + import zarr + + with tempfile.TemporaryDirectory() as tmpdir: + dirname = f'{tmpdir}/test.zarr' + store = zarr.DirectoryStore(dirname) + zarr.group(store=store) + + # Write compound dataset + with lindi.LindiH5pyFile.from_zarr_store(store, mode='r+') as h5f: + compound_dtype = np.dtype([('x', np.int32), ('y', np.float64)]) + data = np.array([(1, 2.2), (3, 4.4), (5, 6.6)], dtype=compound_dtype) + h5f.create_dataset('dset_compound', data=data) + + # Read and test slicing + with lindi.LindiH5pyFile.from_zarr_store(store, mode='r') as h5f: + dset = h5f['dset_compound'] + + # Test full slice + result = dset[:] + assert result.shape == (3,) + assert result.dtype == compound_dtype + assert result[0]['x'] == 1 + assert result[0]['y'] == 2.2 + assert result[1]['x'] == 3 + assert result[1]['y'] == 4.4 + + # Test single element access + result = dset[0] + assert isinstance(result, np.void) + assert result['x'] == 1 + assert result['y'] == 2.2 + + # Test partial slice + result = dset[0:2] + assert result.shape == (2,) + assert result.dtype == compound_dtype + assert result[0]['x'] == 1 + assert result[1]['x'] == 3 + + # Test negative indexing + result = dset[-1] + assert result['x'] == 5 + assert result['y'] == 6.6 + + # Test field selection (should still work) + result = dset['x'][:] + assert result.shape == (3,) + assert np.array_equal(result, [1, 3, 5]) + + def create_example_h5_file(fname): with h5py.File(fname, 'w') as f: f.attrs['attr1'] = 'value1' From de96c54e18cbf7b445dad0e8117075bc59dcf7b2 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sat, 3 Jan 2026 01:02:32 +0000 Subject: [PATCH 3/4] Fix linting issues - remove trailing whitespace Co-authored-by: rly <310197+rly@users.noreply.github.com> --- tests/test_lindi_h5py_file.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/tests/test_lindi_h5py_file.py b/tests/test_lindi_h5py_file.py index b8058c0..ea4e4ba 100644 --- a/tests/test_lindi_h5py_file.py +++ b/tests/test_lindi_h5py_file.py @@ -361,22 +361,22 @@ def test_compound_dtype_slicing(): """Test that compound dtype datasets support numeric slicing (e.g., [:], [0], [0:2])""" import numpy as np import zarr - + with tempfile.TemporaryDirectory() as tmpdir: dirname = f'{tmpdir}/test.zarr' store = zarr.DirectoryStore(dirname) zarr.group(store=store) - + # Write compound dataset with lindi.LindiH5pyFile.from_zarr_store(store, mode='r+') as h5f: compound_dtype = np.dtype([('x', np.int32), ('y', np.float64)]) data = np.array([(1, 2.2), (3, 4.4), (5, 6.6)], dtype=compound_dtype) h5f.create_dataset('dset_compound', data=data) - + # Read and test slicing with lindi.LindiH5pyFile.from_zarr_store(store, mode='r') as h5f: dset = h5f['dset_compound'] - + # Test full slice result = dset[:] assert result.shape == (3,) @@ -385,25 +385,25 @@ def test_compound_dtype_slicing(): assert result[0]['y'] == 2.2 assert result[1]['x'] == 3 assert result[1]['y'] == 4.4 - + # Test single element access result = dset[0] assert isinstance(result, np.void) assert result['x'] == 1 assert result['y'] == 2.2 - + # Test partial slice result = dset[0:2] assert result.shape == (2,) assert result.dtype == compound_dtype assert result[0]['x'] == 1 assert result[1]['x'] == 3 - + # Test negative indexing result = dset[-1] assert result['x'] == 5 assert result['y'] == 6.6 - + # Test field selection (should still work) result = dset['x'][:] assert result.shape == (3,) From 1b04ba3c3e98b4b05dd90251f66b4909f3de8886 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sat, 3 Jan 2026 01:05:12 +0000 Subject: [PATCH 4/4] Address code review feedback - improve type handling and imports Co-authored-by: rly <310197+rly@users.noreply.github.com> --- lindi/LindiH5pyFile/LindiH5pyDataset.py | 11 ++++++++--- tests/test_lindi_h5py_file.py | 4 ++-- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/lindi/LindiH5pyFile/LindiH5pyDataset.py b/lindi/LindiH5pyFile/LindiH5pyDataset.py index f017d4c..e824a3d 100644 --- a/lindi/LindiH5pyFile/LindiH5pyDataset.py +++ b/lindi/LindiH5pyFile/LindiH5pyDataset.py @@ -220,11 +220,16 @@ def _get_item_for_zarr(self, zarr_array: zarr.Array, selection: Any): # Convert to structured array with compound dtype if isinstance(raw_data, np.ndarray): # Multiple elements - convert each list to tuple - tuples = [tuple(item) for item in raw_data] + tuples = [tuple(item) if isinstance(item, (list, np.ndarray)) else item for item in raw_data] return np.array(tuples, dtype=self._compound_dtype) - else: - # Single element - raw_data is a list + elif isinstance(raw_data, (list, tuple)): + # Single element - raw_data is a list or tuple return np.array(tuple(raw_data), dtype=self._compound_dtype)[()] + else: + # Unexpected type - this shouldn't happen, but handle gracefully + raise TypeError( + f"Unexpected data type from zarr array for compound dataset {self.name}: {type(raw_data)}" + ) # We use zarr's slicing, except in the case of a scalar dataset if self.ndim == 0: diff --git a/tests/test_lindi_h5py_file.py b/tests/test_lindi_h5py_file.py index ea4e4ba..17a13f5 100644 --- a/tests/test_lindi_h5py_file.py +++ b/tests/test_lindi_h5py_file.py @@ -2,6 +2,8 @@ import os import pytest import h5py +import numpy as np +import zarr import lindi from .utils import assert_h5py_files_equal @@ -359,8 +361,6 @@ def test_create_dataset(): def test_compound_dtype_slicing(): """Test that compound dtype datasets support numeric slicing (e.g., [:], [0], [0:2])""" - import numpy as np - import zarr with tempfile.TemporaryDirectory() as tmpdir: dirname = f'{tmpdir}/test.zarr'