Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 16 additions & 3 deletions lindi/LindiH5pyFile/LindiH5pyDataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,9 +214,22 @@ def _get_item_for_zarr(self, zarr_array: zarr.Array, selection: Any):
)
return ret
else:
raise TypeError(
f"Compound dataset {self.name} does not support selection with {selection}"
)
# Numeric slicing (e.g., [:], [0], [0:2])
# Get the raw data from zarr (will be object array of lists)
raw_data = zarr_array[selection]
# Convert to structured array with compound dtype
if isinstance(raw_data, np.ndarray):
# Multiple elements - convert each list to tuple
tuples = [tuple(item) if isinstance(item, (list, np.ndarray)) else item for item in raw_data]
return np.array(tuples, dtype=self._compound_dtype)
elif isinstance(raw_data, (list, tuple)):
# Single element - raw_data is a list or tuple
return np.array(tuple(raw_data), dtype=self._compound_dtype)[()]
else:
# Unexpected type - this shouldn't happen, but handle gracefully
raise TypeError(
f"Unexpected data type from zarr array for compound dataset {self.name}: {type(raw_data)}"
)

# We use zarr's slicing, except in the case of a scalar dataset
if self.ndim == 0:
Expand Down
53 changes: 53 additions & 0 deletions tests/test_lindi_h5py_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
import os
import pytest
import h5py
import numpy as np
import zarr
import lindi
from .utils import assert_h5py_files_equal

Expand Down Expand Up @@ -357,6 +359,57 @@ def test_create_dataset():
assert ds.shape == (3,)


def test_compound_dtype_slicing():
"""Test that compound dtype datasets support numeric slicing (e.g., [:], [0], [0:2])"""

with tempfile.TemporaryDirectory() as tmpdir:
dirname = f'{tmpdir}/test.zarr'
store = zarr.DirectoryStore(dirname)
zarr.group(store=store)

# Write compound dataset
with lindi.LindiH5pyFile.from_zarr_store(store, mode='r+') as h5f:
compound_dtype = np.dtype([('x', np.int32), ('y', np.float64)])
data = np.array([(1, 2.2), (3, 4.4), (5, 6.6)], dtype=compound_dtype)
h5f.create_dataset('dset_compound', data=data)

# Read and test slicing
with lindi.LindiH5pyFile.from_zarr_store(store, mode='r') as h5f:
dset = h5f['dset_compound']

# Test full slice
result = dset[:]
assert result.shape == (3,)
assert result.dtype == compound_dtype
assert result[0]['x'] == 1
assert result[0]['y'] == 2.2
assert result[1]['x'] == 3
assert result[1]['y'] == 4.4

# Test single element access
result = dset[0]
assert isinstance(result, np.void)
assert result['x'] == 1
assert result['y'] == 2.2

# Test partial slice
result = dset[0:2]
assert result.shape == (2,)
assert result.dtype == compound_dtype
assert result[0]['x'] == 1
assert result[1]['x'] == 3

# Test negative indexing
result = dset[-1]
assert result['x'] == 5
assert result['y'] == 6.6

# Test field selection (should still work)
result = dset['x'][:]
assert result.shape == (3,)
assert np.array_equal(result, [1, 3, 5])


def create_example_h5_file(fname):
with h5py.File(fname, 'w') as f:
f.attrs['attr1'] = 'value1'
Expand Down