From 399a2416bd50be0311ac7e83134371688793e23c Mon Sep 17 00:00:00 2001 From: Tim Monko Date: Fri, 3 Apr 2026 22:26:47 -0500 Subject: [PATCH 01/10] WIP: memory checks, chunking, and new setting --- src/ndevio/ndev_settings.yaml | 13 ++- src/ndevio/nimage.py | 53 ++++++++-- src/ndevio/utils/_layer_utils.py | 116 ++++++++++++++++---- tests/test_nimage.py | 152 +++++++++++++++++++++++++++ tests/test_utils/test_layer_utils.py | 105 +++++++++++++++--- 5 files changed, 397 insertions(+), 42 deletions(-) diff --git a/src/ndevio/ndev_settings.yaml b/src/ndevio/ndev_settings.yaml index b4c567f..ae77bfc 100644 --- a/src/ndevio/ndev_settings.yaml +++ b/src/ndevio/ndev_settings.yaml @@ -1,7 +1,7 @@ ndevio_reader: suggest_reader_plugins: default: true - tooltip: Whether to suggest plugins to install when no reader can be found + tooltip: Whether to suggest plugins to install when no reader can be found. value: true preferred_reader: default: null @@ -16,12 +16,19 @@ ndevio_reader: - View All Scenes - View First Scene Only default: Open Scene Widget - tooltip: How to handle files with multiple scenes + tooltip: How to handle files with multiple scenes. value: Open Scene Widget clear_layers_on_new_scene: default: false - tooltip: Whether to clear the viewer when selecting a new scene + tooltip: Whether to clear the viewer when selecting a new scene. value: false + max_in_mem_gb: + default: 8.0 + min: 0.5 + max: 128.0 + step: 0.5 + tooltip: Maximum uncompressed image size in GB to load eagerly. Larger images use dask. + value: 8.0 ndevio_export: canvas_scale: diff --git a/src/ndevio/nimage.py b/src/ndevio/nimage.py index 38616d3..ab576bb 100644 --- a/src/ndevio/nimage.py +++ b/src/ndevio/nimage.py @@ -140,23 +140,45 @@ def __init__( if isinstance(image, str): image = image.rstrip('/') + # Default to per-plane chunks so each Z/T slice is a separate dask + # task (~one TIFF page) rather than the entire ZYX volume. + # bioio's DEFAULT_CHUNK_DIMS includes Z, meaning each (T,C) pair is + # one giant task and every Z-slice navigation decompresses the full + # volume. ["Y", "X"] gives O(Z) tasks with ~1 page per compute. + kwargs.setdefault('chunk_dims', ['Y', 'X']) + # Pre-compute kwargs without chunk_dims for readers that reject it + # (e.g. bioio-ome-zarr, bioio-czi's internal renderer). + _kw_no_chunks = {k: v for k, v in kwargs.items() if k != 'chunk_dims'} + resolved_reader = _resolve_reader(image, reader) + def _init_base(rdr, kw): + """Call BioImage.__init__, retrying without chunk_dims on TypeError.""" + try: + BioImage.__init__(self, image=image, reader=rdr, **kw) + except TypeError as exc: + if 'chunk_dims' in str(exc): + BioImage.__init__( + self, image=image, reader=rdr, **_kw_no_chunks + ) + else: + raise + # Try preferred/explicit reader first, fall back to bioio default if resolved_reader is not None: try: - super().__init__(image=image, reader=resolved_reader, **kwargs) + _init_base(resolved_reader, kwargs) except UnsupportedFileFormatError: # Preferred reader failed, fall back to bioio's default try: - super().__init__(image=image, reader=None, **kwargs) + _init_base(None, kwargs) except UnsupportedFileFormatError: if isinstance(image, str | Path): raise_unsupported_with_suggestions(image) raise else: try: - super().__init__(image=image, reader=None, **kwargs) + _init_base(None, kwargs) except UnsupportedFileFormatError: if isinstance(image, str | Path): raise_unsupported_with_suggestions(image) @@ -216,7 +238,12 @@ def reference_xarray(self) -> xr.DataArray: # Ensure we're at the highest-res level for metadata consistency current_res = self.current_resolution_level self.set_resolution_level(0) - if self._is_remote or not determine_in_memory(self.path): + import math + + uncompressed_bytes = math.prod(self.shape) * self.dtype.itemsize + if self._is_remote or not determine_in_memory( + self.path, uncompressed_bytes=uncompressed_bytes + ): self._reference_xarray = self.xarray_dask_data.squeeze() else: self._reference_xarray = self.xarray_data.squeeze() @@ -258,13 +285,19 @@ def _build_layer_data(self) -> list: ref = self.reference_xarray keep_dims = set(ref.dims) + # ref.nbytes is the squeezed level-0 array size — what napari loads. + # Using this instead of math.prod(self.shape)*dtype.itemsize avoids + # the level-confusion risk and correctly reflects multichannel size. + uncompressed_bytes = ref.nbytes arrays: list = [] for level in levels: self.set_resolution_level(level) if ( multiscale or self._is_remote - or not determine_in_memory(self.path) + or not determine_in_memory( + self.path, uncompressed_bytes=uncompressed_bytes + ) ): xr_data = self.xarray_dask_data else: @@ -562,7 +595,10 @@ def get_layer_data_tuples( if channel_dim not in ref.dims: channel_name = self.channel_names[0] effective_type = resolve_layer_type( - channel_name or '', layer_type, channel_types + channel_name or '', + layer_type, + channel_types, + path_stem=self.path_stem, ) extra_kwargs = ( channel_kwargs.get(channel_name) @@ -591,7 +627,10 @@ def get_layer_data_tuples( for i in range(total_channels): channel_name = channel_names[i] effective_type = resolve_layer_type( - channel_name, layer_type, channel_types + channel_name, + layer_type, + channel_types, + path_stem=self.path_stem, ) # Slice along channel axis for each resolution level diff --git a/src/ndevio/utils/_layer_utils.py b/src/ndevio/utils/_layer_utils.py index d309e0e..01190a4 100644 --- a/src/ndevio/utils/_layer_utils.py +++ b/src/ndevio/utils/_layer_utils.py @@ -12,10 +12,31 @@ logger = logging.getLogger(__name__) # Keywords that indicate a channel contains labels/segmentation data -LABEL_KEYWORDS = frozenset({'label', 'mask', 'segmentation', 'seg', 'roi'}) +CHANNEL_LABEL_KEYWORDS = frozenset( + { + 'label', + 'mask', + 'seg', + 'segmentation', + 'annotation', + 'roi', + 'region', + 'instance', + 'objects', + } +) +FILE_LABEL_KEYWORDS = frozenset( + { + 'label', + 'mask', + 'segmentation', + 'instance', + 'objects', + } +) -def infer_layer_type(channel_name: str) -> str: +def infer_channel_layer_type(channel_name: str) -> str: """Infer layer type from channel name keywords. Parameters @@ -30,15 +51,45 @@ def infer_layer_type(channel_name: str) -> str: Examples -------- - >>> infer_layer_type('nuclei_mask') + >>> infer_channel_layer_type('nuclei_mask') 'labels' - >>> infer_layer_type('DAPI') + >>> infer_channel_layer_type('DAPI') 'image' """ name_lower = channel_name.lower() return ( - 'labels' if any(kw in name_lower for kw in LABEL_KEYWORDS) else 'image' + 'labels' + if any(kw in name_lower for kw in CHANNEL_LABEL_KEYWORDS) + else 'image' + ) + + +def infer_file_label_type(path_stem: str) -> str: + """Infer layer type from filename stem keywords. + + Parameters + ---------- + path_stem : str + The filename stem (no extension) to check. + + Returns + ------- + str + 'labels' if path_stem contains a label keyword, else 'image'. + Examples + -------- + >>> infer_file_label_type('cells_segmentation') + 'labels' + >>> infer_file_label_type('experiment1') + 'image' + + """ + name_lower = path_stem.lower() + return ( + 'labels' + if any(kw in name_lower for kw in FILE_LABEL_KEYWORDS) + else 'image' ) @@ -46,9 +97,14 @@ def resolve_layer_type( channel_name: str, global_override: str | None, channel_types: dict[str, str] | None, + path_stem: str | None = None, ) -> str: """Resolve layer type: global override > per-channel > auto-detect. + Auto-detection checks the channel name first, then falls back to the + filename stem so that files named e.g. ``cells_mask.tif`` are detected + as ``'labels'`` even when the channel name is a generic ``'0'``. + Parameters ---------- channel_name : str @@ -57,6 +113,9 @@ def resolve_layer_type( If set, this layer type is used for all channels. channel_types : dict[str, str] | None Per-channel layer type mapping. + path_stem : str | None + Filename stem (no extension) used as a fallback when the channel + name does not contain label keywords. Returns ------- @@ -68,12 +127,17 @@ def resolve_layer_type( return global_override if channel_types and channel_name in channel_types: return channel_types[channel_name] - return infer_layer_type(channel_name) + if infer_channel_layer_type(channel_name) == 'labels': + return 'labels' + if path_stem is not None: + return infer_file_label_type(path_stem) + return 'image' def determine_in_memory( path: str | None, - max_in_mem_bytes: float = 4e9, + uncompressed_bytes: int | None = None, + max_in_mem_bytes: float | None = None, max_in_mem_percent: float = 0.3, ) -> bool: """Determine whether to load image data in memory or as dask array. @@ -82,11 +146,17 @@ def determine_in_memory( ---------- path : str | None Path to the image file as a string. If None (array data), returns True. - max_in_mem_bytes : float - Maximum file size in bytes for in-memory loading. - Default is 4 GB (4e9 bytes). + uncompressed_bytes : int | None + Expected in-memory size in bytes (``shape.prod() * dtype.itemsize``). + When provided this is used instead of the on-disk file size, which + can be far smaller for compressed formats (e.g. LZW-compressed int32 + TIFF). When None the on-disk size reported by the filesystem is used. + max_in_mem_bytes : float | None + Maximum size in bytes for in-memory loading. + If None (default), reads from the ``ndevio_reader.max_in_mem_gb`` + setting, falling back to 8 GB (8e9 bytes). max_in_mem_percent : float - Maximum percentage of available memory for in-memory loading. + Maximum fraction of available memory for in-memory loading. Default is 30%. Returns @@ -95,20 +165,30 @@ def determine_in_memory( True if image should be loaded in memory, False for dask array. """ - from bioio_base.io import pathlike_to_fs - from psutil import virtual_memory - # No file path means array data - always in memory if path is None: return True - fs, path_str = pathlike_to_fs(path) - filesize: int = fs.size(path_str) # type: ignore[assignment] + if max_in_mem_bytes is None: + from ndev_settings import get_settings + + max_in_mem_bytes = get_settings().ndevio_reader.max_in_mem_gb * 1e9 + + from psutil import virtual_memory + available_mem = virtual_memory().available + if uncompressed_bytes is not None: + check_bytes = uncompressed_bytes + else: + from bioio_base.io import pathlike_to_fs + + fs, path_str = pathlike_to_fs(path) + check_bytes = fs.size(path_str) # type: ignore[assignment] + return ( - filesize <= max_in_mem_bytes - and filesize < max_in_mem_percent * available_mem + check_bytes <= max_in_mem_bytes + and check_bytes < max_in_mem_percent * available_mem ) diff --git a/tests/test_nimage.py b/tests/test_nimage.py index e2ea657..08a4e21 100644 --- a/tests/test_nimage.py +++ b/tests/test_nimage.py @@ -798,3 +798,155 @@ def test_explicit_reader_fails_falls_back(self, resources_dir: Path): assert img is not None # Should have fallen back to bioio's default assert img.reader.name == 'bioio_ome_tiff' + + +# ============================================================================= +# Regression tests: compressed files and filename-based label detection +# ============================================================================= + + +def test_compressed_int32_tiff_uses_dask(tmp_path: Path): + """Regression: a compressed int32 TIFF must be loaded as dask even when + its on-disk size is well below the in-memory threshold. + + An 18.9 MB LZW-compressed int32 file expands to ~3 GB in RAM. + The old code compared the compressed *filesystem* size against the + threshold; a 19 MB file would always pass and be loaded eagerly. + The fix computes uncompressed_bytes = prod(shape) * dtype.itemsize and + uses that instead. + """ + import math + + import numpy as np + import tifffile + + # All-zeros data compresses to near-nothing with LZW: small, quick write. + # Shape gives ~288 MB uncompressed. We mock available RAM to 500 MB so + # that 30% = 150 MB < 288 MB, which forces dask regardless of threshold. + # Without the uncompressed_bytes fix, disk_size (~KB) would be used and + # the tiny file would be loaded eagerly. + shape = (200, 600, 600) + + path = tmp_path / 'big_uncompressed.tif' + tifffile.imwrite( + str(path), np.zeros(shape, dtype=np.int32), compression='lzw' + ) + + disk_size = path.stat().st_size + uncompressed = math.prod(shape) * np.dtype(np.int32).itemsize + assert disk_size < uncompressed // 100, ( + 'test precondition: compressed file must be tiny vs uncompressed' + ) + + import dask.array as da + + # Mock RAM so the memory-fraction check forces dask (288 MB > 30% of 500 MB). + # This isolates the test from machine memory and makes it deterministic. + with mock.patch( + 'psutil.virtual_memory', return_value=mock.Mock(available=int(500e6)) + ): + img = nImage(path) + + assert isinstance(img.reference_xarray.data, da.Array), ( + f'Expected dask array, got {type(img.reference_xarray.data)}' + ) + + tuples = img.get_layer_data_tuples() + assert len(tuples) == 1 + data_out, _, _ = tuples[0] + assert isinstance(data_out, list) + assert isinstance(data_out[0], da.Array), ( + f'Expected dask array in layer tuple, got {type(data_out[0])}' + ) + + +def test_labels_detected_from_filename(tmp_path: Path): + """Regression: a TIFF file whose channel name is a generic '0' but whose + filename contains a label keyword (e.g. 'cells_mask.tif') should be + returned as a 'labels' layer, not 'image'. + + Previously only the channel name was checked; now the filename stem is + used as a fallback when the channel name provides no signal. + """ + import numpy as np + import tifffile + + # Single-channel int32 TIFF — channel name will be '0' (no label keyword) + data = np.random.randint(0, 10, (10, 10), dtype=np.int32) + path = tmp_path / 'cells_mask.tif' + tifffile.imwrite(str(path), data) + + img = nImage(path) + # Verify the channel name is generic (no label keyword) + channel_name = img.channel_names[0] + from ndevio.utils._layer_utils import CHANNEL_LABEL_KEYWORDS + + assert not any( + kw in channel_name.lower() for kw in CHANNEL_LABEL_KEYWORDS + ), f'Channel name {channel_name!r} unexpectedly contains a label keyword' + + tuples = img.get_layer_data_tuples() + assert len(tuples) == 1 + _, _, layer_type = tuples[0] + assert layer_type == 'labels', ( + f"Expected 'labels' from filename 'cells_mask.tif', got {layer_type!r}" + ) + + +def test_non_label_filename_stays_image(tmp_path: Path): + """Counter-test: a TIFF named 'raw_image.tif' with generic channel name + should remain an 'image' layer, not be promoted to 'labels'. + """ + import numpy as np + import tifffile + + data = np.zeros((10, 10), dtype=np.uint16) + path = tmp_path / 'raw_image.tif' + tifffile.imwrite(str(path), data) + + img = nImage(path) + tuples = img.get_layer_data_tuples() + assert len(tuples) == 1 + _, _, layer_type = tuples[0] + assert layer_type == 'image' + + +def test_dask_chunks_are_per_plane(tmp_path: Path): + """Verify that dask-loaded TIFFs have per-Z-plane chunks (not the whole volume). + + bioio-base's DEFAULT_CHUNK_DIMS = ["Z","Y","X"] creates one dask task per + (T,C) pair — every Z-slice decompresses the full ZYX volume. nImage + overrides this with chunk_dims=["Y","X"] so each task is a single page. + + For a (Z=8, Y=64, X=64) file the resulting dask array should have chunks + (1, 64, 64), not (8, 64, 64). + """ + import dask.array as da + import numpy as np + import tifffile + + # 100 planes × 64 × 64 × uint16 = 800 KB uncompressed. + # With 1 MB available, 30 % = 300 KB < 800 KB → forced to dask. + shape = (100, 64, 64) # Z, Y, X + path = tmp_path / 'zyx_chunk_test.tiff' + tifffile.imwrite(str(path), np.zeros(shape, dtype=np.uint16)) + + # Force dask: mock available RAM so the memory-fraction check triggers. + with mock.patch( + 'psutil.virtual_memory', return_value=mock.Mock(available=int(1e6)) + ): + img = nImage(path) + tuples = img.get_layer_data_tuples() + + data_out, _, _ = tuples[0] + # layer_data is always a list (multiscale-compatible); [0] is level 0. + arr = data_out[0] + assert isinstance(arr, da.Array), f'Expected dask array, got {type(arr)}' + + z_chunk, y_chunk, x_chunk = arr.chunksize + assert z_chunk == 1, ( + f'Expected Z-chunk=1 (per-plane), got {z_chunk}. ' + 'chunk_dims override to ["Y","X"] may not be working.' + ) + assert y_chunk == 64 + assert x_chunk == 64 diff --git a/tests/test_utils/test_layer_utils.py b/tests/test_utils/test_layer_utils.py index 86710e3..e6ffc7a 100644 --- a/tests/test_utils/test_layer_utils.py +++ b/tests/test_utils/test_layer_utils.py @@ -10,29 +10,29 @@ class TestInferLayerType: def test_label_keyword_returns_labels(self): """Test that label keywords are detected.""" - from ndevio.utils._layer_utils import infer_layer_type + from ndevio.utils._layer_utils import infer_channel_layer_type - assert infer_layer_type('nuclei_mask') == 'labels' - assert infer_layer_type('cell_labels') == 'labels' - assert infer_layer_type('segmentation') == 'labels' - assert infer_layer_type('SEG_channel') == 'labels' - assert infer_layer_type('roi_data') == 'labels' + assert infer_channel_layer_type('nuclei_mask') == 'labels' + assert infer_channel_layer_type('cell_labels') == 'labels' + assert infer_channel_layer_type('segmentation') == 'labels' + assert infer_channel_layer_type('SEG_channel') == 'labels' + assert infer_channel_layer_type('roi_data') == 'labels' def test_non_label_returns_image(self): """Test that non-label names return image.""" - from ndevio.utils._layer_utils import infer_layer_type + from ndevio.utils._layer_utils import infer_channel_layer_type - assert infer_layer_type('DAPI') == 'image' - assert infer_layer_type('GFP') == 'image' - assert infer_layer_type('membrane') == 'image' + assert infer_channel_layer_type('DAPI') == 'image' + assert infer_channel_layer_type('GFP') == 'image' + assert infer_channel_layer_type('membrane') == 'image' def test_case_insensitive(self): """Test that detection is case-insensitive.""" - from ndevio.utils._layer_utils import infer_layer_type + from ndevio.utils._layer_utils import infer_channel_layer_type - assert infer_layer_type('MASK') == 'labels' - assert infer_layer_type('Label') == 'labels' - assert infer_layer_type('SEGMENTATION') == 'labels' + assert infer_channel_layer_type('MASK') == 'labels' + assert infer_channel_layer_type('Label') == 'labels' + assert infer_channel_layer_type('SEGMENTATION') == 'labels' class TestResolveLayerType: @@ -69,6 +69,54 @@ def test_auto_detect_when_no_overrides(self): ) # Auto-detect assert resolve_layer_type('DAPI', None, None) == 'image' # Auto-detect + def test_path_stem_fallback_detects_labels(self): + """Regression: file named 'cells_mask.tif' with generic channel name + '0' should be detected as 'labels' via the path_stem fallback. + """ + from ndevio.utils._layer_utils import resolve_layer_type + + assert ( + resolve_layer_type('0', None, None, path_stem='cells_mask') + == 'labels' + ) + assert ( + resolve_layer_type( + 'Channel 0', None, None, path_stem='nuclei_labels' + ) + == 'labels' + ) + assert ( + resolve_layer_type('', None, None, path_stem='segmentation_output') + == 'labels' + ) + assert resolve_layer_type('', None, None, path_stem='raw') == 'image' + + def test_path_stem_not_checked_when_channel_triggers_detection(self): + """Channel-name detection is unaffected by a non-label path_stem.""" + from ndevio.utils._layer_utils import resolve_layer_type + + assert ( + resolve_layer_type('nuclei_mask', None, None, path_stem='raw') + == 'labels' + ) + + def test_path_stem_nonlabel_image_result(self): + """Neither channel nor path_stem contains label keyword → 'image'.""" + from ndevio.utils._layer_utils import resolve_layer_type + + assert ( + resolve_layer_type('DAPI', None, None, path_stem='raw_image') + == 'image' + ) + + def test_path_stem_none_channel_nonlabel_returns_image(self): + """path_stem=None with non-label channel should still return 'image'.""" + from ndevio.utils._layer_utils import resolve_layer_type + + assert ( + resolve_layer_type('DAPI', None, None, path_stem=None) == 'image' + ) + class TestDetermineInMemory: """Tests for determine_in_memory function.""" @@ -109,6 +157,35 @@ def test_large_file_returns_false(self, tmp_path): ): assert determine_in_memory(large_file) is False + def test_uncompressed_bytes_large_overrides_small_disk_size( + self, tmp_path + ): + """Regression: compressed files (e.g. int32 TIFF) that are small on + disk but large when decompressed must trigger dask loading. + + uncompressed_bytes takes precedence over filesystem size so that + a 19 MB compressed file whose data expands to 3 GB in memory is + not eagerly loaded. + """ + from ndevio.utils._layer_utils import determine_in_memory + + small_file = tmp_path / 'labels.tif' + small_file.write_bytes(b'\x00' * 100) # tiny on disk + + with mock.patch( + 'psutil.virtual_memory', return_value=mock.Mock(available=1e10) + ): + # uncompressed_bytes above threshold → dask + assert ( + determine_in_memory(small_file, uncompressed_bytes=int(5e9)) + is False + ) + # uncompressed_bytes well below threshold → in-memory + assert ( + determine_in_memory(small_file, uncompressed_bytes=1000) + is True + ) + class TestBuildLayerTuple: """Tests for build_layer_tuple function.""" From cc8e9a6e50397f283b60adeb3a0165edf965fb8f Mon Sep 17 00:00:00 2001 From: Tim Monko Date: Fri, 3 Apr 2026 23:41:55 -0500 Subject: [PATCH 02/10] bump ndev-settings --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 404a15e..a415afb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -29,7 +29,7 @@ requires-python = ">=3.11" # See best practices: https://napari.org/stable/plugins/building_a_plugin/best_practices.html dependencies = [ "napari", - "ndev-settings>=0.4.1", + "ndev-settings>=0.4.2", "nbatch>=0.0.4", "natsort", "magicgui", From 1bd97e4e8d99dc18fb7856af45909d34bf53a629 Mon Sep 17 00:00:00 2001 From: Tim Monko Date: Fri, 3 Apr 2026 23:42:17 -0500 Subject: [PATCH 03/10] drop dead code paths --- src/ndevio/utils/_layer_utils.py | 123 ++----------------------------- 1 file changed, 7 insertions(+), 116 deletions(-) diff --git a/src/ndevio/utils/_layer_utils.py b/src/ndevio/utils/_layer_utils.py index 01190a4..94f722f 100644 --- a/src/ndevio/utils/_layer_utils.py +++ b/src/ndevio/utils/_layer_utils.py @@ -36,61 +36,10 @@ ) -def infer_channel_layer_type(channel_name: str) -> str: - """Infer layer type from channel name keywords. - - Parameters - ---------- - channel_name : str - The channel name to check. - - Returns - ------- - str - 'labels' if channel_name contains a label keyword, else 'image'. - - Examples - -------- - >>> infer_channel_layer_type('nuclei_mask') - 'labels' - >>> infer_channel_layer_type('DAPI') - 'image' - - """ - name_lower = channel_name.lower() - return ( - 'labels' - if any(kw in name_lower for kw in CHANNEL_LABEL_KEYWORDS) - else 'image' - ) - - -def infer_file_label_type(path_stem: str) -> str: - """Infer layer type from filename stem keywords. - - Parameters - ---------- - path_stem : str - The filename stem (no extension) to check. - - Returns - ------- - str - 'labels' if path_stem contains a label keyword, else 'image'. - Examples - -------- - >>> infer_file_label_type('cells_segmentation') - 'labels' - >>> infer_file_label_type('experiment1') - 'image' - - """ - name_lower = path_stem.lower() - return ( - 'labels' - if any(kw in name_lower for kw in FILE_LABEL_KEYWORDS) - else 'image' - ) +def _contains_label_keyword(value: str, keywords: frozenset[str]) -> bool: + """Return whether a string contains any keyword in a keyword set.""" + value_lower = value.lower() + return any(keyword in value_lower for keyword in keywords) def resolve_layer_type( @@ -127,71 +76,13 @@ def resolve_layer_type( return global_override if channel_types and channel_name in channel_types: return channel_types[channel_name] - if infer_channel_layer_type(channel_name) == 'labels': + if _contains_label_keyword(channel_name, CHANNEL_LABEL_KEYWORDS): + return 'labels' + if path_stem and _contains_label_keyword(path_stem, FILE_LABEL_KEYWORDS): return 'labels' - if path_stem is not None: - return infer_file_label_type(path_stem) return 'image' -def determine_in_memory( - path: str | None, - uncompressed_bytes: int | None = None, - max_in_mem_bytes: float | None = None, - max_in_mem_percent: float = 0.3, -) -> bool: - """Determine whether to load image data in memory or as dask array. - - Parameters - ---------- - path : str | None - Path to the image file as a string. If None (array data), returns True. - uncompressed_bytes : int | None - Expected in-memory size in bytes (``shape.prod() * dtype.itemsize``). - When provided this is used instead of the on-disk file size, which - can be far smaller for compressed formats (e.g. LZW-compressed int32 - TIFF). When None the on-disk size reported by the filesystem is used. - max_in_mem_bytes : float | None - Maximum size in bytes for in-memory loading. - If None (default), reads from the ``ndevio_reader.max_in_mem_gb`` - setting, falling back to 8 GB (8e9 bytes). - max_in_mem_percent : float - Maximum fraction of available memory for in-memory loading. - Default is 30%. - - Returns - ------- - bool - True if image should be loaded in memory, False for dask array. - - """ - # No file path means array data - always in memory - if path is None: - return True - - if max_in_mem_bytes is None: - from ndev_settings import get_settings - - max_in_mem_bytes = get_settings().ndevio_reader.max_in_mem_gb * 1e9 - - from psutil import virtual_memory - - available_mem = virtual_memory().available - - if uncompressed_bytes is not None: - check_bytes = uncompressed_bytes - else: - from bioio_base.io import pathlike_to_fs - - fs, path_str = pathlike_to_fs(path) - check_bytes = fs.size(path_str) # type: ignore[assignment] - - return ( - check_bytes <= max_in_mem_bytes - and check_bytes < max_in_mem_percent * available_mem - ) - - def build_layer_tuple( data: ArrayLike | list[ArrayLike], *, From 086c74c6409cc7cb0c921e1bec357640c062b636 Mon Sep 17 00:00:00 2001 From: Tim Monko Date: Fri, 3 Apr 2026 23:42:48 -0500 Subject: [PATCH 04/10] refactor memory logic of nimage --- src/ndevio/nimage.py | 320 +++++++++++++++++++++++++++++++------------ 1 file changed, 232 insertions(+), 88 deletions(-) diff --git a/src/ndevio/nimage.py b/src/ndevio/nimage.py index ab576bb..3f86c69 100644 --- a/src/ndevio/nimage.py +++ b/src/ndevio/nimage.py @@ -3,21 +3,21 @@ from __future__ import annotations import logging +import math +from collections.abc import Sequence +from inspect import Parameter, signature from pathlib import Path -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Any from bioio import BioImage from .bioio_plugins._manager import raise_unsupported_with_suggestions from .utils._layer_utils import ( build_layer_tuple, - determine_in_memory, resolve_layer_type, ) if TYPE_CHECKING: - from collections.abc import Sequence - import xarray as xr from bioio_base.reader import Reader from bioio_base.types import ImageLike @@ -26,6 +26,106 @@ logger = logging.getLogger(__name__) +def _prepare_bioimage_init_kwargs( + kwargs: dict[str, Any], +) -> tuple[dict[str, Any], dict[str, Any]]: + """Build BioImage init kwargs and a fallback without chunk_dims.""" + init_kwargs = dict(kwargs) + init_kwargs.setdefault('chunk_dims', ['Y', 'X']) + fallback_kwargs = { + key: value for key, value in init_kwargs.items() if key != 'chunk_dims' + } + return init_kwargs, fallback_kwargs + + +def _reader_supports_chunk_dims( + reader: type[Reader] | Sequence[type[Reader]] | None, +) -> bool | None: + """Return whether a reader explicitly supports the chunk_dims kwarg. + + Returns None when support cannot be known ahead of time, for example when + BioImage will determine the reader internally or a reader sequence is mixed. + """ + + def _supports_single(candidate: type[Reader]) -> bool: + params = signature(candidate.__init__).parameters.values() + return any( + parameter.name == 'chunk_dims' + or parameter.kind == Parameter.VAR_KEYWORD + for parameter in params + ) + + if reader is None: + return None + if isinstance(reader, Sequence): + support = {_supports_single(candidate) for candidate in reader} + return support.pop() if len(support) == 1 else None + return _supports_single(reader) + + +def _init_bioimage_once( + instance: BioImage, + *, + image: ImageLike, + reader: type[Reader] | Sequence[type[Reader]] | None, + init_kwargs: dict[str, Any], + fallback_kwargs: dict[str, Any], +) -> None: + """Initialize BioImage with the best available chunk_dims strategy.""" + chunk_support = _reader_supports_chunk_dims(reader) + if chunk_support is False: + BioImage.__init__( + instance, image=image, reader=reader, **fallback_kwargs + ) + return + + if chunk_support is True: + BioImage.__init__(instance, image=image, reader=reader, **init_kwargs) + return + + try: + BioImage.__init__(instance, image=image, reader=reader, **init_kwargs) + except TypeError as exc: + if 'chunk_dims' not in str(exc): + raise + BioImage.__init__( + instance, image=image, reader=reader, **fallback_kwargs + ) + + +def _initialize_bioimage( + instance: BioImage, + *, + image: ImageLike, + resolved_reader: type[Reader] | Sequence[type[Reader]] | None, + init_kwargs: dict[str, Any], + fallback_kwargs: dict[str, Any], +) -> None: + """Initialize BioImage with preferred-reader fallback to default.""" + from bioio_base.exceptions import UnsupportedFileFormatError + + if resolved_reader is not None: + try: + _init_bioimage_once( + instance, + image=image, + reader=resolved_reader, + init_kwargs=init_kwargs, + fallback_kwargs=fallback_kwargs, + ) + return + except UnsupportedFileFormatError: + pass + + _init_bioimage_once( + instance, + image=image, + reader=None, + init_kwargs=init_kwargs, + fallback_kwargs=fallback_kwargs, + ) + + def _resolve_reader( image: ImageLike, explicit_reader: type[Reader] | Sequence[type[Reader]] | None, @@ -125,6 +225,8 @@ class nImage(BioImage): _is_remote: bool _reference_xarray: xr.DataArray | None _layer_data: list | None + _level0_uncompressed_bytes: int | None + _should_load_in_memory_cache: bool | None def __init__( self, @@ -145,66 +247,28 @@ def __init__( # bioio's DEFAULT_CHUNK_DIMS includes Z, meaning each (T,C) pair is # one giant task and every Z-slice navigation decompresses the full # volume. ["Y", "X"] gives O(Z) tasks with ~1 page per compute. - kwargs.setdefault('chunk_dims', ['Y', 'X']) - # Pre-compute kwargs without chunk_dims for readers that reject it - # (e.g. bioio-ome-zarr, bioio-czi's internal renderer). - _kw_no_chunks = {k: v for k, v in kwargs.items() if k != 'chunk_dims'} - + init_kwargs, fallback_kwargs = _prepare_bioimage_init_kwargs(kwargs) resolved_reader = _resolve_reader(image, reader) - def _init_base(rdr, kw): - """Call BioImage.__init__, retrying without chunk_dims on TypeError.""" - try: - BioImage.__init__(self, image=image, reader=rdr, **kw) - except TypeError as exc: - if 'chunk_dims' in str(exc): - BioImage.__init__( - self, image=image, reader=rdr, **_kw_no_chunks - ) - else: - raise - - # Try preferred/explicit reader first, fall back to bioio default - if resolved_reader is not None: - try: - _init_base(resolved_reader, kwargs) - except UnsupportedFileFormatError: - # Preferred reader failed, fall back to bioio's default - try: - _init_base(None, kwargs) - except UnsupportedFileFormatError: - if isinstance(image, str | Path): - raise_unsupported_with_suggestions(image) - raise - else: - try: - _init_base(None, kwargs) - except UnsupportedFileFormatError: - if isinstance(image, str | Path): - raise_unsupported_with_suggestions(image) - raise + try: + _initialize_bioimage( + self, + image=image, + resolved_reader=resolved_reader, + init_kwargs=init_kwargs, + fallback_kwargs=fallback_kwargs, + ) + except UnsupportedFileFormatError: + if isinstance(image, str | Path): + raise_unsupported_with_suggestions(image) + raise # Instance state self._reference_xarray = None self._layer_data = None - if isinstance(image, str | Path): - import fsspec - from fsspec.implementations.local import LocalFileSystem - - s = str(image) - fs, resolved = fsspec.url_to_fs(s) - if isinstance(fs, LocalFileSystem): - # Normalise file:// URIs and any platform variations to an - # OS-native path string so Path(self.path) always round-trips. - self.path = str(Path(resolved)) - self._is_remote = False - else: - # Remote URI (s3://, https://, gc://, …) — keep verbatim. - self.path = s - self._is_remote = True - else: - self.path = None - self._is_remote = False + self._level0_uncompressed_bytes = None + self._should_load_in_memory_cache = None + self._initialize_source_state(image) # Any compatibility warnings for old formats should be emitted at this point # Cheaply check without imports by looking at the reader's module name @@ -215,6 +279,100 @@ def _init_base(rdr, kw): apply_ome_zarr_compat_patches(self.reader) + def _initialize_source_state(self, image: ImageLike) -> None: + """Populate local path/remote state from the original image input.""" + if not isinstance(image, str | Path): + self.path = None + self._is_remote = False + return + + import fsspec + from fsspec.implementations.local import LocalFileSystem + + source = str(image) + fs, resolved = fsspec.url_to_fs(source) + if isinstance(fs, LocalFileSystem): + self.path = str(Path(resolved)) + self._is_remote = False + return + + self.path = source + self._is_remote = True + + def _max_in_memory_bytes(self) -> float: + """Return the configured eager-loading threshold in bytes.""" + from ndev_settings import get_settings + + settings = get_settings() + reader_settings = settings.ndevio_reader # type: ignore[attr-defined] + max_in_mem_gb = getattr(reader_settings, 'max_in_mem_gb', 8.0) + return float(max_in_mem_gb) * 1e9 + + def _fits_in_memory( + self, + *, + uncompressed_bytes: int | None = None, + max_in_mem_percent: float = 0.3, + ) -> bool: + """Return whether this image should be loaded eagerly.""" + if self.path is None: + return True + + from psutil import virtual_memory + + available_mem = int(virtual_memory().available) + + if uncompressed_bytes is not None: + check_bytes = int(uncompressed_bytes) + else: + from bioio_base.io import pathlike_to_fs + + fs, path_str = pathlike_to_fs(self.path) + file_size = fs.size(path_str) + assert file_size is not None + check_bytes = int(file_size) + + max_in_mem_bytes = self._max_in_memory_bytes() + return ( + check_bytes <= max_in_mem_bytes + and check_bytes < max_in_mem_percent * available_mem + ) + + def _get_level0_uncompressed_bytes(self) -> int: + """Return the highest-resolution uncompressed byte size.""" + if self._level0_uncompressed_bytes is None: + current_res = self.current_resolution_level + self.set_resolution_level(0) + try: + self._level0_uncompressed_bytes = ( + math.prod(self.shape) * self.dtype.itemsize + ) + finally: + self.set_resolution_level(current_res) + return self._level0_uncompressed_bytes + + def _should_load_in_memory(self) -> bool: + """Return the cached single-resolution load decision.""" + if self._should_load_in_memory_cache is None: + self._should_load_in_memory_cache = self._fits_in_memory( + uncompressed_bytes=self._get_level0_uncompressed_bytes(), + ) + return self._should_load_in_memory_cache + + def _use_dask(self, *, multiscale: bool = False) -> bool: + """Return whether the current access path should stay dask-backed.""" + return ( + multiscale or self._is_remote or not self._should_load_in_memory() + ) + + def _get_xarray_for_current_level( + self, *, multiscale: bool = False + ) -> xr.DataArray: + """Return the xarray backend to use at the current resolution level.""" + if self._use_dask(multiscale=multiscale): + return self.xarray_dask_data + return self.xarray_data + @property def reference_xarray(self) -> xr.DataArray: """Image data as xarray DataArray for metadata determination. @@ -238,16 +396,12 @@ def reference_xarray(self) -> xr.DataArray: # Ensure we're at the highest-res level for metadata consistency current_res = self.current_resolution_level self.set_resolution_level(0) - import math - - uncompressed_bytes = math.prod(self.shape) * self.dtype.itemsize - if self._is_remote or not determine_in_memory( - self.path, uncompressed_bytes=uncompressed_bytes - ): - self._reference_xarray = self.xarray_dask_data.squeeze() - else: - self._reference_xarray = self.xarray_data.squeeze() - self.set_resolution_level(current_res) + try: + self._reference_xarray = self._get_xarray_for_current_level( + multiscale=len(self.resolution_levels) > 1 + ).squeeze() + finally: + self.set_resolution_level(current_res) return self._reference_xarray @property @@ -277,6 +431,7 @@ def _build_layer_data(self) -> list: current_res = self.current_resolution_level levels = self.resolution_levels multiscale = len(levels) > 1 + use_dask = self._use_dask(multiscale=multiscale) # Determine which dims to keep from level 0's squeezed metadata. # Using isel instead of squeeze ensures all levels have @@ -285,28 +440,17 @@ def _build_layer_data(self) -> list: ref = self.reference_xarray keep_dims = set(ref.dims) - # ref.nbytes is the squeezed level-0 array size — what napari loads. - # Using this instead of math.prod(self.shape)*dtype.itemsize avoids - # the level-confusion risk and correctly reflects multichannel size. - uncompressed_bytes = ref.nbytes arrays: list = [] - for level in levels: - self.set_resolution_level(level) - if ( - multiscale - or self._is_remote - or not determine_in_memory( - self.path, uncompressed_bytes=uncompressed_bytes + try: + for level in levels: + self.set_resolution_level(level) + xr_data = ( + self.xarray_dask_data if use_dask else self.xarray_data ) - ): - xr_data = self.xarray_dask_data - else: - xr_data = self.xarray_data - - indexer = {d: 0 for d in xr_data.dims if d not in keep_dims} - arrays.append(xr_data.isel(indexer).data) - - self.set_resolution_level(current_res) + indexer = {d: 0 for d in xr_data.dims if d not in keep_dims} + arrays.append(xr_data.isel(indexer).data) + finally: + self.set_resolution_level(current_res) return arrays @property From e4c2d6ee6cb49dc114786ff428e1a87c40191383 Mon Sep 17 00:00:00 2001 From: Tim Monko Date: Fri, 3 Apr 2026 23:43:29 -0500 Subject: [PATCH 05/10] test new logic paths --- tests/test_nimage.py | 85 +++++++++++++++++++++ tests/test_utils/test_layer_utils.py | 109 ++------------------------- 2 files changed, 93 insertions(+), 101 deletions(-) diff --git a/tests/test_nimage.py b/tests/test_nimage.py index 08a4e21..fee81cc 100644 --- a/tests/test_nimage.py +++ b/tests/test_nimage.py @@ -800,6 +800,91 @@ def test_explicit_reader_fails_falls_back(self, resources_dir: Path): assert img.reader.name == 'bioio_ome_tiff' +class TestDetermineInMemory: + """Tests for nImage memory-loading policy.""" + + @staticmethod + def _make_image(path): + img = object.__new__(nImage) + img.path = None if path is None else str(path) + img._is_remote = False + return img + + def test_none_path_returns_true(self): + """Array-backed inputs should stay in memory.""" + assert self._make_image(None)._fits_in_memory() is True + + def test_small_file_returns_true(self, tmp_path): + """Small files should be loaded eagerly.""" + small_file = tmp_path / 'small.txt' + small_file.write_text('x' * 100) + + with mock.patch( + 'psutil.virtual_memory', return_value=mock.Mock(available=1e10) + ): + assert self._make_image(small_file)._fits_in_memory() is True + + def test_large_file_returns_false(self, tmp_path): + """Large files should stay dask-backed.""" + large_file = tmp_path / 'large.txt' + large_file.write_text('x') + + with ( + mock.patch( + 'psutil.virtual_memory', return_value=mock.Mock(available=1e9) + ), + mock.patch( + 'bioio_base.io.pathlike_to_fs', + return_value=(mock.Mock(size=lambda x: 5e9), ''), + ), + ): + assert self._make_image(large_file)._fits_in_memory() is False + + def test_uncompressed_bytes_large_overrides_small_disk_size( + self, tmp_path + ): + """Compressed files should be judged by RAM footprint when known.""" + small_file = tmp_path / 'labels.tif' + small_file.write_bytes(b'\x00' * 100) + + with mock.patch( + 'psutil.virtual_memory', return_value=mock.Mock(available=1e10) + ): + assert ( + self._make_image(small_file)._fits_in_memory( + uncompressed_bytes=int(5e9) + ) + is False + ) + assert ( + self._make_image(small_file)._fits_in_memory( + uncompressed_bytes=1000 + ) + is True + ) + + def test_missing_max_in_mem_setting_falls_back_to_default(self, tmp_path): + """Older persisted settings may not yet contain max_in_mem_gb.""" + from types import SimpleNamespace + + small_file = tmp_path / 'small.txt' + small_file.write_text('x' * 100) + + with ( + mock.patch( + 'ndev_settings.get_settings', + return_value=SimpleNamespace( + ndevio_reader=SimpleNamespace(), + ), + ), + mock.patch( + 'psutil.virtual_memory', + return_value=mock.Mock(available=1e10), + ), + ): + assert self._make_image(small_file)._fits_in_memory() is True + + # ============================================================================= # Regression tests: compressed files and filename-based label detection # ============================================================================= diff --git a/tests/test_utils/test_layer_utils.py b/tests/test_utils/test_layer_utils.py index e6ffc7a..b54b96a 100644 --- a/tests/test_utils/test_layer_utils.py +++ b/tests/test_utils/test_layer_utils.py @@ -2,38 +2,6 @@ from __future__ import annotations -from unittest import mock - - -class TestInferLayerType: - """Tests for infer_layer_type function.""" - - def test_label_keyword_returns_labels(self): - """Test that label keywords are detected.""" - from ndevio.utils._layer_utils import infer_channel_layer_type - - assert infer_channel_layer_type('nuclei_mask') == 'labels' - assert infer_channel_layer_type('cell_labels') == 'labels' - assert infer_channel_layer_type('segmentation') == 'labels' - assert infer_channel_layer_type('SEG_channel') == 'labels' - assert infer_channel_layer_type('roi_data') == 'labels' - - def test_non_label_returns_image(self): - """Test that non-label names return image.""" - from ndevio.utils._layer_utils import infer_channel_layer_type - - assert infer_channel_layer_type('DAPI') == 'image' - assert infer_channel_layer_type('GFP') == 'image' - assert infer_channel_layer_type('membrane') == 'image' - - def test_case_insensitive(self): - """Test that detection is case-insensitive.""" - from ndevio.utils._layer_utils import infer_channel_layer_type - - assert infer_channel_layer_type('MASK') == 'labels' - assert infer_channel_layer_type('Label') == 'labels' - assert infer_channel_layer_type('SEGMENTATION') == 'labels' - class TestResolveLayerType: """Tests for resolve_layer_type function.""" @@ -69,6 +37,14 @@ def test_auto_detect_when_no_overrides(self): ) # Auto-detect assert resolve_layer_type('DAPI', None, None) == 'image' # Auto-detect + def test_auto_detect_is_case_insensitive(self): + """Channel-name keyword matching should ignore case.""" + from ndevio.utils._layer_utils import resolve_layer_type + + assert resolve_layer_type('MASK', None, None) == 'labels' + assert resolve_layer_type('Label', None, None) == 'labels' + assert resolve_layer_type('SEGMENTATION', None, None) == 'labels' + def test_path_stem_fallback_detects_labels(self): """Regression: file named 'cells_mask.tif' with generic channel name '0' should be detected as 'labels' via the path_stem fallback. @@ -118,75 +94,6 @@ def test_path_stem_none_channel_nonlabel_returns_image(self): ) -class TestDetermineInMemory: - """Tests for determine_in_memory function.""" - - def test_none_path_returns_true(self): - """Test that None path (array data) returns True.""" - from ndevio.utils._layer_utils import determine_in_memory - - assert determine_in_memory(None) is True - - def test_small_file_returns_true(self, tmp_path): - """Test that small files are loaded in memory.""" - from ndevio.utils._layer_utils import determine_in_memory - - small_file = tmp_path / 'small.txt' - small_file.write_text('x' * 100) - - with mock.patch( - 'psutil.virtual_memory', return_value=mock.Mock(available=1e10) - ): - assert determine_in_memory(small_file) is True - - def test_large_file_returns_false(self, tmp_path): - """Test that large files are loaded as dask.""" - from ndevio.utils._layer_utils import determine_in_memory - - large_file = tmp_path / 'large.txt' - large_file.write_text('x') - - with ( - mock.patch( - 'psutil.virtual_memory', return_value=mock.Mock(available=1e9) - ), - mock.patch( - 'bioio_base.io.pathlike_to_fs', - return_value=(mock.Mock(size=lambda x: 5e9), ''), - ), - ): - assert determine_in_memory(large_file) is False - - def test_uncompressed_bytes_large_overrides_small_disk_size( - self, tmp_path - ): - """Regression: compressed files (e.g. int32 TIFF) that are small on - disk but large when decompressed must trigger dask loading. - - uncompressed_bytes takes precedence over filesystem size so that - a 19 MB compressed file whose data expands to 3 GB in memory is - not eagerly loaded. - """ - from ndevio.utils._layer_utils import determine_in_memory - - small_file = tmp_path / 'labels.tif' - small_file.write_bytes(b'\x00' * 100) # tiny on disk - - with mock.patch( - 'psutil.virtual_memory', return_value=mock.Mock(available=1e10) - ): - # uncompressed_bytes above threshold → dask - assert ( - determine_in_memory(small_file, uncompressed_bytes=int(5e9)) - is False - ) - # uncompressed_bytes well below threshold → in-memory - assert ( - determine_in_memory(small_file, uncompressed_bytes=1000) - is True - ) - - class TestBuildLayerTuple: """Tests for build_layer_tuple function.""" From 3562dbc42cbb9c87976a6ef426b9c777bc2c268b Mon Sep 17 00:00:00 2001 From: Tim Monko Date: Sat, 4 Apr 2026 15:47:32 -0500 Subject: [PATCH 06/10] clean up nImage init and memory checks --- src/ndevio/nimage.py | 219 ++++++++++++++----------------------------- tests/test_nimage.py | 102 +++++++++----------- 2 files changed, 110 insertions(+), 211 deletions(-) diff --git a/src/ndevio/nimage.py b/src/ndevio/nimage.py index 3f86c69..8009fb4 100644 --- a/src/ndevio/nimage.py +++ b/src/ndevio/nimage.py @@ -3,9 +3,7 @@ from __future__ import annotations import logging -import math from collections.abc import Sequence -from inspect import Parameter, signature from pathlib import Path from typing import TYPE_CHECKING, Any @@ -30,69 +28,21 @@ def _prepare_bioimage_init_kwargs( kwargs: dict[str, Any], ) -> tuple[dict[str, Any], dict[str, Any]]: """Build BioImage init kwargs and a fallback without chunk_dims.""" - init_kwargs = dict(kwargs) - init_kwargs.setdefault('chunk_dims', ['Y', 'X']) + # Default to per-plane chunks so each Z/T slice is a separate dask + # task (~one TIFF page) rather than the entire ZYX volume. + # bioio's DEFAULT_CHUNK_DIMS includes Z, meaning each (T,C) pair is + # one giant task and every Z-slice navigation decompresses the full + # volume. ["Y", "X"] gives O(Z) tasks with ~1 page per compute. + init_kwargs = { + **kwargs, + 'chunk_dims': kwargs.get('chunk_dims', ['Y', 'X']), + } fallback_kwargs = { - key: value for key, value in init_kwargs.items() if key != 'chunk_dims' + k: v for k, v in init_kwargs.items() if k != 'chunk_dims' } return init_kwargs, fallback_kwargs -def _reader_supports_chunk_dims( - reader: type[Reader] | Sequence[type[Reader]] | None, -) -> bool | None: - """Return whether a reader explicitly supports the chunk_dims kwarg. - - Returns None when support cannot be known ahead of time, for example when - BioImage will determine the reader internally or a reader sequence is mixed. - """ - - def _supports_single(candidate: type[Reader]) -> bool: - params = signature(candidate.__init__).parameters.values() - return any( - parameter.name == 'chunk_dims' - or parameter.kind == Parameter.VAR_KEYWORD - for parameter in params - ) - - if reader is None: - return None - if isinstance(reader, Sequence): - support = {_supports_single(candidate) for candidate in reader} - return support.pop() if len(support) == 1 else None - return _supports_single(reader) - - -def _init_bioimage_once( - instance: BioImage, - *, - image: ImageLike, - reader: type[Reader] | Sequence[type[Reader]] | None, - init_kwargs: dict[str, Any], - fallback_kwargs: dict[str, Any], -) -> None: - """Initialize BioImage with the best available chunk_dims strategy.""" - chunk_support = _reader_supports_chunk_dims(reader) - if chunk_support is False: - BioImage.__init__( - instance, image=image, reader=reader, **fallback_kwargs - ) - return - - if chunk_support is True: - BioImage.__init__(instance, image=image, reader=reader, **init_kwargs) - return - - try: - BioImage.__init__(instance, image=image, reader=reader, **init_kwargs) - except TypeError as exc: - if 'chunk_dims' not in str(exc): - raise - BioImage.__init__( - instance, image=image, reader=reader, **fallback_kwargs - ) - - def _initialize_bioimage( instance: BioImage, *, @@ -101,29 +51,36 @@ def _initialize_bioimage( init_kwargs: dict[str, Any], fallback_kwargs: dict[str, Any], ) -> None: - """Initialize BioImage with preferred-reader fallback to default.""" + """Initialize BioImage with preferred-reader fallback to default. + + Tries ``chunk_dims=['Y','X']`` for per-plane chunking, falling back to + the reader's default chunking if ``chunk_dims`` is not supported. + If a preferred reader is given but cannot read the file, falls back to + BioImage's automatic reader selection. + """ from bioio_base.exceptions import UnsupportedFileFormatError - if resolved_reader is not None: + def _init(reader: type[Reader] | Sequence[type[Reader]] | None) -> None: + """Initialize with chunk_dims, silently falling back without it.""" try: - _init_bioimage_once( - instance, - image=image, - reader=resolved_reader, - init_kwargs=init_kwargs, - fallback_kwargs=fallback_kwargs, + BioImage.__init__( + instance, image=image, reader=reader, **init_kwargs + ) + except TypeError as exc: + if 'chunk_dims' not in str(exc): + raise + BioImage.__init__( + instance, image=image, reader=reader, **fallback_kwargs ) + + if resolved_reader is not None: + try: + _init(resolved_reader) return except UnsupportedFileFormatError: pass - _init_bioimage_once( - instance, - image=image, - reader=None, - init_kwargs=init_kwargs, - fallback_kwargs=fallback_kwargs, - ) + _init(None) def _resolve_reader( @@ -225,8 +182,7 @@ class nImage(BioImage): _is_remote: bool _reference_xarray: xr.DataArray | None _layer_data: list | None - _level0_uncompressed_bytes: int | None - _should_load_in_memory_cache: bool | None + _use_dask_cache: bool | None def __init__( self, @@ -242,11 +198,6 @@ def __init__( if isinstance(image, str): image = image.rstrip('/') - # Default to per-plane chunks so each Z/T slice is a separate dask - # task (~one TIFF page) rather than the entire ZYX volume. - # bioio's DEFAULT_CHUNK_DIMS includes Z, meaning each (T,C) pair is - # one giant task and every Z-slice navigation decompresses the full - # volume. ["Y", "X"] gives O(Z) tasks with ~1 page per compute. init_kwargs, fallback_kwargs = _prepare_bioimage_init_kwargs(kwargs) resolved_reader = _resolve_reader(image, reader) @@ -266,8 +217,7 @@ def __init__( # Instance state self._reference_xarray = None self._layer_data = None - self._level0_uncompressed_bytes = None - self._should_load_in_memory_cache = None + self._use_dask_cache = None self._initialize_source_state(image) # Any compatibility warnings for old formats should be emitted at this point @@ -299,79 +249,45 @@ def _initialize_source_state(self, image: ImageLike) -> None: self.path = source self._is_remote = True - def _max_in_memory_bytes(self) -> float: - """Return the configured eager-loading threshold in bytes.""" - from ndev_settings import get_settings - - settings = get_settings() - reader_settings = settings.ndevio_reader # type: ignore[attr-defined] - max_in_mem_gb = getattr(reader_settings, 'max_in_mem_gb', 8.0) - return float(max_in_mem_gb) * 1e9 + def _fits_in_memory(self) -> bool: + """Return True if the full uncompressed image fits comfortably in RAM. - def _fits_in_memory( - self, - *, - uncompressed_bytes: int | None = None, - max_in_mem_percent: float = 0.3, - ) -> bool: - """Return whether this image should be loaded eagerly.""" + Uses the reader's dask-backed metadata rather + than the compressed on-disk size, so heavily-compressed files (e.g. an + 18 MB LZW int32 label TIFF that expands to several GB) are correctly + flagged as too large to load eagerly. + """ if self.path is None: return True + from ndev_settings import get_settings from psutil import virtual_memory - available_mem = int(virtual_memory().available) - - if uncompressed_bytes is not None: - check_bytes = int(uncompressed_bytes) - else: - from bioio_base.io import pathlike_to_fs - - fs, path_str = pathlike_to_fs(self.path) - file_size = fs.size(path_str) - assert file_size is not None - check_bytes = int(file_size) - - max_in_mem_bytes = self._max_in_memory_bytes() - return ( - check_bytes <= max_in_mem_bytes - and check_bytes < max_in_mem_percent * available_mem + max_bytes = ( + float(getattr(get_settings().ndevio_reader, 'max_in_mem_gb', 8.0)) # type: ignore[attr-defined] + * 1e9 ) + available = int(virtual_memory().available) + # xr.DataArray.nbytes = shape × dtype.itemsize — no IO, dask-safe + uncompressed = self.xarray_dask_data.nbytes + return uncompressed <= max_bytes and uncompressed < 0.3 * available - def _get_level0_uncompressed_bytes(self) -> int: - """Return the highest-resolution uncompressed byte size.""" - if self._level0_uncompressed_bytes is None: - current_res = self.current_resolution_level - self.set_resolution_level(0) - try: - self._level0_uncompressed_bytes = ( - math.prod(self.shape) * self.dtype.itemsize - ) - finally: - self.set_resolution_level(current_res) - return self._level0_uncompressed_bytes + @property + def _use_dask(self) -> bool: + """True when all data access for this image should be dask-backed. - def _should_load_in_memory(self) -> bool: - """Return the cached single-resolution load decision.""" - if self._should_load_in_memory_cache is None: - self._should_load_in_memory_cache = self._fits_in_memory( - uncompressed_bytes=self._get_level0_uncompressed_bytes(), + Multiscale images always use dask for memory efficiency. Single- + resolution remote images always use dask. Single-resolution local + images use dask when their uncompressed footprint would not fit + comfortably in RAM. + """ + if self._use_dask_cache is None: + self._use_dask_cache = ( + len(self.resolution_levels) > 1 + or self._is_remote + or not self._fits_in_memory() ) - return self._should_load_in_memory_cache - - def _use_dask(self, *, multiscale: bool = False) -> bool: - """Return whether the current access path should stay dask-backed.""" - return ( - multiscale or self._is_remote or not self._should_load_in_memory() - ) - - def _get_xarray_for_current_level( - self, *, multiscale: bool = False - ) -> xr.DataArray: - """Return the xarray backend to use at the current resolution level.""" - if self._use_dask(multiscale=multiscale): - return self.xarray_dask_data - return self.xarray_data + return self._use_dask_cache @property def reference_xarray(self) -> xr.DataArray: @@ -397,8 +313,10 @@ def reference_xarray(self) -> xr.DataArray: current_res = self.current_resolution_level self.set_resolution_level(0) try: - self._reference_xarray = self._get_xarray_for_current_level( - multiscale=len(self.resolution_levels) > 1 + self._reference_xarray = ( + self.xarray_dask_data + if self._use_dask + else self.xarray_data ).squeeze() finally: self.set_resolution_level(current_res) @@ -430,8 +348,7 @@ def _build_layer_data(self) -> list: """Build the list of arrays for all resolution levels.""" current_res = self.current_resolution_level levels = self.resolution_levels - multiscale = len(levels) > 1 - use_dask = self._use_dask(multiscale=multiscale) + use_dask = self._use_dask # Determine which dims to keep from level 0's squeezed metadata. # Using isel instead of squeeze ensures all levels have diff --git a/tests/test_nimage.py b/tests/test_nimage.py index fee81cc..8cd74a7 100644 --- a/tests/test_nimage.py +++ b/tests/test_nimage.py @@ -800,89 +800,71 @@ def test_explicit_reader_fails_falls_back(self, resources_dir: Path): assert img.reader.name == 'bioio_ome_tiff' -class TestDetermineInMemory: - """Tests for nImage memory-loading policy.""" +class TestFitsInMemory: + """Tests for nImage._fits_in_memory memory-loading policy.""" - @staticmethod - def _make_image(path): - img = object.__new__(nImage) - img.path = None if path is None else str(path) - img._is_remote = False - return img + def test_array_backed_always_fits(self): + """Array-backed nImage (path=None) should always fit in memory.""" + import numpy as np + + img = nImage(np.zeros((10, 10), dtype=np.uint8)) + assert img._fits_in_memory() is True - def test_none_path_returns_true(self): - """Array-backed inputs should stay in memory.""" - assert self._make_image(None)._fits_in_memory() is True + def test_small_image_fits(self, tmp_path): + """A small image whose uncompressed size is within limits fits eagerly.""" + import numpy as np + import tifffile - def test_small_file_returns_true(self, tmp_path): - """Small files should be loaded eagerly.""" - small_file = tmp_path / 'small.txt' - small_file.write_text('x' * 100) + path = tmp_path / 'small.tif' + tifffile.imwrite(str(path), np.zeros((5, 5), dtype=np.uint8)) with mock.patch( - 'psutil.virtual_memory', return_value=mock.Mock(available=1e10) + 'psutil.virtual_memory', + return_value=mock.Mock(available=int(1e10)), ): - assert self._make_image(small_file)._fits_in_memory() is True + img = nImage(path) + assert img._fits_in_memory() is True - def test_large_file_returns_false(self, tmp_path): - """Large files should stay dask-backed.""" - large_file = tmp_path / 'large.txt' - large_file.write_text('x') - - with ( - mock.patch( - 'psutil.virtual_memory', return_value=mock.Mock(available=1e9) - ), - mock.patch( - 'bioio_base.io.pathlike_to_fs', - return_value=(mock.Mock(size=lambda x: 5e9), ''), - ), - ): - assert self._make_image(large_file)._fits_in_memory() is False + def test_exceeds_memory_percentage_returns_false(self, tmp_path): + """Image whose uncompressed size exceeds 30% of available RAM → dask.""" + import numpy as np + import tifffile - def test_uncompressed_bytes_large_overrides_small_disk_size( - self, tmp_path - ): - """Compressed files should be judged by RAM footprint when known.""" - small_file = tmp_path / 'labels.tif' - small_file.write_bytes(b'\x00' * 100) + # 50×50×50×uint32 = 500 KB uncompressed + path = tmp_path / 'medium.tif' + tifffile.imwrite(str(path), np.zeros((50, 50, 50), dtype=np.uint32)) + # 30% of 1 MB = 300 KB < 500 KB → should not fit with mock.patch( - 'psutil.virtual_memory', return_value=mock.Mock(available=1e10) + 'psutil.virtual_memory', return_value=mock.Mock(available=int(1e6)) ): - assert ( - self._make_image(small_file)._fits_in_memory( - uncompressed_bytes=int(5e9) - ) - is False - ) - assert ( - self._make_image(small_file)._fits_in_memory( - uncompressed_bytes=1000 - ) - is True - ) + img = nImage(path) + assert img._fits_in_memory() is False def test_missing_max_in_mem_setting_falls_back_to_default(self, tmp_path): - """Older persisted settings may not yet contain max_in_mem_gb.""" + """Older persisted settings missing max_in_mem_gb should use 8 GB.""" from types import SimpleNamespace - small_file = tmp_path / 'small.txt' - small_file.write_text('x' * 100) + import numpy as np + import tifffile + + path = tmp_path / 'small.tif' + tifffile.imwrite(str(path), np.zeros((5, 5), dtype=np.uint8)) with ( mock.patch( 'ndev_settings.get_settings', return_value=SimpleNamespace( - ndevio_reader=SimpleNamespace(), + ndevio_reader=SimpleNamespace(preferred_reader=None), ), ), mock.patch( 'psutil.virtual_memory', - return_value=mock.Mock(available=1e10), + return_value=mock.Mock(available=int(1e10)), ), ): - assert self._make_image(small_file)._fits_in_memory() is True + img = nImage(path) + assert img._fits_in_memory() is True # ============================================================================= @@ -894,11 +876,11 @@ def test_compressed_int32_tiff_uses_dask(tmp_path: Path): """Regression: a compressed int32 TIFF must be loaded as dask even when its on-disk size is well below the in-memory threshold. - An 18.9 MB LZW-compressed int32 file expands to ~3 GB in RAM. + An 18.9 MB LZW-compressed int32 file expands to ~288 MB in RAM. The old code compared the compressed *filesystem* size against the threshold; a 19 MB file would always pass and be loaded eagerly. - The fix computes uncompressed_bytes = prod(shape) * dtype.itemsize and - uses that instead. + The fix uses ``xarray_dask_data.nbytes`` (= shape × dtype.itemsize) + so the uncompressed footprint is used for the decision. """ import math From 0047b3f288903f4faef99007564f9ee7f71d5adc Mon Sep 17 00:00:00 2001 From: Tim Monko Date: Sat, 4 Apr 2026 15:56:52 -0500 Subject: [PATCH 07/10] stepdown style for nimage.py --- src/ndevio/nimage.py | 216 +++++++++++++++++++++---------------------- 1 file changed, 108 insertions(+), 108 deletions(-) diff --git a/src/ndevio/nimage.py b/src/ndevio/nimage.py index 8009fb4..f64c314 100644 --- a/src/ndevio/nimage.py +++ b/src/ndevio/nimage.py @@ -24,114 +24,6 @@ logger = logging.getLogger(__name__) -def _prepare_bioimage_init_kwargs( - kwargs: dict[str, Any], -) -> tuple[dict[str, Any], dict[str, Any]]: - """Build BioImage init kwargs and a fallback without chunk_dims.""" - # Default to per-plane chunks so each Z/T slice is a separate dask - # task (~one TIFF page) rather than the entire ZYX volume. - # bioio's DEFAULT_CHUNK_DIMS includes Z, meaning each (T,C) pair is - # one giant task and every Z-slice navigation decompresses the full - # volume. ["Y", "X"] gives O(Z) tasks with ~1 page per compute. - init_kwargs = { - **kwargs, - 'chunk_dims': kwargs.get('chunk_dims', ['Y', 'X']), - } - fallback_kwargs = { - k: v for k, v in init_kwargs.items() if k != 'chunk_dims' - } - return init_kwargs, fallback_kwargs - - -def _initialize_bioimage( - instance: BioImage, - *, - image: ImageLike, - resolved_reader: type[Reader] | Sequence[type[Reader]] | None, - init_kwargs: dict[str, Any], - fallback_kwargs: dict[str, Any], -) -> None: - """Initialize BioImage with preferred-reader fallback to default. - - Tries ``chunk_dims=['Y','X']`` for per-plane chunking, falling back to - the reader's default chunking if ``chunk_dims`` is not supported. - If a preferred reader is given but cannot read the file, falls back to - BioImage's automatic reader selection. - """ - from bioio_base.exceptions import UnsupportedFileFormatError - - def _init(reader: type[Reader] | Sequence[type[Reader]] | None) -> None: - """Initialize with chunk_dims, silently falling back without it.""" - try: - BioImage.__init__( - instance, image=image, reader=reader, **init_kwargs - ) - except TypeError as exc: - if 'chunk_dims' not in str(exc): - raise - BioImage.__init__( - instance, image=image, reader=reader, **fallback_kwargs - ) - - if resolved_reader is not None: - try: - _init(resolved_reader) - return - except UnsupportedFileFormatError: - pass - - _init(None) - - -def _resolve_reader( - image: ImageLike, - explicit_reader: type[Reader] | Sequence[type[Reader]] | None, -) -> type[Reader] | Sequence[type[Reader]] | None: - """Resolve the reader to use for an image. - - Priority: - 1. Explicit reader (passed to __init__) - 2. Preferred reader from settings (if file path and installed) - 3. None (let bioio determine) - - Parameters - ---------- - image : ImageLike - The image to resolve a reader for. - explicit_reader : type[Reader] | Sequence[type[Reader]] | None - Explicit reader class(es) passed by user. - - Returns - ------- - type[Reader] | Sequence[type[Reader]] | None - The reader to use, or None to let bioio choose. - - """ - if explicit_reader is not None: - return explicit_reader - - # Only check preferred reader for file paths - if not isinstance(image, str | Path): - return None - - # Get preferred reader from settings - from ndev_settings import get_settings - - from .bioio_plugins._utils import get_installed_plugins, get_reader_by_name - - settings = get_settings() - preferred = settings.ndevio_reader.preferred_reader # type: ignore - - if not preferred: - return None - - if preferred not in get_installed_plugins(): - logger.debug('Preferred reader %s not installed', preferred) - return None - - return get_reader_by_name(preferred) - - class nImage(BioImage): """ An nImage is a BioImage with additional functionality for napari. @@ -719,3 +611,111 @@ def get_layer_data_tuples( ) return tuples + + +def _prepare_bioimage_init_kwargs( + kwargs: dict[str, Any], +) -> tuple[dict[str, Any], dict[str, Any]]: + """Build BioImage init kwargs and a fallback without chunk_dims.""" + # Default to per-plane chunks so each Z/T slice is a separate dask + # task (~one TIFF page) rather than the entire ZYX volume. + # bioio's DEFAULT_CHUNK_DIMS includes Z, meaning each (T,C) pair is + # one giant task and every Z-slice navigation decompresses the full + # volume. ["Y", "X"] gives O(Z) tasks with ~1 page per compute. + init_kwargs = { + **kwargs, + 'chunk_dims': kwargs.get('chunk_dims', ['Y', 'X']), + } + fallback_kwargs = { + k: v for k, v in init_kwargs.items() if k != 'chunk_dims' + } + return init_kwargs, fallback_kwargs + + +def _initialize_bioimage( + instance: BioImage, + *, + image: ImageLike, + resolved_reader: type[Reader] | Sequence[type[Reader]] | None, + init_kwargs: dict[str, Any], + fallback_kwargs: dict[str, Any], +) -> None: + """Initialize BioImage with preferred-reader fallback to default. + + Tries ``chunk_dims=['Y','X']`` for per-plane chunking, falling back to + the reader's default chunking if ``chunk_dims`` is not supported. + If a preferred reader is given but cannot read the file, falls back to + BioImage's automatic reader selection. + """ + from bioio_base.exceptions import UnsupportedFileFormatError + + def _init(reader: type[Reader] | Sequence[type[Reader]] | None) -> None: + """Initialize with chunk_dims, silently falling back without it.""" + try: + BioImage.__init__( + instance, image=image, reader=reader, **init_kwargs + ) + except TypeError as exc: + if 'chunk_dims' not in str(exc): + raise + BioImage.__init__( + instance, image=image, reader=reader, **fallback_kwargs + ) + + if resolved_reader is not None: + try: + _init(resolved_reader) + return + except UnsupportedFileFormatError: + pass + + _init(None) + + +def _resolve_reader( + image: ImageLike, + explicit_reader: type[Reader] | Sequence[type[Reader]] | None, +) -> type[Reader] | Sequence[type[Reader]] | None: + """Resolve the reader to use for an image. + + Priority: + 1. Explicit reader (passed to __init__) + 2. Preferred reader from settings (if file path and installed) + 3. None (let bioio determine) + + Parameters + ---------- + image : ImageLike + The image to resolve a reader for. + explicit_reader : type[Reader] | Sequence[type[Reader]] | None + Explicit reader class(es) passed by user. + + Returns + ------- + type[Reader] | Sequence[type[Reader]] | None + The reader to use, or None to let bioio choose. + + """ + if explicit_reader is not None: + return explicit_reader + + # Only check preferred reader for file paths + if not isinstance(image, str | Path): + return None + + # Get preferred reader from settings + from ndev_settings import get_settings + + from .bioio_plugins._utils import get_installed_plugins, get_reader_by_name + + settings = get_settings() + preferred = settings.ndevio_reader.preferred_reader # type: ignore + + if not preferred: + return None + + if preferred not in get_installed_plugins(): + logger.debug('Preferred reader %s not installed', preferred) + return None + + return get_reader_by_name(preferred) From 0f2c1e01825ccc325991dbe7be6571c0fc51121a Mon Sep 17 00:00:00 2001 From: Tim Monko Date: Sat, 4 Apr 2026 15:59:18 -0500 Subject: [PATCH 08/10] simple docstring cleanup --- src/ndevio/nimage.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/src/ndevio/nimage.py b/src/ndevio/nimage.py index f64c314..308c3db 100644 --- a/src/ndevio/nimage.py +++ b/src/ndevio/nimage.py @@ -142,13 +142,7 @@ def _initialize_source_state(self, image: ImageLike) -> None: self._is_remote = True def _fits_in_memory(self) -> bool: - """Return True if the full uncompressed image fits comfortably in RAM. - - Uses the reader's dask-backed metadata rather - than the compressed on-disk size, so heavily-compressed files (e.g. an - 18 MB LZW int32 label TIFF that expands to several GB) are correctly - flagged as too large to load eagerly. - """ + """Return True if the uncompressed image fits comfortably in RAM.""" if self.path is None: return True From 2679d47712912a712d80753624260c1bb1400ebc Mon Sep 17 00:00:00 2001 From: Tim Monko Date: Sat, 4 Apr 2026 16:01:32 -0500 Subject: [PATCH 09/10] add some local / remote comments back --- src/ndevio/nimage.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/ndevio/nimage.py b/src/ndevio/nimage.py index 308c3db..f56cee9 100644 --- a/src/ndevio/nimage.py +++ b/src/ndevio/nimage.py @@ -134,10 +134,12 @@ def _initialize_source_state(self, image: ImageLike) -> None: source = str(image) fs, resolved = fsspec.url_to_fs(source) if isinstance(fs, LocalFileSystem): + # Normalise file:// URIs and any platform variations to an + # OS-native path string so Path(self.path) always round-trips. self.path = str(Path(resolved)) self._is_remote = False return - + # Remote URI (s3://, https://, gc://, …) — keep verbatim. self.path = source self._is_remote = True From dac8e911a5aaafc3d96f3d02fa24fa9c27f81bcc Mon Sep 17 00:00:00 2001 From: Tim Monko Date: Sat, 4 Apr 2026 16:33:21 -0500 Subject: [PATCH 10/10] force named args for `resolve_layer_type` and reorder args --- src/ndevio/nimage.py | 12 +++++------ src/ndevio/utils/_layer_utils.py | 22 ++++++++++++------- tests/test_utils/test_layer_utils.py | 32 ++++++++++++++++------------ 3 files changed, 38 insertions(+), 28 deletions(-) diff --git a/src/ndevio/nimage.py b/src/ndevio/nimage.py index f56cee9..ca57037 100644 --- a/src/ndevio/nimage.py +++ b/src/ndevio/nimage.py @@ -544,9 +544,9 @@ def get_layer_data_tuples( if channel_dim not in ref.dims: channel_name = self.channel_names[0] effective_type = resolve_layer_type( - channel_name or '', - layer_type, - channel_types, + global_override=layer_type, + channel_types=channel_types, + channel_name=channel_name or '', path_stem=self.path_stem, ) extra_kwargs = ( @@ -576,9 +576,9 @@ def get_layer_data_tuples( for i in range(total_channels): channel_name = channel_names[i] effective_type = resolve_layer_type( - channel_name, - layer_type, - channel_types, + global_override=layer_type, + channel_types=channel_types, + channel_name=channel_name, path_stem=self.path_stem, ) diff --git a/src/ndevio/utils/_layer_utils.py b/src/ndevio/utils/_layer_utils.py index 94f722f..26d44ec 100644 --- a/src/ndevio/utils/_layer_utils.py +++ b/src/ndevio/utils/_layer_utils.py @@ -43,25 +43,31 @@ def _contains_label_keyword(value: str, keywords: frozenset[str]) -> bool: def resolve_layer_type( - channel_name: str, - global_override: str | None, - channel_types: dict[str, str] | None, + *, + global_override: str | None = None, + channel_types: dict[str, str] | None = None, + channel_name: str = '', path_stem: str | None = None, ) -> str: """Resolve layer type: global override > per-channel > auto-detect. - Auto-detection checks the channel name first, then falls back to the - filename stem so that files named e.g. ``cells_mask.tif`` are detected - as ``'labels'`` even when the channel name is a generic ``'0'``. + Resolution priority, from most general to most specific: + + 1. ``global_override`` — applies the same type to every channel. + 2. ``channel_types`` — per-channel lookup by name. + 3. ``channel_name`` keyword detection — checks for label-like keywords. + 4. ``path_stem`` fallback — filename stem used when the channel name + gives no signal (e.g. generic ``'0'`` from a file named + ``cells_mask.tif``). Parameters ---------- - channel_name : str - Name of the channel. global_override : str | None If set, this layer type is used for all channels. channel_types : dict[str, str] | None Per-channel layer type mapping. + channel_name : str + Name of the channel. path_stem : str | None Filename stem (no extension) used as a fallback when the channel name does not contain label keywords. diff --git a/tests/test_utils/test_layer_utils.py b/tests/test_utils/test_layer_utils.py index b54b96a..1bba334 100644 --- a/tests/test_utils/test_layer_utils.py +++ b/tests/test_utils/test_layer_utils.py @@ -11,9 +11,9 @@ def test_global_override_takes_precedence(self): from ndevio.utils._layer_utils import resolve_layer_type result = resolve_layer_type( - 'nuclei_mask', # Would auto-detect to labels global_override='surface', channel_types={'nuclei_mask': 'image'}, + channel_name='nuclei_mask', # Would auto-detect to labels ) assert result == 'surface' @@ -22,9 +22,9 @@ def test_channel_types_used_when_no_global(self): from ndevio.utils._layer_utils import resolve_layer_type result = resolve_layer_type( - 'nuclei_mask', global_override=None, channel_types={'nuclei_mask': 'points'}, + channel_name='nuclei_mask', ) assert result == 'points' @@ -33,17 +33,19 @@ def test_auto_detect_when_no_overrides(self): from ndevio.utils._layer_utils import resolve_layer_type assert ( - resolve_layer_type('nuclei_mask', None, None) == 'labels' + resolve_layer_type(channel_name='nuclei_mask') == 'labels' + ) # Auto-detect + assert ( + resolve_layer_type(channel_name='DAPI') == 'image' ) # Auto-detect - assert resolve_layer_type('DAPI', None, None) == 'image' # Auto-detect def test_auto_detect_is_case_insensitive(self): """Channel-name keyword matching should ignore case.""" from ndevio.utils._layer_utils import resolve_layer_type - assert resolve_layer_type('MASK', None, None) == 'labels' - assert resolve_layer_type('Label', None, None) == 'labels' - assert resolve_layer_type('SEGMENTATION', None, None) == 'labels' + assert resolve_layer_type(channel_name='MASK') == 'labels' + assert resolve_layer_type(channel_name='Label') == 'labels' + assert resolve_layer_type(channel_name='SEGMENTATION') == 'labels' def test_path_stem_fallback_detects_labels(self): """Regression: file named 'cells_mask.tif' with generic channel name @@ -52,27 +54,29 @@ def test_path_stem_fallback_detects_labels(self): from ndevio.utils._layer_utils import resolve_layer_type assert ( - resolve_layer_type('0', None, None, path_stem='cells_mask') + resolve_layer_type(channel_name='0', path_stem='cells_mask') == 'labels' ) assert ( resolve_layer_type( - 'Channel 0', None, None, path_stem='nuclei_labels' + channel_name='Channel 0', path_stem='nuclei_labels' ) == 'labels' ) assert ( - resolve_layer_type('', None, None, path_stem='segmentation_output') + resolve_layer_type( + channel_name='', path_stem='segmentation_output' + ) == 'labels' ) - assert resolve_layer_type('', None, None, path_stem='raw') == 'image' + assert resolve_layer_type(channel_name='', path_stem='raw') == 'image' def test_path_stem_not_checked_when_channel_triggers_detection(self): """Channel-name detection is unaffected by a non-label path_stem.""" from ndevio.utils._layer_utils import resolve_layer_type assert ( - resolve_layer_type('nuclei_mask', None, None, path_stem='raw') + resolve_layer_type(channel_name='nuclei_mask', path_stem='raw') == 'labels' ) @@ -81,7 +85,7 @@ def test_path_stem_nonlabel_image_result(self): from ndevio.utils._layer_utils import resolve_layer_type assert ( - resolve_layer_type('DAPI', None, None, path_stem='raw_image') + resolve_layer_type(channel_name='DAPI', path_stem='raw_image') == 'image' ) @@ -90,7 +94,7 @@ def test_path_stem_none_channel_nonlabel_returns_image(self): from ndevio.utils._layer_utils import resolve_layer_type assert ( - resolve_layer_type('DAPI', None, None, path_stem=None) == 'image' + resolve_layer_type(channel_name='DAPI', path_stem=None) == 'image' )