diff --git a/pyproject.toml b/pyproject.toml index 404a15e..a415afb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -29,7 +29,7 @@ requires-python = ">=3.11" # See best practices: https://napari.org/stable/plugins/building_a_plugin/best_practices.html dependencies = [ "napari", - "ndev-settings>=0.4.1", + "ndev-settings>=0.4.2", "nbatch>=0.0.4", "natsort", "magicgui", diff --git a/src/ndevio/ndev_settings.yaml b/src/ndevio/ndev_settings.yaml index b4c567f..ae77bfc 100644 --- a/src/ndevio/ndev_settings.yaml +++ b/src/ndevio/ndev_settings.yaml @@ -1,7 +1,7 @@ ndevio_reader: suggest_reader_plugins: default: true - tooltip: Whether to suggest plugins to install when no reader can be found + tooltip: Whether to suggest plugins to install when no reader can be found. value: true preferred_reader: default: null @@ -16,12 +16,19 @@ ndevio_reader: - View All Scenes - View First Scene Only default: Open Scene Widget - tooltip: How to handle files with multiple scenes + tooltip: How to handle files with multiple scenes. value: Open Scene Widget clear_layers_on_new_scene: default: false - tooltip: Whether to clear the viewer when selecting a new scene + tooltip: Whether to clear the viewer when selecting a new scene. value: false + max_in_mem_gb: + default: 8.0 + min: 0.5 + max: 128.0 + step: 0.5 + tooltip: Maximum uncompressed image size in GB to load eagerly. Larger images use dask. + value: 8.0 ndevio_export: canvas_scale: diff --git a/src/ndevio/nimage.py b/src/ndevio/nimage.py index 38616d3..ca57037 100644 --- a/src/ndevio/nimage.py +++ b/src/ndevio/nimage.py @@ -3,21 +3,19 @@ from __future__ import annotations import logging +from collections.abc import Sequence from pathlib import Path -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Any from bioio import BioImage from .bioio_plugins._manager import raise_unsupported_with_suggestions from .utils._layer_utils import ( build_layer_tuple, - determine_in_memory, resolve_layer_type, ) if TYPE_CHECKING: - from collections.abc import Sequence - import xarray as xr from bioio_base.reader import Reader from bioio_base.types import ImageLike @@ -26,55 +24,6 @@ logger = logging.getLogger(__name__) -def _resolve_reader( - image: ImageLike, - explicit_reader: type[Reader] | Sequence[type[Reader]] | None, -) -> type[Reader] | Sequence[type[Reader]] | None: - """Resolve the reader to use for an image. - - Priority: - 1. Explicit reader (passed to __init__) - 2. Preferred reader from settings (if file path and installed) - 3. None (let bioio determine) - - Parameters - ---------- - image : ImageLike - The image to resolve a reader for. - explicit_reader : type[Reader] | Sequence[type[Reader]] | None - Explicit reader class(es) passed by user. - - Returns - ------- - type[Reader] | Sequence[type[Reader]] | None - The reader to use, or None to let bioio choose. - - """ - if explicit_reader is not None: - return explicit_reader - - # Only check preferred reader for file paths - if not isinstance(image, str | Path): - return None - - # Get preferred reader from settings - from ndev_settings import get_settings - - from .bioio_plugins._utils import get_installed_plugins, get_reader_by_name - - settings = get_settings() - preferred = settings.ndevio_reader.preferred_reader # type: ignore - - if not preferred: - return None - - if preferred not in get_installed_plugins(): - logger.debug('Preferred reader %s not installed', preferred) - return None - - return get_reader_by_name(preferred) - - class nImage(BioImage): """ An nImage is a BioImage with additional functionality for napari. @@ -125,6 +74,7 @@ class nImage(BioImage): _is_remote: bool _reference_xarray: xr.DataArray | None _layer_data: list | None + _use_dask_cache: bool | None def __init__( self, @@ -140,49 +90,27 @@ def __init__( if isinstance(image, str): image = image.rstrip('/') + init_kwargs, fallback_kwargs = _prepare_bioimage_init_kwargs(kwargs) resolved_reader = _resolve_reader(image, reader) - # Try preferred/explicit reader first, fall back to bioio default - if resolved_reader is not None: - try: - super().__init__(image=image, reader=resolved_reader, **kwargs) - except UnsupportedFileFormatError: - # Preferred reader failed, fall back to bioio's default - try: - super().__init__(image=image, reader=None, **kwargs) - except UnsupportedFileFormatError: - if isinstance(image, str | Path): - raise_unsupported_with_suggestions(image) - raise - else: - try: - super().__init__(image=image, reader=None, **kwargs) - except UnsupportedFileFormatError: - if isinstance(image, str | Path): - raise_unsupported_with_suggestions(image) - raise + try: + _initialize_bioimage( + self, + image=image, + resolved_reader=resolved_reader, + init_kwargs=init_kwargs, + fallback_kwargs=fallback_kwargs, + ) + except UnsupportedFileFormatError: + if isinstance(image, str | Path): + raise_unsupported_with_suggestions(image) + raise # Instance state self._reference_xarray = None self._layer_data = None - if isinstance(image, str | Path): - import fsspec - from fsspec.implementations.local import LocalFileSystem - - s = str(image) - fs, resolved = fsspec.url_to_fs(s) - if isinstance(fs, LocalFileSystem): - # Normalise file:// URIs and any platform variations to an - # OS-native path string so Path(self.path) always round-trips. - self.path = str(Path(resolved)) - self._is_remote = False - else: - # Remote URI (s3://, https://, gc://, …) — keep verbatim. - self.path = s - self._is_remote = True - else: - self.path = None - self._is_remote = False + self._use_dask_cache = None + self._initialize_source_state(image) # Any compatibility warnings for old formats should be emitted at this point # Cheaply check without imports by looking at the reader's module name @@ -193,6 +121,62 @@ def __init__( apply_ome_zarr_compat_patches(self.reader) + def _initialize_source_state(self, image: ImageLike) -> None: + """Populate local path/remote state from the original image input.""" + if not isinstance(image, str | Path): + self.path = None + self._is_remote = False + return + + import fsspec + from fsspec.implementations.local import LocalFileSystem + + source = str(image) + fs, resolved = fsspec.url_to_fs(source) + if isinstance(fs, LocalFileSystem): + # Normalise file:// URIs and any platform variations to an + # OS-native path string so Path(self.path) always round-trips. + self.path = str(Path(resolved)) + self._is_remote = False + return + # Remote URI (s3://, https://, gc://, …) — keep verbatim. + self.path = source + self._is_remote = True + + def _fits_in_memory(self) -> bool: + """Return True if the uncompressed image fits comfortably in RAM.""" + if self.path is None: + return True + + from ndev_settings import get_settings + from psutil import virtual_memory + + max_bytes = ( + float(getattr(get_settings().ndevio_reader, 'max_in_mem_gb', 8.0)) # type: ignore[attr-defined] + * 1e9 + ) + available = int(virtual_memory().available) + # xr.DataArray.nbytes = shape × dtype.itemsize — no IO, dask-safe + uncompressed = self.xarray_dask_data.nbytes + return uncompressed <= max_bytes and uncompressed < 0.3 * available + + @property + def _use_dask(self) -> bool: + """True when all data access for this image should be dask-backed. + + Multiscale images always use dask for memory efficiency. Single- + resolution remote images always use dask. Single-resolution local + images use dask when their uncompressed footprint would not fit + comfortably in RAM. + """ + if self._use_dask_cache is None: + self._use_dask_cache = ( + len(self.resolution_levels) > 1 + or self._is_remote + or not self._fits_in_memory() + ) + return self._use_dask_cache + @property def reference_xarray(self) -> xr.DataArray: """Image data as xarray DataArray for metadata determination. @@ -216,11 +200,14 @@ def reference_xarray(self) -> xr.DataArray: # Ensure we're at the highest-res level for metadata consistency current_res = self.current_resolution_level self.set_resolution_level(0) - if self._is_remote or not determine_in_memory(self.path): - self._reference_xarray = self.xarray_dask_data.squeeze() - else: - self._reference_xarray = self.xarray_data.squeeze() - self.set_resolution_level(current_res) + try: + self._reference_xarray = ( + self.xarray_dask_data + if self._use_dask + else self.xarray_data + ).squeeze() + finally: + self.set_resolution_level(current_res) return self._reference_xarray @property @@ -249,7 +236,7 @@ def _build_layer_data(self) -> list: """Build the list of arrays for all resolution levels.""" current_res = self.current_resolution_level levels = self.resolution_levels - multiscale = len(levels) > 1 + use_dask = self._use_dask # Determine which dims to keep from level 0's squeezed metadata. # Using isel instead of squeeze ensures all levels have @@ -259,21 +246,16 @@ def _build_layer_data(self) -> list: keep_dims = set(ref.dims) arrays: list = [] - for level in levels: - self.set_resolution_level(level) - if ( - multiscale - or self._is_remote - or not determine_in_memory(self.path) - ): - xr_data = self.xarray_dask_data - else: - xr_data = self.xarray_data - - indexer = {d: 0 for d in xr_data.dims if d not in keep_dims} - arrays.append(xr_data.isel(indexer).data) - - self.set_resolution_level(current_res) + try: + for level in levels: + self.set_resolution_level(level) + xr_data = ( + self.xarray_dask_data if use_dask else self.xarray_data + ) + indexer = {d: 0 for d in xr_data.dims if d not in keep_dims} + arrays.append(xr_data.isel(indexer).data) + finally: + self.set_resolution_level(current_res) return arrays @property @@ -562,7 +544,10 @@ def get_layer_data_tuples( if channel_dim not in ref.dims: channel_name = self.channel_names[0] effective_type = resolve_layer_type( - channel_name or '', layer_type, channel_types + global_override=layer_type, + channel_types=channel_types, + channel_name=channel_name or '', + path_stem=self.path_stem, ) extra_kwargs = ( channel_kwargs.get(channel_name) @@ -591,7 +576,10 @@ def get_layer_data_tuples( for i in range(total_channels): channel_name = channel_names[i] effective_type = resolve_layer_type( - channel_name, layer_type, channel_types + global_override=layer_type, + channel_types=channel_types, + channel_name=channel_name, + path_stem=self.path_stem, ) # Slice along channel axis for each resolution level @@ -619,3 +607,111 @@ def get_layer_data_tuples( ) return tuples + + +def _prepare_bioimage_init_kwargs( + kwargs: dict[str, Any], +) -> tuple[dict[str, Any], dict[str, Any]]: + """Build BioImage init kwargs and a fallback without chunk_dims.""" + # Default to per-plane chunks so each Z/T slice is a separate dask + # task (~one TIFF page) rather than the entire ZYX volume. + # bioio's DEFAULT_CHUNK_DIMS includes Z, meaning each (T,C) pair is + # one giant task and every Z-slice navigation decompresses the full + # volume. ["Y", "X"] gives O(Z) tasks with ~1 page per compute. + init_kwargs = { + **kwargs, + 'chunk_dims': kwargs.get('chunk_dims', ['Y', 'X']), + } + fallback_kwargs = { + k: v for k, v in init_kwargs.items() if k != 'chunk_dims' + } + return init_kwargs, fallback_kwargs + + +def _initialize_bioimage( + instance: BioImage, + *, + image: ImageLike, + resolved_reader: type[Reader] | Sequence[type[Reader]] | None, + init_kwargs: dict[str, Any], + fallback_kwargs: dict[str, Any], +) -> None: + """Initialize BioImage with preferred-reader fallback to default. + + Tries ``chunk_dims=['Y','X']`` for per-plane chunking, falling back to + the reader's default chunking if ``chunk_dims`` is not supported. + If a preferred reader is given but cannot read the file, falls back to + BioImage's automatic reader selection. + """ + from bioio_base.exceptions import UnsupportedFileFormatError + + def _init(reader: type[Reader] | Sequence[type[Reader]] | None) -> None: + """Initialize with chunk_dims, silently falling back without it.""" + try: + BioImage.__init__( + instance, image=image, reader=reader, **init_kwargs + ) + except TypeError as exc: + if 'chunk_dims' not in str(exc): + raise + BioImage.__init__( + instance, image=image, reader=reader, **fallback_kwargs + ) + + if resolved_reader is not None: + try: + _init(resolved_reader) + return + except UnsupportedFileFormatError: + pass + + _init(None) + + +def _resolve_reader( + image: ImageLike, + explicit_reader: type[Reader] | Sequence[type[Reader]] | None, +) -> type[Reader] | Sequence[type[Reader]] | None: + """Resolve the reader to use for an image. + + Priority: + 1. Explicit reader (passed to __init__) + 2. Preferred reader from settings (if file path and installed) + 3. None (let bioio determine) + + Parameters + ---------- + image : ImageLike + The image to resolve a reader for. + explicit_reader : type[Reader] | Sequence[type[Reader]] | None + Explicit reader class(es) passed by user. + + Returns + ------- + type[Reader] | Sequence[type[Reader]] | None + The reader to use, or None to let bioio choose. + + """ + if explicit_reader is not None: + return explicit_reader + + # Only check preferred reader for file paths + if not isinstance(image, str | Path): + return None + + # Get preferred reader from settings + from ndev_settings import get_settings + + from .bioio_plugins._utils import get_installed_plugins, get_reader_by_name + + settings = get_settings() + preferred = settings.ndevio_reader.preferred_reader # type: ignore + + if not preferred: + return None + + if preferred not in get_installed_plugins(): + logger.debug('Preferred reader %s not installed', preferred) + return None + + return get_reader_by_name(preferred) diff --git a/src/ndevio/utils/_layer_utils.py b/src/ndevio/utils/_layer_utils.py index d309e0e..26d44ec 100644 --- a/src/ndevio/utils/_layer_utils.py +++ b/src/ndevio/utils/_layer_utils.py @@ -12,51 +12,65 @@ logger = logging.getLogger(__name__) # Keywords that indicate a channel contains labels/segmentation data -LABEL_KEYWORDS = frozenset({'label', 'mask', 'segmentation', 'seg', 'roi'}) - - -def infer_layer_type(channel_name: str) -> str: - """Infer layer type from channel name keywords. - - Parameters - ---------- - channel_name : str - The channel name to check. - - Returns - ------- - str - 'labels' if channel_name contains a label keyword, else 'image'. +CHANNEL_LABEL_KEYWORDS = frozenset( + { + 'label', + 'mask', + 'seg', + 'segmentation', + 'annotation', + 'roi', + 'region', + 'instance', + 'objects', + } +) +FILE_LABEL_KEYWORDS = frozenset( + { + 'label', + 'mask', + 'segmentation', + 'instance', + 'objects', + } +) - Examples - -------- - >>> infer_layer_type('nuclei_mask') - 'labels' - >>> infer_layer_type('DAPI') - 'image' - """ - name_lower = channel_name.lower() - return ( - 'labels' if any(kw in name_lower for kw in LABEL_KEYWORDS) else 'image' - ) +def _contains_label_keyword(value: str, keywords: frozenset[str]) -> bool: + """Return whether a string contains any keyword in a keyword set.""" + value_lower = value.lower() + return any(keyword in value_lower for keyword in keywords) def resolve_layer_type( - channel_name: str, - global_override: str | None, - channel_types: dict[str, str] | None, + *, + global_override: str | None = None, + channel_types: dict[str, str] | None = None, + channel_name: str = '', + path_stem: str | None = None, ) -> str: """Resolve layer type: global override > per-channel > auto-detect. + Resolution priority, from most general to most specific: + + 1. ``global_override`` — applies the same type to every channel. + 2. ``channel_types`` — per-channel lookup by name. + 3. ``channel_name`` keyword detection — checks for label-like keywords. + 4. ``path_stem`` fallback — filename stem used when the channel name + gives no signal (e.g. generic ``'0'`` from a file named + ``cells_mask.tif``). + Parameters ---------- - channel_name : str - Name of the channel. global_override : str | None If set, this layer type is used for all channels. channel_types : dict[str, str] | None Per-channel layer type mapping. + channel_name : str + Name of the channel. + path_stem : str | None + Filename stem (no extension) used as a fallback when the channel + name does not contain label keywords. Returns ------- @@ -68,48 +82,11 @@ def resolve_layer_type( return global_override if channel_types and channel_name in channel_types: return channel_types[channel_name] - return infer_layer_type(channel_name) - - -def determine_in_memory( - path: str | None, - max_in_mem_bytes: float = 4e9, - max_in_mem_percent: float = 0.3, -) -> bool: - """Determine whether to load image data in memory or as dask array. - - Parameters - ---------- - path : str | None - Path to the image file as a string. If None (array data), returns True. - max_in_mem_bytes : float - Maximum file size in bytes for in-memory loading. - Default is 4 GB (4e9 bytes). - max_in_mem_percent : float - Maximum percentage of available memory for in-memory loading. - Default is 30%. - - Returns - ------- - bool - True if image should be loaded in memory, False for dask array. - - """ - from bioio_base.io import pathlike_to_fs - from psutil import virtual_memory - - # No file path means array data - always in memory - if path is None: - return True - - fs, path_str = pathlike_to_fs(path) - filesize: int = fs.size(path_str) # type: ignore[assignment] - available_mem = virtual_memory().available - - return ( - filesize <= max_in_mem_bytes - and filesize < max_in_mem_percent * available_mem - ) + if _contains_label_keyword(channel_name, CHANNEL_LABEL_KEYWORDS): + return 'labels' + if path_stem and _contains_label_keyword(path_stem, FILE_LABEL_KEYWORDS): + return 'labels' + return 'image' def build_layer_tuple( diff --git a/tests/test_nimage.py b/tests/test_nimage.py index e2ea657..8cd74a7 100644 --- a/tests/test_nimage.py +++ b/tests/test_nimage.py @@ -798,3 +798,222 @@ def test_explicit_reader_fails_falls_back(self, resources_dir: Path): assert img is not None # Should have fallen back to bioio's default assert img.reader.name == 'bioio_ome_tiff' + + +class TestFitsInMemory: + """Tests for nImage._fits_in_memory memory-loading policy.""" + + def test_array_backed_always_fits(self): + """Array-backed nImage (path=None) should always fit in memory.""" + import numpy as np + + img = nImage(np.zeros((10, 10), dtype=np.uint8)) + assert img._fits_in_memory() is True + + def test_small_image_fits(self, tmp_path): + """A small image whose uncompressed size is within limits fits eagerly.""" + import numpy as np + import tifffile + + path = tmp_path / 'small.tif' + tifffile.imwrite(str(path), np.zeros((5, 5), dtype=np.uint8)) + + with mock.patch( + 'psutil.virtual_memory', + return_value=mock.Mock(available=int(1e10)), + ): + img = nImage(path) + assert img._fits_in_memory() is True + + def test_exceeds_memory_percentage_returns_false(self, tmp_path): + """Image whose uncompressed size exceeds 30% of available RAM → dask.""" + import numpy as np + import tifffile + + # 50×50×50×uint32 = 500 KB uncompressed + path = tmp_path / 'medium.tif' + tifffile.imwrite(str(path), np.zeros((50, 50, 50), dtype=np.uint32)) + + # 30% of 1 MB = 300 KB < 500 KB → should not fit + with mock.patch( + 'psutil.virtual_memory', return_value=mock.Mock(available=int(1e6)) + ): + img = nImage(path) + assert img._fits_in_memory() is False + + def test_missing_max_in_mem_setting_falls_back_to_default(self, tmp_path): + """Older persisted settings missing max_in_mem_gb should use 8 GB.""" + from types import SimpleNamespace + + import numpy as np + import tifffile + + path = tmp_path / 'small.tif' + tifffile.imwrite(str(path), np.zeros((5, 5), dtype=np.uint8)) + + with ( + mock.patch( + 'ndev_settings.get_settings', + return_value=SimpleNamespace( + ndevio_reader=SimpleNamespace(preferred_reader=None), + ), + ), + mock.patch( + 'psutil.virtual_memory', + return_value=mock.Mock(available=int(1e10)), + ), + ): + img = nImage(path) + assert img._fits_in_memory() is True + + +# ============================================================================= +# Regression tests: compressed files and filename-based label detection +# ============================================================================= + + +def test_compressed_int32_tiff_uses_dask(tmp_path: Path): + """Regression: a compressed int32 TIFF must be loaded as dask even when + its on-disk size is well below the in-memory threshold. + + An 18.9 MB LZW-compressed int32 file expands to ~288 MB in RAM. + The old code compared the compressed *filesystem* size against the + threshold; a 19 MB file would always pass and be loaded eagerly. + The fix uses ``xarray_dask_data.nbytes`` (= shape × dtype.itemsize) + so the uncompressed footprint is used for the decision. + """ + import math + + import numpy as np + import tifffile + + # All-zeros data compresses to near-nothing with LZW: small, quick write. + # Shape gives ~288 MB uncompressed. We mock available RAM to 500 MB so + # that 30% = 150 MB < 288 MB, which forces dask regardless of threshold. + # Without the uncompressed_bytes fix, disk_size (~KB) would be used and + # the tiny file would be loaded eagerly. + shape = (200, 600, 600) + + path = tmp_path / 'big_uncompressed.tif' + tifffile.imwrite( + str(path), np.zeros(shape, dtype=np.int32), compression='lzw' + ) + + disk_size = path.stat().st_size + uncompressed = math.prod(shape) * np.dtype(np.int32).itemsize + assert disk_size < uncompressed // 100, ( + 'test precondition: compressed file must be tiny vs uncompressed' + ) + + import dask.array as da + + # Mock RAM so the memory-fraction check forces dask (288 MB > 30% of 500 MB). + # This isolates the test from machine memory and makes it deterministic. + with mock.patch( + 'psutil.virtual_memory', return_value=mock.Mock(available=int(500e6)) + ): + img = nImage(path) + + assert isinstance(img.reference_xarray.data, da.Array), ( + f'Expected dask array, got {type(img.reference_xarray.data)}' + ) + + tuples = img.get_layer_data_tuples() + assert len(tuples) == 1 + data_out, _, _ = tuples[0] + assert isinstance(data_out, list) + assert isinstance(data_out[0], da.Array), ( + f'Expected dask array in layer tuple, got {type(data_out[0])}' + ) + + +def test_labels_detected_from_filename(tmp_path: Path): + """Regression: a TIFF file whose channel name is a generic '0' but whose + filename contains a label keyword (e.g. 'cells_mask.tif') should be + returned as a 'labels' layer, not 'image'. + + Previously only the channel name was checked; now the filename stem is + used as a fallback when the channel name provides no signal. + """ + import numpy as np + import tifffile + + # Single-channel int32 TIFF — channel name will be '0' (no label keyword) + data = np.random.randint(0, 10, (10, 10), dtype=np.int32) + path = tmp_path / 'cells_mask.tif' + tifffile.imwrite(str(path), data) + + img = nImage(path) + # Verify the channel name is generic (no label keyword) + channel_name = img.channel_names[0] + from ndevio.utils._layer_utils import CHANNEL_LABEL_KEYWORDS + + assert not any( + kw in channel_name.lower() for kw in CHANNEL_LABEL_KEYWORDS + ), f'Channel name {channel_name!r} unexpectedly contains a label keyword' + + tuples = img.get_layer_data_tuples() + assert len(tuples) == 1 + _, _, layer_type = tuples[0] + assert layer_type == 'labels', ( + f"Expected 'labels' from filename 'cells_mask.tif', got {layer_type!r}" + ) + + +def test_non_label_filename_stays_image(tmp_path: Path): + """Counter-test: a TIFF named 'raw_image.tif' with generic channel name + should remain an 'image' layer, not be promoted to 'labels'. + """ + import numpy as np + import tifffile + + data = np.zeros((10, 10), dtype=np.uint16) + path = tmp_path / 'raw_image.tif' + tifffile.imwrite(str(path), data) + + img = nImage(path) + tuples = img.get_layer_data_tuples() + assert len(tuples) == 1 + _, _, layer_type = tuples[0] + assert layer_type == 'image' + + +def test_dask_chunks_are_per_plane(tmp_path: Path): + """Verify that dask-loaded TIFFs have per-Z-plane chunks (not the whole volume). + + bioio-base's DEFAULT_CHUNK_DIMS = ["Z","Y","X"] creates one dask task per + (T,C) pair — every Z-slice decompresses the full ZYX volume. nImage + overrides this with chunk_dims=["Y","X"] so each task is a single page. + + For a (Z=8, Y=64, X=64) file the resulting dask array should have chunks + (1, 64, 64), not (8, 64, 64). + """ + import dask.array as da + import numpy as np + import tifffile + + # 100 planes × 64 × 64 × uint16 = 800 KB uncompressed. + # With 1 MB available, 30 % = 300 KB < 800 KB → forced to dask. + shape = (100, 64, 64) # Z, Y, X + path = tmp_path / 'zyx_chunk_test.tiff' + tifffile.imwrite(str(path), np.zeros(shape, dtype=np.uint16)) + + # Force dask: mock available RAM so the memory-fraction check triggers. + with mock.patch( + 'psutil.virtual_memory', return_value=mock.Mock(available=int(1e6)) + ): + img = nImage(path) + tuples = img.get_layer_data_tuples() + + data_out, _, _ = tuples[0] + # layer_data is always a list (multiscale-compatible); [0] is level 0. + arr = data_out[0] + assert isinstance(arr, da.Array), f'Expected dask array, got {type(arr)}' + + z_chunk, y_chunk, x_chunk = arr.chunksize + assert z_chunk == 1, ( + f'Expected Z-chunk=1 (per-plane), got {z_chunk}. ' + 'chunk_dims override to ["Y","X"] may not be working.' + ) + assert y_chunk == 64 + assert x_chunk == 64 diff --git a/tests/test_utils/test_layer_utils.py b/tests/test_utils/test_layer_utils.py index 86710e3..1bba334 100644 --- a/tests/test_utils/test_layer_utils.py +++ b/tests/test_utils/test_layer_utils.py @@ -2,38 +2,6 @@ from __future__ import annotations -from unittest import mock - - -class TestInferLayerType: - """Tests for infer_layer_type function.""" - - def test_label_keyword_returns_labels(self): - """Test that label keywords are detected.""" - from ndevio.utils._layer_utils import infer_layer_type - - assert infer_layer_type('nuclei_mask') == 'labels' - assert infer_layer_type('cell_labels') == 'labels' - assert infer_layer_type('segmentation') == 'labels' - assert infer_layer_type('SEG_channel') == 'labels' - assert infer_layer_type('roi_data') == 'labels' - - def test_non_label_returns_image(self): - """Test that non-label names return image.""" - from ndevio.utils._layer_utils import infer_layer_type - - assert infer_layer_type('DAPI') == 'image' - assert infer_layer_type('GFP') == 'image' - assert infer_layer_type('membrane') == 'image' - - def test_case_insensitive(self): - """Test that detection is case-insensitive.""" - from ndevio.utils._layer_utils import infer_layer_type - - assert infer_layer_type('MASK') == 'labels' - assert infer_layer_type('Label') == 'labels' - assert infer_layer_type('SEGMENTATION') == 'labels' - class TestResolveLayerType: """Tests for resolve_layer_type function.""" @@ -43,9 +11,9 @@ def test_global_override_takes_precedence(self): from ndevio.utils._layer_utils import resolve_layer_type result = resolve_layer_type( - 'nuclei_mask', # Would auto-detect to labels global_override='surface', channel_types={'nuclei_mask': 'image'}, + channel_name='nuclei_mask', # Would auto-detect to labels ) assert result == 'surface' @@ -54,9 +22,9 @@ def test_channel_types_used_when_no_global(self): from ndevio.utils._layer_utils import resolve_layer_type result = resolve_layer_type( - 'nuclei_mask', global_override=None, channel_types={'nuclei_mask': 'points'}, + channel_name='nuclei_mask', ) assert result == 'points' @@ -65,49 +33,69 @@ def test_auto_detect_when_no_overrides(self): from ndevio.utils._layer_utils import resolve_layer_type assert ( - resolve_layer_type('nuclei_mask', None, None) == 'labels' + resolve_layer_type(channel_name='nuclei_mask') == 'labels' + ) # Auto-detect + assert ( + resolve_layer_type(channel_name='DAPI') == 'image' ) # Auto-detect - assert resolve_layer_type('DAPI', None, None) == 'image' # Auto-detect + def test_auto_detect_is_case_insensitive(self): + """Channel-name keyword matching should ignore case.""" + from ndevio.utils._layer_utils import resolve_layer_type -class TestDetermineInMemory: - """Tests for determine_in_memory function.""" + assert resolve_layer_type(channel_name='MASK') == 'labels' + assert resolve_layer_type(channel_name='Label') == 'labels' + assert resolve_layer_type(channel_name='SEGMENTATION') == 'labels' - def test_none_path_returns_true(self): - """Test that None path (array data) returns True.""" - from ndevio.utils._layer_utils import determine_in_memory + def test_path_stem_fallback_detects_labels(self): + """Regression: file named 'cells_mask.tif' with generic channel name + '0' should be detected as 'labels' via the path_stem fallback. + """ + from ndevio.utils._layer_utils import resolve_layer_type - assert determine_in_memory(None) is True + assert ( + resolve_layer_type(channel_name='0', path_stem='cells_mask') + == 'labels' + ) + assert ( + resolve_layer_type( + channel_name='Channel 0', path_stem='nuclei_labels' + ) + == 'labels' + ) + assert ( + resolve_layer_type( + channel_name='', path_stem='segmentation_output' + ) + == 'labels' + ) + assert resolve_layer_type(channel_name='', path_stem='raw') == 'image' - def test_small_file_returns_true(self, tmp_path): - """Test that small files are loaded in memory.""" - from ndevio.utils._layer_utils import determine_in_memory + def test_path_stem_not_checked_when_channel_triggers_detection(self): + """Channel-name detection is unaffected by a non-label path_stem.""" + from ndevio.utils._layer_utils import resolve_layer_type - small_file = tmp_path / 'small.txt' - small_file.write_text('x' * 100) + assert ( + resolve_layer_type(channel_name='nuclei_mask', path_stem='raw') + == 'labels' + ) - with mock.patch( - 'psutil.virtual_memory', return_value=mock.Mock(available=1e10) - ): - assert determine_in_memory(small_file) is True + def test_path_stem_nonlabel_image_result(self): + """Neither channel nor path_stem contains label keyword → 'image'.""" + from ndevio.utils._layer_utils import resolve_layer_type - def test_large_file_returns_false(self, tmp_path): - """Test that large files are loaded as dask.""" - from ndevio.utils._layer_utils import determine_in_memory + assert ( + resolve_layer_type(channel_name='DAPI', path_stem='raw_image') + == 'image' + ) - large_file = tmp_path / 'large.txt' - large_file.write_text('x') + def test_path_stem_none_channel_nonlabel_returns_image(self): + """path_stem=None with non-label channel should still return 'image'.""" + from ndevio.utils._layer_utils import resolve_layer_type - with ( - mock.patch( - 'psutil.virtual_memory', return_value=mock.Mock(available=1e9) - ), - mock.patch( - 'bioio_base.io.pathlike_to_fs', - return_value=(mock.Mock(size=lambda x: 5e9), ''), - ), - ): - assert determine_in_memory(large_file) is False + assert ( + resolve_layer_type(channel_name='DAPI', path_stem=None) == 'image' + ) class TestBuildLayerTuple: