diff --git a/mobie/image_data.py b/mobie/image_data.py index 105ab48..ee7f168 100644 --- a/mobie/image_data.py +++ b/mobie/image_data.py @@ -8,14 +8,18 @@ import mobie.utils as utils import numpy as np import pybdv.metadata as bdv_metadata +import tifffile from elf.io import open_file from mobie.import_data import import_image_data from pybdv.util import absolute_to_relative_scale_factors, get_key, get_scale_factors -def _get_default_contrast_limits(input_path, input_key, int_to_uint): - with open_file(input_path, "r") as f: - dtype = f[input_key].dtype +def _get_default_contrast_limits(input_path, input_key, int_to_uint, use_memmap=False): + if use_memmap: + dtype = tifffile.memmap(input_path).dtype + else: + with open_file(input_path, "r") as f: + dtype = f[input_key].dtype if np.issubdtype(dtype, np.integer): if int_to_uint: @@ -187,8 +191,9 @@ def add_image(input_path, input_key, move_only=False, int_to_uint=False, channel=None, - skip_add_to_dataset=False): - """ Add an image source to a MoBIE dataset. + skip_add_to_dataset=False, + use_memmap=False): + """Add an image source to a MoBIE dataset. Will create the dataset if it does not exist. @@ -222,8 +227,11 @@ def add_image(input_path, input_key, Currently only supported for the ome.zarr format (default: None) skip_add_to_dataset [bool] - Skip adding the source to the dataset after converting the image data. This should be used when calling `add_image` in parallel in order to avoid - writing to dataset.json in parallel, which can cause issues. In this case the source needs to be added later - , which can be done by calling this function again. (default: False) + writing to dataset.json in parallel, which can cause issues. + In this case the source needs to be added later, e.g. by calling this function again. (default: False) + use_memmap [bool] - Whether to use memmap for loading the input data. + This option is only supported for inputs in tif file format that can be loaded via `tifffile.memmap`. + This does not work for images that are compressed or have an otherwise non-standard format. (default: False) """ # TODO add 'setup_id' to the json schema for bdv formats to also support it there if channel is not None and file_format != "ome.zarr": @@ -236,7 +244,7 @@ def add_image(input_path, input_key, # set default contrast_limits if we don't have a view # or if the passed view doesn't hav contrast limits if view is None or "contrastLimits" not in view.get("sourceDisplays", [{}])[0].get("imageDisplay", {}): - contrast_limits = _get_default_contrast_limits(input_path, input_key, int_to_uint) + contrast_limits = _get_default_contrast_limits(input_path, input_key, int_to_uint, use_memmap=use_memmap) else: contrast_limits = None view = utils.require_dataset_and_view(root, dataset_name, file_format, @@ -266,7 +274,8 @@ def add_image(input_path, input_key, source_name=image_name, file_format=file_format, int_to_uint=int_to_uint, - channel=channel) + channel=channel, + use_memmap=use_memmap) if transformation is not None: utils.update_transformation_parameter(image_metadata_path, transformation, file_format) diff --git a/mobie/import_data/image.py b/mobie/import_data/image.py index 4357606..7122369 100644 --- a/mobie/import_data/image.py +++ b/mobie/import_data/image.py @@ -7,7 +7,8 @@ def import_image_data(in_path, in_key, out_path, tmp_folder=None, target="local", max_jobs=mp.cpu_count(), block_shape=None, unit="micrometer", source_name=None, file_format="ome.zarr", - int_to_uint=False, channel=None): + int_to_uint=False, channel=None, + use_memmap=False): """ Import image data to mobie format. Arguments: @@ -28,6 +29,7 @@ def import_image_data(in_path, in_key, out_path, int_to_uint [bool] - whether to convert signed to unsigned integer (default: False) channel [int] - the channel to load from the data. Currently only supported for the ome.zarr format (default: None) + use_memmap [bool] - Whether the input is a tif file that can be memmaped. (default: False) """ # we allow 2d data for ome.zarr file format if file_format != "ome.zarr": @@ -37,4 +39,4 @@ def import_image_data(in_path, in_key, out_path, tmp_folder, target, max_jobs, block_shape, library="skimage", unit=unit, source_name=source_name, metadata_format=file_format, int_to_uint=int_to_uint, - channel=channel) + channel=channel, use_memmap=use_memmap) diff --git a/mobie/import_data/utils.py b/mobie/import_data/utils.py index 5d34709..4547577 100644 --- a/mobie/import_data/utils.py +++ b/mobie/import_data/utils.py @@ -3,6 +3,7 @@ import luigi import nifty.distributed as ndist +import tifffile from cluster_tools.statistics import DataStatisticsWorkflow from cluster_tools.downscaling import DownscalingWorkflow @@ -47,12 +48,15 @@ def compute_node_labels(seg_path, seg_key, return data -def check_input_data(in_path, in_key, resolution, require3d, channel): +def check_input_data(in_path, in_key, resolution, require3d, channel, use_memmap=False): # TODO to support data with channel, we need to support downscaling with channels if channel is not None: raise NotImplementedError - with open_file(in_path, "r") as f: - ndim = f[in_key].ndim + if use_memmap: + ndim = tifffile.memmap(in_path).ndim + else: + with open_file(in_path, "r") as f: + ndim = f[in_key].ndim if require3d and ndim != 3: raise ValueError(f"Expect 3d data, got ndim={ndim}") if len(resolution) != ndim: @@ -66,14 +70,15 @@ def downscale(in_path, in_key, out_path, metadata_format="ome.zarr", out_key="", unit="micrometer", source_name=None, roi_begin=None, roi_end=None, - int_to_uint=False, channel=None): + int_to_uint=False, channel=None, + use_memmap=False): task = DownscalingWorkflow block_shape = chunks if block_shape is None else block_shape config_dir = os.path.join(tmp_folder, "configs") # ome.zarr can also be written in 2d, all other formats require 3d require3d = metadata_format != "ome.zarr" - check_input_data(in_path, in_key, resolution, require3d, channel) + check_input_data(in_path, in_key, resolution, require3d, channel, use_memmap) write_global_config(config_dir, block_shape=block_shape, require3d=require3d, roi_begin=roi_begin, roi_end=roi_end) @@ -99,7 +104,7 @@ def downscale(in_path, in_key, out_path, scale_factors=scale_factors, halos=halos, metadata_format=metadata_format, metadata_dict=metadata_dict, output_path=out_path, output_key_prefix=out_key, - int_to_uint=int_to_uint) + int_to_uint=int_to_uint, use_memmap=use_memmap) ret = luigi.build([t], local_scheduler=True) if not ret: raise RuntimeError("Downscaling failed") diff --git a/test/test_image_data.py b/test/test_image_data.py index 3d7fe8d..02e20d1 100644 --- a/test/test_image_data.py +++ b/test/test_image_data.py @@ -6,7 +6,7 @@ from shutil import rmtree from sys import platform -import imageio +import imageio.v3 as imageio import mobie import numpy as np import h5py @@ -233,7 +233,7 @@ def test_cli(self): dataset_folder = os.path.join(self.root, self.dataset_name) self.check_data(dataset_folder, im_name) - # 2D + # 2D @unittest.skipIf(platform == "win32", "CLI does not work on windows") def test_cli_2D(self): @@ -267,7 +267,6 @@ def test_cli_2D(self): exp_data = imageio.imread(in_path) - dataset_folder = os.path.join(self.root, dataset_name) self.check_data(dataset_folder, im_name, exp_data=exp_data) @@ -351,6 +350,20 @@ def test_skip_metadata(self): self.check_data(os.path.join(self.root, self.dataset_name), im_name) + # + # test for tif input data that is read via memmap + # + def test_memmap(self): + tif_path = os.path.join(self.test_folder, "tif-image.tif") + imageio.imwrite(tif_path, self.data) + im_name = "test-memmap" + scales = [[2, 2, 2]] + mobie.add_image(tif_path, None, self.root, self.dataset_name, im_name, + resolution=(1, 1, 1), scale_factors=scales, + chunks=(64, 64, 64), tmp_folder=self.tmp_folder, + target="local", max_jobs=self.max_jobs, + use_memmap=True) + self.check_data(os.path.join(self.root, self.dataset_name), im_name) # # data validation