mobie · constantinpape · Apr 7, 2025 · Apr 4, 2025 · Apr 7, 2025
diff --git a/mobie/image_data.py b/mobie/image_data.py
@@ -8,14 +8,18 @@
 import mobie.utils as utils
 import numpy as np
 import pybdv.metadata as bdv_metadata
+import tifffile
 from elf.io import open_file
 from mobie.import_data import import_image_data
 from pybdv.util import absolute_to_relative_scale_factors, get_key, get_scale_factors
 
 
-def _get_default_contrast_limits(input_path, input_key, int_to_uint):
-    with open_file(input_path, "r") as f:
-        dtype = f[input_key].dtype
+def _get_default_contrast_limits(input_path, input_key, int_to_uint, use_memmap=False):
+    if use_memmap:
+        dtype = tifffile.memmap(input_path).dtype
+    else:
+        with open_file(input_path, "r") as f:
+            dtype = f[input_key].dtype
 
     if np.issubdtype(dtype, np.integer):
         if int_to_uint:
@@ -187,8 +191,9 @@ def add_image(input_path, input_key,
               move_only=False,
               int_to_uint=False,
               channel=None,
-              skip_add_to_dataset=False):
-    """ Add an image source to a MoBIE dataset.
+              skip_add_to_dataset=False,
+              use_memmap=False):
+    """Add an image source to a MoBIE dataset.
 
     Will create the dataset if it does not exist.
 
@@ -222,8 +227,11 @@ def add_image(input_path, input_key,
             Currently only supported for the ome.zarr format (default: None)
         skip_add_to_dataset [bool] - Skip adding the source to the dataset after converting the image data.
             This should be used when calling `add_image` in parallel in order to avoid
-            writing to dataset.json in parallel, which can cause issues. In this case the source needs to be added later
-            , which can be done by calling this function again. (default: False)
+            writing to dataset.json in parallel, which can cause issues.
+            In this case the source needs to be added later, e.g. by calling this function again. (default: False)
+        use_memmap [bool] - Whether to use memmap for loading the input data.
+            This option is only supported for inputs in tif file format that can be loaded via `tifffile.memmap`.
+            This does not work for images that are compressed or have an otherwise non-standard format. (default: False)
     """
     # TODO add 'setup_id' to the json schema for bdv formats to also support it there
     if channel is not None and file_format != "ome.zarr":
@@ -236,7 +244,7 @@ def add_image(input_path, input_key,
     # set default contrast_limits if we don't have a view
     # or if the passed view doesn't hav contrast limits
     if view is None or "contrastLimits" not in view.get("sourceDisplays", [{}])[0].get("imageDisplay", {}):
-        contrast_limits = _get_default_contrast_limits(input_path, input_key, int_to_uint)
+        contrast_limits = _get_default_contrast_limits(input_path, input_key, int_to_uint, use_memmap=use_memmap)
     else:
         contrast_limits = None
     view = utils.require_dataset_and_view(root, dataset_name, file_format,
@@ -266,7 +274,8 @@ def add_image(input_path, input_key,
                           source_name=image_name,
                           file_format=file_format,
                           int_to_uint=int_to_uint,
-                          channel=channel)
+                          channel=channel,
+                          use_memmap=use_memmap)
 
     if transformation is not None:
         utils.update_transformation_parameter(image_metadata_path, transformation, file_format)

diff --git a/mobie/import_data/image.py b/mobie/import_data/image.py
@@ -7,7 +7,8 @@ def import_image_data(in_path, in_key, out_path,
                       tmp_folder=None, target="local", max_jobs=mp.cpu_count(),
                       block_shape=None, unit="micrometer",
                       source_name=None, file_format="ome.zarr",
-                      int_to_uint=False, channel=None):
+                      int_to_uint=False, channel=None,
+                      use_memmap=False):
     """ Import image data to mobie format.
 
     Arguments:
@@ -28,6 +29,7 @@ def import_image_data(in_path, in_key, out_path,
         int_to_uint [bool] - whether to convert signed to unsigned integer (default: False)
         channel [int] - the channel to load from the data.
             Currently only supported for the ome.zarr format (default: None)
+        use_memmap [bool] - Whether the input is a tif file that can be memmaped. (default: False)
     """
     # we allow 2d data for ome.zarr file format
     if file_format != "ome.zarr":
@@ -37,4 +39,4 @@ def import_image_data(in_path, in_key, out_path,
               tmp_folder, target, max_jobs, block_shape,
               library="skimage", unit=unit, source_name=source_name,
               metadata_format=file_format, int_to_uint=int_to_uint,
-              channel=channel)
+              channel=channel, use_memmap=use_memmap)
diff --git a/mobie/import_data/utils.py b/mobie/import_data/utils.py
@@ -3,6 +3,7 @@
 
 import luigi
 import nifty.distributed as ndist
+import tifffile
 
 from cluster_tools.statistics import DataStatisticsWorkflow
 from cluster_tools.downscaling import DownscalingWorkflow
@@ -47,12 +48,15 @@ def compute_node_labels(seg_path, seg_key,
     return data
 
 
-def check_input_data(in_path, in_key, resolution, require3d, channel):
+def check_input_data(in_path, in_key, resolution, require3d, channel, use_memmap=False):
     # TODO to support data with channel, we need to support downscaling with channels
     if channel is not None:
         raise NotImplementedError
-    with open_file(in_path, "r") as f:
-        ndim = f[in_key].ndim
+    if use_memmap:
+        ndim = tifffile.memmap(in_path).ndim
+    else:
+        with open_file(in_path, "r") as f:
+            ndim = f[in_key].ndim
     if require3d and ndim != 3:
         raise ValueError(f"Expect 3d data, got ndim={ndim}")
     if len(resolution) != ndim:
@@ -66,14 +70,15 @@ def downscale(in_path, in_key, out_path,
               metadata_format="ome.zarr", out_key="",
               unit="micrometer", source_name=None,
               roi_begin=None, roi_end=None,
-              int_to_uint=False, channel=None):
+              int_to_uint=False, channel=None,
+              use_memmap=False):
     task = DownscalingWorkflow
 
     block_shape = chunks if block_shape is None else block_shape
     config_dir = os.path.join(tmp_folder, "configs")
     # ome.zarr can also be written in 2d, all other formats require 3d
     require3d = metadata_format != "ome.zarr"
-    check_input_data(in_path, in_key, resolution, require3d, channel)
+    check_input_data(in_path, in_key, resolution, require3d, channel, use_memmap)
     write_global_config(config_dir, block_shape=block_shape, require3d=require3d,
                         roi_begin=roi_begin, roi_end=roi_end)
 
@@ -99,7 +104,7 @@ def downscale(in_path, in_key, out_path,
              scale_factors=scale_factors, halos=halos,
              metadata_format=metadata_format, metadata_dict=metadata_dict,
              output_path=out_path, output_key_prefix=out_key,
-             int_to_uint=int_to_uint)
+             int_to_uint=int_to_uint, use_memmap=use_memmap)
     ret = luigi.build([t], local_scheduler=True)
     if not ret:
         raise RuntimeError("Downscaling failed")

diff --git a/test/test_image_data.py b/test/test_image_data.py
@@ -6,7 +6,7 @@
 from shutil import rmtree
 from sys import platform
 
-import imageio
+import imageio.v3 as imageio
 import mobie
 import numpy as np
 import h5py
@@ -233,7 +233,7 @@ def test_cli(self):
         dataset_folder = os.path.join(self.root, self.dataset_name)
         self.check_data(dataset_folder, im_name)
 
-        # 2D
+    # 2D
     @unittest.skipIf(platform == "win32", "CLI does not work on windows")
     def test_cli_2D(self):
 
@@ -267,7 +267,6 @@ def test_cli_2D(self):
 
         exp_data = imageio.imread(in_path)
 
-
         dataset_folder = os.path.join(self.root, dataset_name)
         self.check_data(dataset_folder, im_name, exp_data=exp_data)
 
@@ -351,6 +350,20 @@ def test_skip_metadata(self):
 
         self.check_data(os.path.join(self.root, self.dataset_name), im_name)
 
+    #
+    # test for tif input data that is read via memmap
+    #
+    def test_memmap(self):
+        tif_path = os.path.join(self.test_folder, "tif-image.tif")
+        imageio.imwrite(tif_path, self.data)
+        im_name = "test-memmap"
+        scales = [[2, 2, 2]]
+        mobie.add_image(tif_path, None, self.root, self.dataset_name, im_name,
+                        resolution=(1, 1, 1), scale_factors=scales,
+                        chunks=(64, 64, 64), tmp_folder=self.tmp_folder,
+                        target="local", max_jobs=self.max_jobs,
+                        use_memmap=True)
+        self.check_data(os.path.join(self.root, self.dataset_name), im_name)
 
     #
     # data validation