From f8f5ce16e49477450a7d4a3284d4cba9b51a085d Mon Sep 17 00:00:00 2001 From: Martin Schorb Date: Thu, 8 Feb 2024 16:44:26 +0100 Subject: [PATCH 1/8] enable slicing functionality through ROI --- .gitignore | 1 + mobie/import_data/image.py | 26 ++++++++++++++++++++++++-- 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index d4f21a5..6a628b5 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,4 @@ tmp*/ *.n5 *.h5 .idea/ +build/ \ No newline at end of file diff --git a/mobie/import_data/image.py b/mobie/import_data/image.py index c77b2e0..6b53e8f 100644 --- a/mobie/import_data/image.py +++ b/mobie/import_data/image.py @@ -7,7 +7,9 @@ def import_image_data(in_path, in_key, out_path, tmp_folder=None, target="local", max_jobs=mp.cpu_count(), block_shape=None, unit="micrometer", source_name=None, file_format="bdv.n5", - int_to_uint=False, channel=None): + int_to_uint=False, channel=None, + selected_input_channel=None, + roi_begin=None, roi_end=None): """ Import image data to mobie format. Arguments: @@ -28,13 +30,33 @@ def import_image_data(in_path, in_key, out_path, int_to_uint [bool] - whether to convert signed to unsigned integer (default: False) channel [int] - the channel to load from the data. Currently only supported for the ome.zarr format (default: None) + selected_input_channel [list[int]] - A single channel (idx) to be added. If channel is not axis 0: [idx, dim] + roi_begin [list[int]] - Start of ROI to be extracted + roi_end [list[int]] - End of ROI to be extracted """ + + + if len(selected_input_channel) < 2: + # if only one element, we assume relevant image stack dimension is 0 (like channel for multi-channel tifs). + selected_input_channel = [0, selected_input_channel[0]] + elif len(selected_input_channel) > 2: + raise ValueError("Only single channel selection possible.") + # + # if type(self.input_key) in [tuple, list]: + # newshape = list(shape) + # _unused_ = newshape.pop(self.input_key[1]) + # shape = tuple(newshape) + + # we allow 2d data for ome.zarr file format if file_format != "ome.zarr": in_path, in_key = ensure_volume(in_path, in_key, tmp_folder, chunks) + downscale(in_path, in_key, out_path, resolution, scale_factors, chunks, tmp_folder, target, max_jobs, block_shape, library="skimage", unit=unit, source_name=source_name, - metadata_format=file_format, int_to_uint=int_to_uint, + metadata_format=file_format, + roi_begin=roi_begin, roi_end=roi_end, + int_to_uint=int_to_uint, channel=channel) From 04a01cd7e3c93aabbd48e6a56b0567500dca67ff Mon Sep 17 00:00:00 2001 From: Martin Schorb Date: Thu, 8 Feb 2024 17:35:33 +0100 Subject: [PATCH 2/8] implement slicing functionality through ROI --- mobie/import_data/image.py | 41 ++++++++++++++++++++++++++------------ 1 file changed, 28 insertions(+), 13 deletions(-) diff --git a/mobie/import_data/image.py b/mobie/import_data/image.py index 6b53e8f..6cb7026 100644 --- a/mobie/import_data/image.py +++ b/mobie/import_data/image.py @@ -1,4 +1,5 @@ import multiprocessing as mp +from elf.io import open_file from .utils import downscale, ensure_volume @@ -35,22 +36,36 @@ def import_image_data(in_path, in_key, out_path, roi_end [list[int]] - End of ROI to be extracted """ - - if len(selected_input_channel) < 2: - # if only one element, we assume relevant image stack dimension is 0 (like channel for multi-channel tifs). - selected_input_channel = [0, selected_input_channel[0]] - elif len(selected_input_channel) > 2: - raise ValueError("Only single channel selection possible.") - # - # if type(self.input_key) in [tuple, list]: - # newshape = list(shape) - # _unused_ = newshape.pop(self.input_key[1]) - # shape = tuple(newshape) - - # we allow 2d data for ome.zarr file format if file_format != "ome.zarr": in_path, in_key = ensure_volume(in_path, in_key, tmp_folder, chunks) + if not all((selected_input_channel is None, roi_begin is None, roi_end is None)): + raise NotImplementedError("Selection of sub-arrays only possible with OME-Zarr output.") + + if selected_input_channel: + if len(selected_input_channel) < 2: + # if only one element, we assume relevant image stack dimension is 0 (like channel for multi-channel tifs). + selected_input_channel = [0, selected_input_channel[0]] + elif len(selected_input_channel) > 2: + raise ValueError("Only single channel selection possible.") + + with open_file(in_path, mode="r") as f: + shape = f[in_key].shape + newshape = list(shape) + _unused_ = newshape.pop(selected_input_channel[1]) + + roi_begin = [0] * len(shape) + roi_end = list(shape) + + if selected_input_channel[0] > len(shape) - 1: + raise ValueError("Wrong channel dimension.") + + if selected_input_channel[1] > shape[selected_input_channel[0]] - 1: + raise ValueError("Channel index exceeds axis length.") + + roi_begin[selected_input_channel[0]] = selected_input_channel[1] + roi_end[selected_input_channel[0]] = selected_input_channel[1] + 1 + downscale(in_path, in_key, out_path, resolution, scale_factors, chunks, From 9b22618806693b63b71c9b0b399cae4909ece745 Mon Sep 17 00:00:00 2001 From: Martin Schorb Date: Fri, 9 Feb 2024 14:41:33 +0100 Subject: [PATCH 3/8] forward input channel and roi to `add_image` --- mobie/image_data.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/mobie/image_data.py b/mobie/image_data.py index 6eaba1c..95b7a64 100644 --- a/mobie/image_data.py +++ b/mobie/image_data.py @@ -168,7 +168,9 @@ def add_image(input_path, input_key, move_only=False, int_to_uint=False, channel=None, - skip_add_to_dataset=False): + skip_add_to_dataset=False, + selected_input_channel=None, + roi_begin=None, roi_end=None): """ Add an image source to a MoBIE dataset. Will create the dataset if it does not exist. @@ -205,6 +207,9 @@ def add_image(input_path, input_key, This should be used when calling `add_image` in parallel in order to avoid writing to dataset.json in parallel, which can cause issues. In this case the source needs to be added later , which can be done by calling this function again. (default: False) + selected_input_channel [list[int]] - A single channel (idx) to be added. If channel is not axis 0: [idx, dim] + roi_begin [list[int]] - Start of ROI to be extracted + roi_end [list[int]] - End of ROI to be extracted """ # TODO add 'setup_id' to the json schema for bdv formats to also support it there if channel is not None and file_format != "ome.zarr": @@ -247,7 +252,10 @@ def add_image(input_path, input_key, source_name=image_name, file_format=file_format, int_to_uint=int_to_uint, - channel=channel) + channel=channel, + selected_input_channel=selected_input_channel, + roi_begin=roi_begin, roi_end=roi_end + ) if transformation is not None: utils.update_transformation_parameter(image_metadata_path, transformation, file_format) From abba28e64bff85beacd466f817a1dc3550aa7159 Mon Sep 17 00:00:00 2001 From: Martin Schorb Date: Fri, 9 Feb 2024 15:47:50 +0100 Subject: [PATCH 4/8] implement test up to luigi task --- mobie/import_data/image.py | 6 ++++-- mobie/import_data/utils.py | 10 ++++++++-- test/test_image_data.py | 14 ++++++++++++++ 3 files changed, 26 insertions(+), 4 deletions(-) diff --git a/mobie/import_data/image.py b/mobie/import_data/image.py index 6cb7026..a4f60a1 100644 --- a/mobie/import_data/image.py +++ b/mobie/import_data/image.py @@ -43,7 +43,9 @@ def import_image_data(in_path, in_key, out_path, raise NotImplementedError("Selection of sub-arrays only possible with OME-Zarr output.") if selected_input_channel: - if len(selected_input_channel) < 2: + if type(selected_input_channel) is int: + selected_input_channel = [0, selected_input_channel] + elif len(selected_input_channel) < 2: # if only one element, we assume relevant image stack dimension is 0 (like channel for multi-channel tifs). selected_input_channel = [0, selected_input_channel[0]] elif len(selected_input_channel) > 2: @@ -52,7 +54,7 @@ def import_image_data(in_path, in_key, out_path, with open_file(in_path, mode="r") as f: shape = f[in_key].shape newshape = list(shape) - _unused_ = newshape.pop(selected_input_channel[1]) + _unused_ = newshape.pop(selected_input_channel[0]) roi_begin = [0] * len(shape) roi_end = list(shape) diff --git a/mobie/import_data/utils.py b/mobie/import_data/utils.py index 8c12180..28ab0f3 100644 --- a/mobie/import_data/utils.py +++ b/mobie/import_data/utils.py @@ -1,5 +1,6 @@ import json import os +import numpy as np import luigi import nifty.distributed as ndist @@ -47,12 +48,17 @@ def compute_node_labels(seg_path, seg_key, return data -def check_input_data(in_path, in_key, resolution, require3d, channel): +def check_input_data(in_path, in_key, resolution, require3d, channel, roi_begin=None, roi_end=None): # TODO to support data with channel, we need to support downscaling with channels if channel is not None: raise NotImplementedError with open_file(in_path, "r") as f: ndim = f[in_key].ndim + if any((roi_begin, roi_end)): + # reduce singleton dimensons + if any(np.array(roi_end) - np.array(roi_begin) == 1): + ndim = ndim - np.sum(np.array(roi_end) - np.array(roi_begin) == 1) + if require3d and ndim != 3: raise ValueError(f"Expect 3d data, got ndim={ndim}") if len(resolution) != ndim: @@ -73,7 +79,7 @@ def downscale(in_path, in_key, out_path, config_dir = os.path.join(tmp_folder, "configs") # ome.zarr can also be written in 2d, all other formats require 3d require3d = metadata_format != "ome.zarr" - check_input_data(in_path, in_key, resolution, require3d, channel) + check_input_data(in_path, in_key, resolution, require3d, channel, roi_begin=roi_begin, roi_end=roi_end) write_global_config(config_dir, block_shape=block_shape, require3d=require3d, roi_begin=roi_begin, roi_end=roi_end) diff --git a/test/test_image_data.py b/test/test_image_data.py index 7904c23..c12889e 100644 --- a/test/test_image_data.py +++ b/test/test_image_data.py @@ -314,6 +314,20 @@ def test_skip_metadata(self): self.check_data(os.path.join(self.root, self.dataset_name), im_name) + def test_input_channel(self): + path1 = os.path.join(self.test_folder, '3ch.h5') + key = 'data' + self.make_hdf5_data(path1, key, shape=(3,128,128)) + + mobie.add_image(path1, key, self.root, self.dataset_name, '3ch_test', + resolution=(1, 1), scale_factors=[[2,2]], + chunks=(64, 64), tmp_folder=self.tmp_folder, + file_format='ome.zarr', + target="local", max_jobs=self.max_jobs, selected_input_channel=1) + + + pass + # # data validation # From 7187a64d08f0e7a6dbc807e05766fe2acd172989 Mon Sep 17 00:00:00 2001 From: Martin Schorb Date: Wed, 14 Feb 2024 14:26:51 +0100 Subject: [PATCH 5/8] remove singleton dimension squeezing --- mobie/import_data/image.py | 2 -- mobie/import_data/utils.py | 4 ---- 2 files changed, 6 deletions(-) diff --git a/mobie/import_data/image.py b/mobie/import_data/image.py index a4f60a1..45e9eaf 100644 --- a/mobie/import_data/image.py +++ b/mobie/import_data/image.py @@ -53,8 +53,6 @@ def import_image_data(in_path, in_key, out_path, with open_file(in_path, mode="r") as f: shape = f[in_key].shape - newshape = list(shape) - _unused_ = newshape.pop(selected_input_channel[0]) roi_begin = [0] * len(shape) roi_end = list(shape) diff --git a/mobie/import_data/utils.py b/mobie/import_data/utils.py index 28ab0f3..f780b5e 100644 --- a/mobie/import_data/utils.py +++ b/mobie/import_data/utils.py @@ -54,10 +54,6 @@ def check_input_data(in_path, in_key, resolution, require3d, channel, roi_begin= raise NotImplementedError with open_file(in_path, "r") as f: ndim = f[in_key].ndim - if any((roi_begin, roi_end)): - # reduce singleton dimensons - if any(np.array(roi_end) - np.array(roi_begin) == 1): - ndim = ndim - np.sum(np.array(roi_end) - np.array(roi_begin) == 1) if require3d and ndim != 3: raise ValueError(f"Expect 3d data, got ndim={ndim}") From 3e099e4d1a84bab9988168face2bf907fd72b9d3 Mon Sep 17 00:00:00 2001 From: Martin Schorb Date: Wed, 14 Feb 2024 14:27:07 +0100 Subject: [PATCH 6/8] remove singleton dimension squeezing --- test/test_image_data.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/test/test_image_data.py b/test/test_image_data.py index c12889e..892cd46 100644 --- a/test/test_image_data.py +++ b/test/test_image_data.py @@ -319,13 +319,16 @@ def test_input_channel(self): key = 'data' self.make_hdf5_data(path1, key, shape=(3,128,128)) + # check integer channel mobie.add_image(path1, key, self.root, self.dataset_name, '3ch_test', - resolution=(1, 1), scale_factors=[[2,2]], - chunks=(64, 64), tmp_folder=self.tmp_folder, + resolution=(1, 1, 1), scale_factors=[[2, 2, 2]], + chunks=(1, 64, 64), tmp_folder=self.tmp_folder, file_format='ome.zarr', target="local", max_jobs=self.max_jobs, selected_input_channel=1) + + pass # From 8848c64af5c717af31b396d75b501256ee3d768a Mon Sep 17 00:00:00 2001 From: Martin Schorb Date: Wed, 14 Feb 2024 17:47:32 +0100 Subject: [PATCH 7/8] implement fit_to_roi --- mobie/import_data/image.py | 6 +++++- mobie/import_data/utils.py | 4 ++-- mobie/utils.py | 4 ++++ test/test_image_data.py | 15 ++++++++++----- 4 files changed, 21 insertions(+), 8 deletions(-) diff --git a/mobie/import_data/image.py b/mobie/import_data/image.py index 2746d4b..5240ede 100644 --- a/mobie/import_data/image.py +++ b/mobie/import_data/image.py @@ -42,6 +42,8 @@ def import_image_data(in_path, in_key, out_path, if not all((selected_input_channel is None, roi_begin is None, roi_end is None)): raise NotImplementedError("Selection of sub-arrays only possible with OME-Zarr output.") + fit_to_roi = False + if selected_input_channel: if type(selected_input_channel) is int: selected_input_channel = [0, selected_input_channel] @@ -66,6 +68,8 @@ def import_image_data(in_path, in_key, out_path, roi_begin[selected_input_channel[0]] = selected_input_channel[1] roi_end[selected_input_channel[0]] = selected_input_channel[1] + 1 + if any((roi_begin is not None, roi_end is not None)): + fit_to_roi = True downscale(in_path, in_key, out_path, resolution, scale_factors, chunks, @@ -73,5 +77,5 @@ def import_image_data(in_path, in_key, out_path, library="skimage", unit=unit, source_name=source_name, metadata_format=file_format, roi_begin=roi_begin, roi_end=roi_end, - int_to_uint=int_to_uint, + int_to_uint=int_to_uint, fit_to_roi=fit_to_roi, channel=channel) diff --git a/mobie/import_data/utils.py b/mobie/import_data/utils.py index ac6d6d0..3faeab5 100644 --- a/mobie/import_data/utils.py +++ b/mobie/import_data/utils.py @@ -67,7 +67,7 @@ def downscale(in_path, in_key, out_path, library="vigra", library_kwargs=None, metadata_format="ome.zarr", out_key="", unit="micrometer", source_name=None, - roi_begin=None, roi_end=None, + roi_begin=None, roi_end=None, fit_to_roi=False, int_to_uint=False, channel=None): task = DownscalingWorkflow @@ -77,7 +77,7 @@ def downscale(in_path, in_key, out_path, require3d = metadata_format != "ome.zarr" check_input_data(in_path, in_key, resolution, require3d, channel, roi_begin=roi_begin, roi_end=roi_end) write_global_config(config_dir, block_shape=block_shape, require3d=require3d, - roi_begin=roi_begin, roi_end=roi_end) + roi_begin=roi_begin, roi_end=roi_end, fit_to_roi=fit_to_roi) configs = DownscalingWorkflow.get_config() conf = configs["copy_volume"] diff --git a/mobie/utils.py b/mobie/utils.py index 8fe4598..2084352 100644 --- a/mobie/utils.py +++ b/mobie/utils.py @@ -220,6 +220,7 @@ def write_global_config(config_folder, block_shape=None, roi_begin=None, roi_end=None, + fit_to_roi=False, qos=None, require3d=True): os.makedirs(config_folder, exist_ok=True) @@ -248,6 +249,9 @@ def write_global_config(config_folder, raise ValueError(f"Invalid roi_end given: {roi_end}") global_config["roi_end"] = roi_end + if fit_to_roi: + global_config["fit_to_roi"] = True + if qos is not None: global_config["qos"] = qos diff --git a/test/test_image_data.py b/test/test_image_data.py index 5b61fac..4c0c33a 100644 --- a/test/test_image_data.py +++ b/test/test_image_data.py @@ -234,6 +234,7 @@ def test_cli(self): self.check_data(dataset_folder, im_name) # 2D + @unittest.skipIf(platform == "win32", "CLI does not work on windows") def test_cli_2D(self): @@ -267,7 +268,6 @@ def test_cli_2D(self): exp_data = imageio.imread(in_path) - dataset_folder = os.path.join(self.root, dataset_name) self.check_data(dataset_folder, im_name, exp_data=exp_data) @@ -351,23 +351,28 @@ def test_skip_metadata(self): self.check_data(os.path.join(self.root, self.dataset_name), im_name) - def test_input_channel(self): path1 = os.path.join(self.test_folder, '3ch.h5') key = 'data' - self.make_hdf5_data(path1, key, shape=(3,128,128)) + self.make_hdf5_data(path1, key, shape=(3, 128, 128)) + + with open_file(path1, mode="r") as f: + im = f[key][:] + + im_name = '3ch_test_int1' # check integer channel - mobie.add_image(path1, key, self.root, self.dataset_name, '3ch_test', + mobie.add_image(path1, key, self.root, self.dataset_name, im_name, resolution=(1, 1, 1), scale_factors=[[2, 2, 2]], chunks=(1, 64, 64), tmp_folder=self.tmp_folder, file_format='ome.zarr', target="local", max_jobs=self.max_jobs, selected_input_channel=1) + test_data = im[1,:,:] + self.check_data(os.path.join(self.root, self.dataset_name), im_name, exp_data=test_data) - pass # # data validation From 983b4ebe940324a8823446f3606f8dc8ce0faba6 Mon Sep 17 00:00:00 2001 From: Martin Schorb Date: Wed, 14 Feb 2024 18:29:42 +0100 Subject: [PATCH 8/8] add more tests --- test/test_image_data.py | 69 +++++++++++++++++++++++++++++++++++++++-- 1 file changed, 66 insertions(+), 3 deletions(-) diff --git a/test/test_image_data.py b/test/test_image_data.py index 4c0c33a..5f35582 100644 --- a/test/test_image_data.py +++ b/test/test_image_data.py @@ -359,19 +359,82 @@ def test_input_channel(self): with open_file(path1, mode="r") as f: im = f[key][:] - im_name = '3ch_test_int1' + # test wrong channel input + im_name = 'channel_error' + for in_channel in ([1, 2, 3], [4, 0], [0, 4]): + with self.assertRaises(ValueError): + mobie.add_image(path1, key, self.root, self.dataset_name, im_name, + resolution=(1, 1, 1), scale_factors=[[2, 2, 2]], + chunks=(1, 64, 64), tmp_folder=self.tmp_folder, + file_format='ome.zarr', + target="local", max_jobs=self.max_jobs, selected_input_channel=in_channel) # check integer channel + for chidx, in_channel in enumerate([1,[1]]): + im_name = '3ch_test_int_' + str(chidx) + mobie.add_image(path1, key, self.root, self.dataset_name, im_name, + resolution=(1, 1, 1), scale_factors=[[2, 2, 2]], + chunks=(1, 64, 64), tmp_folder=self.tmp_folder, + file_format='ome.zarr', + target="local", max_jobs=self.max_jobs, selected_input_channel=in_channel) + test_data = im[1, :, :] + + self.check_data(os.path.join(self.root, self.dataset_name), im_name, exp_data=test_data) + + # check channel as list + im_name = '3ch_test_list' + mobie.add_image(path1, key, self.root, self.dataset_name, im_name, + resolution=(1, 1, 1), scale_factors=[[2, 2, 2]], + chunks=(1, 1, 64), tmp_folder=self.tmp_folder, + file_format='ome.zarr', + target="local", max_jobs=self.max_jobs, selected_input_channel=[1, 14]) + test_data = im[:, 14, :] + + self.check_data(os.path.join(self.root, self.dataset_name), im_name, exp_data=test_data) + + def test_input_roi(self): + path1 = os.path.join(self.test_folder, '3ch.h5') + key = 'data' + inshape=(123, 124, 125) + + self.make_hdf5_data(path1, key, shape=inshape) + + roi_vals = np.floor(np.random.random((2,3))*(np.array(inshape)-1)).astype(int) + + roi_begin = np.min(roi_vals, axis=0) + roi_end = np.max(roi_vals, axis=0) + + for idx in range(3): + if roi_begin[idx] == roi_end[idx]: + roi_end[idx] += 1 + + with open_file(path1, mode="r") as f: + im = f[key][:] + + # check integer channel + im_name = 'roi_test' mobie.add_image(path1, key, self.root, self.dataset_name, im_name, resolution=(1, 1, 1), scale_factors=[[2, 2, 2]], chunks=(1, 64, 64), tmp_folder=self.tmp_folder, file_format='ome.zarr', - target="local", max_jobs=self.max_jobs, selected_input_channel=1) - test_data = im[1,:,:] + roi_begin=roi_begin, roi_end=roi_end, + target="local", max_jobs=self.max_jobs, + ) + test_data = im[roi_begin[0]:roi_end[0], roi_begin[1]:roi_end[1], roi_begin[2]:roi_end[2]] + self.check_data(os.path.join(self.root, self.dataset_name), im_name, exp_data=test_data) + # check channel as list + im_name = '3ch_test_list' + mobie.add_image(path1, key, self.root, self.dataset_name, im_name, + resolution=(1, 1, 1), scale_factors=[[2, 2, 2]], + chunks=(1, 1, 64), tmp_folder=self.tmp_folder, + file_format='ome.zarr', + target="local", max_jobs=self.max_jobs, selected_input_channel=[1, 14]) + test_data = im[:, 14, :] + self.check_data(os.path.join(self.root, self.dataset_name), im_name, exp_data=test_data) #