From 6b306b014a3b8a1f9daaa63464723ad8e808c55b Mon Sep 17 00:00:00 2001 From: Kevin Takasaki Date: Mon, 20 Oct 2025 11:04:44 -0700 Subject: [PATCH 1/7] deleting old commented code --- .../convert_to_n5/tiff_to_ngff.py | 43 +------------------ 1 file changed, 1 insertion(+), 42 deletions(-) diff --git a/acpreprocessing/stitching_modules/convert_to_n5/tiff_to_ngff.py b/acpreprocessing/stitching_modules/convert_to_n5/tiff_to_ngff.py index f91cecf..1f63bd4 100644 --- a/acpreprocessing/stitching_modules/convert_to_n5/tiff_to_ngff.py +++ b/acpreprocessing/stitching_modules/convert_to_n5/tiff_to_ngff.py @@ -701,48 +701,8 @@ def write_mimgfns_to_zarr( workers = concurrency - #zstore = zarr.DirectoryStore(output_n5, dimension_separator='/') f = zarr.group(output_n5) - # with zarr.open(zstore, mode='a') as f: - # mip_ds = {} - # # create groups with attributes according to omezarr spec - # if len(group_names) == 1: - # group_name = group_names[0] - # try: - # g = f.create_group(f"{group_name}") - # except KeyError: - # g = f[f"{group_name}"] - # try: - # attributes = group_attributes[0] - # except IndexError: - # print('attributes error') - - # if "pixelResolution" in attributes: - # if deskew_options: - # attributes["pixelResolution"]["dimensions"][2] /= deskew_options["deskew_stride"] - # attributes = omezarr_attrs( - # group_name, attributes["position"], attributes["pixelResolution"]["dimensions"], max_mip) - # if attributes: - # for k, v in attributes.items(): - # g.attrs[k] = v - # else: - # raise TiffToNGFFValueError("only one group name expected") - # scales = [] - - # # shuffle=Blosc.BITSHUFFLE) - # compression = Blosc(cname='zstd', clevel=1) - # for mip_lvl in range(max_mip + 1): - # mip_3dshape = mip_level_shape(mip_lvl, joined_shapes) - # ds_lvl = g.create_dataset( - # f"{mip_lvl}", - # chunks=chunk_size, - # shape=(1, 1, mip_3dshape[0], mip_3dshape[1], mip_3dshape[2]), - # compression=compression, - # dtype=dtype - # ) - # dsfactors = [int(i)**mip_lvl for i in mip_dsfactor] - # mip_ds[mip_lvl] = ds_lvl - # scales.append(dsfactors) + if len(group_names) == 1: group_name = group_names[0] if group_name in f: @@ -796,7 +756,6 @@ def write_mimgfns_to_zarr( ds_lvl = g[f"{mip_lvl}"] dsfactors = [int(i)**mip_lvl for i in mip_dsfactor] - #mip_ds[mip_lvl] = ds_lvl scales.append(dsfactors) mip_ds = {} From 1d320f3117455a2bb2f6dd835f0ec46b5a4e144d Mon Sep 17 00:00:00 2001 From: Kevin Takasaki Date: Mon, 20 Oct 2025 11:10:20 -0700 Subject: [PATCH 2/7] added shard size to schema --- .../stitching_modules/convert_to_n5/tiff_to_ngff.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/acpreprocessing/stitching_modules/convert_to_n5/tiff_to_ngff.py b/acpreprocessing/stitching_modules/convert_to_n5/tiff_to_ngff.py index 1f63bd4..aba84f7 100644 --- a/acpreprocessing/stitching_modules/convert_to_n5/tiff_to_ngff.py +++ b/acpreprocessing/stitching_modules/convert_to_n5/tiff_to_ngff.py @@ -636,7 +636,7 @@ class TiffToNGFFValueError(TiffToNGFFException, ValueError): def write_mimgfns_to_zarr( mimgfns, output_n5, group_names, group_attributes=None, max_mip=0, - mip_dsfactor=(2, 2, 2), chunk_size=(1, 1, 64, 64, 64), + mip_dsfactor=(2, 2, 2), chunk_size=(1, 1, 64, 64, 64), shard_size=(1,1,512,512,512), concurrency=10, compression="raw", dtype="uint16", lvl_to_mip_kwargs=None, interleaved_channels=1, channel=0, deskew_options=None, **kwargs): @@ -747,7 +747,7 @@ def write_mimgfns_to_zarr( ds_lvl = g.create_array( name=f"{mip_lvl}", chunks=chunk_size, - shards=(1,1,512,512,512), + shards=shard_size, shape=(1, 1, mip_3dshape[0], mip_3dshape[1], mip_3dshape[2]), compressors=compressors, dtype=dtype @@ -855,6 +855,12 @@ class TiffDirToZarrInputParameters(argschema.ArgSchema, argschema.fields.Int(), argschema.fields.Int(), argschema.fields.Int()), required=False, default=(1, 1, 64, 64, 64)) + shard_size = argschema.fields.Tuple(( + argschema.fields.Int(), + argschema.fields.Int(), + argschema.fields.Int(), + argschema.fields.Int(), + argschema.fields.Int()), required=False, default=(1, 1, 512, 512, 512)) class TiffDirToN5LegacyParameters(argschema.ArgSchema, @@ -878,6 +884,7 @@ def run(self): self.args["max_mip"], self.args["mip_dsfactor"], self.args["chunk_size"], + self.args["shard_size"], concurrency=self.args["concurrency"], compression=self.args["compression"], lvl_to_mip_kwargs=self.args["lvl_to_mip_kwargs"], From 4527ddc80864d919c8c89bd70f2595eb3cefdcba Mon Sep 17 00:00:00 2001 From: Kevin Takasaki Date: Mon, 20 Oct 2025 11:14:10 -0700 Subject: [PATCH 3/7] updating acquisition for shards and axes --- .../convert_to_n5/acquisition_dir_to_ngff.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/acpreprocessing/stitching_modules/convert_to_n5/acquisition_dir_to_ngff.py b/acpreprocessing/stitching_modules/convert_to_n5/acquisition_dir_to_ngff.py index d4e0dd8..10948c8 100644 --- a/acpreprocessing/stitching_modules/convert_to_n5/acquisition_dir_to_ngff.py +++ b/acpreprocessing/stitching_modules/convert_to_n5/acquisition_dir_to_ngff.py @@ -96,8 +96,7 @@ def acquisition_to_ngff(acquisition_dir, output, out_dir, position_concurrency=5 axesStr = acq_parameters["stage_axes"] if axesStr=="yxz": axes = (1,0,2) - ori = (-1,1,1) - + ori = (-1,1,-1) try: setup_group_attributes = [{ "pixelResolution": { @@ -171,7 +170,7 @@ class AcquisitionDirToNGFF(argschema.ArgSchemaParser): def _get_ngff_kwargs(self): ngff_keys = { "max_mip", "concurrency", "compression", - "lvl_to_mip_kwargs", "chunk_size", "mip_dsfactor", + "lvl_to_mip_kwargs", "chunk_size", "shard_size", "mip_dsfactor", "deskew_options"} return {k: self.args[k] for k in (ngff_keys & self.args.keys())} From ddb455609912ff9db671acbffd399d9018a017e0 Mon Sep 17 00:00:00 2001 From: Kevin Takasaki Date: Mon, 20 Oct 2025 12:13:55 -0700 Subject: [PATCH 4/7] more complicated setting of shard size for mip pyramid levels . fixed schema . GODDAMN TUPLE INPUT TO ARGSCHEMA WTF --- .../convert_to_n5/tiff_to_ngff.py | 45 ++++++------------- 1 file changed, 13 insertions(+), 32 deletions(-) diff --git a/acpreprocessing/stitching_modules/convert_to_n5/tiff_to_ngff.py b/acpreprocessing/stitching_modules/convert_to_n5/tiff_to_ngff.py index aba84f7..5c29e7f 100644 --- a/acpreprocessing/stitching_modules/convert_to_n5/tiff_to_ngff.py +++ b/acpreprocessing/stitching_modules/convert_to_n5/tiff_to_ngff.py @@ -6,15 +6,12 @@ import math import pathlib -#import imageio.v2 as imageio from tifffile import TiffFile from natsort import natsorted import numpy import skimage -#import z5py import zarr -from numcodecs import Blosc import argschema import acpreprocessing.utils.convert @@ -747,7 +744,7 @@ def write_mimgfns_to_zarr( ds_lvl = g.create_array( name=f"{mip_lvl}", chunks=chunk_size, - shards=shard_size, + shards=(1, 1, max(chunk_size[2],min(shard_size[2],mip_3dshape[0])), max(chunk_size[3],min(shard_size[3],mip_3dshape[1])), max(chunk_size[4],min(shard_size[4],mip_3dshape[2]))), shape=(1, 1, mip_3dshape[0], mip_3dshape[1], mip_3dshape[2]), compressors=compressors, dtype=dtype @@ -831,24 +828,6 @@ class NGFFGenerationParameters(argschema.schemas.DefaultSchema): argschema.fields.Int()), required=False, default=(2, 2, 2)) deskew_options = argschema.fields.Nested( DeskewOptions, required=False) - - -class NGFFGroupGenerationParameters(NGFFGenerationParameters): - group_names = argschema.fields.List( - argschema.fields.Str, required=True) - group_attributes = argschema.fields.List( - argschema.fields.Dict(required=False, default={}), default=[], - required=False) - - -class TiffDirToNGFFParameters(NGFFGroupGenerationParameters): - input_dir = argschema.fields.InputDir(required=True) - interleaved_channels = argschema.fields.Int(required=False, default=1) - channel = argschema.fields.Int(required=False, default=0) - - -class TiffDirToZarrInputParameters(argschema.ArgSchema, - TiffDirToNGFFParameters): chunk_size = argschema.fields.Tuple(( argschema.fields.Int(), argschema.fields.Int(), @@ -863,12 +842,18 @@ class TiffDirToZarrInputParameters(argschema.ArgSchema, argschema.fields.Int()), required=False, default=(1, 1, 512, 512, 512)) -class TiffDirToN5LegacyParameters(argschema.ArgSchema, - TiffDirToNGFFParameters): - chunk_size = argschema.fields.Tuple(( - argschema.fields.Int(), - argschema.fields.Int(), - argschema.fields.Int()), required=False, default=(64, 64, 64)) +class NGFFGroupGenerationParameters(NGFFGenerationParameters): + group_names = argschema.fields.List( + argschema.fields.Str, required=True) + group_attributes = argschema.fields.List( + argschema.fields.Dict(required=False, default={}), default=[], + required=False) + + +class TiffDirToZarrInputParameters(NGFFGroupGenerationParameters): + input_dir = argschema.fields.InputDir(required=True) + interleaved_channels = argschema.fields.Int(required=False, default=1) + channel = argschema.fields.Int(required=False, default=0) class TiffDirToZarr(argschema.ArgSchemaParser): @@ -891,10 +876,6 @@ def run(self): deskew_options=deskew_options) -class TiffDirToN5(TiffDirToZarr): - default_schema = TiffDirToN5LegacyParameters - - if __name__ == "__main__": mod = TiffDirToZarr() mod.run() From a0879c0560264b75cb21d11c8c1c03866fdc50be Mon Sep 17 00:00:00 2001 From: Kevin Takasaki <64923500+kttakasaki@users.noreply.github.com> Date: Thu, 30 Oct 2025 13:09:50 -0700 Subject: [PATCH 5/7] testing file locking issue . --- acpreprocessing/stitching_modules/convert_to_n5/ts_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/acpreprocessing/stitching_modules/convert_to_n5/ts_utils.py b/acpreprocessing/stitching_modules/convert_to_n5/ts_utils.py index 672348a..c464d4c 100644 --- a/acpreprocessing/stitching_modules/convert_to_n5/ts_utils.py +++ b/acpreprocessing/stitching_modules/convert_to_n5/ts_utils.py @@ -76,7 +76,7 @@ def create_kvstore(fpath, store, AWS_param=None): Returns: dict: The kvstore configuration. """ - kvstore = {"driver": store, "path": fpath} + kvstore = {"driver": store, "path": fpath, "file_io_locking": {"mode":"non_atomic"}} if store == 's3': # Parse the S3 URL into bucket and path From 3234db3ac45129b4f9492590a85664a4ae88dda4 Mon Sep 17 00:00:00 2001 From: Kevin Takasaki <64923500+kttakasaki@users.noreply.github.com> Date: Thu, 30 Oct 2025 14:17:07 -0700 Subject: [PATCH 6/7] fixing bug in mip dataset size calculation . . . . --- .../stitching_modules/convert_to_n5/tiff_to_ngff.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/acpreprocessing/stitching_modules/convert_to_n5/tiff_to_ngff.py b/acpreprocessing/stitching_modules/convert_to_n5/tiff_to_ngff.py index 5c29e7f..95be32a 100644 --- a/acpreprocessing/stitching_modules/convert_to_n5/tiff_to_ngff.py +++ b/acpreprocessing/stitching_modules/convert_to_n5/tiff_to_ngff.py @@ -737,6 +737,8 @@ def write_mimgfns_to_zarr( for mip_lvl in range(max_mip + 1): mip_3dshape = mip_level_shape(mip_lvl, joined_shapes) + #mip_3dshape = tuple([max(a,b) for a,b in zip(shard_size[2:],mip_3dshape)]) + mip_shard_size = shard_size #(1, 1, min(shard_size[2],mip_3dshape[0]), min(shard_size[3],mip_3dshape[1]), min(shard_size[4],mip_3dshape[2])) if f"{mip_lvl}" in g: ds_lvl = g[f"{mip_lvl}"] else: @@ -744,7 +746,7 @@ def write_mimgfns_to_zarr( ds_lvl = g.create_array( name=f"{mip_lvl}", chunks=chunk_size, - shards=(1, 1, max(chunk_size[2],min(shard_size[2],mip_3dshape[0])), max(chunk_size[3],min(shard_size[3],mip_3dshape[1])), max(chunk_size[4],min(shard_size[4],mip_3dshape[2]))), + shards=mip_shard_size, shape=(1, 1, mip_3dshape[0], mip_3dshape[1], mip_3dshape[2]), compressors=compressors, dtype=dtype @@ -839,7 +841,7 @@ class NGFFGenerationParameters(argschema.schemas.DefaultSchema): argschema.fields.Int(), argschema.fields.Int(), argschema.fields.Int(), - argschema.fields.Int()), required=False, default=(1, 1, 512, 512, 512)) + argschema.fields.Int()), required=False, default=(1, 1, 1024, 512, 512)) class NGFFGroupGenerationParameters(NGFFGenerationParameters): From fc89f8aa5e8e41f9c18fa79db1b5a00c8c5612e7 Mon Sep 17 00:00:00 2001 From: Kevin Takasaki Date: Thu, 19 Mar 2026 11:48:37 -0700 Subject: [PATCH 7/7] fixing tiff_to_ngff conversion for non-deskew --- .../convert_to_n5/psdeskew.py | 4 +--- .../convert_to_n5/tiff_to_ngff.py | 19 ++++++++++++++----- 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/acpreprocessing/stitching_modules/convert_to_n5/psdeskew.py b/acpreprocessing/stitching_modules/convert_to_n5/psdeskew.py index e9b71fc..13b09e9 100644 --- a/acpreprocessing/stitching_modules/convert_to_n5/psdeskew.py +++ b/acpreprocessing/stitching_modules/convert_to_n5/psdeskew.py @@ -194,9 +194,7 @@ def reshape_joined_shapes(joined_shapes, stride, blockdims, **kwargs): deskewed_shape : tuple of int shape of deskewed 3D array represented by joined_shapes """ - # if not transpose is None: - # axes = transpose - # else: + # assume axis 0 is length of scan i.e. z axis dimension from raw data stack axes = (0,1,2) # deskewed_shape = (int(np.ceil(joined_shapes[axes[0]]/(blockdims[axes[0]]/stride))*blockdims[axes[0]]), # joined_shapes[axes[1]], diff --git a/acpreprocessing/stitching_modules/convert_to_n5/tiff_to_ngff.py b/acpreprocessing/stitching_modules/convert_to_n5/tiff_to_ngff.py index 95be32a..14144cc 100644 --- a/acpreprocessing/stitching_modules/convert_to_n5/tiff_to_ngff.py +++ b/acpreprocessing/stitching_modules/convert_to_n5/tiff_to_ngff.py @@ -96,7 +96,7 @@ def iterate_2d_arrays_from_mimgfns(mimgfns, interleaved_channels=1, channel=0): def iterate_numpy_chunks_from_dataset( - dataset, slice_length=None, pad=True, *args, **kwargs): + dataset, slice_length=None, n_pad=0, *args, **kwargs): """iterate over a contiguous hdf5 daataset as chunks of numpy arrays Parameters @@ -113,19 +113,26 @@ def iterate_numpy_chunks_from_dataset( arr : numpy.ndarray 3D numpy array representing a consecutive chunk of 2D arrays """ - #array_gen = iterate_2d_arrays_from_dataset(mimgfns, *args, **kwargs) + for chunk in iterate_chunks(dataset, slice_length):#,*args,**kwargs): arr = numpy.asarray(chunk) - if pad: + if n_pad>-1: if arr.shape[0] != slice_length: newarr = numpy.zeros((slice_length, *arr.shape[1:]), dtype=arr.dtype) newarr[:arr.shape[0], :, :] = arr[:, :, :] + print(f"incomplete chunk of size {arr.shape[0]} padded") yield newarr else: yield arr else: yield arr + if n_pad>0: + print(f"chunk padding, n_pad = {n_pad}") + for i_n in range(n_pad): + newarr = numpy.zeros((slice_length, *arr.shape[1:]), + dtype=arr.dtype) + yield newarr def length_to_interleaved_length(length, interleaved_channels): @@ -450,6 +457,8 @@ def iterate_mip_levels_from_dataset( lvl_to_mip_kwargs = ({} if lvl_to_mip_kwargs is None else lvl_to_mip_kwargs) mip_kwargs = lvl_to_mip_kwargs.get(lvl, {}) + #TODO: deskew chunk fixing parameter + n_pad = 19 if deskew_kwargs else 0 start_index = 0 chunk_index = 0 if lvl > 0: @@ -509,7 +518,7 @@ def iterate_mip_levels_from_dataset( # get level 0 chunks # block_size is the number of slices to read from tiffs for chunk in iterate_numpy_chunks_from_dataset( - dataset, slice_length, pad=False, + dataset, slice_length, n_pad=n_pad, interleaved_channels=interleaved_channels, channel=channel): # deskew level 0 chunk @@ -835,7 +844,7 @@ class NGFFGenerationParameters(argschema.schemas.DefaultSchema): argschema.fields.Int(), argschema.fields.Int(), argschema.fields.Int(), - argschema.fields.Int()), required=False, default=(1, 1, 64, 64, 64)) + argschema.fields.Int()), required=False, default=(1, 1, 128, 128, 128)) shard_size = argschema.fields.Tuple(( argschema.fields.Int(), argschema.fields.Int(),