diff --git a/.gitmodules b/.gitmodules index 1a921ba..88d9a73 100644 --- a/.gitmodules +++ b/.gitmodules @@ -10,3 +10,6 @@ [submodule "vendor/sam2"] path = vendor/sam2 url = https://github.com/facebookresearch/sam2.git +[submodule "vendor/rawnind_jddc"] + path = vendor/rawnind_jddc + url = https://github.com/trougnouf/rawnind_jddc diff --git a/README.md b/README.md index e9f4853..9cdb925 100644 --- a/README.md +++ b/README.md @@ -15,6 +15,7 @@ Currently targets the ONNX backend. The pipeline is designed to support addition | [`mask-object-sam21-small`](models/mask-object-sam21-small/README.md) | mask | SAM 2.1 Hiera Small for interactive masking | | [`mask-object-sam21-tiny`](models/mask-object-sam21-tiny/README.md) | mask | SAM 2.1 Hiera Tiny for interactive masking | | [`mask-object-segnext-b2hq`](models/mask-object-segnext-b2hq/README.md) | mask | SegNext ViT-B SAx2 HQ for semantic masking | +| [`rawdenoise-nind`](models/rawdenoise-nind/README.md) | rawdenoise | UtNet2 raw denoiser trained on RawNIND (Bayer + linear Rec.2020 variants) | | [`upscale-bsrgan`](models/upscale-bsrgan/README.md) | upscale | BSRGAN 2x and 4x blind super-resolution | ## Repository structure diff --git a/darktable_ai/demo.py b/darktable_ai/demo.py index 1ee3182..c31aa18 100644 --- a/darktable_ai/demo.py +++ b/darktable_ai/demo.py @@ -8,6 +8,27 @@ from darktable_ai.config import ModelConfig from darktable_ai.convert import _import_script +_PROCESSED_IMAGE_EXTS = {".jpg", ".jpeg", ".png"} +_RAW_IMAGE_EXTS = { + ".cr2", ".cr3", ".crw", # Canon + ".nef", ".nrw", # Nikon + ".arw", ".sr2", ".srf", # Sony + ".raf", # Fuji + ".rw2", # Panasonic + ".pef", ".ptx", # Pentax + ".orf", # Olympus + ".rwl", # Leica + ".srw", # Samsung + ".dng", # Adobe generic +} +_SAMPLE_EXTS = _PROCESSED_IMAGE_EXTS | _RAW_IMAGE_EXTS + +# Task → output file extension. Raw-domain tasks can't round-trip through PNG +# because they produce linear HDR or >8-bit data. +_OUTPUT_EXT_BY_TASK = { + "rawdenoise": ".tif", +} + def run_demo(config: ModelConfig) -> None: """Run the model's demo.py on all sample images for its task.""" @@ -26,16 +47,19 @@ def run_demo(config: ModelConfig) -> None: module = _import_script(demo_script) model_kwargs = _model_type_kwargs(config) + out_ext = _OUTPUT_EXT_BY_TASK.get(config.task, ".png") - for img in sorted(images_dir.iterdir()): - if img.suffix.lower() not in (".jpg", ".jpeg", ".png"): - continue + samples = sorted(p for p in images_dir.rglob("*") + if p.is_file() and p.suffix.lower() in _SAMPLE_EXTS) + + for img in samples: if img.stem.startswith("expected"): continue - name = img.stem - output_path = demo_output_dir / f"{name}.png" - extra_kwargs = _image_kwargs(config, name) + rel = img.relative_to(images_dir).with_suffix("") + name = str(rel).replace("/", "_").replace("\\", "_") + output_path = demo_output_dir / f"{name}{out_ext}" + extra_kwargs = _image_kwargs(config, img, rel) print(f" {name}") module.demo( @@ -60,15 +84,18 @@ def _model_type_kwargs(config: ModelConfig) -> dict: return {"model": str(output_dir / "model.onnx")} -def _image_kwargs(config: ModelConfig, image_name: str) -> dict: +def _image_kwargs(config: ModelConfig, img: Path, rel: Path) -> dict: """Get extra demo kwargs for a specific image. Reads from a JSON sidecar file next to the sample image first (e.g. ``samples/mask-object/example_01.json``), then falls back - to ``demo.image_args`` in model.yaml. + to ``demo.image_args`` in model.yaml, keyed by either the flattened + relative path or the bare filename stem. """ - sidecar = config.root_dir / "samples" / config.task / f"{image_name}.json" + sidecar = img.with_suffix(".json") if sidecar.is_file(): with open(sidecar) as f: return json.load(f) - return config.demo.image_args.get(image_name, {}) + flat = str(rel).replace("/", "_").replace("\\", "_") + return (config.demo.image_args.get(flat) + or config.demo.image_args.get(img.stem, {})) diff --git a/models/denoise-nind/model.yaml b/models/denoise-nind/model.yaml index 787cd00..26339e3 100644 --- a/models/denoise-nind/model.yaml +++ b/models/denoise-nind/model.yaml @@ -17,7 +17,7 @@ checkpoints: model_card: long_description: "Image denoiser trained on the Natural Image Noise Dataset (NIND) from Wikimedia Commons" scope: "single-image denoising" - author: "Benoit Brummer (University of Louvain)" + author: "Benoit Brummer (UCLouvain)" source: "https://github.com/trougnouf/nind-denoise" paper: "https://arxiv.org/abs/1906.00270" license: "GPL-3.0" diff --git a/models/rawdenoise-nind/README.md b/models/rawdenoise-nind/README.md new file mode 100644 index 0000000..be15201 --- /dev/null +++ b/models/rawdenoise-nind/README.md @@ -0,0 +1,88 @@ +# RawNIND UtNet2 (Bayer + Linear Rec.2020 variants) + +Two UtNet2 raw denoisers trained on the Raw Natural Image Noise Dataset +(RawNIND). Bundled into a single `type: multi` package with sensor-based +auto-dispatch. + +| Variant | Input | Output | Use for | +|----------------|------------------------------|------------------------------------------|----------------------------------| +| `model_bayer` | 4ch packed Bayer [R,G1,G2,B] | 3ch camRGB at 2× spatial, arbitrary gain | Bayer sensors (pre-demosaic) | +| `model_linear` | 3ch linear Rec.2020 | 3ch linear Rec.2020, arbitrary gain | X-Trans, Foveon, post-demosaic | + +Both models perform the same denoising task; the Bayer variant additionally +does the demosaic (via a PixelShuffle output head that 2× upsamples) and +emits its output in the camera's native RGB space — the ColorMatrix is not +baked into the graph, so consumers must apply it after inference to reach +linear Rec.2020. The linear variant is a pure 3→3 denoiser, in and out of +linear Rec.2020. Both variants output at an arbitrary learned gain and +require a scalar gain-match against the input mean before use. + +## Source + +- Repository: https://github.com/trougnouf/rawnind_jddc +- Paper: [Learning Joint Denoising, Demosaicing, and Compression from the Raw Natural Image Noise Dataset](https://arxiv.org/abs/2501.08924) (Brummer & De Vleeschouwer, 2025) +- License: GPL-3.0 + +## Architecture + +UtNet2 — 4-pool U-Net encoder-decoder (input H,W must be divisible by 16): + +- `funit=32`, activation `LeakyReLU` (package default for both variants) +- Bayer output head: `Conv2d(32 → 12, 1×1) → PixelShuffle(2)` (4 → 3 ch at 2× spatial) +- Linear output head: `Conv2d(32 → 3, 1×1)` (3 → 3 ch, same spatial) + +## Checkpoints + +- Bayer: `DenoiserTrainingBayerToProfiledRGB_4ch_2024-02-21-bayer_ms-ssim_mgout_notrans_valeither_-4` (iter 4350000) +- Linear: `DenoiserTrainingProfiledRGBToProfiledRGB_3ch_2024-10-09-prgb_ms-ssim_mgout_notrans_valeither_-1` (iter 3900000) + +Both are the canonical base variants from the `graph_denoise_models_definitions.yaml` +config map (`in_channels: 4` and `in_channels: 3`, no other options set). Training +used `match_gain: output` — the raw network outputs are at an arbitrary learned +scale; the demo rescales against the input mean at inference. + +## ONNX Models + +| File | Input | Output | +|-------------------|----------------------------------|----------------------------------| +| `model_bayer.onnx` | `input` — float32 [1, 4, H, W] | `output` — float32 [1, 3, 2H, 2W] | +| `model_linear.onnx` | `input` — float32 [1, 3, H, W] | `output` — float32 [1, 3, H, W] | + +H and W must be divisible by 16. + +## Demo pipeline + +`demo.py` auto-dispatches based on `rawpy.imread(image).raw_pattern.shape`: + +- `(2, 2)` → Bayer variant: + 1. Normalise per-channel black level → white level, clip to [0, 1] + 2. Pack to [R, G1, G2, B] half-resolution tensor + 3. Crop to mod-16 + 4. Inference → camRGB (arbitrary scale, 2× input spatial size) + 5. Gain-match to input mean + 6. camRGB → linear Rec.2020 via `inv(rgb_xyz_matrix[:3,:]) → XYZ → Rec.2020` +- anything else (X-Trans 6×6, Foveon, …) → Linear variant: + 1. `rawpy.postprocess` with linear Rec.2020 output, camera WB, no gamma + 2. Crop to mod-16 + 3. Inference → lin-Rec.2020 (arbitrary scale) + 4. Gain-match to input mean + +Output is a 16-bit linear Rec.2020 TIFF (or `.exr` if the output path has that +suffix). Linear Rec.2020 looks very dark in typical image viewers — open in +darktable / rawtherapee / a PQ-aware viewer. + +## Selection Criteria + +| Property | Value | +|--------------------------|-----------------------------------------------------------------------------------------------------------| +| Model license | GPL-3.0 | +| OSAID v1.0 | Open Source AI | +| MOF | Class I (Open Science) | +| Training data license | CC BY 4.0 / CC0 (per-image, Wikimedia Commons) | +| Training data provenance | [RawNIND](https://dataverse.uclouvain.be/dataverse/rawnind) – real-world raw noise/clean pairs captured by authors | +| Training code | [GPL-3.0](https://github.com/trougnouf/rawnind_jddc) | +| Known limitations | Authors flag the code as academic state; Bayer-only 2x output upsample baked into the Bayer variant | +| Published research | [arXiv:2501.08924](https://arxiv.org/abs/2501.08924) | +| Inference | Local only, no cloud dependencies | +| Scope | Raw and linear-RGB image denoising | +| Reproducibility | Full pipeline (setup, convert, clean, demo) | diff --git a/models/rawdenoise-nind/convert.py b/models/rawdenoise-nind/convert.py new file mode 100644 index 0000000..4ed2565 --- /dev/null +++ b/models/rawdenoise-nind/convert.py @@ -0,0 +1,183 @@ +"""Export RawNIND UtNet2 raw denoiser to ONNX. + +Uses UtNet2 from the cloned rawnind_jddc repository: +https://github.com/trougnouf/rawnind_jddc + +The bayer2prgb variant takes a 4-channel packed Bayer tensor and produces a +3-channel linear Rec.2020 RGB image at the same spatial resolution as the +packed Bayer input (i.e. half the sensor resolution on each axis). +""" + +import argparse +import importlib.util +import os +import sys +import types + +import torch + +SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__)) +DTAI_ROOT = os.environ.get("DTAI_ROOT", os.path.join(SCRIPT_DIR, "../..")) +_RAWNIND_SRC = os.path.join(DTAI_ROOT, "vendor", "rawnind_jddc", "src") + + +def _load_utnet2(): + """Load UtNet2 from raw_denoiser.py without triggering rawnind's package __init__. + + The upstream package's __init__.py chains in tools/libs/models, which pulls + psutil, configargparse and a long tail of research-pipeline deps we don't + need for ONNX export. UtNet2 itself only depends on torch; the only sibling + import it makes (rawnind.libs.rawproc) is used exclusively by the + Passthrough class, so we stub it out. + """ + # Stub the parent packages + the one real sibling module UtNet2's file imports. + for name in ("rawnind", "rawnind.libs", "rawnind.libs.rawproc"): + if name not in sys.modules: + sys.modules[name] = types.ModuleType(name) + + path = os.path.join(_RAWNIND_SRC, "rawnind", "models", "raw_denoiser.py") + spec = importlib.util.spec_from_file_location("_rawnind_raw_denoiser", path) + module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(module) + return module.UtNet2 + + +UtNet2 = _load_utnet2() + +try: + import onnxconverter_common + HAS_ONNX_CONVERTER = True +except ImportError: + HAS_ONNX_CONVERTER = False + + +def load_model(checkpoint_path, in_channels=4, funit=32, activation="PReLU", + preupsample=False): + model = UtNet2( + in_channels=in_channels, + funit=funit, + activation=activation, + preupsample=preupsample, + ) + # weights_only=False: the rawnind_jddc checkpoints are plain pickle dumps + # from torch.save on the nn.Module / training state, not pure tensor dicts. + loaded = torch.load(checkpoint_path, map_location="cpu", weights_only=False) + + # Full-model pickle (author saved the nn.Module itself) + if isinstance(loaded, torch.nn.Module): + state_dict = loaded.state_dict() + elif isinstance(loaded, dict): + state_dict = loaded + for key in ("state_dict", "model_state_dict", "params", "params_ema", "model", "generator"): + if key in state_dict: + state_dict = state_dict[key] + if isinstance(state_dict, torch.nn.Module): + state_dict = state_dict.state_dict() + break + else: + raise TypeError(f"Unexpected checkpoint type: {type(loaded)}") + + # Strip "module." prefix if present (DataParallel) + cleaned = { + (k[len("module."):] if k.startswith("module.") else k): v + for k, v in state_dict.items() + } + model.load_state_dict(cleaned, strict=True) + model.eval() + return model + + +def export_to_onnx(model, output_path, in_channels=4, + input_height=256, input_width=256, + dynamic_shapes=True, opset_version=17, fp16=False): + dummy_input = torch.randn(1, in_channels, input_height, input_width) + + dynamic_axes = None + if dynamic_shapes: + dynamic_axes = { + "input": {0: "batch_size", 2: "height", 3: "width"}, + "output": {0: "batch_size", 2: "height", 3: "width"}, + } + + torch.onnx.export( + model, + dummy_input, + output_path, + export_params=True, + opset_version=opset_version, + do_constant_folding=True, + input_names=["input"], + output_names=["output"], + dynamic_axes=dynamic_axes, + dynamo=False, + ) + print(f"Model exported to {output_path}") + + import onnx + onnx_model = onnx.load(output_path) + onnx.checker.check_model(onnx_model) + print("ONNX model verification passed!") + + try: + import onnxsim + print("Simplifying model...") + onnx_model, ok = onnxsim.simplify(onnx_model) + if ok: + onnx.save(onnx_model, output_path) + print("Model simplified successfully") + else: + print("Warning: simplification failed, using unsimplified model") + except ImportError: + print("onnx-simplifier not installed, skipping.") + + if fp16: + if not HAS_ONNX_CONVERTER: + print("Warning: onnxconverter-common not installed. Skipping FP16 conversion.") + return + print("Converting to FP16...") + from onnxconverter_common import float16 + fp16_model = float16.convert_float_to_float16(onnx_model) + onnx.save(fp16_model, output_path) + print(f"FP16 model saved to {output_path}") + + +def convert(checkpoint, output="model.onnx", in_channels=4, funit=32, + activation="PReLU", preupsample=False, + height=256, width=256, dynamic_shapes=True, opset=17, fp16=False): + """Entry point for programmatic conversion.""" + os.makedirs(os.path.dirname(output) or ".", exist_ok=True) + + print("Loading RawNIND UtNet2 model...") + model = load_model(checkpoint, in_channels=in_channels, funit=funit, + activation=activation, preupsample=preupsample) + + print("Exporting to ONNX...") + export_to_onnx(model, output, in_channels=in_channels, + input_height=height, input_width=width, + dynamic_shapes=dynamic_shapes, + opset_version=opset, fp16=fp16) + + +def main(): + parser = argparse.ArgumentParser(description="Export RawNIND UtNet2 to ONNX") + parser.add_argument("--checkpoint", type=str, required=True) + parser.add_argument("--output", type=str, default="model.onnx") + parser.add_argument("--in-channels", type=int, default=4) + parser.add_argument("--funit", type=int, default=32) + parser.add_argument("--activation", type=str, default="PReLU") + parser.add_argument("--preupsample", action="store_true") + parser.add_argument("--height", type=int, default=256) + parser.add_argument("--width", type=int, default=256) + parser.add_argument("--opset", type=int, default=17) + parser.add_argument("--fp16", action="store_true") + args = parser.parse_args() + + convert(args.checkpoint, args.output, + in_channels=args.in_channels, funit=args.funit, + activation=args.activation, preupsample=args.preupsample, + height=args.height, width=args.width, + opset=args.opset, fp16=args.fp16) + + +if __name__ == "__main__": + main() diff --git a/models/rawdenoise-nind/demo.py b/models/rawdenoise-nind/demo.py new file mode 100644 index 0000000..5bb6406 --- /dev/null +++ b/models/rawdenoise-nind/demo.py @@ -0,0 +1,364 @@ +"""Demo: run the RawNIND UtNet2 raw-denoise ONNX models. + +This package ships two variants: + * model_bayer.onnx – 4ch packed-Bayer input, joint denoise + demosaic + * model_linear.onnx – 3ch linear Rec.2020 input, denoise only + (X-Trans, Foveon, or anywhere demosaic already ran) + +Input: a raw file readable by rawpy (CR2, NEF, ARW, DNG, RAF, ...). +Output: a linear Rec.2020 TIFF (16-bit) or EXR. + +Sensor dispatch: + * `raw.raw_pattern.shape == (2, 2)` → Bayer variant (native raw path) + * otherwise → linear variant, with rawpy demosaicing + to linear Rec.2020 first +""" + +import argparse +import os +import time + +import numpy as np +import onnxruntime as ort + + +# --------------------------------------------------------------------------- +# Bayer path – pack raw mosaic into 4ch [R, G1, G2, B] +# --------------------------------------------------------------------------- + +def pack_rggb(bayer: np.ndarray, raw_pattern, color_desc: str) -> np.ndarray: + """Pack a 2D Bayer mosaic into a 4-channel half-resolution [R, G1, G2, B] tensor.""" + h, w = bayer.shape + assert h % 2 == 0 and w % 2 == 0, "Bayer dimensions must be even" + + r_plane = g1_plane = g2_plane = b_plane = None + for i in range(2): + for j in range(2): + color = color_desc[int(raw_pattern[i, j])] + plane = bayer[i::2, j::2] + if color == "R": + r_plane = plane + elif color == "B": + b_plane = plane + elif color == "G": + if g1_plane is None: + g1_plane = plane + else: + g2_plane = plane + else: + raise ValueError(f"Unsupported Bayer colour: {color!r}") + + if any(p is None for p in (r_plane, g1_plane, g2_plane, b_plane)): + raise ValueError( + f"Incomplete RGGB pattern from desc={color_desc!r} " + f"raw_pattern={raw_pattern.tolist()}" + ) + return np.stack([r_plane, g1_plane, g2_plane, b_plane], axis=0) + + +def load_raw_as_packed_bayer(image_path: str): + """Load a 2×2 Bayer raw, black-level + white-level normalise per channel, + and pack into a (4, H/2, W/2) tensor. Returns (packed, rgb_xyz_matrix).""" + import rawpy + + raw = rawpy.imread(image_path) + assert raw.raw_pattern.shape == (2, 2), "load_raw_as_packed_bayer called on non-Bayer" + + bayer = raw.raw_image_visible.astype(np.float32) + white = float(raw.white_level) + color_desc = raw.color_desc.decode("ascii") + + black_per_ch = np.asarray(raw.black_level_per_channel, dtype=np.float32) + for i in range(2): + for j in range(2): + ch_idx = int(raw.raw_pattern[i, j]) + bl = black_per_ch[ch_idx] + vrange = max(white - bl, 1.0) + bayer[i::2, j::2] = np.clip((bayer[i::2, j::2] - bl) / vrange, 0.0, 1.0) + + packed = pack_rggb(bayer, raw.raw_pattern, color_desc) + return packed, np.asarray(raw.rgb_xyz_matrix, dtype=np.float32) + + +# --------------------------------------------------------------------------- +# Linear path – rawpy demosaic → lin-Rec.2020 +# --------------------------------------------------------------------------- + +def load_raw_as_lin_rec2020(image_path: str) -> np.ndarray: + """Demosaic a raw file to a (3, H, W) linear Rec.2020 tensor in [0, 1]. + + Uses rawpy's postprocess with a neutral pipeline: linear output, no auto- + bright, no gamma, no user flip. Output colour space is set to Rec.2020 so + rawpy applies the camera-matrix + white-balance conversion internally. + """ + import rawpy + + raw = rawpy.imread(image_path) + rgb = raw.postprocess( + output_color=rawpy.ColorSpace.Rec2020, + output_bps=16, + no_auto_bright=True, + use_camera_wb=True, + gamma=(1.0, 1.0), + user_flip=0, + ) + rgb = rgb.astype(np.float32) / 65535.0 + return np.transpose(rgb, (2, 0, 1)) # (3, H, W) + + +# --------------------------------------------------------------------------- +# camRGB → linear Rec.2020 (Bayer path only; matrix from rawnind_jddc/rawproc.py) +# --------------------------------------------------------------------------- + +_XYZ_TO_LIN_REC2020 = np.array( + [ + [1.71666343, -0.35567332, -0.25336809], + [-0.66667384, 1.61645574, 0.0157683], + [0.01764248, -0.04277698, 0.94224328], + ], + dtype=np.float32, +) + + +def cam_rgb_to_lin_rec2020(cam_rgb: np.ndarray, rgb_xyz_matrix: np.ndarray) -> np.ndarray: + cam_to_xyzd65 = np.linalg.inv(rgb_xyz_matrix[:3, :]) + m = _XYZ_TO_LIN_REC2020 @ cam_to_xyzd65 + h, w, _ = cam_rgb.shape + out = (m @ cam_rgb.reshape(-1, 3).T).T.reshape(h, w, 3) + return out.astype(np.float32) + + +# --------------------------------------------------------------------------- +# Shared helpers +# --------------------------------------------------------------------------- + +def _match_gain(other: np.ndarray, anchor: np.ndarray) -> np.ndarray: + """Scale `other` so its mean matches `anchor`'s mean (rawproc.match_gain). + + Used at inference to substitute for the training-time match_gain=output + step, with the input image as the anchor instead of the (unavailable) GT. + Sign-preserving: if the network output mean is negative, gain is negative + too, flipping the output back into the anchor's sign convention. + """ + anchor_mean = float(anchor.mean()) + other_mean = float(other.mean()) + if abs(other_mean) < 1e-12: + return other # degenerate — nothing sensible to rescale to + return other * (anchor_mean / other_mean) + + +def _run_tiled(session, input_name, input_is_fp16: bool, + arr: np.ndarray, + tile_size: int = 256, overlap: int = 32, + scale: int = 1) -> np.ndarray: + """Tiled inference with mirror-padded edges and overlap-trimmed stitching. + + `arr` is (1, C_in, H, W) float32; H and W need NOT be multiples of 16 + (each tile is T × T and T is constrained to be a multiple of 16). + Returns (1, C_out, H*scale, W*scale) float32. + + Matches the darktable C code: step = T - 2·overlap, each tile reads a + T × T window with `overlap` border on each side (mirror-padded at the + image boundary), and only the core (step × step) region of each tile + is written to the output — which keeps tile seams seamless. + + Picked T=256 (mod 16) and overlap=32 by default: on a 24MP Bayer raw + the peak ORT working set stays well under 1 GB (vs. > 10 GB for a + full-image pass), so the demo runs on a GitHub 7 GB runner. + """ + _, _, H, W = arr.shape + T = tile_size + O = overlap + S = scale + step = T - 2 * O + assert step > 0, "tile_size must exceed 2 * overlap" + assert T % 16 == 0, "tile_size must be a multiple of 16" + + n_y = (H + step - 1) // step + n_x = (W + step - 1) // step + + # mirror-pad so every tile read stays inside `padded` regardless of + # where the last tile ends up (pad_after is at least O; can be more + # when H/W aren't divisible by step) + pad_before = O + pad_after_y = max(O, (n_y - 1) * step + T - H - O) + pad_after_x = max(O, (n_x - 1) * step + T - W - O) + padded = np.pad( + arr, + ((0, 0), (0, 0), (pad_before, pad_after_y), (pad_before, pad_after_x)), + mode="reflect", + ) + + out = None # shape known only after the first tile (C_out from the model) + for ty in range(n_y): + core_y = ty * step + core_h = min(step, H - core_y) + for tx in range(n_x): + core_x = tx * step + core_w = min(step, W - core_x) + tile = padded[:, :, core_y:core_y + T, core_x:core_x + T] + tile = np.ascontiguousarray(tile) + if input_is_fp16: + tile = tile.astype(np.float16) + [tile_out] = session.run(None, {input_name: tile}) + if out is None: + c_out = tile_out.shape[1] + out = np.zeros((1, c_out, H * S, W * S), dtype=np.float32) + # strip the overlap border and blit the core region + out[:, :, + core_y * S:(core_y + core_h) * S, + core_x * S:(core_x + core_w) * S] = \ + tile_out[:, :, + O * S:(O + core_h) * S, + O * S:(O + core_w) * S].astype(np.float32) + return out + + +def _load_session(model_path: str): + print(f"Loading model: {model_path}") + session = ort.InferenceSession(model_path, providers=["CPUExecutionProvider"]) + model_input = session.get_inputs()[0] + return session, model_input.name, model_input.type == "tensor(float16)" + + +def _save_tiff16(path, rgb: np.ndarray): + import tifffile + tifffile.imwrite(path, (rgb * 65535.0).astype(np.uint16)) + + +def _save_exr(path, rgb: np.ndarray): + import OpenEXR + import Imath + h, w, _ = rgb.shape + header = OpenEXR.Header(w, h) + half = Imath.Channel(Imath.PixelType(Imath.PixelType.HALF)) + header["channels"] = {"R": half, "G": half, "B": half} + exr = OpenEXR.OutputFile(path, header) + r, g, b = (rgb[..., i].astype(np.float16).tobytes() for i in range(3)) + exr.writePixels({"R": r, "G": g, "B": b}) + exr.close() + + +def _save(output_path: str, rgb_hwc: np.ndarray): + rgb_hwc = np.clip(rgb_hwc, 0.0, 1.0) + if os.path.splitext(output_path)[1].lower() == ".exr": + _save_exr(output_path, rgb_hwc) + else: + _save_tiff16(output_path, rgb_hwc) + + +# --------------------------------------------------------------------------- +# Bayer inference pipeline +# --------------------------------------------------------------------------- + +def run_bayer(model_path: str, image_path: str, output_path: str, + tile_size: int = 256, overlap: int = 32) -> None: + t0 = time.perf_counter() + session, input_name, input_is_fp16 = _load_session(model_path) + + print(f"Loading raw (Bayer): {image_path}") + packed, rgb_xyz_matrix = load_raw_as_packed_bayer(image_path) + print(f" Packed shape: {packed.shape} (C, H, W)") + + arr = packed[np.newaxis].astype(np.float32) + + print(f"Running tiled inference (Bayer, T={tile_size}, overlap={overlap})...") + output = _run_tiled(session, input_name, input_is_fp16, arr, + tile_size=tile_size, overlap=overlap, scale=2) + + # Bayer model outputs camRGB at an arbitrary learned scale (training used + # match_gain=output). Gain-match against the input mosaic, then convert + # camRGB → lin-Rec.2020. + cam_rgb = output[0].astype(np.float32).transpose(1, 2, 0) + cam_rgb = _match_gain(cam_rgb, anchor=arr) + rec2020 = cam_rgb_to_lin_rec2020(cam_rgb, rgb_xyz_matrix) + print(f" Output (linear Rec.2020): " + f"range=[{rec2020.min():.3f}, {rec2020.max():.3f}] mean={rec2020.mean():.3f}") + + _save(output_path, rec2020) + print(f"Saved: {output_path} (total {time.perf_counter() - t0:.2f}s)") + + +# --------------------------------------------------------------------------- +# Linear (prgb2prgb) inference pipeline +# --------------------------------------------------------------------------- + +def run_linear(model_path: str, image_path: str, output_path: str, + tile_size: int = 256, overlap: int = 32) -> None: + t0 = time.perf_counter() + session, input_name, input_is_fp16 = _load_session(model_path) + + print(f"Loading raw (linear, via rawpy demosaic): {image_path}") + rec2020_in = load_raw_as_lin_rec2020(image_path) + print(f" Demosaicked: {rec2020_in.shape} (C, H, W)") + + arr = rec2020_in[np.newaxis].astype(np.float32) + + print(f"Running tiled inference (linear, T={tile_size}, overlap={overlap})...") + output = _run_tiled(session, input_name, input_is_fp16, arr, + tile_size=tile_size, overlap=overlap, scale=1) + + # Like the Bayer variant, the network output is at an arbitrary learned + # scale (training also used match_gain=output). Gain-match against the + # input. No colour conversion needed — input and output both live in + # linear Rec.2020. + rec2020 = output[0].astype(np.float32).transpose(1, 2, 0) + rec2020 = _match_gain(rec2020, anchor=arr) + print(f" Output (linear Rec.2020): " + f"range=[{rec2020.min():.3f}, {rec2020.max():.3f}] mean={rec2020.mean():.3f}") + + _save(output_path, rec2020) + print(f"Saved: {output_path} (total {time.perf_counter() - t0:.2f}s)") + + +# --------------------------------------------------------------------------- +# Entry point +# --------------------------------------------------------------------------- + +def _dispatch_variant(image_path: str) -> str: + """Return 'bayer' or 'linear' based on the raw file's sensor pattern.""" + import rawpy + with rawpy.imread(image_path) as raw: + return "bayer" if raw.raw_pattern.shape == (2, 2) else "linear" + + +def demo(model_dir, image, output, variant=None, + tile_size: int = 256, overlap: int = 32, **kwargs): + """Entry point invoked by the framework for type=multi models. + + If `variant` is not given, auto-dispatch based on sensor pattern. + `tile_size` and `overlap` are in packed-space pixels for the Bayer + variant and in sensor-space pixels for the linear variant (both + equivalently: the model's own input spatial units). + """ + os.makedirs(os.path.dirname(output) or ".", exist_ok=True) + if variant is None: + variant = _dispatch_variant(image) + + if variant == "bayer": + run_bayer(os.path.join(model_dir, "model_bayer.onnx"), image, output, + tile_size=tile_size, overlap=overlap) + elif variant == "linear": + run_linear(os.path.join(model_dir, "model_linear.onnx"), image, output, + tile_size=tile_size, overlap=overlap) + else: + raise ValueError(f"Unknown variant: {variant!r} (expected 'bayer' or 'linear')") + + +def main(): + parser = argparse.ArgumentParser(description="RawNIND UtNet2 raw-denoise demo.") + parser.add_argument("--model-dir", type=str, required=True, + help="Directory containing model_bayer.onnx and model_linear.onnx") + parser.add_argument("--image", type=str, required=True, + help="Raw file (CR2, NEF, ARW, DNG, RAF, ...)") + parser.add_argument("--output", type=str, required=True, + help="Output .tif (16-bit linear Rec.2020) or .exr") + parser.add_argument("--variant", choices=["bayer", "linear"], default=None, + help="Force a variant; default: auto-dispatch by sensor") + args = parser.parse_args() + + demo(args.model_dir, args.image, args.output, variant=args.variant) + + +if __name__ == "__main__": + main() diff --git a/models/rawdenoise-nind/model.yaml b/models/rawdenoise-nind/model.yaml new file mode 100644 index 0000000..b1c407f --- /dev/null +++ b/models/rawdenoise-nind/model.yaml @@ -0,0 +1,64 @@ +id: rawdenoise-nind +name: "raw denoise nind" +description: "UtNet2 raw denoiser trained on RawNIND (Bayer + linear Rec.2020 variants)" +task: rawdenoise +version: "1.0" +arch: utnet2 +tiling: true +type: multi +dep_group: rawnind + +attributes: + coreml_cpu_only: true + input_sizes: [2048, 1536, 1024, 768, 512, 384, 256] + variants: + bayer: + onnx: model_bayer.onnx + input_kind: packed_bayer # 4ch R/G1/G2/B, half-resolution, pre-demosaic + linear: + onnx: model_linear.onnx + input_kind: lin_rec2020 # already demosaicked (X-Trans, Foveon, etc.) + +repo: + submodule: vendor/rawnind_jddc + +checkpoints: + # DenoiserTrainingBayerToProfiledRGB_4ch_2024-02-21-bayer_ms-ssim_mgout_notrans_valeither_-4 + # canonical "Bayer" base variant (in_channels=4, funit=32, linear Rec.2020 target) + - url: "https://drive.google.com/file/d/1dFTLeljWi9wwojcZUsam8bE31JdYy3oM/view?usp=drive_link" + path: "temp/rawdenoise-nind/denoiser_bayer2prgb_utnet2.pt" + # DenoiserTrainingProfiledRGBToProfiledRGB_3ch_2024-10-09-prgb_ms-ssim_mgout_notrans_valeither_-1 + # canonical "Linear RGB" base variant (in_channels=3, funit=32, lin-Rec.2020 in/out) + - url: "https://drive.google.com/file/d/1kH8tK4RN_edak3r_VIAhCzPa6CNsuTYC/view?usp=drive_link" + path: "temp/rawdenoise-nind/denoiser_prgb2prgb_utnet2.pt" + +model_card: + long_description: "UtNet2 raw denoisers trained on the Raw Natural Image Noise Dataset (RawNIND). Bundles two variants: Bayer and Linear (for X-Trans, Foveon, or any already-demosaicked pipeline)" + scope: "raw and linear-RGB image denoising" + author: "Benoit Brummer (UCLouvain)" + source: "https://github.com/trougnouf/rawnind_jddc" + paper: "https://arxiv.org/abs/2501.08924" + license: "GPL-3.0" + training_data: "RawNIND – real-world raw noise/clean pairs photographed by authors, published on Wikimedia Commons and UCLouvain Dataverse" + training_data_license: "CC BY 4.0 / CC0 (per-image, Wikimedia Commons)" + notes: "all components publicly available under open licenses" + +convert: + - script: convert.py + args: + checkpoint: "{temp}/denoiser_bayer2prgb_utnet2.pt" + output: "{output}/model_bayer.onnx" + in_channels: 4 + funit: 32 + activation: "LeakyReLU" + opset: 17 + fp16: false + - script: convert.py + args: + checkpoint: "{temp}/denoiser_prgb2prgb_utnet2.pt" + output: "{output}/model_linear.onnx" + in_channels: 3 + funit: 32 + activation: "LeakyReLU" + opset: 17 + fp16: false diff --git a/pyproject.toml b/pyproject.toml index bfcdef6..1907b88 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -40,6 +40,16 @@ nind = [ {include-group = "core"}, "onnxconverter-common", ] +rawnind = [ + {include-group = "core"}, + "onnxconverter-common", + "rawpy", + "tifffile", + "configargparse", + "pytorch-msssim", + "colour-science", + "ptflops", +] segnext = [ {include-group = "core"}, "mmcv>=2.2.0", @@ -64,6 +74,7 @@ openclip = [ all-models = [ {include-group = "nafnet"}, {include-group = "nind"}, + {include-group = "rawnind"}, {include-group = "segnext"}, {include-group = "bsrgan"}, {include-group = "sam21"}, diff --git a/samples/rawdenoise/bayer/1D3_6400.CR2 b/samples/rawdenoise/bayer/1D3_6400.CR2 new file mode 100644 index 0000000..ebaec17 Binary files /dev/null and b/samples/rawdenoise/bayer/1D3_6400.CR2 differ diff --git a/samples/rawdenoise/bayer/DSC04028.ARW b/samples/rawdenoise/bayer/DSC04028.ARW new file mode 100644 index 0000000..e04e9e9 Binary files /dev/null and b/samples/rawdenoise/bayer/DSC04028.ARW differ diff --git a/samples/rawdenoise/bayer/monkey_25600.CR3 b/samples/rawdenoise/bayer/monkey_25600.CR3 new file mode 100644 index 0000000..e134a4e Binary files /dev/null and b/samples/rawdenoise/bayer/monkey_25600.CR3 differ diff --git a/samples/rawdenoise/xtrans/20241017_0191.RAF b/samples/rawdenoise/xtrans/20241017_0191.RAF new file mode 100644 index 0000000..d58a91a Binary files /dev/null and b/samples/rawdenoise/xtrans/20241017_0191.RAF differ diff --git a/samples/rawdenoise/xtrans/DSCF3323.RAF b/samples/rawdenoise/xtrans/DSCF3323.RAF new file mode 100644 index 0000000..b5fbb38 Binary files /dev/null and b/samples/rawdenoise/xtrans/DSCF3323.RAF differ diff --git a/samples/rawdenoise/xtrans/DSCF9838.RAF b/samples/rawdenoise/xtrans/DSCF9838.RAF new file mode 100644 index 0000000..6b82071 Binary files /dev/null and b/samples/rawdenoise/xtrans/DSCF9838.RAF differ diff --git a/vendor/rawnind_jddc b/vendor/rawnind_jddc new file mode 160000 index 0000000..4d455aa --- /dev/null +++ b/vendor/rawnind_jddc @@ -0,0 +1 @@ +Subproject commit 4d455aa8ada69214eafa6a91ac0b2e011cf9dcb7