diff --git a/.gitmodules b/.gitmodules
index 1a921ba..88d9a73 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -10,3 +10,6 @@
 [submodule "vendor/sam2"]
 	path = vendor/sam2
 	url = https://github.com/facebookresearch/sam2.git
+[submodule "vendor/rawnind_jddc"]
+	path = vendor/rawnind_jddc
+	url = https://github.com/trougnouf/rawnind_jddc
diff --git a/README.md b/README.md
index e9f4853..9cdb925 100644
--- a/README.md
+++ b/README.md
@@ -15,6 +15,7 @@ Currently targets the ONNX backend. The pipeline is designed to support addition
 | [`mask-object-sam21-small`](models/mask-object-sam21-small/README.md)         | mask    | SAM 2.1 Hiera Small for interactive masking     |
 | [`mask-object-sam21-tiny`](models/mask-object-sam21-tiny/README.md)           | mask    | SAM 2.1 Hiera Tiny for interactive masking      |
 | [`mask-object-segnext-b2hq`](models/mask-object-segnext-b2hq/README.md)      | mask    | SegNext ViT-B SAx2 HQ for semantic masking      |
+| [`rawdenoise-nind`](models/rawdenoise-nind/README.md)                         | rawdenoise | UtNet2 raw denoiser trained on RawNIND (Bayer + linear Rec.2020 variants) |
 | [`upscale-bsrgan`](models/upscale-bsrgan/README.md)                          | upscale | BSRGAN 2x and 4x blind super-resolution        |
 
 ## Repository structure
diff --git a/darktable_ai/demo.py b/darktable_ai/demo.py
index 1ee3182..c31aa18 100644
--- a/darktable_ai/demo.py
+++ b/darktable_ai/demo.py
@@ -8,6 +8,27 @@
 from darktable_ai.config import ModelConfig
 from darktable_ai.convert import _import_script
 
+_PROCESSED_IMAGE_EXTS = {".jpg", ".jpeg", ".png"}
+_RAW_IMAGE_EXTS = {
+    ".cr2", ".cr3", ".crw",      # Canon
+    ".nef", ".nrw",              # Nikon
+    ".arw", ".sr2", ".srf",      # Sony
+    ".raf",                      # Fuji
+    ".rw2",                      # Panasonic
+    ".pef", ".ptx",              # Pentax
+    ".orf",                      # Olympus
+    ".rwl",                      # Leica
+    ".srw",                      # Samsung
+    ".dng",                      # Adobe generic
+}
+_SAMPLE_EXTS = _PROCESSED_IMAGE_EXTS | _RAW_IMAGE_EXTS
+
+# Task → output file extension. Raw-domain tasks can't round-trip through PNG
+# because they produce linear HDR or >8-bit data.
+_OUTPUT_EXT_BY_TASK = {
+    "rawdenoise": ".tif",
+}
+
 
 def run_demo(config: ModelConfig) -> None:
     """Run the model's demo.py on all sample images for its task."""
@@ -26,16 +47,19 @@ def run_demo(config: ModelConfig) -> None:
 
     module = _import_script(demo_script)
     model_kwargs = _model_type_kwargs(config)
+    out_ext = _OUTPUT_EXT_BY_TASK.get(config.task, ".png")
 
-    for img in sorted(images_dir.iterdir()):
-        if img.suffix.lower() not in (".jpg", ".jpeg", ".png"):
-            continue
+    samples = sorted(p for p in images_dir.rglob("*")
+                     if p.is_file() and p.suffix.lower() in _SAMPLE_EXTS)
+
+    for img in samples:
         if img.stem.startswith("expected"):
             continue
 
-        name = img.stem
-        output_path = demo_output_dir / f"{name}.png"
-        extra_kwargs = _image_kwargs(config, name)
+        rel = img.relative_to(images_dir).with_suffix("")
+        name = str(rel).replace("/", "_").replace("\\", "_")
+        output_path = demo_output_dir / f"{name}{out_ext}"
+        extra_kwargs = _image_kwargs(config, img, rel)
 
         print(f"  {name}")
         module.demo(
@@ -60,15 +84,18 @@ def _model_type_kwargs(config: ModelConfig) -> dict:
         return {"model": str(output_dir / "model.onnx")}
 
 
-def _image_kwargs(config: ModelConfig, image_name: str) -> dict:
+def _image_kwargs(config: ModelConfig, img: Path, rel: Path) -> dict:
     """Get extra demo kwargs for a specific image.
 
     Reads from a JSON sidecar file next to the sample image first
     (e.g. ``samples/mask-object/example_01.json``), then falls back
-    to ``demo.image_args`` in model.yaml.
+    to ``demo.image_args`` in model.yaml, keyed by either the flattened
+    relative path or the bare filename stem.
     """
-    sidecar = config.root_dir / "samples" / config.task / f"{image_name}.json"
+    sidecar = img.with_suffix(".json")
     if sidecar.is_file():
         with open(sidecar) as f:
             return json.load(f)
-    return config.demo.image_args.get(image_name, {})
+    flat = str(rel).replace("/", "_").replace("\\", "_")
+    return (config.demo.image_args.get(flat)
+            or config.demo.image_args.get(img.stem, {}))
diff --git a/models/denoise-nind/model.yaml b/models/denoise-nind/model.yaml
index 787cd00..26339e3 100644
--- a/models/denoise-nind/model.yaml
+++ b/models/denoise-nind/model.yaml
@@ -17,7 +17,7 @@ checkpoints:
 model_card:
   long_description: "Image denoiser trained on the Natural Image Noise Dataset (NIND) from Wikimedia Commons"
   scope: "single-image denoising"
-  author: "Benoit Brummer (University of Louvain)"
+  author: "Benoit Brummer (UCLouvain)"
   source: "https://github.com/trougnouf/nind-denoise"
   paper: "https://arxiv.org/abs/1906.00270"
   license: "GPL-3.0"
diff --git a/models/rawdenoise-nind/README.md b/models/rawdenoise-nind/README.md
new file mode 100644
index 0000000..be15201
--- /dev/null
+++ b/models/rawdenoise-nind/README.md
@@ -0,0 +1,88 @@
+# RawNIND UtNet2 (Bayer + Linear Rec.2020 variants)
+
+Two UtNet2 raw denoisers trained on the Raw Natural Image Noise Dataset
+(RawNIND). Bundled into a single `type: multi` package with sensor-based
+auto-dispatch.
+
+| Variant        | Input                        | Output                                   | Use for                          |
+|----------------|------------------------------|------------------------------------------|----------------------------------|
+| `model_bayer`  | 4ch packed Bayer [R,G1,G2,B] | 3ch camRGB at 2× spatial, arbitrary gain | Bayer sensors (pre-demosaic)     |
+| `model_linear` | 3ch linear Rec.2020          | 3ch linear Rec.2020, arbitrary gain      | X-Trans, Foveon, post-demosaic   |
+
+Both models perform the same denoising task; the Bayer variant additionally
+does the demosaic (via a PixelShuffle output head that 2× upsamples) and
+emits its output in the camera's native RGB space — the ColorMatrix is not
+baked into the graph, so consumers must apply it after inference to reach
+linear Rec.2020. The linear variant is a pure 3→3 denoiser, in and out of
+linear Rec.2020. Both variants output at an arbitrary learned gain and
+require a scalar gain-match against the input mean before use.
+
+## Source
+
+- Repository: https://github.com/trougnouf/rawnind_jddc
+- Paper: [Learning Joint Denoising, Demosaicing, and Compression from the Raw Natural Image Noise Dataset](https://arxiv.org/abs/2501.08924) (Brummer & De Vleeschouwer, 2025)
+- License: GPL-3.0
+
+## Architecture
+
+UtNet2 — 4-pool U-Net encoder-decoder (input H,W must be divisible by 16):
+
+- `funit=32`, activation `LeakyReLU` (package default for both variants)
+- Bayer output head:  `Conv2d(32 → 12, 1×1) → PixelShuffle(2)` (4 → 3 ch at 2× spatial)
+- Linear output head: `Conv2d(32 → 3, 1×1)` (3 → 3 ch, same spatial)
+
+## Checkpoints
+
+- Bayer:  `DenoiserTrainingBayerToProfiledRGB_4ch_2024-02-21-bayer_ms-ssim_mgout_notrans_valeither_-4` (iter 4350000)
+- Linear: `DenoiserTrainingProfiledRGBToProfiledRGB_3ch_2024-10-09-prgb_ms-ssim_mgout_notrans_valeither_-1` (iter 3900000)
+
+Both are the canonical base variants from the `graph_denoise_models_definitions.yaml`
+config map (`in_channels: 4` and `in_channels: 3`, no other options set). Training
+used `match_gain: output` — the raw network outputs are at an arbitrary learned
+scale; the demo rescales against the input mean at inference.
+
+## ONNX Models
+
+| File              | Input                            | Output                           |
+|-------------------|----------------------------------|----------------------------------|
+| `model_bayer.onnx`  | `input` — float32 [1, 4, H, W]   | `output` — float32 [1, 3, 2H, 2W] |
+| `model_linear.onnx` | `input` — float32 [1, 3, H, W]   | `output` — float32 [1, 3, H, W]  |
+
+H and W must be divisible by 16.
+
+## Demo pipeline
+
+`demo.py` auto-dispatches based on `rawpy.imread(image).raw_pattern.shape`:
+
+- `(2, 2)` → Bayer variant:
+  1. Normalise per-channel black level → white level, clip to [0, 1]
+  2. Pack to [R, G1, G2, B] half-resolution tensor
+  3. Crop to mod-16
+  4. Inference → camRGB (arbitrary scale, 2× input spatial size)
+  5. Gain-match to input mean
+  6. camRGB → linear Rec.2020 via `inv(rgb_xyz_matrix[:3,:]) → XYZ → Rec.2020`
+- anything else (X-Trans 6×6, Foveon, …) → Linear variant:
+  1. `rawpy.postprocess` with linear Rec.2020 output, camera WB, no gamma
+  2. Crop to mod-16
+  3. Inference → lin-Rec.2020 (arbitrary scale)
+  4. Gain-match to input mean
+
+Output is a 16-bit linear Rec.2020 TIFF (or `.exr` if the output path has that
+suffix). Linear Rec.2020 looks very dark in typical image viewers — open in
+darktable / rawtherapee / a PQ-aware viewer.
+
+## Selection Criteria
+
+| Property                 | Value                                                                                                     |
+|--------------------------|-----------------------------------------------------------------------------------------------------------|
+| Model license            | GPL-3.0                                                                                                   |
+| OSAID v1.0               | Open Source AI                                                                                            |
+| MOF                      | Class I (Open Science)                                                                                    |
+| Training data license    | CC BY 4.0 / CC0 (per-image, Wikimedia Commons)                                                            |
+| Training data provenance | [RawNIND](https://dataverse.uclouvain.be/dataverse/rawnind) – real-world raw noise/clean pairs captured by authors |
+| Training code            | [GPL-3.0](https://github.com/trougnouf/rawnind_jddc)                                                      |
+| Known limitations        | Authors flag the code as academic state; Bayer-only 2x output upsample baked into the Bayer variant        |
+| Published research       | [arXiv:2501.08924](https://arxiv.org/abs/2501.08924)                                                      |
+| Inference                | Local only, no cloud dependencies                                                                         |
+| Scope                    | Raw and linear-RGB image denoising                                                                        |
+| Reproducibility          | Full pipeline (setup, convert, clean, demo)                                                               |
diff --git a/models/rawdenoise-nind/convert.py b/models/rawdenoise-nind/convert.py
new file mode 100644
index 0000000..4ed2565
--- /dev/null
+++ b/models/rawdenoise-nind/convert.py
@@ -0,0 +1,183 @@
+"""Export RawNIND UtNet2 raw denoiser to ONNX.
+
+Uses UtNet2 from the cloned rawnind_jddc repository:
+https://github.com/trougnouf/rawnind_jddc
+
+The bayer2prgb variant takes a 4-channel packed Bayer tensor and produces a
+3-channel linear Rec.2020 RGB image at the same spatial resolution as the
+packed Bayer input (i.e. half the sensor resolution on each axis).
+"""
+
+import argparse
+import importlib.util
+import os
+import sys
+import types
+
+import torch
+
+SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
+DTAI_ROOT = os.environ.get("DTAI_ROOT", os.path.join(SCRIPT_DIR, "../.."))
+_RAWNIND_SRC = os.path.join(DTAI_ROOT, "vendor", "rawnind_jddc", "src")
+
+
+def _load_utnet2():
+    """Load UtNet2 from raw_denoiser.py without triggering rawnind's package __init__.
+
+    The upstream package's __init__.py chains in tools/libs/models, which pulls
+    psutil, configargparse and a long tail of research-pipeline deps we don't
+    need for ONNX export. UtNet2 itself only depends on torch; the only sibling
+    import it makes (rawnind.libs.rawproc) is used exclusively by the
+    Passthrough class, so we stub it out.
+    """
+    # Stub the parent packages + the one real sibling module UtNet2's file imports.
+    for name in ("rawnind", "rawnind.libs", "rawnind.libs.rawproc"):
+        if name not in sys.modules:
+            sys.modules[name] = types.ModuleType(name)
+
+    path = os.path.join(_RAWNIND_SRC, "rawnind", "models", "raw_denoiser.py")
+    spec = importlib.util.spec_from_file_location("_rawnind_raw_denoiser", path)
+    module = importlib.util.module_from_spec(spec)
+    spec.loader.exec_module(module)
+    return module.UtNet2
+
+
+UtNet2 = _load_utnet2()
+
+try:
+    import onnxconverter_common
+    HAS_ONNX_CONVERTER = True
+except ImportError:
+    HAS_ONNX_CONVERTER = False
+
+
+def load_model(checkpoint_path, in_channels=4, funit=32, activation="PReLU",
+               preupsample=False):
+    model = UtNet2(
+        in_channels=in_channels,
+        funit=funit,
+        activation=activation,
+        preupsample=preupsample,
+    )
+    # weights_only=False: the rawnind_jddc checkpoints are plain pickle dumps
+    # from torch.save on the nn.Module / training state, not pure tensor dicts.
+    loaded = torch.load(checkpoint_path, map_location="cpu", weights_only=False)
+
+    # Full-model pickle (author saved the nn.Module itself)
+    if isinstance(loaded, torch.nn.Module):
+        state_dict = loaded.state_dict()
+    elif isinstance(loaded, dict):
+        state_dict = loaded
+        for key in ("state_dict", "model_state_dict", "params", "params_ema", "model", "generator"):
+            if key in state_dict:
+                state_dict = state_dict[key]
+                if isinstance(state_dict, torch.nn.Module):
+                    state_dict = state_dict.state_dict()
+                break
+    else:
+        raise TypeError(f"Unexpected checkpoint type: {type(loaded)}")
+
+    # Strip "module." prefix if present (DataParallel)
+    cleaned = {
+        (k[len("module."):] if k.startswith("module.") else k): v
+        for k, v in state_dict.items()
+    }
+    model.load_state_dict(cleaned, strict=True)
+    model.eval()
+    return model
+
+
+def export_to_onnx(model, output_path, in_channels=4,
+                   input_height=256, input_width=256,
+                   dynamic_shapes=True, opset_version=17, fp16=False):
+    dummy_input = torch.randn(1, in_channels, input_height, input_width)
+
+    dynamic_axes = None
+    if dynamic_shapes:
+        dynamic_axes = {
+            "input": {0: "batch_size", 2: "height", 3: "width"},
+            "output": {0: "batch_size", 2: "height", 3: "width"},
+        }
+
+    torch.onnx.export(
+        model,
+        dummy_input,
+        output_path,
+        export_params=True,
+        opset_version=opset_version,
+        do_constant_folding=True,
+        input_names=["input"],
+        output_names=["output"],
+        dynamic_axes=dynamic_axes,
+        dynamo=False,
+    )
+    print(f"Model exported to {output_path}")
+
+    import onnx
+    onnx_model = onnx.load(output_path)
+    onnx.checker.check_model(onnx_model)
+    print("ONNX model verification passed!")
+
+    try:
+        import onnxsim
+        print("Simplifying model...")
+        onnx_model, ok = onnxsim.simplify(onnx_model)
+        if ok:
+            onnx.save(onnx_model, output_path)
+            print("Model simplified successfully")
+        else:
+            print("Warning: simplification failed, using unsimplified model")
+    except ImportError:
+        print("onnx-simplifier not installed, skipping.")
+
+    if fp16:
+        if not HAS_ONNX_CONVERTER:
+            print("Warning: onnxconverter-common not installed. Skipping FP16 conversion.")
+            return
+        print("Converting to FP16...")
+        from onnxconverter_common import float16
+        fp16_model = float16.convert_float_to_float16(onnx_model)
+        onnx.save(fp16_model, output_path)
+        print(f"FP16 model saved to {output_path}")
+
+
+def convert(checkpoint, output="model.onnx", in_channels=4, funit=32,
+            activation="PReLU", preupsample=False,
+            height=256, width=256, dynamic_shapes=True, opset=17, fp16=False):
+    """Entry point for programmatic conversion."""
+    os.makedirs(os.path.dirname(output) or ".", exist_ok=True)
+
+    print("Loading RawNIND UtNet2 model...")
+    model = load_model(checkpoint, in_channels=in_channels, funit=funit,
+                       activation=activation, preupsample=preupsample)
+
+    print("Exporting to ONNX...")
+    export_to_onnx(model, output, in_channels=in_channels,
+                   input_height=height, input_width=width,
+                   dynamic_shapes=dynamic_shapes,
+                   opset_version=opset, fp16=fp16)
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Export RawNIND UtNet2 to ONNX")
+    parser.add_argument("--checkpoint", type=str, required=True)
+    parser.add_argument("--output", type=str, default="model.onnx")
+    parser.add_argument("--in-channels", type=int, default=4)
+    parser.add_argument("--funit", type=int, default=32)
+    parser.add_argument("--activation", type=str, default="PReLU")
+    parser.add_argument("--preupsample", action="store_true")
+    parser.add_argument("--height", type=int, default=256)
+    parser.add_argument("--width", type=int, default=256)
+    parser.add_argument("--opset", type=int, default=17)
+    parser.add_argument("--fp16", action="store_true")
+    args = parser.parse_args()
+
+    convert(args.checkpoint, args.output,
+            in_channels=args.in_channels, funit=args.funit,
+            activation=args.activation, preupsample=args.preupsample,
+            height=args.height, width=args.width,
+            opset=args.opset, fp16=args.fp16)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/models/rawdenoise-nind/demo.py b/models/rawdenoise-nind/demo.py
new file mode 100644
index 0000000..5bb6406
--- /dev/null
+++ b/models/rawdenoise-nind/demo.py
@@ -0,0 +1,364 @@
+"""Demo: run the RawNIND UtNet2 raw-denoise ONNX models.
+
+This package ships two variants:
+  * model_bayer.onnx  – 4ch packed-Bayer input, joint denoise + demosaic
+  * model_linear.onnx – 3ch linear Rec.2020 input, denoise only
+                        (X-Trans, Foveon, or anywhere demosaic already ran)
+
+Input:  a raw file readable by rawpy (CR2, NEF, ARW, DNG, RAF, ...).
+Output: a linear Rec.2020 TIFF (16-bit) or EXR.
+
+Sensor dispatch:
+  * `raw.raw_pattern.shape == (2, 2)` → Bayer variant (native raw path)
+  * otherwise                         → linear variant, with rawpy demosaicing
+                                         to linear Rec.2020 first
+"""
+
+import argparse
+import os
+import time
+
+import numpy as np
+import onnxruntime as ort
+
+
+# ---------------------------------------------------------------------------
+# Bayer path – pack raw mosaic into 4ch [R, G1, G2, B]
+# ---------------------------------------------------------------------------
+
+def pack_rggb(bayer: np.ndarray, raw_pattern, color_desc: str) -> np.ndarray:
+    """Pack a 2D Bayer mosaic into a 4-channel half-resolution [R, G1, G2, B] tensor."""
+    h, w = bayer.shape
+    assert h % 2 == 0 and w % 2 == 0, "Bayer dimensions must be even"
+
+    r_plane = g1_plane = g2_plane = b_plane = None
+    for i in range(2):
+        for j in range(2):
+            color = color_desc[int(raw_pattern[i, j])]
+            plane = bayer[i::2, j::2]
+            if color == "R":
+                r_plane = plane
+            elif color == "B":
+                b_plane = plane
+            elif color == "G":
+                if g1_plane is None:
+                    g1_plane = plane
+                else:
+                    g2_plane = plane
+            else:
+                raise ValueError(f"Unsupported Bayer colour: {color!r}")
+
+    if any(p is None for p in (r_plane, g1_plane, g2_plane, b_plane)):
+        raise ValueError(
+            f"Incomplete RGGB pattern from desc={color_desc!r} "
+            f"raw_pattern={raw_pattern.tolist()}"
+        )
+    return np.stack([r_plane, g1_plane, g2_plane, b_plane], axis=0)
+
+
+def load_raw_as_packed_bayer(image_path: str):
+    """Load a 2×2 Bayer raw, black-level + white-level normalise per channel,
+    and pack into a (4, H/2, W/2) tensor. Returns (packed, rgb_xyz_matrix)."""
+    import rawpy
+
+    raw = rawpy.imread(image_path)
+    assert raw.raw_pattern.shape == (2, 2), "load_raw_as_packed_bayer called on non-Bayer"
+
+    bayer = raw.raw_image_visible.astype(np.float32)
+    white = float(raw.white_level)
+    color_desc = raw.color_desc.decode("ascii")
+
+    black_per_ch = np.asarray(raw.black_level_per_channel, dtype=np.float32)
+    for i in range(2):
+        for j in range(2):
+            ch_idx = int(raw.raw_pattern[i, j])
+            bl = black_per_ch[ch_idx]
+            vrange = max(white - bl, 1.0)
+            bayer[i::2, j::2] = np.clip((bayer[i::2, j::2] - bl) / vrange, 0.0, 1.0)
+
+    packed = pack_rggb(bayer, raw.raw_pattern, color_desc)
+    return packed, np.asarray(raw.rgb_xyz_matrix, dtype=np.float32)
+
+
+# ---------------------------------------------------------------------------
+# Linear path – rawpy demosaic → lin-Rec.2020
+# ---------------------------------------------------------------------------
+
+def load_raw_as_lin_rec2020(image_path: str) -> np.ndarray:
+    """Demosaic a raw file to a (3, H, W) linear Rec.2020 tensor in [0, 1].
+
+    Uses rawpy's postprocess with a neutral pipeline: linear output, no auto-
+    bright, no gamma, no user flip. Output colour space is set to Rec.2020 so
+    rawpy applies the camera-matrix + white-balance conversion internally.
+    """
+    import rawpy
+
+    raw = rawpy.imread(image_path)
+    rgb = raw.postprocess(
+        output_color=rawpy.ColorSpace.Rec2020,
+        output_bps=16,
+        no_auto_bright=True,
+        use_camera_wb=True,
+        gamma=(1.0, 1.0),
+        user_flip=0,
+    )
+    rgb = rgb.astype(np.float32) / 65535.0
+    return np.transpose(rgb, (2, 0, 1))  # (3, H, W)
+
+
+# ---------------------------------------------------------------------------
+# camRGB → linear Rec.2020 (Bayer path only; matrix from rawnind_jddc/rawproc.py)
+# ---------------------------------------------------------------------------
+
+_XYZ_TO_LIN_REC2020 = np.array(
+    [
+        [1.71666343, -0.35567332, -0.25336809],
+        [-0.66667384, 1.61645574, 0.0157683],
+        [0.01764248, -0.04277698, 0.94224328],
+    ],
+    dtype=np.float32,
+)
+
+
+def cam_rgb_to_lin_rec2020(cam_rgb: np.ndarray, rgb_xyz_matrix: np.ndarray) -> np.ndarray:
+    cam_to_xyzd65 = np.linalg.inv(rgb_xyz_matrix[:3, :])
+    m = _XYZ_TO_LIN_REC2020 @ cam_to_xyzd65
+    h, w, _ = cam_rgb.shape
+    out = (m @ cam_rgb.reshape(-1, 3).T).T.reshape(h, w, 3)
+    return out.astype(np.float32)
+
+
+# ---------------------------------------------------------------------------
+# Shared helpers
+# ---------------------------------------------------------------------------
+
+def _match_gain(other: np.ndarray, anchor: np.ndarray) -> np.ndarray:
+    """Scale `other` so its mean matches `anchor`'s mean (rawproc.match_gain).
+
+    Used at inference to substitute for the training-time match_gain=output
+    step, with the input image as the anchor instead of the (unavailable) GT.
+    Sign-preserving: if the network output mean is negative, gain is negative
+    too, flipping the output back into the anchor's sign convention.
+    """
+    anchor_mean = float(anchor.mean())
+    other_mean = float(other.mean())
+    if abs(other_mean) < 1e-12:
+        return other  # degenerate — nothing sensible to rescale to
+    return other * (anchor_mean / other_mean)
+
+
+def _run_tiled(session, input_name, input_is_fp16: bool,
+               arr: np.ndarray,
+               tile_size: int = 256, overlap: int = 32,
+               scale: int = 1) -> np.ndarray:
+    """Tiled inference with mirror-padded edges and overlap-trimmed stitching.
+
+    `arr` is (1, C_in, H, W) float32; H and W need NOT be multiples of 16
+    (each tile is T × T and T is constrained to be a multiple of 16).
+    Returns (1, C_out, H*scale, W*scale) float32.
+
+    Matches the darktable C code: step = T - 2·overlap, each tile reads a
+    T × T window with `overlap` border on each side (mirror-padded at the
+    image boundary), and only the core (step × step) region of each tile
+    is written to the output — which keeps tile seams seamless.
+
+    Picked T=256 (mod 16) and overlap=32 by default: on a 24MP Bayer raw
+    the peak ORT working set stays well under 1 GB (vs. > 10 GB for a
+    full-image pass), so the demo runs on a GitHub 7 GB runner.
+    """
+    _, _, H, W = arr.shape
+    T = tile_size
+    O = overlap
+    S = scale
+    step = T - 2 * O
+    assert step > 0, "tile_size must exceed 2 * overlap"
+    assert T % 16 == 0, "tile_size must be a multiple of 16"
+
+    n_y = (H + step - 1) // step
+    n_x = (W + step - 1) // step
+
+    # mirror-pad so every tile read stays inside `padded` regardless of
+    # where the last tile ends up (pad_after is at least O; can be more
+    # when H/W aren't divisible by step)
+    pad_before = O
+    pad_after_y = max(O, (n_y - 1) * step + T - H - O)
+    pad_after_x = max(O, (n_x - 1) * step + T - W - O)
+    padded = np.pad(
+        arr,
+        ((0, 0), (0, 0), (pad_before, pad_after_y), (pad_before, pad_after_x)),
+        mode="reflect",
+    )
+
+    out = None  # shape known only after the first tile (C_out from the model)
+    for ty in range(n_y):
+        core_y = ty * step
+        core_h = min(step, H - core_y)
+        for tx in range(n_x):
+            core_x = tx * step
+            core_w = min(step, W - core_x)
+            tile = padded[:, :, core_y:core_y + T, core_x:core_x + T]
+            tile = np.ascontiguousarray(tile)
+            if input_is_fp16:
+                tile = tile.astype(np.float16)
+            [tile_out] = session.run(None, {input_name: tile})
+            if out is None:
+                c_out = tile_out.shape[1]
+                out = np.zeros((1, c_out, H * S, W * S), dtype=np.float32)
+            # strip the overlap border and blit the core region
+            out[:, :,
+                core_y * S:(core_y + core_h) * S,
+                core_x * S:(core_x + core_w) * S] = \
+                tile_out[:, :,
+                         O * S:(O + core_h) * S,
+                         O * S:(O + core_w) * S].astype(np.float32)
+    return out
+
+
+def _load_session(model_path: str):
+    print(f"Loading model: {model_path}")
+    session = ort.InferenceSession(model_path, providers=["CPUExecutionProvider"])
+    model_input = session.get_inputs()[0]
+    return session, model_input.name, model_input.type == "tensor(float16)"
+
+
+def _save_tiff16(path, rgb: np.ndarray):
+    import tifffile
+    tifffile.imwrite(path, (rgb * 65535.0).astype(np.uint16))
+
+
+def _save_exr(path, rgb: np.ndarray):
+    import OpenEXR
+    import Imath
+    h, w, _ = rgb.shape
+    header = OpenEXR.Header(w, h)
+    half = Imath.Channel(Imath.PixelType(Imath.PixelType.HALF))
+    header["channels"] = {"R": half, "G": half, "B": half}
+    exr = OpenEXR.OutputFile(path, header)
+    r, g, b = (rgb[..., i].astype(np.float16).tobytes() for i in range(3))
+    exr.writePixels({"R": r, "G": g, "B": b})
+    exr.close()
+
+
+def _save(output_path: str, rgb_hwc: np.ndarray):
+    rgb_hwc = np.clip(rgb_hwc, 0.0, 1.0)
+    if os.path.splitext(output_path)[1].lower() == ".exr":
+        _save_exr(output_path, rgb_hwc)
+    else:
+        _save_tiff16(output_path, rgb_hwc)
+
+
+# ---------------------------------------------------------------------------
+# Bayer inference pipeline
+# ---------------------------------------------------------------------------
+
+def run_bayer(model_path: str, image_path: str, output_path: str,
+              tile_size: int = 256, overlap: int = 32) -> None:
+    t0 = time.perf_counter()
+    session, input_name, input_is_fp16 = _load_session(model_path)
+
+    print(f"Loading raw (Bayer): {image_path}")
+    packed, rgb_xyz_matrix = load_raw_as_packed_bayer(image_path)
+    print(f"  Packed shape:  {packed.shape} (C, H, W)")
+
+    arr = packed[np.newaxis].astype(np.float32)
+
+    print(f"Running tiled inference (Bayer, T={tile_size}, overlap={overlap})...")
+    output = _run_tiled(session, input_name, input_is_fp16, arr,
+                        tile_size=tile_size, overlap=overlap, scale=2)
+
+    # Bayer model outputs camRGB at an arbitrary learned scale (training used
+    # match_gain=output). Gain-match against the input mosaic, then convert
+    # camRGB → lin-Rec.2020.
+    cam_rgb = output[0].astype(np.float32).transpose(1, 2, 0)
+    cam_rgb = _match_gain(cam_rgb, anchor=arr)
+    rec2020 = cam_rgb_to_lin_rec2020(cam_rgb, rgb_xyz_matrix)
+    print(f"  Output (linear Rec.2020): "
+          f"range=[{rec2020.min():.3f}, {rec2020.max():.3f}] mean={rec2020.mean():.3f}")
+
+    _save(output_path, rec2020)
+    print(f"Saved: {output_path} (total {time.perf_counter() - t0:.2f}s)")
+
+
+# ---------------------------------------------------------------------------
+# Linear (prgb2prgb) inference pipeline
+# ---------------------------------------------------------------------------
+
+def run_linear(model_path: str, image_path: str, output_path: str,
+               tile_size: int = 256, overlap: int = 32) -> None:
+    t0 = time.perf_counter()
+    session, input_name, input_is_fp16 = _load_session(model_path)
+
+    print(f"Loading raw (linear, via rawpy demosaic): {image_path}")
+    rec2020_in = load_raw_as_lin_rec2020(image_path)
+    print(f"  Demosaicked:   {rec2020_in.shape} (C, H, W)")
+
+    arr = rec2020_in[np.newaxis].astype(np.float32)
+
+    print(f"Running tiled inference (linear, T={tile_size}, overlap={overlap})...")
+    output = _run_tiled(session, input_name, input_is_fp16, arr,
+                        tile_size=tile_size, overlap=overlap, scale=1)
+
+    # Like the Bayer variant, the network output is at an arbitrary learned
+    # scale (training also used match_gain=output). Gain-match against the
+    # input. No colour conversion needed — input and output both live in
+    # linear Rec.2020.
+    rec2020 = output[0].astype(np.float32).transpose(1, 2, 0)
+    rec2020 = _match_gain(rec2020, anchor=arr)
+    print(f"  Output (linear Rec.2020): "
+          f"range=[{rec2020.min():.3f}, {rec2020.max():.3f}] mean={rec2020.mean():.3f}")
+
+    _save(output_path, rec2020)
+    print(f"Saved: {output_path} (total {time.perf_counter() - t0:.2f}s)")
+
+
+# ---------------------------------------------------------------------------
+# Entry point
+# ---------------------------------------------------------------------------
+
+def _dispatch_variant(image_path: str) -> str:
+    """Return 'bayer' or 'linear' based on the raw file's sensor pattern."""
+    import rawpy
+    with rawpy.imread(image_path) as raw:
+        return "bayer" if raw.raw_pattern.shape == (2, 2) else "linear"
+
+
+def demo(model_dir, image, output, variant=None,
+         tile_size: int = 256, overlap: int = 32, **kwargs):
+    """Entry point invoked by the framework for type=multi models.
+
+    If `variant` is not given, auto-dispatch based on sensor pattern.
+    `tile_size` and `overlap` are in packed-space pixels for the Bayer
+    variant and in sensor-space pixels for the linear variant (both
+    equivalently: the model's own input spatial units).
+    """
+    os.makedirs(os.path.dirname(output) or ".", exist_ok=True)
+    if variant is None:
+        variant = _dispatch_variant(image)
+
+    if variant == "bayer":
+        run_bayer(os.path.join(model_dir, "model_bayer.onnx"), image, output,
+                  tile_size=tile_size, overlap=overlap)
+    elif variant == "linear":
+        run_linear(os.path.join(model_dir, "model_linear.onnx"), image, output,
+                   tile_size=tile_size, overlap=overlap)
+    else:
+        raise ValueError(f"Unknown variant: {variant!r} (expected 'bayer' or 'linear')")
+
+
+def main():
+    parser = argparse.ArgumentParser(description="RawNIND UtNet2 raw-denoise demo.")
+    parser.add_argument("--model-dir", type=str, required=True,
+                        help="Directory containing model_bayer.onnx and model_linear.onnx")
+    parser.add_argument("--image", type=str, required=True,
+                        help="Raw file (CR2, NEF, ARW, DNG, RAF, ...)")
+    parser.add_argument("--output", type=str, required=True,
+                        help="Output .tif (16-bit linear Rec.2020) or .exr")
+    parser.add_argument("--variant", choices=["bayer", "linear"], default=None,
+                        help="Force a variant; default: auto-dispatch by sensor")
+    args = parser.parse_args()
+
+    demo(args.model_dir, args.image, args.output, variant=args.variant)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/models/rawdenoise-nind/model.yaml b/models/rawdenoise-nind/model.yaml
new file mode 100644
index 0000000..b1c407f
--- /dev/null
+++ b/models/rawdenoise-nind/model.yaml
@@ -0,0 +1,64 @@
+id: rawdenoise-nind
+name: "raw denoise nind"
+description: "UtNet2 raw denoiser trained on RawNIND (Bayer + linear Rec.2020 variants)"
+task: rawdenoise
+version: "1.0"
+arch: utnet2
+tiling: true
+type: multi
+dep_group: rawnind
+
+attributes:
+  coreml_cpu_only: true
+  input_sizes: [2048, 1536, 1024, 768, 512, 384, 256]
+  variants:
+    bayer:
+      onnx: model_bayer.onnx
+      input_kind: packed_bayer       # 4ch R/G1/G2/B, half-resolution, pre-demosaic
+    linear:
+      onnx: model_linear.onnx
+      input_kind: lin_rec2020         # already demosaicked (X-Trans, Foveon, etc.)
+
+repo:
+  submodule: vendor/rawnind_jddc
+
+checkpoints:
+  # DenoiserTrainingBayerToProfiledRGB_4ch_2024-02-21-bayer_ms-ssim_mgout_notrans_valeither_-4
+  # canonical "Bayer" base variant (in_channels=4, funit=32, linear Rec.2020 target)
+  - url: "https://drive.google.com/file/d/1dFTLeljWi9wwojcZUsam8bE31JdYy3oM/view?usp=drive_link"
+    path: "temp/rawdenoise-nind/denoiser_bayer2prgb_utnet2.pt"
+  # DenoiserTrainingProfiledRGBToProfiledRGB_3ch_2024-10-09-prgb_ms-ssim_mgout_notrans_valeither_-1
+  # canonical "Linear RGB" base variant (in_channels=3, funit=32, lin-Rec.2020 in/out)
+  - url: "https://drive.google.com/file/d/1kH8tK4RN_edak3r_VIAhCzPa6CNsuTYC/view?usp=drive_link"
+    path: "temp/rawdenoise-nind/denoiser_prgb2prgb_utnet2.pt"
+
+model_card:
+  long_description: "UtNet2 raw denoisers trained on the Raw Natural Image Noise Dataset (RawNIND). Bundles two variants: Bayer and Linear (for X-Trans, Foveon, or any already-demosaicked pipeline)"
+  scope: "raw and linear-RGB image denoising"
+  author: "Benoit Brummer (UCLouvain)"
+  source: "https://github.com/trougnouf/rawnind_jddc"
+  paper: "https://arxiv.org/abs/2501.08924"
+  license: "GPL-3.0"
+  training_data: "RawNIND – real-world raw noise/clean pairs photographed by authors, published on Wikimedia Commons and UCLouvain Dataverse"
+  training_data_license: "CC BY 4.0 / CC0 (per-image, Wikimedia Commons)"
+  notes: "all components publicly available under open licenses"
+
+convert:
+  - script: convert.py
+    args:
+      checkpoint: "{temp}/denoiser_bayer2prgb_utnet2.pt"
+      output: "{output}/model_bayer.onnx"
+      in_channels: 4
+      funit: 32
+      activation: "LeakyReLU"
+      opset: 17
+      fp16: false
+  - script: convert.py
+    args:
+      checkpoint: "{temp}/denoiser_prgb2prgb_utnet2.pt"
+      output: "{output}/model_linear.onnx"
+      in_channels: 3
+      funit: 32
+      activation: "LeakyReLU"
+      opset: 17
+      fp16: false
diff --git a/pyproject.toml b/pyproject.toml
index bfcdef6..1907b88 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -40,6 +40,16 @@ nind = [
     {include-group = "core"},
     "onnxconverter-common",
 ]
+rawnind = [
+    {include-group = "core"},
+    "onnxconverter-common",
+    "rawpy",
+    "tifffile",
+    "configargparse",
+    "pytorch-msssim",
+    "colour-science",
+    "ptflops",
+]
 segnext = [
     {include-group = "core"},
     "mmcv>=2.2.0",
@@ -64,6 +74,7 @@ openclip = [
 all-models = [
     {include-group = "nafnet"},
     {include-group = "nind"},
+    {include-group = "rawnind"},
     {include-group = "segnext"},
     {include-group = "bsrgan"},
     {include-group = "sam21"},
diff --git a/samples/rawdenoise/bayer/1D3_6400.CR2 b/samples/rawdenoise/bayer/1D3_6400.CR2
new file mode 100644
index 0000000..ebaec17
Binary files /dev/null and b/samples/rawdenoise/bayer/1D3_6400.CR2 differ
diff --git a/samples/rawdenoise/bayer/DSC04028.ARW b/samples/rawdenoise/bayer/DSC04028.ARW
new file mode 100644
index 0000000..e04e9e9
Binary files /dev/null and b/samples/rawdenoise/bayer/DSC04028.ARW differ
diff --git a/samples/rawdenoise/bayer/monkey_25600.CR3 b/samples/rawdenoise/bayer/monkey_25600.CR3
new file mode 100644
index 0000000..e134a4e
Binary files /dev/null and b/samples/rawdenoise/bayer/monkey_25600.CR3 differ
diff --git a/samples/rawdenoise/xtrans/20241017_0191.RAF b/samples/rawdenoise/xtrans/20241017_0191.RAF
new file mode 100644
index 0000000..d58a91a
Binary files /dev/null and b/samples/rawdenoise/xtrans/20241017_0191.RAF differ
diff --git a/samples/rawdenoise/xtrans/DSCF3323.RAF b/samples/rawdenoise/xtrans/DSCF3323.RAF
new file mode 100644
index 0000000..b5fbb38
Binary files /dev/null and b/samples/rawdenoise/xtrans/DSCF3323.RAF differ
diff --git a/samples/rawdenoise/xtrans/DSCF9838.RAF b/samples/rawdenoise/xtrans/DSCF9838.RAF
new file mode 100644
index 0000000..6b82071
Binary files /dev/null and b/samples/rawdenoise/xtrans/DSCF9838.RAF differ
diff --git a/vendor/rawnind_jddc b/vendor/rawnind_jddc
new file mode 160000
index 0000000..4d455aa
--- /dev/null
+++ b/vendor/rawnind_jddc
@@ -0,0 +1 @@
+Subproject commit 4d455aa8ada69214eafa6a91ac0b2e011cf9dcb7