From 896accf7fcef92be3e364e9d43f6525d883b1014 Mon Sep 17 00:00:00 2001 From: Andrii Ryzhkov Date: Thu, 23 Apr 2026 13:33:44 +0200 Subject: [PATCH 1/9] Add AI raw denoise (RawNIND, Bayer + X-Trans) --- data/ai_models.json | 8 + data/darktableconfig.xml.in | 4 +- dev-doc/AI.md | 16 +- dev-doc/AI_Tasks.md | 4 +- src/CMakeLists.txt | 4 + src/ai/backend.h | 11 +- src/ai/backend_common.c | 56 +- src/ai/backend_onnx.c | 50 +- src/common/ai/restore.c | 1479 ++++++++-------- src/common/ai/restore.h | 560 ++++--- src/common/ai/restore_common.h | 236 +++ src/common/ai/restore_raw_bayer.c | 803 +++++++++ src/common/ai/restore_raw_bayer.h | 134 ++ src/common/ai/restore_raw_linear.c | 1032 ++++++++++++ src/common/ai/restore_raw_linear.h | 161 ++ src/common/ai/restore_rgb.c | 832 +++++++++ src/common/ai/restore_rgb.h | 180 ++ src/common/ai/segmentation.c | 4 +- src/common/dng_writer.c | 373 +++++ src/common/dng_writer.h | 105 ++ src/libs/neural_restore.c | 1949 +++++++++++++++++++--- src/tests/unittests/ai/test_ai_backend.c | 4 +- 22 files changed, 6689 insertions(+), 1316 deletions(-) create mode 100644 src/common/ai/restore_common.h create mode 100644 src/common/ai/restore_raw_bayer.c create mode 100644 src/common/ai/restore_raw_bayer.h create mode 100644 src/common/ai/restore_raw_linear.c create mode 100644 src/common/ai/restore_raw_linear.h create mode 100644 src/common/ai/restore_rgb.c create mode 100644 src/common/ai/restore_rgb.h create mode 100644 src/common/dng_writer.c create mode 100644 src/common/dng_writer.h diff --git a/data/ai_models.json b/data/ai_models.json index 12d46c1cb66d..08fa95e6acad 100644 --- a/data/ai_models.json +++ b/data/ai_models.json @@ -35,6 +35,14 @@ "github_asset": "denoise-nafnet.dtmodel", "default": false }, + { + "id": "rawdenoise-nind", + "name": "raw denoise nind", + "description": "UtNet2 raw denoiser trained on RawNIND dataset", + "task": "rawdenoise", + "github_asset": "rawdenoise-nind.dtmodel", + "default": true + }, { "id": "upscale-bsrgan", "name": "upscale bsrgan", diff --git a/data/darktableconfig.xml.in b/data/darktableconfig.xml.in index d4bf3bb7cc87..e14605fe16fc 100644 --- a/data/darktableconfig.xml.in +++ b/data/darktableconfig.xml.in @@ -3772,9 +3772,9 @@ plugins/lighttable/neural_restore/detail_recovery_bands string - 0.5,0.3,0.1,0.05,0.02 + 0.25,0.15,0.05,0.02,0.01 detail recovery wavelet band thresholds - comma-separated sigma multipliers for wavelet detail recovery bands (finest to coarsest). controls how much noise vs texture is recovered by the detail recovery slider + comma-separated sigma multipliers for wavelet detail recovery bands (finest to coarsest). controls how much noise vs texture passes through the DWT filter when strength is below 100 plugins/lighttable/neural_restore/preview_height diff --git a/dev-doc/AI.md b/dev-doc/AI.md index 18270ff25382..4c5c2454924c 100644 --- a/dev-doc/AI.md +++ b/dev-doc/AI.md @@ -17,7 +17,12 @@ src/ai/ ONNX Runtime backend (darktable_ai static lib) src/common/ai/ higher-level AI modules (compiled in lib_darktable) segmentation.c/.h SAM/SegNext interactive masking - restore.c/.h denoise/upscale tiled inference + restore.c/.h generic env/ctx lifecycle + model loaders + restore_common.h private struct defs shared by restore_* + restore_rgb.c/.h RGB-path denoise + upscale (tiled inference, + shadow boost, DWT detail recovery) + restore_raw_bayer.c/.h RawNIND Bayer denoise (batch + piped preview) + restore_raw_linear.c/.h RawNIND linear/X-Trans denoise src/common/ai_models.c/.h model registry, download, preferences integration src/gui/preferences_ai.c AI preferences tab @@ -402,6 +407,9 @@ FILE(GLOB SOURCE_FILES_AI "common/ai_models.c" "common/ai/segmentation.c" "common/ai/restore.c" + "common/ai/restore_rgb.c" + "common/ai/restore_raw_bayer.c" + "common/ai/restore_raw_linear.c" "common/ai/your_task.c" # add here ... ) @@ -455,8 +463,10 @@ dt_your_task_free(ctx); | Task | Key | API | Consumer | |------|-----|-----|----------| | Object Mask | `"mask"` | `src/common/ai/segmentation.h` | `src/develop/masks/object.c` | -| Denoise | `"denoise"` | `src/common/ai/restore.h` | `src/libs/neural_restore.c` | -| Upscale | `"upscale"` | `src/common/ai/restore.h` | `src/libs/neural_restore.c` | +| Denoise | `"denoise"` | `src/common/ai/restore_rgb.h` | `src/libs/neural_restore.c` | +| Upscale | `"upscale"` | `src/common/ai/restore_rgb.h` | `src/libs/neural_restore.c` | +| Raw Denoise (Bayer) | `"rawdenoise"` | `src/common/ai/restore_raw_bayer.h` | `src/libs/neural_restore.c` | +| Raw Denoise (Linear) | `"rawdenoise"` | `src/common/ai/restore_raw_linear.h` | `src/libs/neural_restore.c` | For model requirements, I/O specifications, tiling strategies, color space conventions, ONNX export instructions, and config.json examples diff --git a/dev-doc/AI_Tasks.md b/dev-doc/AI_Tasks.md index 8720f9d9b2ef..9a034a70be3c 100644 --- a/dev-doc/AI_Tasks.md +++ b/dev-doc/AI_Tasks.md @@ -131,7 +131,7 @@ repository. Requirements for the decoder export: Removes noise from developed images using neural network inference. **Task key**: `"denoise"` -**API**: `src/common/ai/restore.h` (`dt_restore_load_denoise`) +**API**: `src/common/ai/restore.h` (loader: `dt_restore_load_denoise`), `src/common/ai/restore_rgb.h` (processing: `dt_restore_process_tiled`) **Consumer**: `src/libs/neural_restore.c` ### How It Works @@ -222,7 +222,7 @@ torch.onnx.export(model, dummy_input, "model.onnx", Super-resolution upscaling of developed images (2x or 4x). **Task key**: `"upscale"` -**API**: `src/common/ai/restore.h` (`dt_restore_load_upscale_x2`, `dt_restore_load_upscale_x4`) +**API**: `src/common/ai/restore.h` (loaders: `dt_restore_load_upscale_x2`, `dt_restore_load_upscale_x4`), `src/common/ai/restore_rgb.h` (processing: `dt_restore_process_tiled`) **Consumer**: `src/libs/neural_restore.c` ### How It Works diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index e2d15b5b16cc..035104ed95d4 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -462,6 +462,10 @@ if(USE_AI) "common/ai_models.c" "common/ai/segmentation.c" "common/ai/restore.c" + "common/ai/restore_rgb.c" + "common/ai/restore_raw_bayer.c" + "common/ai/restore_raw_linear.c" + "common/dng_writer.c" "develop/masks/object.c" "gui/preferences_ai.c" ) diff --git a/src/ai/backend.h b/src/ai/backend.h index 776bf8a4c523..93ca4fde67fe 100644 --- a/src/ai/backend.h +++ b/src/ai/backend.h @@ -166,6 +166,14 @@ double dt_ai_model_attribute_double(const dt_ai_model_info_t *info, char *dt_ai_model_attribute_string(const dt_ai_model_info_t *info, const char *key); +/** Return a newly-allocated int array from the JSON-array attribute + * named `key`. *out_count is set to the array length; NULL is returned + * (and *out_count = 0) when the key is absent or not a JSON array. + * Caller frees the returned array with g_free(). */ +int *dt_ai_model_attribute_int_array(const dt_ai_model_info_t *info, + const char *key, + int *out_count); + /* --- Discovery --- */ /** @@ -267,7 +275,8 @@ dt_ai_context_t *dt_ai_load_model_ext(dt_ai_environment_t *env, dt_ai_provider_t provider, dt_ai_opt_level_t opt_level, const dt_ai_dim_override_t *dim_overrides, - int n_overrides); + int n_overrides, + uint32_t ep_flags); /** * @brief Tensor Data Types diff --git a/src/ai/backend_common.c b/src/ai/backend_common.c index 1e9ae773e924..f7a594f90d49 100644 --- a/src/ai/backend_common.c +++ b/src/ai/backend_common.c @@ -357,7 +357,8 @@ dt_ai_provider_t dt_ai_env_get_provider(dt_ai_environment_t *env) extern dt_ai_context_t * dt_ai_onnx_load_ext(const char *model_dir, const char *model_file, dt_ai_provider_t provider, dt_ai_opt_level_t opt_level, - const dt_ai_dim_override_t *dim_overrides, int n_overrides); + const dt_ai_dim_override_t *dim_overrides, int n_overrides, + uint32_t ep_flags); // model loading with backend dispatch @@ -367,7 +368,7 @@ dt_ai_context_t *dt_ai_load_model(dt_ai_environment_t *env, dt_ai_provider_t provider) { return dt_ai_load_model_ext(env, model_id, model_file, provider, - DT_AI_OPT_ALL, NULL, 0); + DT_AI_OPT_ALL, NULL, 0, 0); } dt_ai_context_t *dt_ai_load_model_ext(dt_ai_environment_t *env, @@ -376,7 +377,8 @@ dt_ai_context_t *dt_ai_load_model_ext(dt_ai_environment_t *env, dt_ai_provider_t provider, dt_ai_opt_level_t opt_level, const dt_ai_dim_override_t *dim_overrides, - int n_overrides) + int n_overrides, + uint32_t ep_flags) { if(!env || !model_id) return NULL; @@ -428,7 +430,7 @@ dt_ai_context_t *dt_ai_load_model_ext(dt_ai_environment_t *env, if(strcmp(backend_copy, "onnx") == 0) { ctx = dt_ai_onnx_load_ext(model_dir, model_file, provider, opt_level, - dim_overrides, n_overrides); + dim_overrides, n_overrides, ep_flags); } else { @@ -448,6 +450,10 @@ dt_ai_context_t *dt_ai_load_model_ext(dt_ai_environment_t *env, // _attribute_node returns the parsed JsonParser plus a borrowed JsonNode* // for the named key; caller must g_object_unref the returned parser; // returns NULL parser if the attribute set is absent or the key is missing +// +// the key accepts a dotted path ("variants.bayer.onnx"): each segment +// except the last must resolve to a JSON object; the final segment is +// the leaf lookup and may hold any JSON value type static JsonParser *_attribute_node(const dt_ai_model_info_t *info, const char *key, JsonNode **out_node) @@ -467,12 +473,26 @@ static JsonParser *_attribute_node(const dt_ai_model_info_t *info, return NULL; } JsonObject *obj = json_node_get_object(root); - if(!json_object_has_member(obj, key)) + gchar **segments = g_strsplit(key, ".", -1); + const int n = g_strv_length(segments); + JsonNode *node = NULL; + for(int i = 0; i < n; i++) + { + if(!json_object_has_member(obj, segments[i])) goto out; + node = json_object_get_member(obj, segments[i]); + if(i == n - 1) break; + // intermediate segments must be objects to descend further + if(!node || !JSON_NODE_HOLDS_OBJECT(node)) { node = NULL; goto out; } + obj = json_node_get_object(node); + } +out: + g_strfreev(segments); + if(!node) { g_object_unref(parser); return NULL; } - *out_node = json_object_get_member(obj, key); + *out_node = node; return parser; } @@ -529,6 +549,30 @@ char *dt_ai_model_attribute_string(const dt_ai_model_info_t *info, return result; } +int *dt_ai_model_attribute_int_array(const dt_ai_model_info_t *info, + const char *key, + int *out_count) +{ + if(out_count) *out_count = 0; + JsonNode *v = NULL; + JsonParser *p = _attribute_node(info, key, &v); + int *result = NULL; + if(v && JSON_NODE_HOLDS_ARRAY(v)) + { + JsonArray *arr = json_node_get_array(v); + const guint n = json_array_get_length(arr); + if(n > 0) + { + result = g_new(int, n); + for(guint i = 0; i < n; i++) + result[i] = (int)json_array_get_int_element(arr, i); + if(out_count) *out_count = (int)n; + } + } + if(p) g_object_unref(p); + return result; +} + // provider string conversion const char *dt_ai_provider_to_string(dt_ai_provider_t provider) diff --git a/src/ai/backend_onnx.c b/src/ai/backend_onnx.c index a6ce05ca2236..80cf93072f67 100644 --- a/src/ai/backend_onnx.c +++ b/src/ai/backend_onnx.c @@ -801,7 +801,8 @@ static float _half_to_float(uint16_t h) static gboolean _try_provider(OrtSessionOptions *session_opts, const char *symbol_name, const char *provider_name, - const char *device_type) + const char *device_type, + uint32_t flags) { OrtStatus *status = NULL; gboolean ok = FALSE; @@ -851,7 +852,7 @@ static gboolean _try_provider(OrtSessionOptions *session_opts, // integer-argument providers (CUDA, CoreML, DML, MIGraphX, ROCm) typedef OrtStatus *(*ProviderAppenderInt)(OrtSessionOptions *, uint32_t); ProviderAppenderInt appender = (ProviderAppenderInt)func_ptr; - status = appender(session_opts, 0); + status = appender(session_opts, flags); } if(!status) { @@ -880,7 +881,9 @@ static gboolean _try_provider(OrtSessionOptions *session_opts, } static void -_enable_acceleration(OrtSessionOptions *session_opts, dt_ai_provider_t provider) +_enable_acceleration(OrtSessionOptions *session_opts, + dt_ai_provider_t provider, + uint32_t coreml_flags) { switch(provider) { @@ -894,14 +897,14 @@ _enable_acceleration(OrtSessionOptions *session_opts, dt_ai_provider_t provider) _try_provider( session_opts, "OrtSessionOptionsAppendExecutionProvider_CoreML", - "Apple CoreML", NULL); + "Apple CoreML", NULL, coreml_flags); #else dt_print(DT_DEBUG_AI, "[darktable_ai] apple CoreML not available on this platform"); #endif break; case DT_AI_PROVIDER_CUDA: - _try_provider(session_opts, "OrtSessionOptionsAppendExecutionProvider_CUDA", "NVIDIA CUDA", NULL); + _try_provider(session_opts, "OrtSessionOptionsAppendExecutionProvider_CUDA", "NVIDIA CUDA", NULL, 0); break; case DT_AI_PROVIDER_MIGRAPHX: @@ -909,13 +912,13 @@ _enable_acceleration(OrtSessionOptions *session_opts, dt_ai_provider_t provider) // load time, so they must be set before CreateEnv() — see // _setup_amd_caches() above. OpenVINO (below) takes options // per-session, so its cache path is passed inline here - if(!_try_provider(session_opts, "OrtSessionOptionsAppendExecutionProvider_MIGraphX", "AMD MIGraphX", NULL)) - _try_provider(session_opts, "OrtSessionOptionsAppendExecutionProvider_ROCM", "AMD ROCm (legacy)", NULL); + if(!_try_provider(session_opts, "OrtSessionOptionsAppendExecutionProvider_MIGraphX", "AMD MIGraphX", NULL, 0)) + _try_provider(session_opts, "OrtSessionOptionsAppendExecutionProvider_ROCM", "AMD ROCm (legacy)", NULL, 0); break; case DT_AI_PROVIDER_OPENVINO: if(!_try_openvino_with_cache(session_opts)) - _try_provider(session_opts, "OrtSessionOptionsAppendExecutionProvider_OpenVINO", "Intel OpenVINO", "AUTO"); + _try_provider(session_opts, "OrtSessionOptionsAppendExecutionProvider_OpenVINO", "Intel OpenVINO", "AUTO", 0); break; case DT_AI_PROVIDER_DIRECTML: @@ -923,7 +926,7 @@ _enable_acceleration(OrtSessionOptions *session_opts, dt_ai_provider_t provider) _try_provider( session_opts, "OrtSessionOptionsAppendExecutionProvider_DML", - "Windows DirectML", NULL); + "Windows DirectML", NULL, 0); #else dt_print(DT_DEBUG_AI, "[darktable_ai] windows DirectML not available on this platform"); #endif @@ -936,27 +939,27 @@ _enable_acceleration(OrtSessionOptions *session_opts, dt_ai_provider_t provider) _try_provider( session_opts, "OrtSessionOptionsAppendExecutionProvider_CoreML", - "Apple CoreML", NULL); + "Apple CoreML", NULL, coreml_flags); #elif defined(_WIN32) _try_provider( session_opts, "OrtSessionOptionsAppendExecutionProvider_DML", - "Windows DirectML", NULL); + "Windows DirectML", NULL, 0); #elif defined(__linux__) // try CUDA first, then MIGraphX (cache configured at env init) if(!_try_provider( session_opts, "OrtSessionOptionsAppendExecutionProvider_CUDA", - "NVIDIA CUDA", NULL)) + "NVIDIA CUDA", NULL, 0)) { if(!_try_provider( session_opts, "OrtSessionOptionsAppendExecutionProvider_MIGraphX", - "AMD MIGraphX", NULL)) + "AMD MIGraphX", NULL, 0)) _try_provider( session_opts, "OrtSessionOptionsAppendExecutionProvider_ROCM", - "AMD ROCm (legacy)", NULL); + "AMD ROCm (legacy)", NULL, 0); } #endif break; @@ -996,20 +999,20 @@ int dt_ai_probe_provider(dt_ai_provider_t provider) switch(provider) { case DT_AI_PROVIDER_COREML: - ok = _try_provider(opts, "OrtSessionOptionsAppendExecutionProvider_CoreML", "Apple CoreML", NULL); + ok = _try_provider(opts, "OrtSessionOptionsAppendExecutionProvider_CoreML", "Apple CoreML", NULL, 0); break; case DT_AI_PROVIDER_CUDA: - ok = _try_provider(opts, "OrtSessionOptionsAppendExecutionProvider_CUDA", "NVIDIA CUDA", NULL); + ok = _try_provider(opts, "OrtSessionOptionsAppendExecutionProvider_CUDA", "NVIDIA CUDA", NULL, 0); break; case DT_AI_PROVIDER_MIGRAPHX: - ok = _try_provider(opts, "OrtSessionOptionsAppendExecutionProvider_MIGraphX", "AMD MIGraphX", NULL) - || _try_provider(opts, "OrtSessionOptionsAppendExecutionProvider_ROCM", "AMD ROCm (legacy)", NULL); + ok = _try_provider(opts, "OrtSessionOptionsAppendExecutionProvider_MIGraphX", "AMD MIGraphX", NULL, 0) + || _try_provider(opts, "OrtSessionOptionsAppendExecutionProvider_ROCM", "AMD ROCm (legacy)", NULL, 0); break; case DT_AI_PROVIDER_OPENVINO: - ok = _try_provider(opts, "OrtSessionOptionsAppendExecutionProvider_OpenVINO", "Intel OpenVINO", "AUTO"); + ok = _try_provider(opts, "OrtSessionOptionsAppendExecutionProvider_OpenVINO", "Intel OpenVINO", "AUTO", 0); break; case DT_AI_PROVIDER_DIRECTML: - ok = _try_provider(opts, "OrtSessionOptionsAppendExecutionProvider_DML", "Windows DirectML", NULL); + ok = _try_provider(opts, "OrtSessionOptionsAppendExecutionProvider_DML", "Windows DirectML", NULL, 0); break; default: break; @@ -1026,7 +1029,8 @@ int dt_ai_probe_provider(dt_ai_provider_t provider) dt_ai_context_t * dt_ai_onnx_load_ext(const char *model_dir, const char *model_file, dt_ai_provider_t provider, dt_ai_opt_level_t opt_level, - const dt_ai_dim_override_t *dim_overrides, int n_overrides) + const dt_ai_dim_override_t *dim_overrides, int n_overrides, + uint32_t ep_flags) { if(!model_dir) return NULL; @@ -1111,7 +1115,7 @@ dt_ai_onnx_load_ext(const char *model_dir, const char *model_file, } // optimize: enable hardware acceleration (AMD caches set at env init) - _enable_acceleration(session_opts, provider); + _enable_acceleration(session_opts, provider, ep_flags); #ifdef _WIN32 // on windows, CreateSession expects a wide character string @@ -1176,7 +1180,7 @@ dt_ai_onnx_load_ext(const char *model_dir, const char *model_file, if(s) g_ort->ReleaseStatus(s); } if(fallbacks[fb].prov != DT_AI_PROVIDER_CPU) - _enable_acceleration(session_opts, fallbacks[fb].prov); + _enable_acceleration(session_opts, fallbacks[fb].prov, ep_flags); #ifdef _WIN32 status = g_ort->CreateSession(g_env, onnx_path_wide, session_opts, &ctx->session); #else diff --git a/src/common/ai/restore.c b/src/common/ai/restore.c index 53ceab0b4863..7be560413db9 100644 --- a/src/common/ai/restore.c +++ b/src/common/ai/restore.c @@ -17,120 +17,34 @@ */ #include "common/ai/restore.h" +#include "common/ai/restore_common.h" #include "ai/backend.h" #include "common/darktable.h" #include "common/ai_models.h" #include "common/colorspaces.h" -#include "common/colorspaces_inline_conversions.h" -#include "common/imagebuf.h" -#include "common/math.h" -#include "common/matrices.h" +#include "common/iop_order.h" +#include "control/control.h" #include "control/jobs.h" +#include "develop/develop.h" +#include "develop/imageop.h" +#include "develop/pixelpipe_hb.h" +#include "imageio/imageio_common.h" -// forward-declare to avoid pulling in dwt.h (which -// includes OpenCL types when HAVE_OPENCL is defined) -extern void dwt_denoise(float *buf, int width, int height, - int bands, const float *noise); #include #include #define OVERLAP_DENOISE 64 #define OVERLAP_UPSCALE 16 -#define MAX_MODEL_INPUTS 4 -#define DWT_DETAIL_BANDS 5 // candidate tile sizes from largest to smallest, used by both the // startup memory-budget selector and the runtime OOM-retry fallback. // the memory-budget check gates which entry is chosen at startup; // the tile size cache persists the result so JIT-compiling EPs -// (MIGraphX, CoreML, TensorRT) only pay the compile cost once. +// (MIGraphX, CoreML, TensorRT) only pay the compile cost once #define DT_RESTORE_TILE_LADDER_1X {2048, 1536, 1024, 768, 512, 384, 256} #define DT_RESTORE_TILE_LADDER_SR {768, 512, 384, 256, 192} -/* --- opaque struct definitions --- */ - -struct dt_restore_env_t -{ - dt_ai_environment_t *ai_env; -}; - -struct dt_restore_context_t -{ - dt_ai_context_t *ai_ctx; - dt_restore_env_t *env; - char *model_id; - char *model_file; - char *task; - int tile_size; // tile size used to create the current session - char *dim_h; // symbolic height dim name used for session overrides - char *dim_w; // symbolic width dim name used for session overrides - // color management: convert from working profile to sRGB before - // inference (model was trained on sRGB primaries) and back after. - // if has_profile is FALSE, fall back to gamma-only conversion - // (treats working-profile numbers as if they were sRGB) - gboolean has_profile; - float wp_to_srgb[9]; // working profile RGB -> sRGB linear (row-major) - float srgb_to_wp[9]; // sRGB linear -> working profile RGB (row-major) - // when TRUE (default), out-of-sRGB-gamut pixels pass through unchanged - // during denoise. when FALSE, every pixel uses the model output and - // wide-gamut colors get clipped to sRGB but everything is denoised - gboolean preserve_wide_gamut; - // shadow_boost_capable: TRUE when the model declares the - // "shadow_boost" attribute in its config.json; set once at load - gboolean shadow_boost_capable; - // shadow_boost: the effective flag used at inference; recomputed - // per-image inside dt_restore_process_tiled() when capable, based - // on a luminance check (bright images skip the curve) - gboolean shadow_boost; - gint ref_count; -}; - -// default multipliers of residual sigma for each wavelet band. -// band 0 (finest) gets the strongest suppression since fine-scale -// features are hardest to distinguish from noise. coarser bands -// preserve more because they capture real texture. -// tunable via darktablerc: plugins/lighttable/neural_restore/detail_recovery_bands -static const float _dwt_sigma_mul_default[DWT_DETAIL_BANDS] = { - 0.5f, // band 0 (finest) — suppress fine luminance noise - 0.3f, // band 1 - 0.1f, // band 2 - 0.05f, // band 3 - 0.02f // band 4 (coarsest) — keep almost everything -}; - -// compute adaptive noise thresholds from residual standard deviation -static void _compute_adaptive_noise(const float *const restrict buf, - const size_t npix, - float noise[DWT_DETAIL_BANDS]) -{ - // read band multipliers from config (comma-separated list). - // e.g. "0.5,0.3,0.1,0.05,0.02" in darktablerc - float sigma_mul[DWT_DETAIL_BANDS]; - memcpy(sigma_mul, _dwt_sigma_mul_default, sizeof(sigma_mul)); - gchar *val = dt_conf_get_string("plugins/lighttable/neural_restore/detail_recovery_bands"); - if(val && val[0]) - { - gchar **parts = g_strsplit(val, ",", DWT_DETAIL_BANDS); - for(int b = 0; parts[b] && b < DWT_DETAIL_BANDS; b++) - sigma_mul[b] = g_ascii_strtod(g_strstrip(parts[b]), NULL); - g_strfreev(parts); - } - g_free(val); - - double sum = 0.0, sum2 = 0.0; - for(size_t i = 0; i < npix; i++) - { - sum += (double)buf[i]; - sum2 += (double)buf[i] * (double)buf[i]; - } - const double mean = sum / (double)npix; - const float sigma = (float)sqrt(sum2 / (double)npix - mean * mean); - - for(int b = 0; b < DWT_DETAIL_BANDS; b++) - noise[b] = sigma * sigma_mul[b]; -} - -/* --- environment lifecycle --- */ +// --- environment lifecycle --- dt_restore_env_t *dt_restore_env_init(void) { @@ -156,12 +70,131 @@ void dt_restore_env_destroy(dt_restore_env_t *env) g_free(env); } -/* --- model lifecycle --- */ +// --- model lifecycle --- + +#define TASK_DENOISE "denoise" +#define TASK_RAWDENOISE "rawdenoise" +#define TASK_UPSCALE "upscale" + +// --- manifest policy parsers --- +// +// parse a variant's string attribute into the matching enum. unknown +// values return UNKNOWN (for input_kind — caller validates), or the +// supplied default (for the other three — caller has already decided +// the default matches today's RawNIND behavior) + +static dt_restore_input_kind_t _parse_input_kind(const char *s) +{ + if(!s) return DT_RESTORE_INPUT_KIND_UNKNOWN; + if(!g_strcmp0(s, "bayer_v1")) return DT_RESTORE_INPUT_KIND_BAYER_V1; + if(!g_strcmp0(s, "xtrans_v1")) return DT_RESTORE_INPUT_KIND_XTRANS_V1; + if(!g_strcmp0(s, "linear_v1")) return DT_RESTORE_INPUT_KIND_LINEAR_V1; + return DT_RESTORE_INPUT_KIND_UNKNOWN; +} + +static const char *_input_kind_name(dt_restore_input_kind_t k) +{ + switch(k) + { + case DT_RESTORE_INPUT_KIND_BAYER_V1: return "bayer_v1"; + case DT_RESTORE_INPUT_KIND_XTRANS_V1: return "xtrans_v1"; + case DT_RESTORE_INPUT_KIND_LINEAR_V1: return "linear_v1"; + default: return "unknown"; + } +} + +static dt_restore_colorspace_t _parse_colorspace(const char *s, + dt_restore_colorspace_t dflt) +{ + if(!s) return dflt; + if(!g_strcmp0(s, "lin_rec2020")) return DT_RESTORE_CS_LIN_REC2020; + if(!g_strcmp0(s, "camRGB")) return DT_RESTORE_CS_CAMRGB; + if(!g_strcmp0(s, "srgb_linear")) return DT_RESTORE_CS_SRGB_LINEAR; + dt_print(DT_DEBUG_AI, + "[restore] unknown input_colorspace '%s', using default", s); + return dflt; +} + +static dt_restore_wb_mode_t _parse_wb_mode(const char *s, + dt_restore_wb_mode_t dflt) +{ + if(!s) return dflt; + if(!g_strcmp0(s, "daylight")) return DT_RESTORE_WB_DAYLIGHT; + if(!g_strcmp0(s, "as_shot")) return DT_RESTORE_WB_AS_SHOT; + if(!g_strcmp0(s, "none")) return DT_RESTORE_WB_NONE; + dt_print(DT_DEBUG_AI, + "[restore] unknown wb_norm '%s', using default", s); + return dflt; +} + +static dt_restore_output_scale_t _parse_output_scale(const char *s, + dt_restore_output_scale_t dflt) +{ + if(!s) return dflt; + if(!g_strcmp0(s, "match_gain")) return DT_RESTORE_OUT_MATCH_GAIN; + if(!g_strcmp0(s, "absolute")) return DT_RESTORE_OUT_ABSOLUTE; + dt_print(DT_DEBUG_AI, + "[restore] unknown output_scale '%s', using default", s); + return dflt; +} + +static dt_restore_bayer_orientation_t _parse_bayer_orientation( + const char *s, dt_restore_bayer_orientation_t dflt) +{ + if(!s) return dflt; + if(!g_strcmp0(s, "force_rggb")) return DT_RESTORE_BAYER_FORCE_RGGB; + if(!g_strcmp0(s, "native")) return DT_RESTORE_BAYER_NATIVE; + dt_print(DT_DEBUG_AI, + "[restore] unknown bayer_orientation '%s', using default", s); + return dflt; +} -#define TASK_DENOISE "denoise" -#define TASK_UPSCALE "upscale" +static dt_restore_edge_pad_t _parse_edge_pad(const char *s, + dt_restore_edge_pad_t dflt) +{ + if(!s) return dflt; + if(!g_strcmp0(s, "mirror_cropped")) return DT_RESTORE_EDGE_MIRROR_CROPPED; + if(!g_strcmp0(s, "mirror")) return DT_RESTORE_EDGE_MIRROR; + dt_print(DT_DEBUG_AI, + "[restore] unknown edge_pad '%s', using default", s); + return dflt; +} + +// target_mean accepts "null" as an explicit disable; missing key falls +// back to the per-variant default (NAN for bayer, 0.3 for linear). +// a numeric string parses via g_ascii_strtod +static float _parse_target_mean(const dt_ai_model_info_t *info, + const char *key, float dflt) +{ + char *s = dt_ai_model_attribute_string(info, key); + if(!s) return dflt; + if(!g_strcmp0(s, "null") || !g_strcmp0(s, "none")) + { + g_free(s); + return NAN; + } + char *endp = NULL; + const double v = g_ascii_strtod(s, &endp); + if(endp == s || !endp || *endp != '\0') + { + dt_print(DT_DEBUG_AI, + "[restore] target_mean '%s' not parseable, using default", s); + g_free(s); + return dflt; + } + g_free(s); + return (float)v; +} -static int _select_tile_size(int scale); +static int _select_tile_size(const int *ladder, int n_ladder, int scale); +// resolve the tile ladder for a model: prefer the "input_sizes" JSON +// array attribute from config.json when present, otherwise fall back +// to the built-in ladder for the model's scale. always returns a +// freshly-allocated int[] + count that the caller owns and g_free()s +static void _resolve_tile_ladder(const dt_ai_model_info_t *info, + int scale, + int **out_sizes, + int *out_count); // returns the cached tile size for model_id+scale+provider combo, or 0 if not set static int _get_cached_tile_size(const char *model_id, int scale) @@ -187,13 +220,14 @@ static void _set_cached_tile_size(const char *model_id, int scale, int tile_size } // internal: create an ORT session for model_id/model_file with spatial dims -// fixed to tile_size. returns a new ai_ctx, or NULL on failure. +// fixed to tile_size. returns a new ai_ctx, or NULL on failure static dt_ai_context_t *_create_session(dt_ai_environment_t *ai_env, const char *model_id, const char *model_file, const char *dim_h, const char *dim_w, - int tile_size) + int tile_size, + uint32_t ep_flags) { const dt_ai_dim_override_t overrides[] = { { "batch_size", 1 }, @@ -204,13 +238,14 @@ static dt_ai_context_t *_create_session(dt_ai_environment_t *ai_env, return dt_ai_load_model_ext( ai_env, model_id, model_file, DT_AI_PROVIDER_CONFIGURED, DT_AI_OPT_ALL, - overrides, (int)G_N_ELEMENTS(overrides)); + overrides, (int)G_N_ELEMENTS(overrides), ep_flags); } // internal: resolve task -> model_id -> load with tile size dim overrides static dt_restore_context_t *_load(dt_restore_env_t *env, const char *task, - const char *model_file, + const char *variant, + const char *default_file, int scale) { if(!env) return NULL; @@ -227,16 +262,158 @@ static dt_restore_context_t *_load(dt_restore_env_t *env, dt_ai_models_get_spatial_dims(darktable.ai_registry, model_id, &dim_h, &dim_w); - // select tile size from cache or memory budget + const dt_ai_model_info_t *info + = dt_ai_get_model_info_by_id(env->ai_env, model_id); + + // variant-aware config lookup: variant models must declare their ONNX + // filename under variants..onnx. input_kind is stashed on ctx + // so raw paths can sanity-check they're pointing at the right model. + // non-variant models (denoise, upscale) pass variant=NULL and supply + // the filename directly via default_file + char *variant_file = NULL; + char *input_kind = NULL; + // policy strings (all optional; NULL falls through to defaults) + char *cs_str = NULL, *wb_str = NULL, *scale_str = NULL; + char *bo_str = NULL, *edge_str = NULL; + // expected input_kind for this variant slot. raw variants MUST match + // one of the declared v1 contracts; non-variant tasks pass UNKNOWN + // and skip the contract check entirely + dt_restore_input_kind_t expected_kind = DT_RESTORE_INPUT_KIND_UNKNOWN; + if(variant) + { + char *k_file = g_strdup_printf("variants.%s.onnx", variant); + char *k_kind = g_strdup_printf("variants.%s.input_kind", variant); + char *k_cs = g_strdup_printf("variants.%s.input_colorspace", variant); + char *k_wb = g_strdup_printf("variants.%s.wb_norm", variant); + char *k_scale = g_strdup_printf("variants.%s.output_scale", variant); + char *k_bo = g_strdup_printf("variants.%s.bayer_orientation", variant); + char *k_edge = g_strdup_printf("variants.%s.edge_pad", variant); + variant_file = dt_ai_model_attribute_string(info, k_file); + input_kind = dt_ai_model_attribute_string(info, k_kind); + cs_str = dt_ai_model_attribute_string(info, k_cs); + wb_str = dt_ai_model_attribute_string(info, k_wb); + scale_str = dt_ai_model_attribute_string(info, k_scale); + bo_str = dt_ai_model_attribute_string(info, k_bo); + edge_str = dt_ai_model_attribute_string(info, k_edge); + g_free(k_file); + g_free(k_kind); + g_free(k_cs); + g_free(k_wb); + g_free(k_scale); + g_free(k_bo); + g_free(k_edge); + + if(!variant_file) + { + dt_print(DT_DEBUG_AI, + "[restore] model %s declares no variants.%s.onnx — " + "cannot load variant", + model_id, variant); + g_free(input_kind); + g_free(cs_str); + g_free(wb_str); + g_free(scale_str); + g_free(bo_str); + g_free(edge_str); + g_free(model_id); + return NULL; + } + + // contract check: the variant slot name pins which input_kind we + // expect. older manifests predate the label; if unset, assume the + // expected one (back-compat). a declared-but-wrong label is a hard + // error — refusing to load keeps mis-packaged ONNX from crashing + // at inference with a confusing shape-mismatch + if(!g_strcmp0(task, TASK_RAWDENOISE)) + { + if(!g_strcmp0(variant, "bayer")) + expected_kind = DT_RESTORE_INPUT_KIND_BAYER_V1; + else if(!g_strcmp0(variant, "xtrans")) + expected_kind = DT_RESTORE_INPUT_KIND_XTRANS_V1; + else if(!g_strcmp0(variant, "linear")) + expected_kind = DT_RESTORE_INPUT_KIND_LINEAR_V1; + } + if(expected_kind != DT_RESTORE_INPUT_KIND_UNKNOWN) + { + const dt_restore_input_kind_t declared = _parse_input_kind(input_kind); + const gboolean missing = (input_kind == NULL); + const gboolean mismatch + = !missing && declared != expected_kind; + if(mismatch || (!missing && declared == DT_RESTORE_INPUT_KIND_UNKNOWN)) + { + dt_print(DT_DEBUG_AI, + "[restore] model %s variant '%s': input_kind '%s' " + "does not match expected '%s' — refusing to load", + model_id, variant, input_kind, + _input_kind_name(expected_kind)); + dt_control_log(_("raw denoise model %s: incompatible input_kind"), + model_id); + g_free(input_kind); + g_free(cs_str); + g_free(wb_str); + g_free(scale_str); + g_free(bo_str); + g_free(edge_str); + g_free(variant_file); + g_free(model_id); + return NULL; + } + } + + dt_print(DT_DEBUG_AI, + "[restore] variant '%s': file=%s input_kind=%s", + variant, variant_file, + input_kind ? input_kind : "(none)"); + } + const char *model_file = variant ? variant_file : default_file; + + // resolve the tile ladder: model-declared input_sizes if present, + // otherwise a copy of the built-in ladder for this scale + int *tile_ladder = NULL; + int n_tile_ladder = 0; + _resolve_tile_ladder(info, scale, &tile_ladder, &n_tile_ladder); + + // select tile size from cache, but only if the cached value is still + // a member of the ladder — otherwise a model upgrade that narrowed + // its supported input_sizes would load with a stale size and fail + // at graph shape inference (U-Nets are strict about spatial dims) int tile_size = _get_cached_tile_size(model_id, scale); - if(tile_size <= 0) - tile_size = _select_tile_size(scale); + gboolean cached_ok = FALSE; + for(int i = 0; i < n_tile_ladder && !cached_ok; i++) + if(tile_ladder[i] == tile_size) cached_ok = TRUE; + if(!cached_ok) + { + if(tile_size > 0) + dt_print(DT_DEBUG_AI, + "[restore] cached tile size %d not in ladder, re-selecting", + tile_size); + tile_size = _select_tile_size(tile_ladder, n_tile_ladder, scale); + } + + // CoreML CPU-only flag: models whose intermediate activations + // overflow FP16 (e.g. raw denoise) declare this in config.json + // to force CoreML's CPU path which runs FP32 + const uint32_t ep_flags + = dt_ai_model_attribute_bool(info, "coreml_cpu_only") ? 1 : 0; + if(ep_flags) + dt_print(DT_DEBUG_AI, + "[restore] model %s: coreml_cpu_only=true (ep_flags=%u)", + model_id, ep_flags); dt_ai_context_t *ai_ctx = _create_session( - env->ai_env, model_id, model_file, dim_h, dim_w, tile_size); + env->ai_env, model_id, model_file, dim_h, dim_w, tile_size, + ep_flags); if(!ai_ctx) { g_free(model_id); + g_free(tile_ladder); + g_free(variant_file); + g_free(input_kind); + g_free(cs_str); + g_free(wb_str); + g_free(scale_str); + g_free(bo_str); + g_free(edge_str); return NULL; } @@ -245,18 +422,78 @@ static dt_restore_context_t *_load(dt_restore_env_t *env, ctx->ai_ctx = ai_ctx; ctx->env = env; ctx->task = g_strdup(task); + ctx->input_kind = input_kind; // take ownership + ctx->scale = scale; ctx->model_id = model_id; ctx->model_file = g_strdup(model_file); ctx->tile_size = tile_size; + ctx->tile_ladder = tile_ladder; + ctx->n_tile_ladder = n_tile_ladder; + ctx->ep_flags = ep_flags; ctx->dim_h = g_strdup(dim_h); ctx->dim_w = g_strdup(dim_w); ctx->preserve_wide_gamut = TRUE; + + // resolve policy enums: per-variant defaults reproduce today's + // RawNIND behavior exactly, so manifests that declare none of these + // keys keep working unchanged. bayer path defaults to daylight WB + // (training distribution); linear path defaults to as-shot WB (its + // re-imported DNG benefits from matching the source tonemap — see + // the rationale in dt_restore_raw_linear). output_scale defaults to + // match_gain for both. linear gets a 0.30 exposure target; bayer + // doesn't use one (NAN = disabled). input_colorspace only applies + // to the linear path + ctx->input_kind_enum = expected_kind; + { + // linear_v1 and xtrans_v1 share the demosaic-based pipeline + // defaults (as-shot WB, lin_rec2020 colorspace, 0.30 training- + // brightness exposure target). When a dedicated xtrans model + // ships these defaults may need to diverge — override in the + // manifest if so + const gboolean demosaic_pipeline + = (expected_kind == DT_RESTORE_INPUT_KIND_LINEAR_V1) + || (expected_kind == DT_RESTORE_INPUT_KIND_XTRANS_V1); + const dt_restore_wb_mode_t default_wb + = demosaic_pipeline ? DT_RESTORE_WB_AS_SHOT : DT_RESTORE_WB_DAYLIGHT; + const dt_restore_colorspace_t default_cs + = demosaic_pipeline ? DT_RESTORE_CS_LIN_REC2020 : DT_RESTORE_CS_CAMRGB; + const float default_tm = demosaic_pipeline ? 0.30f : NAN; + ctx->wb_mode = _parse_wb_mode(wb_str, default_wb); + ctx->output_scale = _parse_output_scale(scale_str, DT_RESTORE_OUT_MATCH_GAIN); + ctx->input_colorspace = _parse_colorspace(cs_str, default_cs); + char *k_tm = variant + ? g_strdup_printf("variants.%s.target_mean", variant) : NULL; + ctx->target_mean = k_tm + ? _parse_target_mean(info, k_tm, default_tm) : default_tm; + g_free(k_tm); + + // bayer-only packing knobs. bayer_v1's contract pairs with + // force_rggb + mirror_cropped (matches RawNIND training which + // physically crops to RGGB before tiling — so corner-tile mirror + // reflections must happen in the cropped frame). a future + // 'native' orientation would let a model see non-RGGB sensors + // without any origin shift; paired default is mirror_absolute + // since there's no cropped frame to reflect within + const dt_restore_bayer_orientation_t default_bo + = (expected_kind == DT_RESTORE_INPUT_KIND_BAYER_V1) + ? DT_RESTORE_BAYER_FORCE_RGGB + : DT_RESTORE_BAYER_NATIVE; + ctx->bayer_orientation = _parse_bayer_orientation(bo_str, default_bo); + const dt_restore_edge_pad_t default_edge + = (ctx->bayer_orientation == DT_RESTORE_BAYER_FORCE_RGGB) + ? DT_RESTORE_EDGE_MIRROR_CROPPED + : DT_RESTORE_EDGE_MIRROR; + ctx->edge_pad = _parse_edge_pad(edge_str, default_edge); + } + g_free(cs_str); + g_free(wb_str); + g_free(scale_str); + g_free(bo_str); + g_free(edge_str); // shadow boost capability is declared per-model via the // "attributes": { "shadow_boost": true } object in config.json; // models that hallucinate in dark patches opt in this way; // other models run as-is - const dt_ai_model_info_t *info - = dt_ai_get_model_info_by_id(env->ai_env, model_id); ctx->shadow_boost_capable = dt_ai_model_attribute_bool(info, "shadow_boost"); ctx->shadow_boost = ctx->shadow_boost_capable; @@ -264,19 +501,28 @@ static dt_restore_context_t *_load(dt_restore_env_t *env, dt_print(DT_DEBUG_AI, "[restore] model %s declares shadow_boost attribute", model_id); + g_free(variant_file); return ctx; } // internal: recreate the ORT session with a smaller tile size after OOM. // updates ctx->ai_ctx and ctx->tile_size in place. // returns TRUE on success, FALSE if the reload also fails. +// +// unload the old session BEFORE creating the new one: after a GPU OOM +// the old session is still holding VRAM, and trying to allocate even +// a tiny new session on top triggers a cascade of init failures in +// ORT's provider-fallback retry path. freeing first lets the new +// session fit without the retries static gboolean _reload_session(dt_restore_context_t *ctx, int new_tile_size) { + dt_ai_unload_model(ctx->ai_ctx); + ctx->ai_ctx = NULL; + dt_ai_context_t *new_ctx = _create_session( ctx->env->ai_env, ctx->model_id, ctx->model_file, - ctx->dim_h, ctx->dim_w, new_tile_size); + ctx->dim_h, ctx->dim_w, new_tile_size, ctx->ep_flags); if(!new_ctx) return FALSE; - dt_ai_unload_model(ctx->ai_ctx); ctx->ai_ctx = new_ctx; ctx->tile_size = new_tile_size; return TRUE; @@ -284,17 +530,63 @@ static gboolean _reload_session(dt_restore_context_t *ctx, int new_tile_size) dt_restore_context_t *dt_restore_load_denoise(dt_restore_env_t *env) { - return _load(env, TASK_DENOISE, NULL, 1); + return _load(env, TASK_DENOISE, NULL, NULL, 1); +} + +dt_restore_sensor_class_t dt_restore_classify_sensor(const dt_image_t *img) +{ + if(!img || !(img->flags & DT_IMAGE_RAW)) + return DT_RESTORE_SENSOR_CLASS_UNSUPPORTED; + if(img->flags & (DT_IMAGE_MONOCHROME | DT_IMAGE_MONOCHROME_BAYER)) + return DT_RESTORE_SENSOR_CLASS_UNSUPPORTED; + const uint32_t filters = img->buf_dsc.filters; + if(filters == 9u) return DT_RESTORE_SENSOR_CLASS_XTRANS; + if(filters != 0u) return DT_RESTORE_SENSOR_CLASS_BAYER; + return DT_RESTORE_SENSOR_CLASS_LINEAR; +} + +dt_restore_context_t *dt_restore_load_rawdenoise_bayer(dt_restore_env_t *env) +{ + // scale 1x, same pipeline as denoise; filename comes from the model's + // variants.bayer.onnx attribute. loading fails if the YAML doesn't + // declare it — no silent fallback for broken model packages + return _load(env, TASK_RAWDENOISE, "bayer", NULL, 1); +} + +dt_restore_context_t *dt_restore_load_rawdenoise_linear(dt_restore_env_t *env) +{ + // generic-demosaic fallback: Foveon, monochrome-with-pattern, and + // currently also X-Trans (until dt_restore_load_rawdenoise_xtrans + // gets a dedicated variant to load) + return _load(env, TASK_RAWDENOISE, "linear", NULL, 1); +} + +dt_restore_context_t *dt_restore_load_rawdenoise_xtrans(dt_restore_env_t *env) +{ + // prefer a dedicated xtrans variant when the manifest declares one; + // fall back to the linear pipeline otherwise. this lets a future + // RawNIND release ship a dedicated X-Trans model via just a manifest + // update — no code changes in darktable (assuming the dedicated model + // shares the linear pipeline; a structurally different X-Trans input + // format would still need its own preprocessing code) + dt_restore_context_t *ctx = _load(env, TASK_RAWDENOISE, "xtrans", NULL, 1); + if(!ctx) + { + dt_print(DT_DEBUG_AI, + "[restore] no dedicated xtrans variant; using linear as fallback"); + ctx = _load(env, TASK_RAWDENOISE, "linear", NULL, 1); + } + return ctx; } dt_restore_context_t *dt_restore_load_upscale_x2(dt_restore_env_t *env) { - return _load(env, TASK_UPSCALE, "model_x2.onnx", 2); + return _load(env, TASK_UPSCALE, NULL, "model_x2.onnx", 2); } dt_restore_context_t *dt_restore_load_upscale_x4(dt_restore_env_t *env) { - return _load(env, TASK_UPSCALE, "model_x4.onnx", 4); + return _load(env, TASK_UPSCALE, NULL, "model_x4.onnx", 4); } dt_restore_context_t *dt_restore_ref(dt_restore_context_t *ctx) @@ -310,76 +602,16 @@ void dt_restore_unref(dt_restore_context_t *ctx) { dt_ai_unload_model(ctx->ai_ctx); g_free(ctx->task); + g_free(ctx->input_kind); g_free(ctx->model_id); g_free(ctx->model_file); g_free(ctx->dim_h); g_free(ctx->dim_w); + g_free(ctx->tile_ladder); g_free(ctx); } } -void dt_restore_set_profile(dt_restore_context_t *ctx, void *profile) -{ - if(!ctx) return; - if(!profile) - { - ctx->has_profile = FALSE; - return; - } - - float primaries[3][2], whitepoint[2]; - if(!dt_colorspaces_get_primaries_and_whitepoint_from_profile( - (cmsHPROFILE)profile, primaries, whitepoint)) - { - dt_print(DT_DEBUG_AI, - "[restore] could not read primaries from working profile, " - "falling back to gamma-only conversion"); - ctx->has_profile = FALSE; - return; - } - - // build WP -> XYZ (stored transposed by dt, convert to row-major) - dt_colormatrix_t wp_to_xyz_T; - dt_make_transposed_matrices_from_primaries_and_whitepoint(primaries, - whitepoint, - wp_to_xyz_T); - float wp_to_xyz[9]; - for(int i = 0; i < 3; i++) - for(int j = 0; j < 3; j++) - wp_to_xyz[3 * i + j] = wp_to_xyz_T[j][i]; - - // transpose dt's sRGB<->XYZ matrices (Bradford D50) to row-major - float xyz_to_srgb[9], srgb_to_xyz[9]; - for(int i = 0; i < 3; i++) - for(int j = 0; j < 3; j++) - { - xyz_to_srgb[3 * i + j] = xyz_to_srgb_transposed[j][i]; - srgb_to_xyz[3 * i + j] = sRGB_to_xyz_transposed[j][i]; - } - - // WP -> sRGB = (XYZ -> sRGB) * (WP -> XYZ) - mat3mul(ctx->wp_to_srgb, xyz_to_srgb, wp_to_xyz); - - // invert WP -> XYZ to get XYZ -> WP, then compose sRGB -> WP - float xyz_to_wp[9]; - if(mat3inv(xyz_to_wp, wp_to_xyz) != 0) - { - dt_print(DT_DEBUG_AI, - "[restore] singular WP->XYZ matrix, falling back to gamma-only"); - ctx->has_profile = FALSE; - return; - } - mat3mul(ctx->srgb_to_wp, xyz_to_wp, srgb_to_xyz); - - ctx->has_profile = TRUE; - dt_print(DT_DEBUG_AI, "[restore] working profile color matrices ready"); -} - -void dt_restore_set_preserve_wide_gamut(dt_restore_context_t *ctx, gboolean preserve) -{ - if(ctx) ctx->preserve_wide_gamut = preserve; -} - static gboolean _model_available(dt_restore_env_t *env, const char *task) { @@ -403,64 +635,31 @@ gboolean dt_restore_denoise_available(dt_restore_env_t *env) return _model_available(env, TASK_DENOISE); } -gboolean dt_restore_upscale_available(dt_restore_env_t *env) +gboolean dt_restore_rawdenoise_available(dt_restore_env_t *env) { - return _model_available(env, TASK_UPSCALE); + return _model_available(env, TASK_RAWDENOISE); } -/* --- color conversion --- */ - -// sRGB transfer function (gamma curve only, no primaries change). -// values > 1.0 are allowed to preserve wide-gamut colors -static inline float _linear_to_srgb(const float v) -{ - if(v <= 0.0f) return 0.0f; - return (v <= 0.0031308f) - ? 12.92f * v - : 1.055f * powf(v, 1.0f / 2.4f) - 0.055f; -} - -static inline float _srgb_to_linear(const float v) -{ - if(v <= 0.0f) return 0.0f; - return (v <= 0.04045f) - ? v / 12.92f - : powf((v + 0.055f) / 1.055f, 2.4f); -} - -/* --- helpers --- */ - -static inline int _mirror(int v, int max) +gboolean dt_restore_upscale_available(dt_restore_env_t *env) { - if(v < 0) v = -v; - if(v >= max) v = 2 * max - 2 - v; - if(v < 0) return 0; - if(v >= max) return max - 1; - return v; + return _model_available(env, TASK_UPSCALE); } -/* --- public API --- */ +// --- public API --- int dt_restore_get_overlap(int scale) { return (scale > 1) ? OVERLAP_UPSCALE : OVERLAP_DENOISE; } -static int _select_tile_size(int scale) +static int _select_tile_size(const int *ladder, int n_ladder, int scale) { - const int ladder_1x[] = DT_RESTORE_TILE_LADDER_1X; - const int ladder_sr[] = DT_RESTORE_TILE_LADDER_SR; - const int *candidates = (scale > 1) ? ladder_sr : ladder_1x; - const int n_candidates = (scale > 1) - ? (int)(sizeof(ladder_sr) / sizeof(int)) - : (int)(sizeof(ladder_1x) / sizeof(int)); - const size_t avail = dt_get_available_mem(); const size_t budget = avail / 4; - for(int i = 0; i < n_candidates; i++) + for(int i = 0; i < n_ladder; i++) { - const size_t T = (size_t)candidates[i]; + const size_t T = (size_t)ladder[i]; const size_t T_out = T * scale; const size_t tile_in = T * T * 3 * sizeof(float); const size_t tile_out @@ -474,659 +673,299 @@ static int _select_tile_size(int scale) { dt_print(DT_DEBUG_AI, "[restore] tile size %d (scale=%d, need %zuMB, budget %zuMB)", - candidates[i], scale, + ladder[i], scale, total / (1024 * 1024), budget / (1024 * 1024)); - return candidates[i]; + return ladder[i]; } } dt_print(DT_DEBUG_AI, "[restore] using minimum tile size %d (budget %zuMB)", - candidates[n_candidates - 1], + ladder[n_ladder - 1], budget / (1024 * 1024)); - return candidates[n_candidates - 1]; + return ladder[n_ladder - 1]; } -// Rec.709 / sRGB luminance weights (Y row of sRGB->XYZ D65); -// applied to working-profile-linear pixels in the pass-through -// blending below; exact only when the working profile is -// sRGB/Rec.709, but correct enough for luminance deltas -static inline float _luma_rec709(float r, float g, float b) -{ - return 0.2126f * r + 0.7152f * g + 0.0722f * b; +static void _resolve_tile_ladder(const dt_ai_model_info_t *info, + int scale, + int **out_sizes, + int *out_count) +{ + // prefer the model's declared input_sizes if present: some exports + // ship with a fixed set of supported tile sizes (e.g. the model was + // compiled for specific spatial dims) and using anything outside + // that list will either refuse to run or produce garbage + int n = 0; + int *sizes = dt_ai_model_attribute_int_array(info, "input_sizes", &n); + if(sizes && n > 0) + { + *out_sizes = sizes; + *out_count = n; + return; + } + g_free(sizes); + + // fall back to the built-in ladder for the model's scale + static const int ladder_1x[] = DT_RESTORE_TILE_LADDER_1X; + static const int ladder_sr[] = DT_RESTORE_TILE_LADDER_SR; + const int *src = (scale > 1) ? ladder_sr : ladder_1x; + const int src_n = (scale > 1) + ? (int)(sizeof(ladder_sr) / sizeof(int)) + : (int)(sizeof(ladder_1x) / sizeof(int)); + int *copy = g_new(int, src_n); + memcpy(copy, src, src_n * sizeof(int)); + *out_sizes = copy; + *out_count = src_n; } -int dt_restore_run_patch(dt_restore_context_t *ctx, - const float *in_patch, - int w, int h, - float *out_patch, - int scale) +int dt_restore_run_patch_bayer(dt_restore_context_t *ctx, + const float *in_4ch, + int w, int h, + float *out_3ch) { if(!ctx || !ctx->ai_ctx) return 1; - const size_t in_pixels = (size_t)w * h * 3; - const int out_w = w * scale; - const int out_h = h * scale; - const size_t out_pixels = (size_t)out_w * out_h * 3; - const size_t plane = (size_t)w * h; - - // convert to sRGB gamma-encoded. If a working profile is set, - // first convert primaries (working profile -> sRGB linear) so the - // model sees the image as if it were native sRGB. Otherwise only - // apply the gamma curve (legacy path, shifts hues for wide-gamut). - // input layout is planar NCHW: R plane, then G plane, then B plane. - // in_gamut_mask records which pixels were in sRGB gamut (scale==1 - // only) so the output pass can skip recomputing WP->sRGB - float *srgb_in = g_try_malloc(in_pixels * sizeof(float)); - uint8_t *in_gamut_mask = NULL; - if(!srgb_in) return 1; - // only allocate the gamut mask when denoise pass-through is requested - const gboolean need_gamut_mask - = ctx->has_profile && scale == 1 && ctx->preserve_wide_gamut; - if(need_gamut_mask) - { - in_gamut_mask = g_try_malloc(plane); - if(!in_gamut_mask) - { - g_free(srgb_in); - return 1; - } - } - if(ctx->has_profile) - { - const float *M = ctx->wp_to_srgb; - const gboolean boost = ctx->shadow_boost; - for(size_t p = 0; p < plane; p++) - { - const float r = in_patch[p]; - const float g = in_patch[p + plane]; - const float b = in_patch[p + 2 * plane]; - float sr = M[0] * r + M[1] * g + M[2] * b; - float sg = M[3] * r + M[4] * g + M[5] * b; - float sb = M[6] * r + M[7] * g + M[8] * b; - // gamut check uses pre-boost values so pass-through decisions - // reflect the original color - if(in_gamut_mask) - { - const float m = 0.01f; // ~1% margin beyond [0, 1] - in_gamut_mask[p] = (sr >= -m && sr <= 1.0f + m - && sg >= -m && sg <= 1.0f + m - && sb >= -m && sb <= 1.0f + m) ? 1 : 0; - } - if(boost) - { - sr = sr > 0.0f ? sqrtf(sr) : 0.0f; - sg = sg > 0.0f ? sqrtf(sg) : 0.0f; - sb = sb > 0.0f ? sqrtf(sb) : 0.0f; - } - srgb_in[p] = _linear_to_srgb(sr); - srgb_in[p + plane] = _linear_to_srgb(sg); - srgb_in[p + 2 * plane] = _linear_to_srgb(sb); - } - } - else if(ctx->shadow_boost) - { - // no profile: still boost shadows so the model stays within its - // comfort zone, even though we treat WP values as sRGB - for(size_t i = 0; i < in_pixels; i++) - { - const float v = in_patch[i]; - const float boosted = v > 0.0f ? sqrtf(v) : 0.0f; - srgb_in[i] = _linear_to_srgb(boosted); - } - } - else - { - for(size_t i = 0; i < in_pixels; i++) - srgb_in[i] = _linear_to_srgb(in_patch[i]); - } - - const int num_inputs = dt_ai_get_input_count(ctx->ai_ctx); - if(num_inputs > MAX_MODEL_INPUTS) - { - g_free(srgb_in); - return 1; - } + int64_t in_shape[] = { 1, 4, h, w }; + int64_t out_shape[] = { 1, 3, 2 * h, 2 * w }; + dt_ai_tensor_t input = { + .data = (void *)in_4ch, + .shape = in_shape, + .ndim = 4, + .type = DT_AI_FLOAT, + }; + dt_ai_tensor_t output = { + .data = out_3ch, + .shape = out_shape, + .ndim = 4, + .type = DT_AI_FLOAT, + }; + return dt_ai_run(ctx->ai_ctx, &input, 1, &output, 1); +} - int64_t input_shape[] = {1, 3, h, w}; - dt_ai_tensor_t inputs[MAX_MODEL_INPUTS]; - memset(inputs, 0, sizeof(inputs)); - inputs[0] = (dt_ai_tensor_t){ - .data = (void *)srgb_in, - .shape = input_shape, - .ndim = 4, - .type = DT_AI_FLOAT}; - - // noise level map for multi-input models - float *noise_map = NULL; - int64_t noise_shape[] = {1, 1, h, w}; - if(num_inputs >= 2) - { - const size_t map_size = (size_t)w * h; - noise_map = g_try_malloc(map_size * sizeof(float)); - if(!noise_map) - { - g_free(srgb_in); - return 1; - } - const float sigma_norm = 25.0f / 255.0f; - for(size_t i = 0; i < map_size; i++) - noise_map[i] = sigma_norm; - inputs[1] = (dt_ai_tensor_t){ - .data = (void *)noise_map, - .shape = noise_shape, - .ndim = 4, - .type = DT_AI_FLOAT}; - } +int dt_restore_run_patch_3ch_raw(dt_restore_context_t *ctx, + const float *in_3ch, + int w, int h, + float *out_3ch) +{ + if(!ctx || !ctx->ai_ctx) return 1; - int64_t output_shape[] = {1, 3, out_h, out_w}; + int64_t in_shape[] = { 1, 3, h, w }; + int64_t out_shape[] = { 1, 3, h, w }; + dt_ai_tensor_t input = { + .data = (void *)in_3ch, + .shape = in_shape, + .ndim = 4, + .type = DT_AI_FLOAT, + }; dt_ai_tensor_t output = { - .data = (void *)out_patch, - .shape = output_shape, - .ndim = 4, - .type = DT_AI_FLOAT}; - - int ret = dt_ai_run(ctx->ai_ctx, inputs, num_inputs, - &output, 1); - g_free(srgb_in); - g_free(noise_map); - if(ret != 0) - { - g_free(in_gamut_mask); - return ret; - } + .data = out_3ch, + .shape = out_shape, + .ndim = 4, + .type = DT_AI_FLOAT, + }; + return dt_ai_run(ctx->ai_ctx, &input, 1, &output, 1); +} - // convert model output back to the working profile - // - // with profile: apply inverse sRGB gamma, then check if the ORIGINAL - // input pixel (converted to sRGB linear) is representable in sRGB - // gamut. if yes, use model output converted back to working profile. - // if no, pass through the original pixel (wide-gamut colors preserved, - // no denoising on those pixels). upscale has no pixel-to-pixel - // correspondence so pass-through is not possible — always use the - // model output - // - // without profile: fall back to per-channel pass-through in the - // original (working-profile-as-sRGB) space - const gboolean boost = ctx->shadow_boost; - if(ctx->has_profile && scale == 1 && ctx->preserve_wide_gamut) - { - const size_t out_plane = (size_t)out_w * out_h; - const float *Mi = ctx->srgb_to_wp; - // pass 1: write denoised values for in-gamut pixels; out-of-gamut - // pixels get plain pass-through as a fallback (used only when no - // in-gamut neighbors are found in pass 2) - for(size_t p = 0; p < out_plane; p++) - { - if(in_gamut_mask[p]) - { - float sr = _srgb_to_linear(out_patch[p]); - float sg = _srgb_to_linear(out_patch[p + out_plane]); - float sb = _srgb_to_linear(out_patch[p + 2 * out_plane]); - if(boost) { sr *= sr; sg *= sg; sb *= sb; } - out_patch[p] = Mi[0] * sr + Mi[1] * sg + Mi[2] * sb; - out_patch[p + out_plane] = Mi[3] * sr + Mi[4] * sg + Mi[5] * sb; - out_patch[p + 2 * out_plane] = Mi[6] * sr + Mi[7] * sg + Mi[8] * sb; - } - else - { - out_patch[p] = in_patch[p]; - out_patch[p + out_plane] = in_patch[p + plane]; - out_patch[p + 2 * out_plane] = in_patch[p + 2 * plane]; - } - } - // pass 2: luminance-only smoothing for out-of-gamut pixels. the - // original pixel keeps its chroma (wide-gamut color preserved - // exactly) but its brightness is shifted to match the local - // average luminance of denoised in-gamut neighbors; this kills - // the single-pixel speckles that pass-through would otherwise - // leave visible against the denoised background - const int radius = 2; // 5x5 window - for(int y = 0; y < out_h; y++) - { - for(int x = 0; x < out_w; x++) - { - const size_t p = (size_t)y * out_w + x; - if(in_gamut_mask[p]) continue; - const float r0 = in_patch[p]; - const float g0 = in_patch[p + plane]; - const float b0 = in_patch[p + 2 * plane]; - const float Y_orig = _luma_rec709(r0, g0, b0); - float sumY = 0.0f; - int count = 0; - const int y0 = y - radius < 0 ? 0 : y - radius; - const int y1 = y + radius >= out_h ? out_h - 1 : y + radius; - const int x0 = x - radius < 0 ? 0 : x - radius; - const int x1 = x + radius >= out_w ? out_w - 1 : x + radius; - for(int yy = y0; yy <= y1; yy++) - { - for(int xx = x0; xx <= x1; xx++) - { - const size_t q = (size_t)yy * out_w + xx; - if(!in_gamut_mask[q]) continue; - const float rq = out_patch[q]; - const float gq = out_patch[q + out_plane]; - const float bq = out_patch[q + 2 * out_plane]; - sumY += _luma_rec709(rq, gq, bq); - count++; - } - } - if(count > 0) - { - const float dY = sumY / (float)count - Y_orig; - out_patch[p] = r0 + dY; - out_patch[p + out_plane] = g0 + dY; - out_patch[p + 2 * out_plane] = b0 + dY; - } - } - } - } - else if(ctx->has_profile && scale == 1) - { - // denoise with profile but NO pass-through: apply the inverse - // matrix to every pixel. wide-gamut inputs will have been clipped - // by the model, but we get denoising everywhere - const size_t out_plane = (size_t)out_w * out_h; - const float *Mi = ctx->srgb_to_wp; - for(size_t p = 0; p < out_plane; p++) - { - float sr = _srgb_to_linear(out_patch[p]); - float sg = _srgb_to_linear(out_patch[p + out_plane]); - float sb = _srgb_to_linear(out_patch[p + 2 * out_plane]); - if(boost) { sr *= sr; sg *= sg; sb *= sb; } - out_patch[p] = Mi[0] * sr + Mi[1] * sg + Mi[2] * sb; - out_patch[p + out_plane] = Mi[3] * sr + Mi[4] * sg + Mi[5] * sb; - out_patch[p + 2 * out_plane] = Mi[6] * sr + Mi[7] * sg + Mi[8] * sb; - } - } - else if(scale == 1) - { - // no profile set: per-channel pass-through, treats working-profile - // numbers as if they were sRGB. colors will be slightly shifted - // for wide-gamut working profiles — rely on the profile path above - // when possible. pass-through still honored via preserve_wide_gamut - for(size_t i = 0; i < out_pixels; i++) - { - const float in = in_patch[i]; - if(ctx->preserve_wide_gamut && (in < 0.0f || in > 1.0f)) - { - out_patch[i] = in; - } - else - { - float v = _srgb_to_linear(out_patch[i]); - if(boost) v *= v; - out_patch[i] = v; - } - } - } - else +const int *dt_restore_get_tile_ladder(const dt_restore_context_t *ctx, + int *out_count) +{ + if(!ctx) { - // upscale: no pixel-to-pixel correspondence, use model output as-is - if(ctx->has_profile) - { - const size_t out_plane = (size_t)out_w * out_h; - const float *Mi = ctx->srgb_to_wp; - for(size_t p = 0; p < out_plane; p++) - { - float sr = _srgb_to_linear(out_patch[p]); - float sg = _srgb_to_linear(out_patch[p + out_plane]); - float sb = _srgb_to_linear(out_patch[p + 2 * out_plane]); - if(boost) { sr *= sr; sg *= sg; sb *= sb; } - out_patch[p] = Mi[0] * sr + Mi[1] * sg + Mi[2] * sb; - out_patch[p + out_plane] = Mi[3] * sr + Mi[4] * sg + Mi[5] * sb; - out_patch[p + 2 * out_plane] = Mi[6] * sr + Mi[7] * sg + Mi[8] * sb; - } - } - else - { - for(size_t i = 0; i < out_pixels; i++) - { - float v = _srgb_to_linear(out_patch[i]); - if(boost) v *= v; - out_patch[i] = v; - } - } + if(out_count) *out_count = 0; + return NULL; } + if(out_count) *out_count = ctx->n_tile_ladder; + return ctx->tile_ladder; +} - g_free(in_gamut_mask); - return 0; +int dt_restore_get_tile_size(const dt_restore_context_t *ctx) +{ + return ctx ? ctx->tile_size : 0; } -// per-image gate for the shadow-boost curve; enable only when the image -// has substantial near-black area to protect — bright images would only -// pay the curve cost (minor highlight compression) for no gain; -// thresholds tuned so localized very-dark features (a tree hollow, a -// silhouette) do NOT trigger; only broad noisy shadow regions do -// -// in_data is interleaved float4 RGBA -#define _SHADOW_BOOST_THRESHOLD 0.005f // 0.5% linear luminance -#define _SHADOW_BOOST_FRACTION 0.10f // 10% of sampled pixels -static gboolean _image_has_deep_shadows(const float *in_data, int w, int h) +gboolean dt_restore_reload_session(dt_restore_context_t *ctx, + int new_tile_size) { - const size_t stride = 16; // sample 1/256 of pixels for speed - size_t dark = 0, total = 0; - for(size_t y = 0; y < (size_t)h; y += stride) - for(size_t x = 0; x < (size_t)w; x += stride) - { - const size_t p = ((size_t)y * w + x) * 4; - const float luma = 0.2126f * in_data[p] - + 0.7152f * in_data[p + 1] - + 0.0722f * in_data[p + 2]; - if(luma < _SHADOW_BOOST_THRESHOLD) dark++; - total++; - } - return total > 0 && (float)dark / total >= _SHADOW_BOOST_FRACTION; + if(!ctx) return FALSE; + return _reload_session(ctx, new_tile_size); } -int dt_restore_process_tiled(dt_restore_context_t *ctx, - const float *in_data, - int width, int height, - int scale, - dt_restore_row_writer_t row_writer, - void *writer_data, - struct _dt_job_t *control_job) +void dt_restore_persist_tile_size(const dt_restore_context_t *ctx) { - if(!ctx || !in_data || !row_writer) - return 1; + if(ctx && ctx->model_id) + _set_cached_tile_size(ctx->model_id, ctx->scale, ctx->tile_size); +} - // for shadow-boost-capable models, decide per-image whether the - // curve is worth applying; one analysis per call, before tiling, - // so all tiles see the same flag (avoids per-tile seams) - if(ctx->shadow_boost_capable) - { - const gboolean dark = _image_has_deep_shadows(in_data, width, height); - ctx->shadow_boost = dark; - dt_print(DT_DEBUG_AI, "[restore] shadow boost %s", - dark ? "enabled" : "disabled"); - } +// shared bridge: run the user's darktable pixelpipe on an arbitrary sensor +// buffer, capture the display-referred RGB at an ROI. used by both raw- +// denoise preview paths (Bayer CFA after re-mosaic, X-Trans CFA after +// re-mosaic) so the preview before/after match what the user sees in +// darkroom after Process + DNG re-import +int dt_restore_run_user_pipe_roi(dt_imgid_t imgid, + void *input_native, + int iw, + int ih, + int roi_x, int roi_y, + int roi_w, int roi_h, + int *out_w, int *out_h, + float **out_rgb) +{ + if(out_rgb) *out_rgb = NULL; + if(out_w) *out_w = 0; + if(out_h) *out_h = 0; + if(!input_native || iw <= 0 || ih <= 0 + || roi_w <= 0 || roi_h <= 0) + return 1; - const int O = (scale > 1) ? OVERLAP_UPSCALE : OVERLAP_DENOISE; - const int S = scale; - const int out_w = width * S; - const int ladder_1x[] = DT_RESTORE_TILE_LADDER_1X; - const int ladder_sr[] = DT_RESTORE_TILE_LADDER_SR; - const int *ladder = (scale > 1) ? ladder_sr : ladder_1x; - const int n_ladder = (scale > 1) - ? (int)(sizeof(ladder_sr) / sizeof(int)) - : (int)(sizeof(ladder_1x) / sizeof(int)); - int T = ctx->tile_size; - - // outer retry loop: on inference failure (e.g. GPU OOM) drop to the - // next smaller candidate in the shared ladder and try again -retry:; - int step = T - 2 * O; - int T_out = T * S; - int O_out = O * S; - int step_out = step * S; - size_t in_plane = (size_t)T * T; - size_t out_plane = (size_t)T_out * T_out; - int cols = (width + step - 1) / step; - int rows = (height + step - 1) / step; - int total_tiles = cols * rows; + dt_develop_t dev; + dt_dev_init(&dev, FALSE); + dt_dev_load_image(&dev, imgid); - dt_print(DT_DEBUG_AI, - "[restore] tiling %dx%d (scale=%d)" - " -> %dx%d, %dx%d grid (%d tiles, T=%d)", - width, height, S, out_w, height * S, - cols, rows, total_tiles, T); - - float *tile_in = g_try_malloc( - in_plane * 3 * sizeof(float)); - float *tile_out = g_try_malloc( - out_plane * 3 * sizeof(float)); - float *row_buf = g_try_malloc( - (size_t)out_w * step_out * 3 * sizeof(float)); - if(!tile_in || !tile_out || !row_buf) + dt_dev_pixelpipe_t pipe; + if(!dt_dev_pixelpipe_init_export(&pipe, iw, ih, IMAGEIO_FLOAT, FALSE)) { - g_free(tile_in); - g_free(tile_out); - g_free(row_buf); + dt_dev_cleanup(&dev); return 1; } - int res = 0; - int tile_count = 0; - - for(int ty = 0; ty < rows; ty++) + // force output to linear Rec.709 (sRGB primaries, linear transfer) + // so the widget's sRGB-gamma encoder displays the right colours. + // MUST be called before create_nodes / synch_all: colorout reads + // pipe->icc_type during commit_params at synch_all time. setting it + // afterwards leaves colorout committed with the user's working + // profile (often Rec.2020 / ProPhoto) → the cairo path then + // applies sRGB gamma to wrong-primaries numbers → preview comes + // out noticeably brighter / wrong colours vs. the batch DNG that + // re-imports through the normal pipe + dt_dev_pixelpipe_set_icc(&pipe, DT_COLORSPACE_LIN_REC709, NULL, + DT_INTENT_PERCEPTUAL); + + dt_ioppr_resync_modules_order(&dev); + dt_dev_pixelpipe_set_input(&pipe, &dev, (float *)input_native, + iw, ih, 1.0f); + dt_dev_pixelpipe_create_nodes(&pipe, &dev); + dt_dev_pixelpipe_synch_all(&pipe, &dev); + + // skip rawdenoise — neural denoise already happened upstream. + // safe to do this after synch_all: this only flips piece->enabled, + // which the per-iop process loop checks at run time + for(GList *n = pipe.nodes; n; n = g_list_next(n)) { - const int y = ty * step; - const int valid_h = (y + step > height) - ? height - y : step; - const int valid_h_out = valid_h * S; - - memset(row_buf, 0, - (size_t)out_w * valid_h_out * 3 - * sizeof(float)); - - for(int tx = 0; tx < cols; tx++) - { - if(control_job - && dt_control_job_get_state(control_job) - == DT_JOB_STATE_CANCELLED) - { - res = 1; - goto cleanup; - } - - const int x = tx * step; - const int in_x = x - O; - const int in_y = y - O; - const int needs_mirror - = (in_x < 0 || in_y < 0 - || in_x + T > width - || in_y + T > height); - - // interleaved RGBx -> planar RGB - if(needs_mirror) - { - for(int dy = 0; dy < T; ++dy) - { - const int sy = _mirror(in_y + dy, height); - for(int dx = 0; dx < T; ++dx) - { - const int sx - = _mirror(in_x + dx, width); - const size_t po = (size_t)dy * T + dx; - const size_t si - = ((size_t)sy * width + sx) * 4; - tile_in[po] = in_data[si + 0]; - tile_in[po + in_plane] - = in_data[si + 1]; - tile_in[po + 2 * in_plane] - = in_data[si + 2]; - } - } - } - else - { - for(int dy = 0; dy < T; ++dy) - { - const float *row - = in_data - + ((size_t)(in_y + dy) * width - + in_x) * 4; - const size_t ro = (size_t)dy * T; - for(int dx = 0; dx < T; ++dx) - { - tile_in[ro + dx] = row[dx * 4 + 0]; - tile_in[ro + dx + in_plane] - = row[dx * 4 + 1]; - tile_in[ro + dx + 2 * in_plane] - = row[dx * 4 + 2]; - } - } - } - - if(dt_restore_run_patch( - ctx, tile_in, T, T, tile_out, S) != 0) - { - // retry with the next smaller ladder entry if no rows have - // been delivered yet (safe to restart). once rows are written - // we can't rewind the row_writer (e.g. TIFF is sequential). - // _reload_session() recreates the ORT session for the smaller - // tile size (dim overrides are shape-specific). - int next_T = 0; - for(int i = 0; i < n_ladder; i++) - if(ladder[i] < T) { next_T = ladder[i]; break; } - if(next_T > 0 && ty == 0 - && _reload_session(ctx, next_T)) - { - dt_print(DT_DEBUG_AI, - "[restore] inference failed at tile %d,%d " - "(T=%d), retrying with T=%d", - x, y, T, next_T); - g_free(tile_in); - g_free(tile_out); - g_free(row_buf); - T = next_T; - goto retry; - } - dt_print(DT_DEBUG_AI, - "[restore] inference failed at" - " tile %d,%d (T=%d, minimum reached)", x, y, T); - res = 1; - goto cleanup; - } - - // valid region -> row buffer - const int valid_w = (x + step > width) - ? width - x : step; - const int valid_w_out = valid_w * S; - - for(int dy = 0; dy < valid_h_out; ++dy) - { - const size_t src_row - = (size_t)(O_out + dy) * T_out + O_out; - const size_t dst_row - = ((size_t)dy * out_w + x * S) * 3; - for(int dx = 0; dx < valid_w_out; ++dx) - { - row_buf[dst_row + dx * 3 + 0] - = tile_out[src_row + dx]; - row_buf[dst_row + dx * 3 + 1] - = tile_out[src_row + dx + out_plane]; - row_buf[dst_row + dx * 3 + 2] - = tile_out[src_row + dx - + 2 * out_plane]; - } - } - - tile_count++; - if(control_job) - dt_control_job_set_progress(control_job, - (double)tile_count / total_tiles); - } + dt_dev_pixelpipe_iop_t *piece = n->data; + if(dt_iop_module_is(piece->module->so, "rawdenoise")) + piece->enabled = FALSE; + } - // deliver completed scanlines via callback - for(int dy = 0; dy < valid_h_out; dy++) + int pw = 0, ph = 0; + dt_dev_pixelpipe_get_dimensions(&pipe, &dev, iw, ih, &pw, &ph); + if(pw <= 0 || ph <= 0) + { + dt_dev_pixelpipe_cleanup(&pipe); + dt_dev_cleanup(&dev); + return 1; + } + pipe.processed_width = pw; + pipe.processed_height = ph; + + // the ROI passed to process_no_gamma is in POST-pipe (final output) + // coords, but the caller hands us sensor (input) coords so the ROI + // lines up with the denoised CFA patch it built. forward-transform + // the crop rectangle's 4 corners through the user's full geometry + // chain (rawprepare + clipping + ashift + lens + rotatepixels + ...) + // and use the INSCRIBED axis-aligned rectangle of the transformed + // quad as the pipe ROI. the circumscribed AABB would include corner + // triangles that back-project to sensor positions OUTSIDE the + // denoised region — they'd render as noisy strips at the edges of + // the preview. the inscribed rect is strictly inside the quad so + // every sample back-projects within the patched region + float corners[8] = { + (float)roi_x, (float)roi_y, + (float)(roi_x + roi_w), (float)roi_y, + (float)roi_x, (float)(roi_y + roi_h), + (float)(roi_x + roi_w), (float)(roi_y + roi_h), + }; + dt_dev_distort_transform_plus(&dev, &pipe, 0.0, + DT_DEV_TRANSFORM_DIR_ALL_GEOMETRY, + corners, 4); + + // inscribed AABB: second-smallest x/y and second-largest x/y of the + // 4 transformed corners. for a parallelogram these are the innermost + // of each pair; for small lens distortions they're still safe (i.e. + // lie inside the quad) because the quad stays nearly rectangular + float xs[4] = { corners[0], corners[2], corners[4], corners[6] }; + float ys[4] = { corners[1], corners[3], corners[5], corners[7] }; + for(int i = 0; i < 3; i++) + for(int j = i + 1; j < 4; j++) { - const float *src = row_buf + (size_t)dy * out_w * 3; - if(row_writer(src, out_w, y * S + dy, - writer_data) != 0) - { - res = 1; - goto cleanup; - } + if(xs[i] > xs[j]) { float t = xs[i]; xs[i] = xs[j]; xs[j] = t; } + if(ys[i] > ys[j]) { float t = ys[i]; ys[i] = ys[j]; ys[j] = t; } } - } - - // persist tile size on first full success so subsequent runs skip OOM retry - if(res == 0) - _set_cached_tile_size(ctx->model_id, S, ctx->tile_size); - -cleanup: - g_free(tile_in); - g_free(tile_out); - g_free(row_buf); - return res; -} - -void dt_restore_apply_detail_recovery(const float *original_4ch, - float *denoised_4ch, - int width, int height, - float alpha) -{ - const size_t npix = (size_t)width * height; - - float *const restrict lum_residual - = dt_alloc_align_float(npix); - if(!lum_residual) return; - -#ifdef _OPENMP -#pragma omp parallel for simd default(none) \ - dt_omp_firstprivate(original_4ch, denoised_4ch, \ - lum_residual, npix) \ - schedule(simd:static) \ - aligned(original_4ch, denoised_4ch, lum_residual:64) -#endif - for(size_t i = 0; i < npix; i++) + // round inward (ceil for inner min, floor for inner max) so the + // chosen rect stays strictly inside the transformed quad + int pipe_roi_x = (int)ceilf(xs[1]); + int pipe_roi_y = (int)ceilf(ys[1]); + int pipe_roi_w = (int)floorf(xs[2]) - pipe_roi_x; + int pipe_roi_h = (int)floorf(ys[2]) - pipe_roi_y; + + // clamp to the pipe's actual processed extent; a sensor ROI near + // the edge may transform to a post-pipe ROI that spills past pw/ph + if(pipe_roi_x < 0) { pipe_roi_w += pipe_roi_x; pipe_roi_x = 0; } + if(pipe_roi_y < 0) { pipe_roi_h += pipe_roi_y; pipe_roi_y = 0; } + if(pipe_roi_x + pipe_roi_w > pw) pipe_roi_w = pw - pipe_roi_x; + if(pipe_roi_y + pipe_roi_h > ph) pipe_roi_h = ph - pipe_roi_y; + if(pipe_roi_w <= 0 || pipe_roi_h <= 0) { - const size_t p = i * 4; - const float lum_orig - = 0.2126f * original_4ch[p + 0] - + 0.7152f * original_4ch[p + 1] - + 0.0722f * original_4ch[p + 2]; - const float lum_den - = 0.2126f * denoised_4ch[p + 0] - + 0.7152f * denoised_4ch[p + 1] - + 0.0722f * denoised_4ch[p + 2]; - lum_residual[i] = lum_orig - lum_den; + dt_dev_pixelpipe_cleanup(&pipe); + dt_dev_cleanup(&dev); + return 1; } - float noise[DWT_DETAIL_BANDS]; - _compute_adaptive_noise(lum_residual, npix, noise); - dwt_denoise(lum_residual, width, height, - DWT_DETAIL_BANDS, noise); - -#ifdef _OPENMP -#pragma omp parallel for simd default(none) \ - dt_omp_firstprivate(denoised_4ch, lum_residual, \ - npix, alpha) \ - schedule(simd:static) \ - aligned(denoised_4ch, lum_residual:64) -#endif - for(size_t i = 0; i < npix; i++) + // NB: process_no_gamma's return value signals "pipe altered + // mid-flight", NOT success — check backbuf instead + dt_dev_pixelpipe_process_no_gamma(&pipe, &dev, + pipe_roi_x, pipe_roi_y, + pipe_roi_w, pipe_roi_h, 1.0f); + + const int bw = pipe.backbuf_width; + const int bh = pipe.backbuf_height; + if(!pipe.backbuf || bw <= 0 || bh <= 0) { - const size_t p = i * 4; - const float d = alpha * lum_residual[i]; - denoised_4ch[p + 0] += d; - denoised_4ch[p + 1] += d; - denoised_4ch[p + 2] += d; + dt_dev_pixelpipe_cleanup(&pipe); + dt_dev_cleanup(&dev); + return 1; } - dt_free_align(lum_residual); -} - -float *dt_restore_compute_dwt_detail(const float *before_3ch, - const float *after_3ch, - int width, int height) -{ - const size_t npix = (size_t)width * height; - float *lum_residual = dt_alloc_align_float(npix); - if(!lum_residual) return NULL; + // actual rendered dims may differ from the geometry-transformed + // pipe ROI if the pipe is trimmed mid-chain (rare but possible). + // callers must read *out_w / *out_h instead of assuming anything + if(bw != pipe_roi_w || bh != pipe_roi_h) + dt_print(DT_DEBUG_AI, + "[restore] pipe ROI %dx%d -> backbuf %dx%d", + pipe_roi_w, pipe_roi_h, bw, bh); - for(size_t i = 0; i < npix; i++) + // pipe.backbuf is 4ch interleaved RGBA; repack to 3ch for the + // preview blend / display path + float *rgb = g_try_malloc((size_t)bw * bh * 3 * sizeof(float)); + if(rgb) { - const size_t si = i * 3; - const float lum_orig - = 0.2126f * before_3ch[si + 0] - + 0.7152f * before_3ch[si + 1] - + 0.0722f * before_3ch[si + 2]; - const float lum_den - = 0.2126f * after_3ch[si + 0] - + 0.7152f * after_3ch[si + 1] - + 0.0722f * after_3ch[si + 2]; - lum_residual[i] = lum_orig - lum_den; + const float *src = (const float *)pipe.backbuf; + for(size_t i = 0; i < (size_t)bw * bh; i++) + { + rgb[i * 3 + 0] = src[i * 4 + 0]; + rgb[i * 3 + 1] = src[i * 4 + 1]; + rgb[i * 3 + 2] = src[i * 4 + 2]; + } } - float noise[DWT_DETAIL_BANDS]; - _compute_adaptive_noise(lum_residual, npix, noise); - dwt_denoise(lum_residual, width, height, - DWT_DETAIL_BANDS, noise); + dt_dev_pixelpipe_cleanup(&pipe); + dt_dev_cleanup(&dev); - return lum_residual; + if(!rgb) return 1; + *out_rgb = rgb; + if(out_w) *out_w = bw; + if(out_h) *out_h = bh; + return 0; } // clang-format off diff --git a/src/common/ai/restore.h b/src/common/ai/restore.h index 7d1587846ac4..e98e6d4a15bc 100644 --- a/src/common/ai/restore.h +++ b/src/common/ai/restore.h @@ -16,268 +16,372 @@ along with darktable. If not, see . */ -/* - restore — reusable AI denoise and upscale processing - - this module provides the core inference, tiling, and detail - recovery logic for AI-based image restoration. it is part of - the darktable_ai shared library and has no GUI dependencies. - - consumers: - - src/libs/neural_restore.c (lighttable batch + preview) - - pixel pipeline: - input is linear Rec.709 float4 RGBA (from darktable export). - dt_restore_run_patch() converts linear->sRGB before inference - and sRGB->linear after. models operate in planar NCHW layout. - dt_restore_process_tiled() handles interleaved-to-planar - conversion, mirror padding at boundaries, and overlap blending. - - detail recovery: - dt_restore_apply_detail_recovery() uses wavelet (DWT) - decomposition to separate noise from texture in the luminance - residual (original - denoised). fine bands are thresholded; - coarser bands are preserved and blended back. -*/ +// restore — generic AI restore environment and model lifecycle. +// +// this module provides the shared scaffolding that all AI restore +// paths sit on top of: environment init, model loading with tile +// ladder selection + OOM retry, reference-counted contexts, tile +// size persistence, and the user-pipe ROI bridge used by raw- +// denoise previews. RGB denoise/upscale inference lives in +// restore_rgb.{c,h}; raw variants live in restore_raw_*.{c,h}. +// +// consumers: +// - src/libs/neural_restore.c (lighttable batch + preview) +// - src/common/ai/restore_rgb.c (RGB denoise + upscale) +// - src/common/ai/restore_raw_bayer.c (RawNIND Bayer) +// - src/common/ai/restore_raw_linear.c (RawNIND linear/X-Trans) #pragma once #include -struct _dt_job_t; +#include "common/image.h" // for dt_imgid_t -/* --- opaque types --- */ +// --- opaque types --- typedef struct dt_restore_env_t dt_restore_env_t; typedef struct dt_restore_context_t dt_restore_context_t; -/* --- environment lifecycle --- */ - -/** - * @brief initialize the restore environment - * - * wraps dt_ai_env_init(). returns NULL when AI is disabled. - * - * @return environment handle, or NULL - */ +// --- sensor classification --- + +// BAYER is any standard 2x2 Bayer (RGGB / BGGR / GRBG / GBRG). +// XTRANS is Fuji's 6x6 pattern (filters == 9u). LINEAR is the +// generic-demosaic fallback used for Foveon, monochrome-with-pattern, +// and anything else without a dedicated pipeline. UNSUPPORTED means +// the image can't be routed to any denoise variant (non-raw, pure +// monochrome, etc.). pick the loader matching the class: +// BAYER -> dt_restore_load_rawdenoise_bayer +// XTRANS -> dt_restore_load_rawdenoise_xtrans +// LINEAR -> dt_restore_load_rawdenoise_linear +typedef enum +{ + DT_RESTORE_SENSOR_CLASS_BAYER = 0, + DT_RESTORE_SENSOR_CLASS_XTRANS, + DT_RESTORE_SENSOR_CLASS_LINEAR, + DT_RESTORE_SENSOR_CLASS_UNSUPPORTED, +} dt_restore_sensor_class_t; + +// classify a raw image by its CFA pattern. pure function of img flags +// and buf_dsc.filters; caller is expected to have a raw-loaded image +// (buf_dsc.filters populated by rawspeed). returns UNSUPPORTED when +// the image isn't a raw darktable can denoise +dt_restore_sensor_class_t dt_restore_classify_sensor(const dt_image_t *img); + +// --- environment lifecycle --- + +// @brief initialize the restore environment +// +// wraps dt_ai_env_init(). returns NULL when AI is disabled. +// +// @return environment handle, or NULL dt_restore_env_t *dt_restore_env_init(void); -/** - * @brief refresh model list after downloads/installs - * @param env environment handle - */ +// @brief refresh model list after downloads/installs +// @param env environment handle void dt_restore_env_refresh(dt_restore_env_t *env); -/** - * @brief destroy the environment and free resources - * @param env environment handle (NULL-safe) - */ +// @brief destroy the environment and free resources +// @param env environment handle (NULL-safe) void dt_restore_env_destroy(dt_restore_env_t *env); -/* --- model lifecycle --- */ +// --- model lifecycle --- -/** - * @brief load denoise model (scale 1x) - * @param env environment handle - * @return context handle, or NULL if no model available - */ +// @brief load denoise model (scale 1x) +// @param env environment handle +// @return context handle, or NULL if no model available dt_restore_context_t *dt_restore_load_denoise(dt_restore_env_t *env); -/** - * @brief load upscale model at 2x - * @param env environment handle - * @return context handle, or NULL if no model available - */ +// @brief load raw-denoise bayer model (scale 1x) +// +// raw denoise reuses the full scale==1 denoise pipeline (tile size, +// color conversion, shadow boost, wide-gamut pass-through); only the +// model's task string ("rawdenoise") differs. the bayer and linear +// ONNX files ship together in one "rawdenoise" package and the caller +// picks which variant to load. +// +// the filename is read from the model's variants.bayer.onnx attribute; +// a model package that doesn't declare this attribute fails to load +// (no silent fallback). +// +// --- bayer_v1 input contract --- +// +// variants declaring `input_kind: bayer_v1` must satisfy: +// +// INPUT: NCHW, 4 channels, T×T (packed half-resolution, where the +// sensor tile is 2T × 2T). channel order: R, G1, G2, B — +// extraction starts at the CFA's R origin so non-RGGB sensors +// (BGGR, GRBG, GBRG) get packed as if they were RGGB. this +// matches RawNIND training, which physically crops non-RGGB +// sensors to an RGGB origin before tiling. overridable via +// variants.bayer.bayer_orientation (force_rggb | native); +// default: force_rggb. +// values: (raw - black[site]) / range[site] * wb_norm[ch]. +// wb_norm defaults to daylight (D65 derived from the camera +// adobe_XYZ_to_CAM), overridable via variants.bayer.wb_norm +// (daylight | as_shot | none). +// edge tiles that extend past the image bounds are mirror- +// padded inside the effective-RGGB-cropped rectangle +// (variants.bayer.edge_pad: mirror_cropped | mirror). default +// for bayer_v1 is mirror_cropped so corner tiles see the same +// reflections the model's training did. +// +// OUTPUT: NCHW, 3 channels, 2T × 2T (model internally demosaics via +// PixelShuffle). values are camRGB in the same (WB, exposure) +// frame as the input. output scale is arbitrary unless the +// variant declares output_scale: absolute — by default the +// loader applies match_gain (scalar mean-match) before +// re-mosaicing. input_colorspace and target_mean are ignored +// on this path. +// +// a declared-but-unknown input_kind (or one that contradicts the slot) +// is a hard error — the loader refuses to open a mis-packaged ONNX. +// manifests predating the contract label (input_kind missing) are +// accepted for back-compat and treated as bayer_v1. +// +// @param env environment handle +// @return context handle, or NULL if no model available / misconfigured +dt_restore_context_t *dt_restore_load_rawdenoise_bayer(dt_restore_env_t *env); + +// @brief load raw-denoise X-Trans model (scale 1x) +// +// prefers a dedicated xtrans variant when the manifest declares +// variants.xtrans.onnx; falls back transparently to the linear variant +// otherwise. callers pick this loader for X-Trans sensors so a future +// RawNIND release can swap in a dedicated model via manifest-only +// changes. +// +// --- xtrans_v1 input contract (reserved) --- +// +// variants declaring `input_kind: xtrans_v1` are accepted by the +// loader but the actual preprocessing contract (channel layout, WB +// convention, output-space semantics) is TBD until Benoit's dedicated +// X-Trans model stabilizes. until then this loader's first call +// returns NULL for any manifest lacking a variants.xtrans slot, and +// the fallback path produces a linear_v1 context +// +// @param env environment handle +// @return context handle, or NULL if neither an xtrans nor a linear +// variant is available. +dt_restore_context_t *dt_restore_load_rawdenoise_xtrans(dt_restore_env_t *env); + +// @brief load raw-denoise linear model (scale 1x) +// +// generic-demosaic-based denoise: used for Foveon, monochrome sensors +// with a CFA-ish pattern, and any raw whose CFA pattern doesn't fit +// the bayer or xtrans pipelines. also the fallback pipeline for +// X-Trans sensors (via dt_restore_load_rawdenoise_xtrans) until a +// dedicated xtrans_v1 model is available. +// +// --- linear_v1 input contract --- +// +// variants declaring `input_kind: linear_v1` must satisfy: +// +// INPUT: NCHW, 3 channels, T × T planar. colorspace is +// variants.linear.input_colorspace (default lin_rec2020; +// alternatives: camRGB, srgb_linear). preprocessing applies +// WB in camRGB first — mode via variants.linear.wb_norm +// (default as_shot; see _resolve_linear_wb) — then the +// camRGB → input-space 3×3 matrix derived from +// adobe_XYZ_to_CAM, then an optional scalar exposure boost +// to variants.linear.target_mean (default 0.30 for the +// training distribution; set "null" to disable). +// +// OUTPUT: NCHW, 3 channels, T × T in the same input-space. output +// scale is arbitrary unless the variant declares +// output_scale: absolute — default is per-channel match_gain +// against the boosted input. the caller then inverts the +// exposure boost, the matrix, and the WB to recover a raw +// camRGB DNG that renders identically under the importing +// pipeline. +// +// same contract-label semantics as the bayer variant: missing label +// accepted as linear_v1, declared-but-mismatched label refuses +// to load with dt_control_log feedback. +// +// @param env environment handle +// @return context handle, or NULL if no model available / misconfigured +dt_restore_context_t *dt_restore_load_rawdenoise_linear(dt_restore_env_t *env); + +// @brief load upscale model at 2x +// @param env environment handle +// @return context handle, or NULL if no model available dt_restore_context_t *dt_restore_load_upscale_x2(dt_restore_env_t *env); -/** - * @brief load upscale model at 4x - * @param env environment handle - * @return context handle, or NULL if no model available - */ +// @brief load upscale model at 4x +// @param env environment handle +// @return context handle, or NULL if no model available dt_restore_context_t *dt_restore_load_upscale_x4(dt_restore_env_t *env); -/** - * @brief increment the reference count for shared ownership. - * multiple threads can share the same context for concurrent - * inference via dt_restore_run_patch(). - * @param ctx context handle - * @return the same pointer (for convenience) - */ +// @brief increment the reference count for shared ownership. +// multiple threads can share the same context for concurrent +// inference. +// @param ctx context handle +// @return the same pointer (for convenience) dt_restore_context_t *dt_restore_ref(dt_restore_context_t *ctx); -/** - * @brief decrement the reference count. frees the context and all - * resources when the count reaches zero. - * @param ctx context handle (NULL-safe) - */ +// @brief decrement the reference count. frees the context and all +// resources when the count reaches zero. +// @param ctx context handle (NULL-safe) void dt_restore_unref(dt_restore_context_t *ctx); -/** - * @brief Set the working color profile for the context. - * - * The AI model was trained on sRGB primaries. If the input pixels are - * in a different working profile (e.g. Rec.2020), we must convert to - * sRGB before inference and back after to avoid hue shifts. Call this - * before running inference on each image that may use a different - * working profile. - * - * If profile is NULL, the pipeline falls back to gamma-only conversion - * (treating working-profile numbers as if they were sRGB), which can - * cause color shifts for wide-gamut working profiles. - * - * Thread-safety: must not be called concurrently with - * dt_restore_run_patch() or dt_restore_process_tiled(). Set the - * profile before dispatching inference on a given image. - * - * @param ctx context handle (NULL-safe) - * @param profile lcms2 cmsHPROFILE handle cast to void*; NULL to disable - */ -void dt_restore_set_profile(dt_restore_context_t *ctx, - void *profile); - -/** - * @brief Enable/disable wide-gamut pass-through for denoise. - * - * When TRUE (default): pixels that would be out of sRGB gamut pass - * through unchanged, preserving color but not denoising them. When - * FALSE: all pixels use the model output, wide-gamut colors are - * clipped to sRGB but everything gets denoised. - * - * Affects denoise only (scale == 1). Upscale always uses the model - * output because there is no pixel-to-pixel correspondence to - * pass through. - * - * @param ctx context handle (NULL-safe) - * @param preserve TRUE to enable pass-through, FALSE to denoise everything - */ -void dt_restore_set_preserve_wide_gamut(dt_restore_context_t *ctx, - gboolean preserve); - -/** - * @brief check if a denoise model is available - * @param env environment handle - * @return TRUE if a denoise model is configured and present - */ +// @brief check if a denoise model is available +// @param env environment handle +// @return TRUE if a denoise model is configured and present gboolean dt_restore_denoise_available(dt_restore_env_t *env); -/** - * @brief check if an upscale model is available - * @param env environment handle - * @return TRUE if an upscale model is configured and present - */ +// @brief check if a raw-denoise model is available +// @param env environment handle +// @return TRUE if a raw-denoise model is configured and present +gboolean dt_restore_rawdenoise_available(dt_restore_env_t *env); + +// @brief check if an upscale model is available +// @param env environment handle +// @return TRUE if an upscale model is configured and present gboolean dt_restore_upscale_available(dt_restore_env_t *env); -/* --- tile size --- */ +// --- tile size --- -/** - * @brief get tile overlap for a given scale factor - * @param scale upscale factor (1 for denoise) - * @return overlap in pixels - */ +// @brief get tile overlap for a given scale factor +// @param scale upscale factor (1 for denoise) +// @return overlap in pixels int dt_restore_get_overlap(int scale); -/* --- inference --- */ - -/** - * @brief row writer callback for dt_restore_process_tiled - * - * called once per tile-row with 3ch interleaved float scanlines. - * the callback can write to a buffer, TIFF, or any other sink. - * - * @param scanline 3ch interleaved float data (out_w pixels) - * @param out_w output width in pixels - * @param y scanline index in the output image - * @param user_data caller-provided context - * @return 0 on success, non-zero to abort - */ -typedef int (*dt_restore_row_writer_t)(const float *scanline, - int out_w, - int y, - void *user_data); - -/** - * @brief run a single inference patch with sRGB conversion - * - * converts linear RGB input to sRGB, runs ONNX inference, - * converts output back to linear. input is planar NCHW float. - * - * @param ctx loaded restore context - * @param in_patch input tile (planar RGB, 3 * w * h floats) - * @param w tile width - * @param h tile height - * @param out_patch output buffer (planar RGB, 3 * w*s * h*s) - * @param scale upscale factor (1 for denoise) - * @return 0 on success - */ -int dt_restore_run_patch(dt_restore_context_t *ctx, - const float *in_patch, - int w, int h, - float *out_patch, - int scale); - -/** - * @brief process an image with tiled inference - * - * tiles the input, runs inference on each tile, and delivers - * completed scanlines via the row_writer callback. input is - * float4 RGBA interleaved (from dt export). - * - * @param ctx loaded restore context (tile_size is stored in ctx) - * @param in_data input pixels (float4 RGBA, width * height) - * @param width input width - * @param height input height - * @param scale upscale factor (1 for denoise) - * @param row_writer callback receiving 3ch float scanlines - * @param writer_data user data passed to row_writer - * @param control_job job handle for progress/cancellation (NULL-safe) - * @return 0 on success - */ -int dt_restore_process_tiled(dt_restore_context_t *ctx, - const float *in_data, - int width, int height, - int scale, - dt_restore_row_writer_t row_writer, - void *writer_data, - struct _dt_job_t *control_job); - -/* --- detail recovery --- */ - -/** - * @brief apply DWT-based detail recovery after denoising - * - * extracts luminance residual, filters noise with wavelet - * decomposition, and blends preserved texture back. - * both buffers are float4 RGBA at the same dimensions. - * - * @param original_4ch original input pixels (read-only) - * @param denoised_4ch denoised pixels (modified in-place) - * @param width image width - * @param height image height - * @param alpha blend strength (0 = none, 1 = full) - */ -void dt_restore_apply_detail_recovery(const float *original_4ch, - float *denoised_4ch, - int width, int height, - float alpha); - -/** - * @brief compute DWT-filtered luminance detail from 3ch buffers - * - * returns a 1ch float array with wavelet-filtered luminance - * residual (noise removed, texture preserved). used for - * preview split visualization. - * - * @param before_3ch original image (3ch interleaved float) - * @param after_3ch processed image (3ch interleaved float) - * @param width image width - * @param height image height - * @return newly allocated 1ch buffer, or NULL. caller frees - * with dt_free_align() - */ -float *dt_restore_compute_dwt_detail(const float *before_3ch, - const float *after_3ch, - int width, int height); +// --- inference --- + +// @brief run a single RawNIND bayer inference patch +// +// thin wrapper over dt_ai_run for bayer-packed input: NO colorspace +// or gamma conversion, NO WB handling, NO shadow boost. caller is +// responsible for black-subtract / normalize / WB / RGGB pack. +// input is planar 4ch NCHW at packed half-resolution, output is +// planar 3ch at full sensor resolution (model internally upscales +// 2x via PixelShuffle). output is in camRGB — the camera ColorMatrix +// is NOT applied in the graph (training applies it externally for +// loss, so re-mosaic + DNG write works natively). +// +// @param ctx loaded restore context (bayer model) +// @param in_4ch packed input (planar 4ch: R, G1, G2, B; 4 * w * h) +// @param w packed-space tile width (= sensor_w / 2) +// @param h packed-space tile height (= sensor_h / 2) +// @param out_3ch output buffer (planar 3ch at 2w * 2h) +// @return 0 on success +int dt_restore_run_patch_bayer(dt_restore_context_t *ctx, + const float *in_4ch, + int w, int h, + float *out_3ch); + +// @brief run a single RawNIND linear inference patch +// +// 3ch in, 3ch out, SAME spatial dims (no internal upscale). like +// _run_patch_bayer: no sRGB / gamma / WP conversion, no shadow boost. +// caller prepares input in the colorspace the linear model was +// trained on (lin_rec2020 per config.json) and gain-matches the +// output afterward (model output is arbitrary-scale camRGB-in-that- +// space, matching the behavior already observed on the bayer variant). +// +// @param ctx loaded restore context (linear model) +// @param in_3ch planar 3ch input (3 * w * h floats, NCHW order) +// @param w tile width +// @param h tile height +// @param out_3ch output buffer (planar 3ch, 3 * w * h floats) +// @return 0 on success +int dt_restore_run_patch_3ch_raw(dt_restore_context_t *ctx, + const float *in_3ch, + int w, int h, + float *out_3ch); + +// @brief look up the tile ladder for a restore context +// +// exposes the model-declared (or default) input_sizes list in +// packed-space. used by the bayer pipeline to pick a starting +// tile size that respects the model's declared shapes. +// +// @param ctx loaded restore context +// @param out_count filled with number of entries (may be NULL) +// @return pointer to the ladder (owned by ctx; do not free). NULL +// if ctx is NULL. +const int *dt_restore_get_tile_ladder(const dt_restore_context_t *ctx, + int *out_count); + +// @brief current tile size stored in the loaded session +// +// @param ctx loaded restore context +// @return tile size in packed-space, or 0 if ctx is NULL +int dt_restore_get_tile_size(const dt_restore_context_t *ctx); + +// @brief recreate the ORT session for a different tile size +// +// used by the bayer OOM-retry loop to step down the ladder +// when inference fails. keeps the same model/provider; only the +// H/W dim overrides change. the old session is unloaded first +// (avoids VRAM cascade on GPU OOM). +// +// @param ctx loaded restore context +// @param new_tile_size new tile size (must be a ladder member) +// @return TRUE on success +gboolean dt_restore_reload_session(dt_restore_context_t *ctx, + int new_tile_size); + +// @brief persist the current tile size to darktablerc +// +// once the bayer pipeline has processed an image end-to-end at +// ctx->tile_size without OOM, call this so the next run skips the +// retry loop and JIT-compiling providers don't pay the compile +// cost again. +// +// @param ctx loaded restore context +void dt_restore_persist_tile_size(const dt_restore_context_t *ctx); + +// @brief run darktable's real user pixelpipe on a sensor buffer, ROI-clipped. +// +// Shared bridge for the raw-denoise preview paths. Both Bayer and +// X-Trans previews need to run the user's full iop stack on a +// (possibly neural-denoised and re-mosaiced) raw buffer so the +// displayed before/after pixels match what the user would see after +// batch processing and DNG re-import. The pipe runs natively — +// rawprepare + demosaic + temperature + colorin + filmic + output +// profile — with rawdenoise skipped since the neural denoiser has +// already done its work. +// +// @param imgid image id (the pipe is built per image) +// @param input_native buffer matching the image's native raw format +// (uint16 CFA or 3ch float LinearRaw). pipe only +// reads this; caller retains ownership. +// @param iw buffer width in native samples +// @param ih buffer height +// @param roi_x ROI top-left x in sensor (input) coords — same +// coordinate system the caller used to patch the +// denoised CFA into input_native. the bridge +// forward-transforms this through the user's +// geometry chain (rawprepare + clipping + ashift + +// lens + ...) so the pipe renders the same sensor +// area the caller patched. +// @param roi_y ROI top-left y (sensor coords) +// @param roi_w ROI width (sensor coords) +// @param roi_h ROI height (sensor coords) +// @param out_w receives actual rendered width (may differ from +// roi_w when user history contains geometry- +// modifying modules like clipping/ashift/lens, or +// when rawprepare trims; NULL to skip) +// @param out_h receives actual rendered height (as out_w; NULL +// to skip) +// @param out_rgb caller-frees with g_free. 3ch interleaved +// (*out_w * *out_h * 3 floats) in linear Rec.709, +// ready for sRGB-gamma display. callers must use +// *out_w / *out_h (not the requested roi_w/roi_h) +// for subsequent indexing. +// @return 0 on success; *out_rgb set to NULL on failure. +int dt_restore_run_user_pipe_roi(dt_imgid_t imgid, + void *input_native, + int iw, + int ih, + int roi_x, + int roi_y, + int roi_w, + int roi_h, + int *out_w, + int *out_h, + float **out_rgb); // clang-format off // modelines: These editor modelines have been set for all relevant files by tools/update_modelines.py diff --git a/src/common/ai/restore_common.h b/src/common/ai/restore_common.h new file mode 100644 index 000000000000..2a5138b6ac4d --- /dev/null +++ b/src/common/ai/restore_common.h @@ -0,0 +1,236 @@ +/* + This file is part of darktable, + Copyright (C) 2026 darktable developers. + + darktable is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + darktable is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with darktable. If not, see . +*/ + +// restore_common — internal shared definitions for the restore_* +// module family (restore.c, restore_rgb.c). NOT a public API: +// consumers must continue to treat dt_restore_context_t / +// dt_restore_env_t as opaque and go through the accessor functions +// declared in restore.h. + +#pragma once + +#include "ai/backend.h" +#include "common/image.h" + +#include +#include + +// --- preprocessing policy enums --- +// +// ctx fields keyed off these enums replace what used to be compile-time +// RawNIND assumptions. Manifest keys (variants..{input_kind, wb_norm, +// output_scale, input_colorspace, target_mean}) resolve to these values +// at load; defaults reproduce RawNIND v1 behavior so legacy manifests +// keep working. See restore.h for the per-variant contract. + +// identifies the full preprocessing contract (layout + WB + scaling + +// training distribution) the ONNX graph was trained against. treated as +// a string match, not a feature set: a model declaring bayer_v1 must +// comply with everything documented for that label in restore.h +typedef enum +{ + DT_RESTORE_INPUT_KIND_UNKNOWN = 0, + DT_RESTORE_INPUT_KIND_BAYER_V1, + // reserved for a future dedicated X-Trans denoise model. accepted as + // a manifest value so packages shipping an xtrans variant can be + // validated; the actual preprocessing pipeline is TBD and filed in + // restore_raw_xtrans.c when Benoit's model format stabilizes. until + // then X-Trans sensors route to the linear pipeline as a fallback + // via dt_restore_load_rawdenoise_xtrans + DT_RESTORE_INPUT_KIND_XTRANS_V1, + DT_RESTORE_INPUT_KIND_LINEAR_V1, +} dt_restore_input_kind_t; + +// color space the linear path feeds to the model. bayer path ignores +// this (4ch-packed layout fixes the space to camRGB by construction) +typedef enum +{ + DT_RESTORE_CS_LIN_REC2020 = 0, // default for linear path + DT_RESTORE_CS_CAMRGB, + DT_RESTORE_CS_SRGB_LINEAR, +} dt_restore_colorspace_t; + +// how WB is normalized before inference (and inverted after). DAYLIGHT +// uses the D65 coefficients derived from adobe_XYZ_to_CAM; AS_SHOT uses +// the raw's wb_coeffs; NONE leaves camRGB untouched +typedef enum +{ + DT_RESTORE_WB_DAYLIGHT = 0, + DT_RESTORE_WB_AS_SHOT, + DT_RESTORE_WB_NONE, +} dt_restore_wb_mode_t; + +// post-inference output scale handling. MATCH_GAIN rescales the model +// output so its mean matches the model input mean (compensates for the +// arbitrary output scale RawNIND's L1 loss produces); ABSOLUTE trusts +// the model output as-is +typedef enum +{ + DT_RESTORE_OUT_MATCH_GAIN = 0, + DT_RESTORE_OUT_ABSOLUTE, +} dt_restore_output_scale_t; + +// how the 4-channel packed Bayer input is oriented. FORCE_RGGB extracts +// from the CFA's R origin so channel 0 is always R regardless of the +// sensor pattern — matches RawNIND v1 training, which cropped non-RGGB +// sensors to an RGGB origin before packing. NATIVE packs in the sensor's +// own CFA order (channel 0 at the top-left of each 2x2 block) for models +// that accept any Bayer pattern unchanged +typedef enum +{ + DT_RESTORE_BAYER_FORCE_RGGB = 0, + DT_RESTORE_BAYER_NATIVE, +} dt_restore_bayer_orientation_t; + +// edge handling when a tile extends past the image boundary. MIRROR is +// darktable's historical periodic reflection on absolute sensor coords. +// MIRROR_CROPPED reflects in the effective-cropped frame (post FORCE_RGGB +// shift) so the reflected content matches what a training pipeline that +// physically crops the sensor before tiling would see — required for +// bit-identical corner tiles on non-RGGB sensors under bayer_v1 +typedef enum +{ + DT_RESTORE_EDGE_MIRROR_CROPPED = 0, + DT_RESTORE_EDGE_MIRROR, +} dt_restore_edge_pad_t; + +// dt_restore_sensor_class_t and _classify_sensor now live in restore.h +// (part of the public API so callers picking a variant loader can use +// it without pulling in restore_common.h's internal struct layouts) + +// --- struct definitions shared across the restore_* module family --- + +struct dt_restore_env_t +{ + dt_ai_environment_t *ai_env; +}; + +struct dt_restore_context_t +{ + dt_ai_context_t *ai_ctx; + struct dt_restore_env_t *env; + char *model_id; + char *model_file; + char *task; + char *input_kind; // variant-declared input kind (e.g. "packed_bayer", + // "lin_rec2020"); NULL if the model doesn't declare one + // policy enums resolved from the manifest at load time; see comments + // on each enum in this file and the per-variant contract in restore.h. + // defaults (0-init from g_new0) reproduce RawNIND v1 behavior, except + // target_mean which needs explicit initialization — see _load + dt_restore_input_kind_t input_kind_enum; + dt_restore_colorspace_t input_colorspace; + dt_restore_wb_mode_t wb_mode; + dt_restore_output_scale_t output_scale; + dt_restore_bayer_orientation_t bayer_orientation; + dt_restore_edge_pad_t edge_pad; + float target_mean; // NAN = no exposure boost + int scale; // model upscale factor (1 for denoise, 2/4 for upscale) + int tile_size; // tile size used to create the current session + char *dim_h; // symbolic height dim name used for session overrides + char *dim_w; // symbolic width dim name used for session overrides + // color management (RGB path): convert working profile → sRGB before + // inference and back after. if has_profile is FALSE, fall back to + // gamma-only conversion (treats working-profile numbers as if sRGB). + gboolean has_profile; + float wp_to_srgb[9]; // working profile RGB -> sRGB linear + float srgb_to_wp[9]; // sRGB linear -> working profile RGB + // RGB path: when TRUE (default), out-of-sRGB-gamut pixels pass + // through unchanged during denoise. when FALSE, every pixel uses + // the model output and wide-gamut colors get clipped to sRGB. + gboolean preserve_wide_gamut; + // RGB path: shadow_boost_capable is set once at load from the + // model's "shadow_boost" attribute; shadow_boost is re-computed + // per image inside dt_restore_process_tiled() based on luminance. + gboolean shadow_boost_capable; + gboolean shadow_boost; + // tile ladder candidates from largest to smallest; either the + // model's "input_sizes" attribute from config.json (when declared) + // or a copy of the built-in ladder for the model's scale. both the + // startup budget selector and the runtime OOM retry loop iterate it + int *tile_ladder; + int n_tile_ladder; + uint32_t ep_flags; // execution provider flags (e.g. CoreML CPU-only) + gint ref_count; +}; + +// DWT detail-recovery band count (used by restore_rgb.c) +#define DWT_DETAIL_BANDS 5 + +// compute per-site black level (4 entries) and raw-ADC range +// (white − black) for this image. prefers the per-site +// raw_black_level_separate when any entry is non-zero, otherwise +// falls back to the single raw_black_level. range entries are +// guarded against non-positive values so callers can divide safely. +// shared by the Bayer prep helper (restore_raw_bayer.c) and by the +// linear path's raw re-mosaic step (restore_raw_linear.c) +static inline void _compute_cfa_black_range(const dt_image_t *img, + float black[4], + float range[4], + float *out_white) +{ + const float white = img->raw_white_point + ? (float)img->raw_white_point : 65535.0f; + if(out_white) *out_white = white; + + const gboolean have_separate + = (img->raw_black_level_separate[0] != 0 + || img->raw_black_level_separate[1] != 0 + || img->raw_black_level_separate[2] != 0 + || img->raw_black_level_separate[3] != 0); + for(int i = 0; i < 4; i++) + black[i] = have_separate + ? (float)img->raw_black_level_separate[i] + : (float)img->raw_black_level; + for(int i = 0; i < 4; i++) + { + range[i] = white - black[i]; + if(range[i] <= 0.0f) range[i] = 1.0f; + } +} + +// periodic mirror-pad index reflection, shared by every restore_* +// consumer that needs edge padding for tile reads (RGB, raw bayer, +// raw linear). fully periodic: any input index maps into [0, n) +static inline int _mirror(int i, int n) +{ + if(n <= 1) return 0; + if(i < 0) i = -i; + const int period = 2 * (n - 1); + i = i % period; + if(i < 0) i += period; + if(i >= n) i = period - i; + return i; +} + +// mirror-pad reflection within an arbitrary sub-range [lo, hi) of the +// underlying 1D array (exclusive hi). used by the Bayer edge-pad mode +// MIRROR_CROPPED so reflections happen inside the RGGB-forced crop +// rectangle rather than the original sensor buffer — matches training +// pipelines that physically crop the sensor to RGGB before tiling +static inline int _mirror_in_range(int i, int lo, int hi) +{ + const int n = hi - lo; + return lo + _mirror(i - lo, n); +} + +// clang-format off +// modelines: These editor modelines have been set for all relevant files by tools/update_modelines.py +// vim: shiftwidth=2 expandtab tabstop=2 cindent +// kate: tab-indents: off; indent-width 2; replace-tabs on; indent-mode cstyle; remove-trailing-spaces modified; +// clang-format on diff --git a/src/common/ai/restore_raw_bayer.c b/src/common/ai/restore_raw_bayer.c new file mode 100644 index 000000000000..91cff804936b --- /dev/null +++ b/src/common/ai/restore_raw_bayer.c @@ -0,0 +1,803 @@ +/* + This file is part of darktable, + Copyright (C) 2026 darktable developers. + + darktable is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + darktable is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with darktable. If not, see . +*/ + +#include "common/ai/restore_raw_bayer.h" +#include "common/ai/restore.h" +#include "common/ai/restore_common.h" +#include "common/colorspaces.h" +#include "common/darktable.h" +#include "common/image.h" +#include "common/image_cache.h" +#include "common/iop_order.h" +#include "common/mipmap_cache.h" +#include "control/jobs.h" +#include "develop/develop.h" +#include "develop/imageop.h" +#include "develop/imageop_math.h" +#include "develop/pixelpipe_hb.h" +#include "imageio/imageio_common.h" + +#include +#include +#include + +#define OVERLAP_PACKED 32 // tile overlap in packed (half-res) pixels + +// find (y0, x0) in {0,1}^2 such that FC(y0, x0, filters) == 0 (R). +// returns TRUE for standard Bayer patterns; FALSE for non-Bayer (filters +// == 0) or X-Trans (filters == 9u), which this pipeline does not handle +static gboolean _bayer_origin(uint32_t filters, int *y0, int *x0) +{ + if(filters == 0u || filters == 9u) return FALSE; + for(int y = 0; y < 2; y++) + for(int x = 0; x < 2; x++) + if(FC(y, x, filters) == 0) + { + *y0 = y; + *x0 = x; + return TRUE; + } + return FALSE; +} + +// shared prep data for Bayer batch + preview +// resolves everything the CFA→packed-input pipeline needs from the +// image metadata: CFA pattern + origin, per-site black/white/range, +// and daylight WB multipliers. both the batch path +// (dt_restore_raw_bayer) and the piped preview use this identical +// pre-processing; keeping it in one helper stops the two copies from +// drifting +typedef struct _bayer_prep_t +{ + uint32_t filters; + int y0, x0; + float white; + float black[4]; + float range[4]; + float wb_norm[3]; // daylight WB, G normalised to 1 + float clip_max; // = white; kept separate for readability +} _bayer_prep_t; + +// populate prep from img metadata. returns 0 on success, 1 when the +// CFA pattern is unsupported (X-Trans or monochrome) +// compute daylight WB (D65 derived from adobe_XYZ_to_CAM). on success +// writes R/B multipliers with G=1 into wb[0..2] and returns TRUE +static gboolean _bayer_wb_daylight(const dt_image_t *img, float wb[3]) +{ + const float D65[3] = { 0.9504f, 1.0f, 1.0889f }; + float resp[3] = { 0.0f, 0.0f, 0.0f }; + float mag = 0.0f; + for(int c = 0; c < 3; c++) + { + resp[c] = img->adobe_XYZ_to_CAM[c][0] * D65[0] + + img->adobe_XYZ_to_CAM[c][1] * D65[1] + + img->adobe_XYZ_to_CAM[c][2] * D65[2]; + mag += fabsf(img->adobe_XYZ_to_CAM[c][0]) + + fabsf(img->adobe_XYZ_to_CAM[c][1]) + + fabsf(img->adobe_XYZ_to_CAM[c][2]); + } + if(mag <= 0.0f || resp[0] <= 0.0f || resp[1] <= 0.0f || resp[2] <= 0.0f) + return FALSE; + wb[0] = resp[1] / resp[0]; + wb[1] = 1.0f; + wb[2] = resp[1] / resp[2]; + return TRUE; +} + +// as-shot WB from img->wb_coeffs normalized to G=1 +static gboolean _bayer_wb_as_shot(const dt_image_t *img, float wb[3]) +{ + if(img->wb_coeffs[0] <= 0.0f + || img->wb_coeffs[1] <= 0.0f + || img->wb_coeffs[2] <= 0.0f) + return FALSE; + const float g = img->wb_coeffs[1]; + wb[0] = img->wb_coeffs[0] / g; + wb[1] = 1.0f; + wb[2] = img->wb_coeffs[2] / g; + return TRUE; +} + +static int _compute_bayer_prep(const dt_restore_context_t *ctx, + const dt_image_t *img, _bayer_prep_t *p) +{ + if(!img || !p) return 1; + p->filters = img->buf_dsc.filters; + if(!_bayer_origin(p->filters, &p->y0, &p->x0)) + { + dt_print(DT_DEBUG_AI, + "[restore_raw_bayer] unsupported CFA pattern (filters=0x%x)", + p->filters); + return 1; + } + + _compute_cfa_black_range(img, p->black, p->range, &p->white); + p->clip_max = p->white; + + // WB normalization keyed off ctx->wb_mode. RawNIND's v1 weights were + // trained on daylight-WB'd data, so the default is DAYLIGHT (derive + // D65 multipliers from adobe_XYZ_to_CAM) with as-shot as the fallback + // when the matrix is missing. AS_SHOT flips the order (as-shot first, + // daylight fallback) for models trained on as-shot distributions. + // NONE leaves camRGB untouched. The same wb_norm is inverted in + // postprocess so the round-trip is consistent regardless of mode. + p->wb_norm[0] = p->wb_norm[1] = p->wb_norm[2] = 1.0f; + const dt_restore_wb_mode_t mode + = ctx ? ctx->wb_mode : DT_RESTORE_WB_DAYLIGHT; + if(mode == DT_RESTORE_WB_DAYLIGHT) + { + if(!_bayer_wb_daylight(img, p->wb_norm)) + _bayer_wb_as_shot(img, p->wb_norm); + } + else if(mode == DT_RESTORE_WB_AS_SHOT) + { + if(!_bayer_wb_as_shot(img, p->wb_norm)) + _bayer_wb_daylight(img, p->wb_norm); + } + // DT_RESTORE_WB_NONE: leave at {1, 1, 1} + return 0; +} + +// shared re-mosaic per-pixel math: model camRGB value → raw ADC +// value (reverses WB, normalisation and black-level shift). caller +// supplies (r, c, ch) from its own FC() dispatch and reads model_val +// from the 2T × 2T tile_out; the caller-side blend / clip / store +// differs per path (batch writes uint16 CFA with strength blend, +// preview writes uint16 or float into a patched sensor buffer) so we +// keep just the pure pixel math shared +static inline float _bayer_remosaic_raw(int r, int c, int ch, + float model_val, + const _bayer_prep_t *prep) +{ + const float normalized = model_val / prep->wb_norm[ch]; + const int bl_idx = ((r & 1) << 1) | (c & 1); + return normalized * prep->range[bl_idx] + prep->black[bl_idx]; +} + +// shared scalar match_gain: scales 3ch model output (2T × 2T) so +// its mean equals the 4ch input mean. identical algorithm for batch +// and preview; batch uses the returned means/gain to log a per-tile +// diagnostic +static void _bayer_gain_match(const float *tile_in, + float *tile_out, + int T, + double *out_in_mean, + double *out_out_mean, + float *out_gain) +{ + const size_t tile_in_plane = (size_t)T * T; + const size_t tile_out_plane = (size_t)(2 * T) * (size_t)(2 * T); + double in_sum = 0.0, out_sum = 0.0; + for(int k = 0; k < 4; k++) + { + const float *p = tile_in + (size_t)k * tile_in_plane; + for(size_t i = 0; i < tile_in_plane; i++) in_sum += p[i]; + } + for(int k = 0; k < 3; k++) + { + const float *p = tile_out + (size_t)k * tile_out_plane; + for(size_t i = 0; i < tile_out_plane; i++) out_sum += p[i]; + } + const double in_mean = in_sum / (double)(4 * tile_in_plane); + const double out_mean = out_sum / (double)(3 * tile_out_plane); + // allow negative gain too: the RawNIND model output scale is + // arbitrary by design (match_gain post-step during training absorbs + // it); in some variants the sign is also inverted. guard only + // against near-zero mean + const float gain = (fabsf((float)out_mean) > 1e-8f) + ? (float)(in_mean / out_mean) : 1.0f; + if(gain != 1.0f) + { + const size_t total_out = tile_out_plane * 3; + for(size_t i = 0; i < total_out; i++) tile_out[i] *= gain; + } + if(out_in_mean) *out_in_mean = in_mean; + if(out_out_mean) *out_out_mean = out_mean; + if(out_gain) *out_gain = gain; +} + +// shared 4ch packing: CFA → planar [R, G1, G2, B] at T×T packed +// compute the mirror-reflection bounds + oriented tile origin based on +// the packing policy on ctx. sr0_base / sc0_base are the caller's +// sensor-space base coords for the tile's top-left 2x2 *before* any +// RGGB-forcing shift (batch passes 2*(py_base - O), preview passes +// the user-centred even-snapped inf_y/inf_x) +// - FORCE_RGGB + MIRROR_CROPPED (bayer_v1 default): origin shifts by +// (y0, x0) so channel 0 always hits R; mirror reflects within the +// cropped [y0, H - y0?1:0) x [x0, W - x0?1:0) rectangle — matches +// training pipelines that physically crop to RGGB before tiling +// - FORCE_RGGB + MIRROR: same origin shift, but reflections happen +// against the full buffer (legacy darktable behavior; equivalent +// to training that doesn't use mirror padding at all) +// - NATIVE + *: no origin shift; each 4ch slot holds the sensor's +// native CFA position. mirror is always full-buffer +static void _bayer_tile_geometry(const dt_restore_context_t *ctx, + const _bayer_prep_t *prep, + int sr0_base, int sc0_base, + int width, int height, + int *sr0_origin, int *sc0_origin, + int *mir_y_lo, int *mir_y_hi, + int *mir_x_lo, int *mir_x_hi) +{ + const gboolean force_rggb + = !ctx || ctx->bayer_orientation == DT_RESTORE_BAYER_FORCE_RGGB; + const int y0 = force_rggb ? prep->y0 : 0; + const int x0 = force_rggb ? prep->x0 : 0; + *sr0_origin = sr0_base + y0; + *sc0_origin = sc0_base + x0; + + const gboolean cropped_mirror + = ctx && force_rggb + && ctx->edge_pad == DT_RESTORE_EDGE_MIRROR_CROPPED; + *mir_y_lo = cropped_mirror ? y0 : 0; + *mir_x_lo = cropped_mirror ? x0 : 0; + *mir_y_hi = cropped_mirror ? (height - (y0 ? 1 : 0)) : height; + *mir_x_hi = cropped_mirror ? (width - (x0 ? 1 : 0)) : width; +} + +// sr0_origin / sc0_origin are sensor-space top-left coords of the packed +// block's (0, 0); edges are mirror-padded via _mirror_in_range(). +// batch and preview paths call this with different origins (tile grid +// vs. a single centred inference tile) but the per-pixel math is identical +// pack a T x T packed-half-res 4-channel tile from the full CFA buffer. +// sr0_origin / sc0_origin is the starting sensor-space (row, col) of the +// tile's top-left 2x2 block; for force_rggb orientation the caller shifts +// by (y0, x0) so channel 0 always hits R. +// [mir_y_lo, mir_y_hi) and [mir_x_lo, mir_x_hi) are the mirror-reflection +// bounds. for EDGE_MIRROR these are [0, height) / [0, width); for +// EDGE_MIRROR_CROPPED they shrink to the effective-RGGB-cropped +// rectangle so reflections match what a crop-then-tile training pipeline +// would see +static void _pack_bayer_tile(const float *cfa, + int width, int height, + int sr0_origin, int sc0_origin, + int mir_y_lo, int mir_y_hi, + int mir_x_lo, int mir_x_hi, + int T, + const _bayer_prep_t *prep, + float *tile_in) +{ + const uint32_t filters = prep->filters; + const float *const black = prep->black; + const float *const range = prep->range; + const float *const wb_norm = prep->wb_norm; + const size_t tile_in_plane = (size_t)T * T; + + for(int dy = 0; dy < T; dy++) + { + const int sr0 = sr0_origin + 2 * dy; + for(int dx = 0; dx < T; dx++) + { + const int sc0 = sc0_origin + 2 * dx; + for(int k = 0; k < 4; k++) + { + const int dr = (k >> 1) & 1; + const int dc = k & 1; + const int r = _mirror_in_range(sr0 + dr, mir_y_lo, mir_y_hi); + const int c = _mirror_in_range(sc0 + dc, mir_x_lo, mir_x_hi); + const float val = cfa[(size_t)r * width + c]; + const int bl_idx = ((r & 1) << 1) | (c & 1); + const float normalized = (val - black[bl_idx]) / range[bl_idx]; + const int ch = FC(r, c, filters); + tile_in[k * tile_in_plane + (size_t)dy * T + dx] + = normalized * wb_norm[ch]; + } + } + } +} + +int dt_restore_raw_bayer(dt_restore_context_t *ctx, + const dt_image_t *img, + const float *cfa_in, + int width, + int height, + uint16_t *cfa_out, + float strength, + struct _dt_job_t *control_job) +{ + if(!ctx || !img || !cfa_in || !cfa_out + || width <= 0 || height <= 0) + return 1; + + const float alpha = strength < 0.0f ? 0.0f + : (strength > 1.0f ? 1.0f : strength); + const float inv_alpha = 1.0f - alpha; + + _bayer_prep_t prep; + if(_compute_bayer_prep(ctx, img, &prep)) return 1; + const uint32_t filters = prep.filters; + const int y0 = prep.y0; + const int x0 = prep.x0; + const float white = prep.white; + const float *const black = prep.black; + const float *const wb_norm = prep.wb_norm; + const float clip_max = prep.clip_max; + + // initialize output with source CFA (covers margins directly) + // margins are the 0-2 rows/cols outside the bayer-aligned working + // region; the model doesn't see them, so we keep original sensor + // values there + for(size_t i = 0; i < (size_t)width * height; i++) + { + const float v = cfa_in[i]; + const float cv = v < 0.0f ? 0.0f : (v > clip_max ? clip_max : v); + cfa_out[i] = (uint16_t)(cv + 0.5f); + } + + // working region in sensor coords: [y0..y0+2*Hh) x [x0..x0+2*Wh) + const int Hh = (height - y0) / 2; + const int Wh = (width - x0) / 2; + if(Hh <= 0 || Wh <= 0) return 0; // too small; output == input + + // tile setup in packed (half-res) space + const int O = OVERLAP_PACKED; + int T = dt_restore_get_tile_size(ctx); + int n_ladder = 0; + const int *ladder = dt_restore_get_tile_ladder(ctx, &n_ladder); + if(T <= 2 * O) T = 256; // defensive fallback + +retry:; + const int step = T - 2 * O; + if(step <= 0) return 1; + const size_t tile_in_plane = (size_t)T * T; + const size_t tile_out_w = 2 * (size_t)T; + const size_t tile_out_plane = tile_out_w * tile_out_w; + const int cols = (Wh + step - 1) / step; + const int rows = (Hh + step - 1) / step; + const int total_tiles = cols * rows; + + dt_print(DT_DEBUG_AI, + "[restore_raw_bayer] %dx%d sensor (CFA origin %d,%d), " + "working %dx%d packed, tile T=%d, %dx%d grid (%d tiles)", + width, height, y0, x0, Wh, Hh, + T, cols, rows, total_tiles); + + // diagnostic: raw CFA range and preprocessing params + { + const size_t npix_dbg = (size_t)width * height; + float in_min = cfa_in[0], in_max = cfa_in[0]; + const size_t step_ = (npix_dbg < 1000000) ? 1 : (npix_dbg / 1000000); + for(size_t i = 0; i < npix_dbg; i += step_) + { + if(cfa_in[i] < in_min) in_min = cfa_in[i]; + if(cfa_in[i] > in_max) in_max = cfa_in[i]; + } + dt_print(DT_DEBUG_AI, + "[restore_raw_bayer] raw CFA range [%.1f, %.1f], " + "black=[%.0f,%.0f,%.0f,%.0f] white=%.0f " + "wb_coeffs=[%.3f,%.3f,%.3f,%.3f] wb_norm=[%.3f,%.3f,%.3f]", + in_min, in_max, + black[0], black[1], black[2], black[3], white, + img->wb_coeffs[0], img->wb_coeffs[1], + img->wb_coeffs[2], img->wb_coeffs[3], + wb_norm[0], wb_norm[1], wb_norm[2]); + } + + float *tile_in = g_try_malloc(tile_in_plane * 4 * sizeof(float)); + float *tile_out = g_try_malloc(tile_out_plane * 3 * sizeof(float)); + if(!tile_in || !tile_out) + { + g_free(tile_in); + g_free(tile_out); + return 1; + } + + int res = 0; + int tile_count = 0; + + for(int ty = 0; ty < rows && res == 0; ty++) + { + for(int tx = 0; tx < cols && res == 0; tx++) + { + if(control_job + && dt_control_job_get_state(control_job) + == DT_JOB_STATE_CANCELLED) + { + res = 1; + break; + } + + const int py_base = ty * step; // core-valid packed start (within working) + const int px_base = tx * step; + const int py_end = (py_base + step > Hh) ? Hh : py_base + step; + const int px_end = (px_base + step > Wh) ? Wh : px_base + step; + const int core_h = py_end - py_base; + const int core_w = px_end - px_base; + + // build 4ch input at packed half-res (T x T). geometry picks + // the right origin and mirror-reflection bounds based on + // ctx->bayer_orientation + ctx->edge_pad + int sr0_origin, sc0_origin; + int mir_y_lo, mir_y_hi, mir_x_lo, mir_x_hi; + _bayer_tile_geometry(ctx, &prep, + 2 * (py_base - O), 2 * (px_base - O), + width, height, + &sr0_origin, &sc0_origin, + &mir_y_lo, &mir_y_hi, &mir_x_lo, &mir_x_hi); + _pack_bayer_tile(cfa_in, width, height, + sr0_origin, sc0_origin, + mir_y_lo, mir_y_hi, mir_x_lo, mir_x_hi, + T, &prep, tile_in); + + // diagnostic: tile 0 pre-inference (4ch packed input) + if(tx == 0 && ty == 0) + { + float mn[4] = {tile_in[0], tile_in[0], tile_in[0], tile_in[0]}; + float mx[4] = {tile_in[0], tile_in[0], tile_in[0], tile_in[0]}; + for(int k = 0; k < 4; k++) + { + const float *p = tile_in + (size_t)k * tile_in_plane; + mn[k] = mx[k] = p[0]; + for(size_t i = 0; i < tile_in_plane; i++) + { + if(p[i] < mn[k]) mn[k] = p[i]; + if(p[i] > mx[k]) mx[k] = p[i]; + } + } + dt_print(DT_DEBUG_AI, + "[restore_raw_bayer] tile0 model_input range " + "R=[%.3f,%.3f] G1=[%.3f,%.3f] G2=[%.3f,%.3f] B=[%.3f,%.3f]", + mn[0], mx[0], mn[1], mx[1], + mn[2], mx[2], mn[3], mx[3]); + } + + // inference + if(dt_restore_run_patch_bayer(ctx, tile_in, T, T, tile_out) != 0) + { + // step down the ladder if possible. first tile only so we + // don't rewrite pixels we've already delivered + int next_T = 0; + for(int i = 0; i < n_ladder; i++) + if(ladder[i] < T) { next_T = ladder[i]; break; } + if(next_T > 0 && ty == 0 && tx == 0 + && dt_restore_reload_session(ctx, next_T)) + { + dt_print(DT_DEBUG_AI, + "[restore_raw_bayer] inference failed at T=%d, retrying T=%d", + T, next_T); + g_free(tile_in); + g_free(tile_out); + T = next_T; + goto retry; + } + dt_print(DT_DEBUG_AI, + "[restore_raw_bayer] inference failed at tile %d,%d (T=%d)", + tx, ty, T); + res = 1; + break; + } + + // match_gain: scale model output so its mean equals the + // preprocessed input mean. the RawNIND model output has an + // arbitrary scale (up to ~10^6) — the Python inference path + // applies match_gain() after every forward pass. we match + // per-tile which is stable: the gain factor is a property of + // the trained weights, approximately constant across tiles of + // the same image. applied in place in tile_out. skipped for + // ABSOLUTE-scale models whose output is already calibrated + double in_mean = 0.0, out_mean = 0.0; + float gain = 1.0f; + if(ctx->output_scale == DT_RESTORE_OUT_MATCH_GAIN) + _bayer_gain_match(tile_in, tile_out, T, + &in_mean, &out_mean, &gain); + + // diagnostic: tile 0 post-gain model-output ranges + gain info + if(tx == 0 && ty == 0) + { + float mn[3] = {tile_out[0], tile_out[0], tile_out[0]}; + float mx[3] = {tile_out[0], tile_out[0], tile_out[0]}; + for(int k = 0; k < 3; k++) + { + const float *p = tile_out + (size_t)k * tile_out_plane; + mn[k] = mx[k] = p[0]; + for(size_t i = 0; i < tile_out_plane; i++) + { + if(p[i] < mn[k]) mn[k] = p[i]; + if(p[i] > mx[k]) mx[k] = p[i]; + } + } + dt_print(DT_DEBUG_AI, + "[restore_raw_bayer] tile0 model_output range " + "R=[%.3f,%.3f] G=[%.3f,%.3f] B=[%.3f,%.3f] " + "in_mean=%.3f out_mean=%.3f gain=%.3e", + mn[0], mx[0], mn[1], mx[1], mn[2], mx[2], + in_mean, out_mean, (double)gain); + } + + // re-mosaic the core-valid region and un-preprocess + // model output dims: 2T x 2T (sensor pixels) for T x T packed tile. + // core valid region in model output starts at (2*O, 2*O) and spans + // (2*core_h) x (2*core_w) sensor pixels + const int core_sh = 2 * core_h; // sensor height of core + const int core_sw = 2 * core_w; + for(int dy = 0; dy < core_sh; dy++) + { + const int r = y0 + 2 * py_base + dy; // sensor row + const int my = 2 * O + dy; // model-output row + const size_t row_off = (size_t)my * tile_out_w; + for(int dx = 0; dx < core_sw; dx++) + { + const int c = x0 + 2 * px_base + dx; // sensor col + const int mx = 2 * O + dx; + + const int ch = FC(r, c, filters); // 0=R, 1=G, 2=B + const float model_val + = tile_out[(size_t)ch * tile_out_plane + row_off + mx]; + + // reverse WB + normalisation → raw ADC + const float raw_val + = _bayer_remosaic_raw(r, c, ch, model_val, &prep); + + // strength blend: α=1 → denoised, α=0 → source CFA + const size_t pidx = (size_t)r * width + c; + const float blended + = alpha * raw_val + inv_alpha * cfa_in[pidx]; + + const float clipped + = blended < 0.0f ? 0.0f + : (blended > clip_max ? clip_max : blended); + cfa_out[pidx] = (uint16_t)(clipped + 0.5f); + } + } + + tile_count++; + if(control_job) + dt_control_job_set_progress(control_job, + (double)tile_count / total_tiles); + } + } + + g_free(tile_in); + g_free(tile_out); + + if(res == 0) + { + // diagnostic: sample cfa_out to confirm values are in a sensible + // raw-ADC range matching BlackLevel/WhiteLevel the DNG advertises + const size_t npix_dbg = (size_t)width * height; + uint16_t omin = cfa_out[0], omax = cfa_out[0]; + uint64_t osum = 0; + const size_t step_ = (npix_dbg < 1000000) ? 1 : (npix_dbg / 1000000); + size_t n = 0; + for(size_t i = 0; i < npix_dbg; i += step_) + { + if(cfa_out[i] < omin) omin = cfa_out[i]; + if(cfa_out[i] > omax) omax = cfa_out[i]; + osum += cfa_out[i]; + n++; + } + dt_print(DT_DEBUG_AI, + "[restore_raw_bayer] cfa_out u16 range [%u, %u] mean=%.0f " + "(DNG will advertise black~%.0f white=%.0f)", + (unsigned)omin, (unsigned)omax, + n ? (double)osum / n : 0.0, + black[0], white); + + dt_restore_persist_tile_size(ctx); + } + + return res; +} + +// preview: single-tile bayer inference + re-mosaic onto a patched CFA, +// then run the user's pipe (via dt_restore_run_user_pipe_roi) twice — +// once on the original mbuf for "before", once on the patched copy for +// "after". the pipe runs at ROI = displayed crop so refreshes stay fast. +// the "after" display-referred output matches what the user would see +// after Process + DNG re-import +int dt_restore_raw_bayer_preview_piped(dt_restore_context_t *ctx, + const dt_image_t *img, + dt_imgid_t imgid, + const float *cfa_full, + int width, int height, + int crop_x, int crop_y, + int crop_w, int crop_h, + float **out_before_rgb, + float **out_denoised_rgb, + int *out_w, + int *out_h) +{ + if(!ctx || !img || !cfa_full || !out_before_rgb || !out_denoised_rgb) + return 1; + *out_before_rgb = NULL; + *out_denoised_rgb = NULL; + if(out_w) *out_w = 0; + if(out_h) *out_h = 0; + + if(width <= 0 || height <= 0 || crop_w <= 0 || crop_h <= 0) return 1; + + _bayer_prep_t prep; + if(_compute_bayer_prep(ctx, img, &prep)) return 1; + const uint32_t filters = prep.filters; + const float clip_max = prep.clip_max; + + const int T = dt_restore_get_tile_size(ctx); + if(T <= 0) return 1; + const int sensor_T = 2 * T; + const int max_disp = sensor_T - 4 * OVERLAP_PACKED; + if(crop_w > max_disp || crop_h > max_disp) return 1; + + // snap crop to CFA grid + crop_x = (crop_x / 2) * 2; + crop_y = (crop_y / 2) * 2; + crop_w = (crop_w / 2) * 2; + crop_h = (crop_h / 2) * 2; + if(crop_w <= 0 || crop_h <= 0) return 1; + + int inf_x = crop_x + crop_w / 2 - sensor_T / 2; + int inf_y = crop_y + crop_h / 2 - sensor_T / 2; + inf_x = (inf_x / 2) * 2; + inf_y = (inf_y / 2) * 2; + + // inference (single tile) + const size_t tile_in_plane = (size_t)T * T; + const size_t tile_out_w = 2 * (size_t)T; + const size_t tile_out_plane = tile_out_w * tile_out_w; + + float *tile_in = g_try_malloc(tile_in_plane * 4 * sizeof(float)); + float *tile_out = g_try_malloc(tile_out_plane * 3 * sizeof(float)); + if(!tile_in || !tile_out) + { + g_free(tile_in); + g_free(tile_out); + return 1; + } + + // geometry applies the same orientation + mirror policy as the batch + // path. sr0_base / sc0_base for the preview is the user-centred, + // even-snapped inference tile origin in sensor coords + int pp_sr0, pp_sc0, pp_mir_y_lo, pp_mir_y_hi, pp_mir_x_lo, pp_mir_x_hi; + _bayer_tile_geometry(ctx, &prep, inf_y, inf_x, width, height, + &pp_sr0, &pp_sc0, + &pp_mir_y_lo, &pp_mir_y_hi, + &pp_mir_x_lo, &pp_mir_x_hi); + _pack_bayer_tile(cfa_full, width, height, + pp_sr0, pp_sc0, + pp_mir_y_lo, pp_mir_y_hi, pp_mir_x_lo, pp_mir_x_hi, + T, &prep, tile_in); + + if(dt_restore_run_patch_bayer(ctx, tile_in, T, T, tile_out) != 0) + { + g_free(tile_in); + g_free(tile_out); + return 1; + } + + // gain-match: same scalar correction as the batch path (gated on + // output_scale; ABSOLUTE-scale models skip it) + if(ctx->output_scale == DT_RESTORE_OUT_MATCH_GAIN) + _bayer_gain_match(tile_in, tile_out, T, NULL, NULL, NULL); + g_free(tile_in); + + // fetch source sensor buffer in native dtype + dt_mipmap_buffer_t mbuf; + dt_mipmap_cache_get(&mbuf, imgid, DT_MIPMAP_FULL, + DT_MIPMAP_BLOCKING, 'r'); + if(!mbuf.buf || mbuf.width != width || mbuf.height != height) + { + dt_mipmap_cache_release(&mbuf); + g_free(tile_out); + return 1; + } + + const int is_uint16 = (img->buf_dsc.datatype == TYPE_UINT16); + const int is_float = (img->buf_dsc.datatype == TYPE_FLOAT); + if(!is_uint16 && !is_float) + { + dt_print(DT_DEBUG_AI, + "[restore_raw_bayer] preview_piped: unsupported raw datatype %d", + img->buf_dsc.datatype); + dt_mipmap_cache_release(&mbuf); + g_free(tile_out); + return 1; + } + const size_t pixel_sz = is_uint16 ? 2 : 4; + const size_t total_bytes = (size_t)width * height * pixel_sz; + + // build denoised-patched CFA: copy original, overwrite the + // entire inference region (2T × 2T sensor pixels) with denoised data. + // patching beyond the display crop gives the pipe's geometry chain + // ~64 px of slop on each side — enough to absorb the few-pixel ROI + // drift that the inscribed-AABB trick alone can't eliminate (pipe + // sampling slightly outside the quad's interior due to floor/ceil + // rounding or modules whose distort_transform returns approximations) + void *patched = g_try_malloc(total_bytes); + if(!patched) + { + dt_mipmap_cache_release(&mbuf); + g_free(tile_out); + return 1; + } + memcpy(patched, mbuf.buf, total_bytes); + + // patch the full 2T × 2T inference region, clamped to the sensor. + // pp_sr0 / pp_sc0 is the oriented tile origin (inf_y/x + y0/x0 under + // FORCE_RGGB, or inf_y/x under NATIVE) — this is where the *output* + // tile's (0, 0) lives in sensor coords, so the patch rectangle and + // the tile_out index must use it consistently + const int patch_x0 = (pp_sc0 < 0) ? 0 : pp_sc0; + const int patch_y0 = (pp_sr0 < 0) ? 0 : pp_sr0; + const int patch_x1 = (pp_sc0 + sensor_T > width) ? width : pp_sc0 + sensor_T; + const int patch_y1 = (pp_sr0 + sensor_T > height) ? height : pp_sr0 + sensor_T; + + for(int sr = patch_y0; sr < patch_y1; sr++) + { + const size_t mo_row = (size_t)(sr - pp_sr0) * tile_out_w; + for(int sc = patch_x0; sc < patch_x1; sc++) + { + const int ch = FC(sr, sc, filters); + const float model_val + = tile_out[(size_t)ch * tile_out_plane + mo_row + (sc - pp_sc0)]; + const float raw_val + = _bayer_remosaic_raw(sr, sc, ch, model_val, &prep); + const float clipped = raw_val < 0.0f ? 0.0f + : (raw_val > clip_max ? clip_max : raw_val); + const size_t idx = (size_t)sr * width + sc; + if(is_uint16) + ((uint16_t *)patched)[idx] = (uint16_t)(clipped + 0.5f); + else + ((float *)patched)[idx] = clipped; + } + } + + g_free(tile_out); + + // run pipe on patched CFA → out_denoised_rgb + int dw = 0, dh = 0, bw = 0, bh = 0; + int err = dt_restore_run_user_pipe_roi(imgid, patched, width, height, + crop_x, crop_y, crop_w, crop_h, + &dw, &dh, out_denoised_rgb); + g_free(patched); + + // run pipe on original mbuf → out_before_rgb + // mbuf.buf is const from our perspective (read-only cache entry) but the + // pipe set_input API isn't marked const; cast to writable pointer with + // the understanding that the pipe doesn't mutate its input buffer + if(err == 0) + { + err = dt_restore_run_user_pipe_roi(imgid, (void *)mbuf.buf, width, height, + crop_x, crop_y, crop_w, crop_h, + &bw, &bh, out_before_rgb); + } + + dt_mipmap_cache_release(&mbuf); + + if(err || dw != bw || dh != bh) + { + // dims must match between the two passes so the caller can blend + // them; mismatch shouldn't happen (same pipe, same ROI) but guard + // anyway so we never hand back inconsistent buffers + if(dw != bw || dh != bh) + dt_print(DT_DEBUG_AI, + "[restore_raw_bayer] preview_piped: before/after dim " + "mismatch (%dx%d vs %dx%d) — aborting", + bw, bh, dw, dh); + g_free(*out_before_rgb); *out_before_rgb = NULL; + g_free(*out_denoised_rgb); *out_denoised_rgb = NULL; + return 1; + } + if(out_w) *out_w = dw; + if(out_h) *out_h = dh; + return 0; +} + +// clang-format off +// modelines: These editor modelines have been set for all relevant files by tools/update_modelines.py +// vim: shiftwidth=2 expandtab tabstop=2 cindent +// kate: tab-indents: off; indent-width 2; replace-tabs on; indent-mode cstyle; remove-trailing-spaces modified; +// clang-format on diff --git a/src/common/ai/restore_raw_bayer.h b/src/common/ai/restore_raw_bayer.h new file mode 100644 index 000000000000..bd25ff48ae57 --- /dev/null +++ b/src/common/ai/restore_raw_bayer.h @@ -0,0 +1,134 @@ +/* + This file is part of darktable, + Copyright (C) 2026 darktable developers. + + darktable is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + darktable is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with darktable. If not, see . +*/ + +// raw_restore — RawNIND bayer-denoise pipeline +// +// wraps a loaded RawNIND bayer model and runs the whole raw->raw +// denoise pipeline: preprocessing (black level, normalize, per-channel +// WB, 2x2 pack), tiled inference with overlap blending, postprocessing +// (un-WB, un-normalize), and re-mosaic back to the original CFA +// pattern. produces a uint16 sensor-sized mosaic that is written to +// DNG by dt_dng_write_cfa_bayer(). +// +// this is kept separate from the RGB denoise/upscale path in restore.c +// because: +// - input is single-channel CFA, not RGB; no sRGB gamma, no wide-gamut +// - preprocessing is raw-specific (per-channel black, WB normalization) +// - output is re-mosaiced to a CFA, not scanlines of interleaved RGB +// - tile dims are in packed half-res space, not sensor resolution + +#pragma once + +#include +#include + +#include "common/ai/restore.h" +#include "common/image.h" // for dt_imgid_t + +struct dt_image_t; +struct _dt_job_t; + +// @brief Run the RawNIND bayer denoise pipeline end-to-end. +// +// @param ctx loaded bayer context (dt_restore_load_rawdenoise_bayer) +// @param img source image (metadata for preprocessing + re-mosaic) +// @param cfa_in sensor CFA as float (full sensor resolution, row-major, +// unnormalized: values in raw ADC units, no black +// subtracted). This is what rawspeed delivers in +// DT_MIPMAP_FULL for raw images. +// @param width sensor width (img->width) +// @param height sensor height (img->height) +// @param cfa_out caller-allocated uint16 buffer of width*height samples. +// On success, contains the denoised mosaic in the same +// CFA layout and raw ADC range as the input. +// @param strength linear blend between original and denoised CFA in +// [0, 1]. 0 = pass-through the source CFA, 1 = full +// model output. Applied per sample at the end of the +// tile postprocess so tile boundaries stay seamless. +// @param control_job job handle for progress/cancellation (NULL-safe) +// @return 0 on success +int dt_restore_raw_bayer(dt_restore_context_t *ctx, + const struct dt_image_t *img, + const float *cfa_in, + int width, + int height, + uint16_t *cfa_out, + float strength, + struct _dt_job_t *control_job); + +// @brief Bayer preview through darktable's real pixelpipe — "preview = batch". +// +// Runs model inference on the displayed crop, re-mosaics the output back +// to CFA (same un-WB / un-normalise / clip logic as dt_restore_raw_bayer), +// patches it into a full-sensor copy of the source, then runs darktable's +// full pixelpipe TWICE — once on the patched (denoised) CFA for the +// "after" view, once on the original CFA for the "before" view. Both +// results go through the image's complete history stack (including +// temperature / filmic / output profile), so the displayed preview +// matches what the user will see after Process + re-import. +// +// The strength slider should blend out_before_rgb and out_denoised_rgb +// at display time; this entry always returns the "strength = 1" denoised +// result. +// +// Expensive: two full pipelined renders per refresh on top of the model +// inference. Typically 2–5 seconds depending on sensor size and iop stack +// complexity. Use dt_restore_raw_bayer_preview for cheaper (but +// colour-approximate) previews. +// +// @param ctx loaded bayer context +// @param img source image metadata +// @param imgid image id (used by the pixelpipe) +// @param cfa_full full-sensor CFA as float (cache in neural_restore) +// @param width sensor width +// @param height sensor height +// @param crop_x displayed crop top-left x (sensor coords, snapped mod 2) +// @param crop_y displayed crop top-left y (snapped mod 2) +// @param crop_w displayed crop width (snapped mod 2) +// @param crop_h displayed crop height (snapped mod 2) +// @param out_before_rgb caller-frees with g_free. 3ch interleaved +// (*out_w * *out_h * 3 floats), linear Rec.709, +// pipe output for the original CFA. +// @param out_denoised_rgb caller-frees with g_free. same shape, pipe +// output for the denoised-patched CFA at α=1. +// @param out_w receives actual rendered width (may differ +// from crop_w when user history contains +// geometry-modifying modules; both returned +// buffers share these dims). +// @param out_h receives actual rendered height. +// @return 0 on success; both outputs NULL on failure. +int dt_restore_raw_bayer_preview_piped(dt_restore_context_t *ctx, + const struct dt_image_t *img, + dt_imgid_t imgid, + const float *cfa_full, + int width, + int height, + int crop_x, + int crop_y, + int crop_w, + int crop_h, + float **out_before_rgb, + float **out_denoised_rgb, + int *out_w, + int *out_h); + +// clang-format off +// modelines: These editor modelines have been set for all relevant files by tools/update_modelines.py +// vim: shiftwidth=2 expandtab tabstop=2 cindent +// kate: tab-indents: off; indent-width 2; replace-tabs on; indent-mode cstyle; remove-trailing-spaces modified; +// clang-format on diff --git a/src/common/ai/restore_raw_linear.c b/src/common/ai/restore_raw_linear.c new file mode 100644 index 000000000000..23a63ea378ef --- /dev/null +++ b/src/common/ai/restore_raw_linear.c @@ -0,0 +1,1032 @@ +/* + This file is part of darktable, + Copyright (C) 2026 darktable developers. + + darktable is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + darktable is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with darktable. If not, see . +*/ + +#include "common/ai/restore_raw_linear.h" +#include "common/ai/restore.h" +#include "common/ai/restore_common.h" +#include "common/darktable.h" +#include "common/image.h" +#include "common/image_cache.h" +#include "common/iop_order.h" +#include "common/math.h" +#include "common/matrices.h" +#include "common/mipmap_cache.h" +#include "control/jobs.h" +#include "develop/develop.h" +#include "develop/imageop.h" +#include "develop/imageop_math.h" +#include "develop/pixelpipe_hb.h" +#include "imageio/imageio_common.h" + +#include +#include +#include +#include + +#define OVERLAP_LINEAR 32 // sensor pixels; same scale as input + +// derive daylight WB multipliers from the camera's XYZ->CAM matrix: +// at D65 white, the camera response per channel is +// resp[c] = sum_i M[c][i] * D65[i] +// and wb_norm[c] = resp[G] / resp[c] normalizes green to 1. +// returns TRUE when a usable matrix is available +static gboolean _daylight_wb(const dt_image_t *img, float wb_norm[3]) +{ + const float D65[3] = { 0.9504f, 1.0f, 1.0889f }; + float resp[3]; + float mag = 0.0f; + for(int c = 0; c < 3; c++) + { + resp[c] = img->adobe_XYZ_to_CAM[c][0] * D65[0] + + img->adobe_XYZ_to_CAM[c][1] * D65[1] + + img->adobe_XYZ_to_CAM[c][2] * D65[2]; + mag += fabsf(img->adobe_XYZ_to_CAM[c][0]) + + fabsf(img->adobe_XYZ_to_CAM[c][1]) + + fabsf(img->adobe_XYZ_to_CAM[c][2]); + } + if(mag <= 0.0f || resp[0] <= 0.0f || resp[1] <= 0.0f || resp[2] <= 0.0f) + { + wb_norm[0] = wb_norm[1] = wb_norm[2] = 1.0f; + return FALSE; + } + wb_norm[0] = resp[1] / resp[0]; + wb_norm[1] = 1.0f; + wb_norm[2] = resp[1] / resp[2]; + return TRUE; +} + +// build the combined "input-space → camRGB + undo exposure boost + undo +// WB" 3×3 used in the final un-matrix pass. folds three linear ops into +// one per-pixel multiplication for speed. caller provides input_to_cam +// (built by _build_cam_matrices for the ctx's input_colorspace), +// inv_boost (= 1 / exposure_boost) and wb_norm +static void _linear_build_M_boosted(const float input_to_cam[9], + float inv_boost, + const float wb_norm[3], + float M[9]) +{ + for(int k = 0; k < 3; k++) + for(int i = 0; i < 3; i++) + M[k * 3 + i] = input_to_cam[k * 3 + i] * inv_boost / wb_norm[k]; +} + +// per-channel scalar match_gain: tile_out[c] *= in_mean[c]/out_mean[c]. +// RawNIND linear output is arbitrary-scale camRGB-in-lin_rec2020 and +// match_gain() is the canonical post-step that puts it back on the +// input's scale. applied in place. out_gain[3] optional (batch uses it +// for a tile0 diagnostic) +static void _linear_gain_match_3ch(const float *tile_in, + float *tile_out, + size_t per_ch, + float out_gain[3]) +{ + for(int k = 0; k < 3; k++) + { + const float *pi = tile_in + (size_t)k * per_ch; + float *po = tile_out + (size_t)k * per_ch; + double in_sum = 0.0, out_sum = 0.0; + for(size_t i = 0; i < per_ch; i++) + { + in_sum += pi[i]; + out_sum += po[i]; + } + const double im = in_sum / (double)per_ch; + const double om = out_sum / (double)per_ch; + const float g = (fabs(om) > 1e-8) ? (float)(im / om) : 1.0f; + if(g != 1.0f) + for(size_t i = 0; i < per_ch; i++) po[i] *= g; + if(out_gain) out_gain[k] = g; + } +} + +// derive + apply an exposure boost to a planar 3ch lin_rec2020 buffer. +// RawNIND training data was exposed at editorial brightness (mean ~0.3 +// in lin_rec2020); low-light raws land near ~0.02, which is >10× darker +// than the training distribution. the UtNet2 weights diverge on such +// OOD input (observed: model output range ±1e10 with negative mean, +// breaking match_gain). we boost to the training mean pre-inference +// and un-boost at the very end; the multiplication commutes with the +// linear un-matrix and un-WB steps so correctness holds. +// target_mean = NAN disables the boost entirely for models that don't +// need a brightness-normalized input. otherwise boost is capped at +// [1, 100] (never dim bright scenes). returned mean / boost are filled +// for optional diagnostics (boost=1 when disabled) +static void _linear_exposure_boost(const dt_restore_context_t *ctx, + float *rgb_planar, + size_t plane, + float *out_mean, + float *out_boost) +{ + const size_t total = plane * 3; + double sum = 0.0; + for(size_t i = 0; i < total; i++) sum += rgb_planar[i]; + const float scene_mean = (float)(sum / (double)total); + const float target = ctx ? ctx->target_mean : 0.30f; + float boost = 1.0f; + if(!isnan(target) && target > 0.0f && scene_mean > 1e-4f) + { + boost = target / scene_mean; + if(boost < 1.0f) boost = 1.0f; + if(boost > 100.0f) boost = 100.0f; + } + if(boost != 1.0f) + for(size_t i = 0; i < total; i++) rgb_planar[i] *= boost; + if(out_mean) *out_mean = scene_mean; + if(out_boost) *out_boost = boost; +} + +// as-shot WB from img->wb_coeffs normalized to G=1 +static gboolean _as_shot_wb(const dt_image_t *img, float wb_norm[3]) +{ + if(img->wb_coeffs[0] <= 0.0f + || img->wb_coeffs[1] <= 0.0f + || img->wb_coeffs[2] <= 0.0f) + return FALSE; + const float g = img->wb_coeffs[1]; + wb_norm[0] = img->wb_coeffs[0] / g; + wb_norm[1] = 1.0f; + wb_norm[2] = img->wb_coeffs[2] / g; + return TRUE; +} + +// resolve WB for the linear path keyed off ctx->wb_mode. Default for +// this path is AS_SHOT (as-shot beats daylight for re-imported DNGs +// because the denoised output's tonal character then matches the +// source — see the long rationale in dt_restore_raw_linear). Fallback +// order swaps per mode; NONE skips normalization entirely +static void _resolve_linear_wb(const dt_restore_context_t *ctx, + const dt_image_t *img, float wb_norm[3]) +{ + wb_norm[0] = wb_norm[1] = wb_norm[2] = 1.0f; + const dt_restore_wb_mode_t mode + = ctx ? ctx->wb_mode : DT_RESTORE_WB_AS_SHOT; + if(mode == DT_RESTORE_WB_AS_SHOT) + { + if(!_as_shot_wb(img, wb_norm)) + _daylight_wb(img, wb_norm); + } + else if(mode == DT_RESTORE_WB_DAYLIGHT) + { + if(!_daylight_wb(img, wb_norm)) + _as_shot_wb(img, wb_norm); + } + // DT_RESTORE_WB_NONE: leave at {1, 1, 1} +} + +// D65 XYZ -> linear Rec.2020 (ITU-R BT.2020), row-major 3x3. +// matches the lin_rec2020 color profile the RawNIND linear variant +// was trained on +static const float _xyz_to_rec2020[9] = { + 1.7166511880f, -0.3556707838f, -0.2533662814f, + -0.6666843518f, 1.6164812366f, 0.0157685458f, + 0.0176398574f, -0.0427706133f, 0.9421031212f, +}; + +static const float _rec2020_to_xyz[9] = { + 0.6369580483f, 0.1446169036f, 0.1688809752f, + 0.2627002120f, 0.6779980715f, 0.0593017165f, + 0.0000000000f, 0.0280726930f, 1.0609850577f, +}; + +// D65 XYZ -> linear sRGB / Rec.709 (IEC 61966-2-1), row-major 3x3. +// used when a variant declares input_colorspace: srgb_linear +static const float _xyz_to_srgb[9] = { + 3.2404542f, -1.5371385f, -0.4985314f, + -0.9692660f, 1.8760108f, 0.0415560f, + 0.0556434f, -0.2040259f, 1.0572252f, +}; + +static const float _srgb_to_xyz[9] = { + 0.4124564f, 0.3575761f, 0.1804375f, + 0.2126729f, 0.7151522f, 0.0721750f, + 0.0193339f, 0.1191920f, 0.9503041f, +}; + +// build the per-image camRGB<->input-space matrices, where input-space +// is chosen by ctx->input_colorspace: +// LIN_REC2020 (default): xyz_to_rec2020 · inverse(adobe_XYZ_to_CAM) +// SRGB_LINEAR: xyz_to_srgb · inverse(adobe_XYZ_to_CAM) +// CAMRGB: identity (model runs directly on camRGB) +// returns TRUE when the input-space transform could be built; FALSE +// when the camera's color matrix is absent or singular (CAMRGB always +// succeeds since it skips the matrix entirely). on FALSE the caller +// falls back to identity (color cast but at least no garbage) +static gboolean _build_cam_matrices(const dt_restore_context_t *ctx, + const dt_image_t *img, + float cam_to_input[9], + float input_to_cam[9]) +{ + const dt_restore_colorspace_t cs + = ctx ? ctx->input_colorspace : DT_RESTORE_CS_LIN_REC2020; + + if(cs == DT_RESTORE_CS_CAMRGB) + { + for(int i = 0; i < 9; i++) + cam_to_input[i] = input_to_cam[i] = (i % 4 == 0) ? 1.0f : 0.0f; + return TRUE; + } + + float cam_from_xyz[9]; + float mag = 0.0f; + for(int k = 0; k < 3; k++) + for(int i = 0; i < 3; i++) + { + const float v = img->adobe_XYZ_to_CAM[k][i]; + cam_from_xyz[k * 3 + i] = v; + mag += fabsf(v); + } + if(mag <= 0.0f) return FALSE; + + float xyz_from_cam[9]; + if(mat3inv(xyz_from_cam, cam_from_xyz) != 0) + return FALSE; + + const float *xyz_to_input = (cs == DT_RESTORE_CS_SRGB_LINEAR) + ? _xyz_to_srgb : _xyz_to_rec2020; + const float *input_to_xyz = (cs == DT_RESTORE_CS_SRGB_LINEAR) + ? _srgb_to_xyz : _rec2020_to_xyz; + + mat3mul(cam_to_input, xyz_to_input, xyz_from_cam); + mat3mul(input_to_cam, cam_from_xyz, input_to_xyz); + return TRUE; +} + +// run the minimal darktable pixelpipe: rawprepare + highlights + +// demosaic, nothing after, no temperature (so output is raw-native +// camRGB without WB applied). output is a newly-allocated 4ch float +// RGBA buffer at the pipeline's processed_{width,height}; caller frees +// with dt_free_align(). returns 0 on success +static int _run_demosaic_pipe(const dt_imgid_t imgid, + float **out_buf, + int *out_w, + int *out_h) +{ + dt_develop_t dev; + dt_dev_init(&dev, FALSE); + dt_dev_load_image(&dev, imgid); + + dt_mipmap_buffer_t mbuf; + dt_mipmap_cache_get(&mbuf, imgid, DT_MIPMAP_FULL, + DT_MIPMAP_BLOCKING, 'r'); + if(!mbuf.buf || !mbuf.width || !mbuf.height) + { + dt_print(DT_DEBUG_AI, + "[restore_raw_linear] could not load raw for imgid %d", + imgid); + dt_mipmap_cache_release(&mbuf); + dt_dev_cleanup(&dev); + return 1; + } + + const int iw = mbuf.width; + const int ih = mbuf.height; + + dt_dev_pixelpipe_t pipe; + if(!dt_dev_pixelpipe_init_export(&pipe, iw, ih, + IMAGEIO_FLOAT, FALSE)) + { + dt_print(DT_DEBUG_AI, + "[restore_raw_linear] pipe init_export failed (%dx%d)", iw, ih); + dt_mipmap_cache_release(&mbuf); + dt_dev_cleanup(&dev); + return 1; + } + + // the export code sequences this as: resync_modules_order -> set_input + // -> create_nodes -> synch_all. resync builds the iop-order table + // from the loaded image's history; without it, create_nodes sees an + // empty/misaligned list and leaves pipe->nodes NULL, which then + // crashes dt_dev_pixelpipe_disable_after when it dereferences + // g_list_last(pipe->nodes) + dt_ioppr_resync_modules_order(&dev); + dt_dev_pixelpipe_set_input(&pipe, &dev, (float *)mbuf.buf, + iw, ih, mbuf.iscale); + dt_dev_pixelpipe_create_nodes(&pipe, &dev); + dt_dev_pixelpipe_synch_all(&pipe, &dev); + + if(!pipe.nodes) + { + dt_print(DT_DEBUG_AI, + "[restore_raw_linear] pipe has no nodes — aborting"); + dt_dev_pixelpipe_cleanup(&pipe); + dt_mipmap_cache_release(&mbuf); + dt_dev_cleanup(&dev); + return 1; + } + + // keep rawprepare + highlights (clip) + demosaic; skip temperature + // (we apply our own daylight WB later) and everything after demosaic + dt_dev_pixelpipe_disable_after(&pipe, "demosaic"); + for(GList *n = pipe.nodes; n; n = g_list_next(n)) + { + dt_dev_pixelpipe_iop_t *piece = n->data; + if(dt_iop_module_is(piece->module->so, "temperature") + || dt_iop_module_is(piece->module->so, "rawdenoise")) + piece->enabled = FALSE; + } + + dt_dev_pixelpipe_get_dimensions(&pipe, &dev, iw, ih, + &pipe.processed_width, + &pipe.processed_height); + const int pw = pipe.processed_width; + const int ph = pipe.processed_height; + + // process CPU-side at full scale. no_gamma keeps float output + dt_dev_pixelpipe_process_no_gamma(&pipe, &dev, 0, 0, pw, ph, 1.0f); + + if(!pipe.backbuf || !pipe.backbuf_width || !pipe.backbuf_height) + { + dt_dev_pixelpipe_cleanup(&pipe); + dt_mipmap_cache_release(&mbuf); + dt_dev_cleanup(&dev); + dt_print(DT_DEBUG_AI, + "[restore_raw_linear] pipe produced no backbuffer"); + return 1; + } + + const int bw = pipe.backbuf_width; + const int bh = pipe.backbuf_height; + float *copy = dt_alloc_align_float((size_t)bw * bh * 4); + if(!copy) + { + dt_dev_pixelpipe_cleanup(&pipe); + dt_mipmap_cache_release(&mbuf); + dt_dev_cleanup(&dev); + return 1; + } + + memcpy(copy, pipe.backbuf, (size_t)bw * bh * 4 * sizeof(float)); + + *out_buf = copy; + *out_w = bw; + *out_h = bh; + + dt_dev_pixelpipe_cleanup(&pipe); + dt_mipmap_cache_release(&mbuf); + dt_dev_cleanup(&dev); + return 0; +} + +int dt_restore_raw_linear(dt_restore_context_t *ctx, + const dt_imgid_t imgid, + float **out_rgb, + int *out_w, + int *out_h, + float strength, + struct _dt_job_t *control_job) +{ + if(!ctx || !out_rgb || !out_w || !out_h) return 1; + *out_rgb = NULL; + + const float alpha = strength < 0.0f ? 0.0f + : (strength > 1.0f ? 1.0f : strength); + const float inv_alpha = 1.0f - alpha; + + // --- 1. produce demosaicked 4ch RGBA via minimal pipeline --- + float *rgba = NULL; + int w = 0, h = 0; + if(_run_demosaic_pipe(imgid, &rgba, &w, &h)) return 1; + + // snapshot image metadata for WB derivation (plain data members; + // don't touch heap pointers like profile/dng_gain_maps) + const dt_image_t *cached = dt_image_cache_get(imgid, 'r'); + if(!cached) + { + dt_free_align(rgba); + return 1; + } + dt_image_t img_meta = *cached; + dt_image_cache_read_release(cached); + + // WB normalization per ctx->wb_mode (default AS_SHOT; see + // _resolve_linear_wb). AS_SHOT beats DAYLIGHT for this path because + // match_gain + the negative-gain hack absorb the training-distribution + // mismatch, so the WB choice mostly shapes the final DNG's tonal look + // and we want the re-imported DNG to render with the same tone/contrast + // as the source + float wb_norm[3]; + _resolve_linear_wb(ctx, &img_meta, wb_norm); + + // feed the model in ctx->input_colorspace (default lin_rec2020, + // matches RawNIND training preprocessing). identity fallback when + // the camera's color matrix is absent (rare); CAMRGB always succeeds + float cam_to_input[9]; + float input_to_cam[9]; + const gboolean matrix_ok = + _build_cam_matrices(ctx, &img_meta, cam_to_input, input_to_cam); + if(!matrix_ok) + { + for(int i = 0; i < 9; i++) + cam_to_input[i] = input_to_cam[i] = (i % 4 == 0) ? 1.0f : 0.0f; + } + dt_print(DT_DEBUG_AI, + "[restore_raw_linear] wb_norm=[%.3f,%.3f,%.3f], " + "colorspace matrix: %s", + wb_norm[0], wb_norm[1], wb_norm[2], + matrix_ok ? "cam->input from adobe_XYZ_to_CAM" + : "identity (no color matrix)"); + + const size_t npix = (size_t)w * h; + + // allocate planar 3ch buffers for tile I/O + the preserved + // pre-inference source for the strength blend + float *rgb_src = dt_alloc_align_float(npix * 3); // planar R,G,B + if(!rgb_src) + { + dt_free_align(rgba); + return 1; + } + + // interleaved RGBA -> planar RGB. apply daylight WB first (matches + // RawNIND training: WB in camRGB space, then camRGB->lin_rec2020), + // then the matrix transform so the model sees lin_rec2020 directly + const size_t plane = npix; + for(size_t i = 0; i < npix; i++) + { + const float cam[3] = { + rgba[i * 4 + 0] * wb_norm[0], + rgba[i * 4 + 1] * wb_norm[1], + rgba[i * 4 + 2] * wb_norm[2], + }; + float input_rgb[3]; + mat3mulv(input_rgb, cam_to_input, cam); + rgb_src[i] = input_rgb[0]; + rgb_src[i + plane] = input_rgb[1]; + rgb_src[i + 2 * plane] = input_rgb[2]; + } + + // diagnostic min/max sweep + exposure boost. sweep is separate from + // the shared boost helper because only the batch diagnostic needs the + // per-channel min/max; the helper just computes the mean + float dbg_min[3], dbg_max[3]; + for(int k = 0; k < 3; k++) + { + const float *p = rgb_src + (size_t)k * plane; + dbg_min[k] = dbg_max[k] = p[0]; + for(size_t i = 0; i < plane; i++) + { + if(p[i] < dbg_min[k]) dbg_min[k] = p[i]; + if(p[i] > dbg_max[k]) dbg_max[k] = p[i]; + } + } + float scene_mean = 0.0f, exposure_boost = 1.0f; + _linear_exposure_boost(ctx, rgb_src, plane, &scene_mean, &exposure_boost); + dt_print(DT_DEBUG_AI, + "[restore_raw_linear] %dx%d, lin_rec2020 input range " + "R=[%.3f,%.3f] G=[%.3f,%.3f] B=[%.3f,%.3f] " + "mean=%.4f boost=%.2fx", + w, h, + dbg_min[0], dbg_max[0], dbg_min[1], dbg_max[1], + dbg_min[2], dbg_max[2], + scene_mean, exposure_boost); + + // allocate planar output buffer that tiles blend into + float *rgb_out = dt_alloc_align_float(npix * 3); + if(!rgb_out) + { + dt_free_align(rgb_src); + dt_free_align(rgba); + return 1; + } + + // initialize output with WB'd source so strength = 0 is exact + // pass-through and tile-edge gaps don't leave uninitialized data + memcpy(rgb_out, rgb_src, npix * 3 * sizeof(float)); + + // tile setup + const int O = OVERLAP_LINEAR; + int T = dt_restore_get_tile_size(ctx); + int n_ladder = 0; + const int *ladder = dt_restore_get_tile_ladder(ctx, &n_ladder); + if(T <= 2 * O) T = 256; + +retry:; + const int step = T - 2 * O; + if(step <= 0) + { + dt_free_align(rgb_src); + dt_free_align(rgb_out); + dt_free_align(rgba); + return 1; + } + const size_t tile_plane = (size_t)T * T; + const int cols = (w + step - 1) / step; + const int rows = (h + step - 1) / step; + const int total_tiles = cols * rows; + + dt_print(DT_DEBUG_AI, + "[restore_raw_linear] tile T=%d step=%d, grid %dx%d (%d tiles)", + T, step, cols, rows, total_tiles); + + float *tile_in = g_try_malloc(tile_plane * 3 * sizeof(float)); + float *tile_out = g_try_malloc(tile_plane * 3 * sizeof(float)); + if(!tile_in || !tile_out) + { + g_free(tile_in); + g_free(tile_out); + dt_free_align(rgb_src); + dt_free_align(rgb_out); + dt_free_align(rgba); + return 1; + } + + int res = 0; + int tile_count = 0; + + for(int ty = 0; ty < rows && res == 0; ty++) + { + for(int tx = 0; tx < cols && res == 0; tx++) + { + if(control_job + && dt_control_job_get_state(control_job) + == DT_JOB_STATE_CANCELLED) + { + res = 1; + break; + } + + const int y_base = ty * step; + const int x_base = tx * step; + const int y_end = (y_base + step > h) ? h : y_base + step; + const int x_end = (x_base + step > w) ? w : x_base + step; + const int core_h = y_end - y_base; + const int core_w = x_end - x_base; + + // extract T x T tile with mirror-pad at boundaries, planar + for(int dy = 0; dy < T; dy++) + { + const int sy = _mirror(y_base - O + dy, h); + for(int dx = 0; dx < T; dx++) + { + const int sx = _mirror(x_base - O + dx, w); + const size_t src = (size_t)sy * w + sx; + const size_t dst = (size_t)dy * T + dx; + tile_in[dst] = rgb_src[src]; + tile_in[dst + tile_plane] = rgb_src[src + plane]; + tile_in[dst + 2 * tile_plane] = rgb_src[src + 2 * plane]; + } + } + + // inference + if(dt_restore_run_patch_3ch_raw(ctx, tile_in, T, T, tile_out) != 0) + { + int next_T = 0; + for(int i = 0; i < n_ladder; i++) + if(ladder[i] < T) { next_T = ladder[i]; break; } + if(next_T > 0 && ty == 0 && tx == 0 + && dt_restore_reload_session(ctx, next_T)) + { + dt_print(DT_DEBUG_AI, + "[restore_raw_linear] inference failed at T=%d, retry T=%d", + T, next_T); + g_free(tile_in); + g_free(tile_out); + T = next_T; + goto retry; + } + dt_print(DT_DEBUG_AI, + "[restore_raw_linear] inference failed at tile %d,%d " + "(T=%d)", tx, ty, T); + res = 1; + break; + } + + // scalar match_gain per channel: tile_out *= in_mean / out_mean + // (applied in place by the helper). skipped for ABSOLUTE-scale + // models whose output is already calibrated + const size_t per_ch = tile_plane; + float gain_ch[3] = { 1.0f, 1.0f, 1.0f }; + if(ctx->output_scale == DT_RESTORE_OUT_MATCH_GAIN) + _linear_gain_match_3ch(tile_in, tile_out, per_ch, gain_ch); + if(tx == 0 && ty == 0) + { + dt_print(DT_DEBUG_AI, + "[restore_raw_linear] tile0 match_gain " + "R=%.3e G=%.3e B=%.3e", + gain_ch[0], gain_ch[1], gain_ch[2]); + } + + // blend: write (α·denoised + (1-α)·source) per channel into + // the core-valid region. rgb_out was pre-filled with rgb_src + // so overlap gaps stay as source + for(int dy = 0; dy < core_h; dy++) + { + const int y = y_base + dy; + const int my = O + dy; + for(int dx = 0; dx < core_w; dx++) + { + const int x = x_base + dx; + const int mx = O + dx; + const size_t tloc = (size_t)my * T + mx; + const size_t dst = (size_t)y * w + x; + + for(int k = 0; k < 3; k++) + { + const float model_v + = tile_out[tloc + (size_t)k * per_ch]; + const float src_v = rgb_src[dst + (size_t)k * plane]; + rgb_out[dst + (size_t)k * plane] + = alpha * model_v + inv_alpha * src_v; + } + } + } + + tile_count++; + if(control_job) + dt_control_job_set_progress(control_job, + (double)tile_count / total_tiles); + } + } + + g_free(tile_in); + g_free(tile_out); + + if(res == 0) + { + // final undo pass: input-space -> camRGB (matrix), divide by + // exposure boost, divide by WB. the DNG writer expects un-WB'd + // normalized camRGB in [0, 1] — AsShotNeutral tells the consumer + // what WB to apply + // out = (input_to_cam · in) / (boost · wb_norm[k]) + // all ops are linear, fold into a single per-pixel 3x3 mul + const float inv_boost = 1.0f / exposure_boost; + float M[9]; + _linear_build_M_boosted(input_to_cam, inv_boost, wb_norm, M); + + for(size_t i = 0; i < npix; i++) + { + const float input_rgb[3] = { + rgb_out[i], + rgb_out[i + plane], + rgb_out[i + 2 * plane], + }; + float cam[3]; + mat3mulv(cam, M, input_rgb); + rgb_out[i] = cam[0]; + rgb_out[i + plane] = cam[1]; + rgb_out[i + 2 * plane] = cam[2]; + } + + dt_restore_persist_tile_size(ctx); + } + + dt_free_align(rgb_src); + dt_free_align(rgba); + + if(res != 0) + { + dt_free_align(rgb_out); + return res; + } + + // convert planar RGB back to interleaved for caller convenience + float *interleaved = dt_alloc_align_float(npix * 3); + if(!interleaved) + { + dt_free_align(rgb_out); + return 1; + } + for(size_t i = 0; i < npix; i++) + { + interleaved[i * 3 + 0] = rgb_out[i]; + interleaved[i * 3 + 1] = rgb_out[i + plane]; + interleaved[i * 3 + 2] = rgb_out[i + 2 * plane]; + } + dt_free_align(rgb_out); + + *out_rgb = interleaved; + *out_w = w; + *out_h = h; + return 0; +} + +// preview prep: demosaic-once per image +// +// dt_restore_raw_linear_prepare runs the full per-image demosaic + +// WB + camRGB->lin_rec2020 once and returns a 3ch interleaved buffer at +// sensor resolution; neural_restore.c caches it across previews of the +// same image +int dt_restore_raw_linear_prepare(const dt_imgid_t imgid, + float **out_rgb, + int *out_w, + int *out_h) +{ + if(!out_rgb || !out_w || !out_h) return 1; + *out_rgb = NULL; + + // 1. demosaic via minimal darktable pipe (rawprepare + highlights + + // demosaic; no temperature, no post-demosaic modules) + float *rgba = NULL; + int w = 0, h = 0; + if(_run_demosaic_pipe(imgid, &rgba, &w, &h)) return 1; + + // 2. snapshot image metadata for WB + matrix derivation + const dt_image_t *cached = dt_image_cache_get(imgid, 'r'); + if(!cached) { dt_free_align(rgba); return 1; } + dt_image_t img_meta = *cached; + dt_image_cache_read_release(cached); + + // this prepare path has no ctx; use the default (AS_SHOT) WB. the + // cached lin_rec2020 buffer assumes this mode, so a future model + // that needs a different WB would require keying the cache on ctx + // too (or adding ctx to this API) + float wb_norm[3]; + _resolve_linear_wb(NULL, &img_meta, wb_norm); + + // 3. camRGB -> input-space matrix. this prepare path has no ctx + // so we use the default (LIN_REC2020); a second model expecting a + // different input space would need the cache keyed on it too + float cam_to_input[9]; + float input_to_cam[9]; + if(!_build_cam_matrices(NULL, &img_meta, cam_to_input, input_to_cam)) + { + for(int i = 0; i < 9; i++) + cam_to_input[i] = (i % 4 == 0) ? 1.0f : 0.0f; + } + + // 4. interleaved RGBA -> interleaved RGB in input-space + WB + const size_t npix = (size_t)w * h; + float *interleaved = dt_alloc_align_float(npix * 3); + if(!interleaved) { dt_free_align(rgba); return 1; } + + for(size_t i = 0; i < npix; i++) + { + const float cam[3] = { + rgba[i * 4 + 0] * wb_norm[0], + rgba[i * 4 + 1] * wb_norm[1], + rgba[i * 4 + 2] * wb_norm[2], + }; + float input_rgb[3]; + mat3mulv(input_rgb, cam_to_input, cam); + interleaved[i * 3 + 0] = input_rgb[0]; + interleaved[i * 3 + 1] = input_rgb[1]; + interleaved[i * 3 + 2] = input_rgb[2]; + } + dt_free_align(rgba); + + *out_rgb = interleaved; + *out_w = w; + *out_h = h; + return 0; +} + +// preview: single-tile X-Trans/linear inference, un-matrix + un-WB + +// un-boost back to raw-ADC, re-mosaic onto the X-Trans CFA, then run +// the user's pipe twice (via dt_restore_run_user_pipe_roi) on the +// patched vs. original CFA to produce display-referred before/after +// crops matching the darkroom render +int dt_restore_raw_linear_preview_piped(dt_restore_context_t *ctx, + const dt_image_t *img, + dt_imgid_t imgid, + const float *full_rgb, + int width, int height, + int crop_x, int crop_y, + int crop_w, int crop_h, + float **out_before_rgb, + float **out_denoised_rgb, + int *out_w, + int *out_h) +{ + if(!ctx || !img || !full_rgb || !out_before_rgb || !out_denoised_rgb) + return 1; + *out_before_rgb = NULL; + *out_denoised_rgb = NULL; + if(out_w) *out_w = 0; + if(out_h) *out_h = 0; + + if(width <= 0 || height <= 0 || crop_w <= 0 || crop_h <= 0) return 1; + + const int T = dt_restore_get_tile_size(ctx); + if(T <= 0) return 1; + const int max_disp = T - 2 * OVERLAP_LINEAR; + if(crop_w > max_disp || crop_h > max_disp) return 1; + + int inf_x = crop_x + crop_w / 2 - T / 2; + int inf_y = crop_y + crop_h / 2 - T / 2; + + // WB + matrix prep (same as dt_restore_raw_linear_prepare / + // dt_restore_raw_linear_preview — but we also need the REVERSE + // transforms to go back to camRGB raw for pipe input) + float wb_norm[3]; + _resolve_linear_wb(ctx, img, wb_norm); + + // NOTE: full_rgb comes from dt_restore_raw_linear_prepare, which + // always caches in LIN_REC2020. if ctx->input_colorspace is something + // else, the reverse-matrix below won't undo what _prepare did and + // output will be wrong. until the cache keys on colorspace, this + // branch is only correct for LIN_REC2020. we still thread ctx so + // the invocation shape is right for future work + float cam_to_input[9]; + float input_to_cam[9]; + if(!_build_cam_matrices(ctx, img, cam_to_input, input_to_cam)) + { + for(int i = 0; i < 9; i++) + cam_to_input[i] = input_to_cam[i] = (i % 4 == 0) ? 1.0f : 0.0f; + } + + // extract crop + overlap from cached full lin_rec2020 -> tile_in + // apply exposure boost (same as preview), run inference + const size_t tile_plane = (size_t)T * T; + float *tile_in = g_try_malloc(tile_plane * 3 * sizeof(float)); + float *tile_out = g_try_malloc(tile_plane * 3 * sizeof(float)); + if(!tile_in || !tile_out) + { + g_free(tile_in); + g_free(tile_out); + return 1; + } + + for(int dy = 0; dy < T; dy++) + { + const int sy = _mirror(inf_y + dy, height); + for(int dx = 0; dx < T; dx++) + { + const int sx = _mirror(inf_x + dx, width); + const size_t src = ((size_t)sy * width + sx) * 3; + const size_t dst = (size_t)dy * T + dx; + tile_in[dst] = full_rgb[src + 0]; + tile_in[dst + tile_plane] = full_rgb[src + 1]; + tile_in[dst + 2 * tile_plane] = full_rgb[src + 2]; + } + } + + float exposure_boost = 1.0f; + _linear_exposure_boost(ctx, tile_in, tile_plane, NULL, &exposure_boost); + + if(dt_restore_run_patch_3ch_raw(ctx, tile_in, T, T, tile_out) != 0) + { + g_free(tile_in); + g_free(tile_out); + return 1; + } + + if(ctx->output_scale == DT_RESTORE_OUT_MATCH_GAIN) + _linear_gain_match_3ch(tile_in, tile_out, tile_plane, NULL); + g_free(tile_in); + + // build matrix to reverse matrix + WB + boost + normalise + // tile_out came from a boosted tile_in (gain_match matches boosted + // magnitudes). to write it back to the native CFA, we reverse the + // whole prepare chain: + // input-space → (input_to_cam) → cam[k] = sum_i M[k][i] * in[i] + // → /wb_norm[k] → un-WB'd raw scale (normalised) + // → *range[?]+black[?] → raw ADC range + // → rounded uint16 CFA value + // folding WB undo and boost undo into one matrix applied per-pixel + const float inv_boost = 1.0f / exposure_boost; + float M_boosted[9]; + _linear_build_M_boosted(input_to_cam, inv_boost, wb_norm, M_boosted); + + // tile_in was already freed after the gain-match loop above + + // fetch native raw buffer + rawprepare params for un-normalise + // mbuf is at RAW sensor dims (e.g. 6336x4182), which are larger than + // the post-rawprepare dims the caller passed (e.g. 6240x4160). the + // rawprepare crop offset lives in img->crop_x / img->crop_y + dt_mipmap_buffer_t mbuf; + dt_mipmap_cache_get(&mbuf, imgid, DT_MIPMAP_FULL, + DT_MIPMAP_BLOCKING, 'r'); + if(!mbuf.buf || mbuf.width <= 0 || mbuf.height <= 0) + { + dt_mipmap_cache_release(&mbuf); + g_free(tile_out); + return 1; + } + const int raw_w = mbuf.width; + const int raw_h = mbuf.height; + const int raw_off_x = img->crop_x; + const int raw_off_y = img->crop_y; + const int is_uint16 = (img->buf_dsc.datatype == TYPE_UINT16); + const int is_float = (img->buf_dsc.datatype == TYPE_FLOAT); + if(!is_uint16 && !is_float) + { + dt_mipmap_cache_release(&mbuf); + g_free(tile_out); + return 1; + } + const size_t pixel_sz = is_uint16 ? 2 : 4; + const size_t total_bytes = (size_t)raw_w * raw_h * pixel_sz; + + // rawprepare's normalisation: pipe will do (value - sub) / div where + // sub is per-CFA-site black level and div is (white - black). to get + // back to raw ADC space we compute per-site (value * range[idx]) + black[idx] + // NOTE: raw_black_level_separate is indexed by CFA position k in 0..3 + // (even if X-Trans has 6 colours, darktable's per-sensel black is 4- + // entry; typical cameras use one value for all positions anyway) + float black[4], range[4], white; + _compute_cfa_black_range(img, black, range, &white); + + // build patched CFA: copy original, overwrite crop with re-mosaiced denoised + void *patched = g_try_malloc(total_bytes); + if(!patched) + { + dt_mipmap_cache_release(&mbuf); + g_free(tile_out); + return 1; + } + memcpy(patched, mbuf.buf, total_bytes); + + // patch the full T × T inference region (clamped to the post-rawprepare + // buffer extent) rather than just the display crop. this gives the + // pipe's geometry chain ~tile-size/2 pixels of slop on each side so + // any residual coordinate drift falls inside denoised data instead of + // showing original CFA at the preview edge + const int patch_x0 = (inf_x < 0) ? 0 : inf_x; + const int patch_y0 = (inf_y < 0) ? 0 : inf_y; + const int patch_x1 = (inf_x + T > width) ? width : inf_x + T; + const int patch_y1 = (inf_y + T > height) ? height : inf_y + T; + + for(int py = patch_y0; py < patch_y1; py++) + { + const int sr_raw = raw_off_y + py; + const size_t mo_row = (size_t)(py - inf_y) * T; + for(int px = patch_x0; px < patch_x1; px++) + { + const int sc_raw = raw_off_x + px; + const size_t mx = (size_t)(px - inf_x); + const float rec[3] = { + tile_out[0 * tile_plane + mo_row + mx], + tile_out[1 * tile_plane + mo_row + mx], + tile_out[2 * tile_plane + mo_row + mx], + }; + // rec → cam (un-matrix + un-WB + un-boost); clamp to [0, 1] + float cam[3]; + mat3mulv(cam, M_boosted, rec); + for(int c = 0; c < 3; c++) + { + if(cam[c] < 0.0f) cam[c] = 0.0f; + if(cam[c] > 1.0f) cam[c] = 1.0f; + } + // re-mosaic: pick the single colour that the X-Trans pattern + // wants at this sensor position, scaled back to raw ADC range. + // FCxtrans uses raw-sensor parity (since xtrans[6][6] is aligned + // with the raw, not the post-crop buffer) + const int ch = FCxtrans(sr_raw, sc_raw, NULL, img->buf_dsc.xtrans); + const int bl_idx = ((sr_raw & 1) << 1) | (sc_raw & 1); + const float adc = cam[ch] * range[bl_idx] + black[bl_idx]; + const float clipped + = adc < 0.0f ? 0.0f : (adc > white ? white : adc); + const size_t idx = (size_t)sr_raw * raw_w + sc_raw; + if(is_uint16) + ((uint16_t *)patched)[idx] = (uint16_t)(clipped + 0.5f); + else + ((float *)patched)[idx] = clipped; + } + } + + g_free(tile_out); + + // run pipe twice on raw-sensor-sized buffers + // ROI is in sensor coords (matching the patched region we built + // above); dt_restore_run_user_pipe_roi forward-transforms it + // through the user's geometry chain before handing to the pipe + int dw = 0, dh = 0, bw = 0, bh = 0; + int err = dt_restore_run_user_pipe_roi(imgid, patched, raw_w, raw_h, + crop_x, crop_y, crop_w, crop_h, + &dw, &dh, out_denoised_rgb); + g_free(patched); + + if(err == 0) + { + err = dt_restore_run_user_pipe_roi(imgid, (void *)mbuf.buf, raw_w, raw_h, + crop_x, crop_y, crop_w, crop_h, + &bw, &bh, out_before_rgb); + } + dt_mipmap_cache_release(&mbuf); + + if(err || dw != bw || dh != bh) + { + if(dw != bw || dh != bh) + dt_print(DT_DEBUG_AI, + "[restore_raw_linear] preview_piped: before/after dim " + "mismatch (%dx%d vs %dx%d) — aborting", + bw, bh, dw, dh); + g_free(*out_before_rgb); *out_before_rgb = NULL; + g_free(*out_denoised_rgb); *out_denoised_rgb = NULL; + return 1; + } + if(out_w) *out_w = dw; + if(out_h) *out_h = dh; + return 0; +} + +// clang-format off +// modelines: These editor modelines have been set for all relevant files by tools/update_modelines.py +// vim: shiftwidth=2 expandtab tabstop=2 cindent +// kate: tab-indents: off; indent-width 2; replace-tabs on; indent-mode cstyle; remove-trailing-spaces modified; +// clang-format on diff --git a/src/common/ai/restore_raw_linear.h b/src/common/ai/restore_raw_linear.h new file mode 100644 index 000000000000..53f3eaeec980 --- /dev/null +++ b/src/common/ai/restore_raw_linear.h @@ -0,0 +1,161 @@ +/* + This file is part of darktable, + Copyright (C) 2026 darktable developers. + + darktable is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + darktable is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with darktable. If not, see . +*/ + +// raw_restore_linear — RawNIND linear-variant denoise pipeline +// +// for sensors the bayer variant can't handle (X-Trans in particular), +// we feed the linear variant of the RawNIND model, which expects a +// 3-channel demosaicked image in lin_rec2020 space at raw scale. +// +// input is produced by running a minimal darktable pipeline: +// rawprepare -> highlights -> demosaic +// while skipping temperature (so we apply our own daylight WB later) +// and every iop after demosaic. this reuses darktable's sensor-aware +// demosaic (AMaZE / VNG / Markesteijn / …) instead of rolling our own, +// which matters for X-Trans quality. +// +// output is a 3ch float RGB buffer at full sensor resolution, in the +// same camRGB + raw ADC range as the source. the neural_restore batch +// path re-mosaics nothing (this sensor type can't be round-tripped +// through a CFA DNG) and writes a LinearRaw DNG via dng_writer. + +#pragma once + +#include + +#include "common/ai/restore.h" +#include "common/darktable.h" + +struct _dt_job_t; +struct dt_image_t; + +// @brief Run the RawNIND linear denoise pipeline end-to-end. +// +// Internally: +// 1. builds a minimal darktable pixelpipe (rawprepare + highlights +// + demosaic, nothing after), disables temperature so no WB is +// baked in; +// 2. allocates the 3ch float demosaicked output at sensor res; +// 3. applies daylight WB + camRGB -> lin_rec2020 matrix; +// 4. tiles the image and calls dt_restore_run_patch_3ch_raw on each, +// gain-matching the output per tile; +// 5. inverts the matrix + WB; +// 6. strength-blends with the pre-inference demosaicked buffer. +// +// @param ctx loaded linear context +// (dt_restore_load_rawdenoise_linear) +// @param imgid image id (pipeline is built per image) +// @param out_rgb caller-allocated 3ch float buffer, +// 3 * sensor_w * sensor_h floats (interleaved RGB). +// on success contains the denoised image in camRGB +// raw-ADC units (same range as the source pre-demosaic +// pipeline would produce). +// @param out_w out: sensor width at which the buffer is filled +// @param out_h out: sensor height at which the buffer is filled +// @param strength 0..1 blend between the demosaicked source (0) and +// the denoised result (1) +// @param control_job job handle for progress/cancellation (NULL-safe) +// @return 0 on success; out_rgb left untouched on failure +int dt_restore_raw_linear(dt_restore_context_t *ctx, + const dt_imgid_t imgid, + float **out_rgb, + int *out_w, + int *out_h, + float strength, + struct _dt_job_t *control_job); + +// @brief Once-per-image demosaic + WB + camRGB->lin_rec2020 prep. +// +// Runs the same minimal pipeline as dt_restore_raw_linear (rawprepare + +// highlights + demosaic, no temperature, no post-demosaic modules) and +// returns a 3ch interleaved lin_rec2020 buffer at sensor resolution. +// +// Slow (full-image demosaic via darktable's pipeline). neural_restore +// caches the result across multiple preview refreshes of the same image. +// +// @param imgid image id +// @param out_rgb caller-frees with dt_free_align. 3ch interleaved +// (sensor_w * sensor_h * 3 floats), in lin_rec2020 with +// as-shot WB applied. +// @param out_w out: sensor width +// @param out_h out: sensor height +// @return 0 on success +int dt_restore_raw_linear_prepare(const dt_imgid_t imgid, + float **out_rgb, + int *out_w, + int *out_h); + +// @brief Linear preview through darktable's real pixelpipe — "preview = +// batch" for X-Trans / non-Bayer sensors. +// +// Runs inference on the crop, un-matrix / un-WB / un-boost the denoised +// crop back to raw-ADC space, re-mosaics onto the X-Trans CFA grid at +// the original sensor positions, then runs darktable's full pixelpipe +// twice on the raw-sensor-sized CFA — once on the patched CFA for +// "after", once on the original for "before". The pipe runs natively +// (rawprepare + highlights + X-Trans demosaic + temperature + colorin +// + filmic + output profile), so the output matches what the user sees +// in darkroom. +// +// Expensive: two full pipelined renders per refresh plus a full-sensor +// un-matrix pass. First refresh on a new image also pays one demosaic +// via dt_restore_raw_linear_prepare. +// +// @param ctx loaded linear context +// @param img source image metadata (for WB / matrix derivation) +// @param imgid image id (used by the pixelpipe) +// @param full_rgb 3ch interleaved lin_rec2020 buffer covering the +// whole sensor (from dt_restore_raw_linear_prepare) +// @param width sensor width +// @param height sensor height +// @param crop_x displayed crop top-left x +// @param crop_y displayed crop top-left y +// @param crop_w displayed crop width (≤ tile_size - 2*OVERLAP_LINEAR) +// @param crop_h displayed crop height +// @param out_before_rgb caller-frees with g_free. 3ch interleaved +// (*out_w * *out_h * 3 floats), linear Rec.709, +// pipe output on the original camRGB raw. +// @param out_denoised_rgb caller-frees with g_free. same shape, pipe +// output on the denoised-patched camRGB raw +// at α = 1. +// @param out_w receives actual rendered width (may differ +// from crop_w when user history contains +// geometry-modifying modules; both returned +// buffers share these dims). +// @param out_h receives actual rendered height. +// @return 0 on success; both outputs NULL on failure. +int dt_restore_raw_linear_preview_piped(dt_restore_context_t *ctx, + const struct dt_image_t *img, + dt_imgid_t imgid, + const float *full_rgb, + int width, + int height, + int crop_x, + int crop_y, + int crop_w, + int crop_h, + float **out_before_rgb, + float **out_denoised_rgb, + int *out_w, + int *out_h); + +// clang-format off +// modelines: These editor modelines have been set for all relevant files by tools/update_modelines.py +// vim: shiftwidth=2 expandtab tabstop=2 cindent +// kate: tab-indents: off; indent-width 2; replace-tabs on; indent-mode cstyle; remove-trailing-spaces modified; +// clang-format on diff --git a/src/common/ai/restore_rgb.c b/src/common/ai/restore_rgb.c new file mode 100644 index 000000000000..640486208aaf --- /dev/null +++ b/src/common/ai/restore_rgb.c @@ -0,0 +1,832 @@ +/* + This file is part of darktable, + Copyright (C) 2026 darktable developers. + + darktable is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + darktable is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with darktable. If not, see . +*/ + +// restore_rgb — RGB-path glue for the AI denoise + upscale tasks. +// +// inputs here are linear-working-profile float4 RGBA (from darktable +// export). this file owns: +// - color management: working-profile → sRGB before inference and +// back after, with optional wide-gamut preservation mask +// - shadow boost: per-image luminance curve to protect deep shadows +// during sRGB round-trip (opt-in via model attribute) +// - wavelet (DWT) detail recovery: preserve high-frequency texture +// in the luminance residual after denoise +// - dt_restore_process_tiled driver that ties together tiling, +// gamut masking, shadow boost and the per-patch inference call +// (dt_restore_run_patch). the low-level inference helpers live in +// restore.c; this file composes them for RGB. +// +// the raw denoise variants (Bayer / X-Trans) do their own pre/post- +// processing (per-CFA-site black / WB / re-mosaic) and live in +// restore_raw_bayer.c / restore_raw_linear.c. they share the generic +// pipeline-bridge dt_restore_run_user_pipe_roi() in restore.c + +#include "common/ai/restore_rgb.h" +#include "common/ai/restore_common.h" +#include "ai/backend.h" +#include "common/darktable.h" +#include "common/colorspaces.h" +#include "common/colorspaces_inline_conversions.h" +#include "common/imagebuf.h" +#include "common/math.h" +#include "common/matrices.h" +#include "control/conf.h" +#include "control/jobs.h" + +#include +#include +#include + +// forward-declare to avoid pulling in dwt.h (which includes OpenCL +// types when HAVE_OPENCL is defined — and the AI shared library +// is built without OpenCL) +extern void dwt_denoise(float *buf, int width, int height, + int bands, const float *noise); + +#define MAX_MODEL_INPUTS 4 + +// default multipliers of residual sigma for each wavelet band. +// band 0 (finest) gets the strongest suppression since fine-scale +// features are hardest to distinguish from noise. coarser bands +// preserve more because they capture real texture. +// tunable via darktablerc: plugins/lighttable/neural_restore/detail_recovery_bands +static const float _dwt_sigma_mul_default[DWT_DETAIL_BANDS] = { + 0.25f, // band 0 (finest) — suppress fine luminance noise + 0.15f, // band 1 + 0.05f, // band 2 + 0.02f, // band 3 + 0.01f // band 4 (coarsest) — keep almost everything +}; + +// sRGB transfer function (gamma curve only, no primaries change). +// values > 1.0 are allowed to preserve wide-gamut colors +static inline float _linear_to_srgb(const float v) +{ + if(v <= 0.0f) return 0.0f; + return (v <= 0.0031308f) + ? 12.92f * v + : 1.055f * powf(v, 1.0f / 2.4f) - 0.055f; +} + +static inline float _srgb_to_linear(const float v) +{ + if(v <= 0.0f) return 0.0f; + return (v <= 0.04045f) + ? v / 12.92f + : powf((v + 0.055f) / 1.055f, 2.4f); +} + +// Rec.709 / sRGB luminance weights (Y row of sRGB->XYZ D65); +// applied to working-profile-linear pixels in the pass-through +// blending below; exact only when the working profile is +// sRGB/Rec.709, but correct enough for luminance deltas +static inline float _luma_rec709(float r, float g, float b) +{ + return 0.2126f * r + 0.7152f * g + 0.0722f * b; +} + +// compute adaptive noise thresholds from residual standard deviation +static void _compute_adaptive_noise(const float *const restrict buf, + const size_t npix, + float noise[DWT_DETAIL_BANDS]) +{ + // read band multipliers from config (comma-separated list). + // e.g. "0.5,0.3,0.1,0.05,0.02" in darktablerc + float sigma_mul[DWT_DETAIL_BANDS]; + memcpy(sigma_mul, _dwt_sigma_mul_default, sizeof(sigma_mul)); + gchar *val = dt_conf_get_string("plugins/lighttable/neural_restore/detail_recovery_bands"); + if(val && val[0]) + { + gchar **parts = g_strsplit(val, ",", DWT_DETAIL_BANDS); + for(int b = 0; parts[b] && b < DWT_DETAIL_BANDS; b++) + sigma_mul[b] = g_ascii_strtod(g_strstrip(parts[b]), NULL); + g_strfreev(parts); + } + g_free(val); + + double sum = 0.0, sum2 = 0.0; + for(size_t i = 0; i < npix; i++) + { + sum += (double)buf[i]; + sum2 += (double)buf[i] * (double)buf[i]; + } + const double mean = sum / (double)npix; + const float sigma = (float)sqrt(sum2 / (double)npix - mean * mean); + + for(int b = 0; b < DWT_DETAIL_BANDS; b++) + noise[b] = sigma * sigma_mul[b]; +} + +void dt_restore_set_profile(dt_restore_context_t *ctx, void *profile) +{ + if(!ctx) return; + if(!profile) + { + ctx->has_profile = FALSE; + return; + } + + float primaries[3][2], whitepoint[2]; + if(!dt_colorspaces_get_primaries_and_whitepoint_from_profile( + (cmsHPROFILE)profile, primaries, whitepoint)) + { + dt_print(DT_DEBUG_AI, + "[restore_rgb] could not read primaries from working profile, " + "falling back to gamma-only conversion"); + ctx->has_profile = FALSE; + return; + } + + // build WP -> XYZ (stored transposed by dt, convert to row-major) + dt_colormatrix_t wp_to_xyz_T; + dt_make_transposed_matrices_from_primaries_and_whitepoint(primaries, + whitepoint, + wp_to_xyz_T); + float wp_to_xyz[9]; + for(int i = 0; i < 3; i++) + for(int j = 0; j < 3; j++) + wp_to_xyz[3 * i + j] = wp_to_xyz_T[j][i]; + + // transpose dt's sRGB<->XYZ matrices (Bradford D50) to row-major + float xyz_to_srgb[9], srgb_to_xyz[9]; + for(int i = 0; i < 3; i++) + for(int j = 0; j < 3; j++) + { + xyz_to_srgb[3 * i + j] = xyz_to_srgb_transposed[j][i]; + srgb_to_xyz[3 * i + j] = sRGB_to_xyz_transposed[j][i]; + } + + // WP -> sRGB = (XYZ -> sRGB) * (WP -> XYZ) + mat3mul(ctx->wp_to_srgb, xyz_to_srgb, wp_to_xyz); + + // invert WP -> XYZ to get XYZ -> WP, then compose sRGB -> WP + float xyz_to_wp[9]; + if(mat3inv(xyz_to_wp, wp_to_xyz) != 0) + { + dt_print(DT_DEBUG_AI, + "[restore_rgb] singular WP->XYZ matrix, falling back to gamma-only"); + ctx->has_profile = FALSE; + return; + } + mat3mul(ctx->srgb_to_wp, xyz_to_wp, srgb_to_xyz); + + ctx->has_profile = TRUE; + dt_print(DT_DEBUG_AI, "[restore_rgb] working profile color matrices ready"); +} + +void dt_restore_set_preserve_wide_gamut(dt_restore_context_t *ctx, gboolean preserve) +{ + if(ctx) ctx->preserve_wide_gamut = preserve; +} + +int dt_restore_run_patch(dt_restore_context_t *ctx, + const float *in_patch, + int w, int h, + float *out_patch, + int scale) +{ + if(!ctx || !ctx->ai_ctx) return 1; + const size_t in_pixels = (size_t)w * h * 3; + const int out_w = w * scale; + const int out_h = h * scale; + const size_t out_pixels = (size_t)out_w * out_h * 3; + const size_t plane = (size_t)w * h; + + // convert to sRGB gamma-encoded. If a working profile is set, + // first convert primaries (working profile -> sRGB linear) so the + // model sees the image as if it were native sRGB. Otherwise only + // apply the gamma curve (legacy path, shifts hues for wide-gamut). + // input layout is planar NCHW: R plane, then G plane, then B plane. + // in_gamut_mask records which pixels were in sRGB gamut (scale==1 + // only) so the output pass can skip recomputing WP->sRGB + float *srgb_in = g_try_malloc(in_pixels * sizeof(float)); + uint8_t *in_gamut_mask = NULL; + if(!srgb_in) return 1; + // only allocate the gamut mask when denoise pass-through is requested + const gboolean need_gamut_mask + = ctx->has_profile && scale == 1 && ctx->preserve_wide_gamut; + if(need_gamut_mask) + { + in_gamut_mask = g_try_malloc(plane); + if(!in_gamut_mask) + { + g_free(srgb_in); + return 1; + } + } + + if(ctx->has_profile) + { + const float *M = ctx->wp_to_srgb; + const gboolean boost = ctx->shadow_boost; + for(size_t p = 0; p < plane; p++) + { + const float r = in_patch[p]; + const float g = in_patch[p + plane]; + const float b = in_patch[p + 2 * plane]; + float sr = M[0] * r + M[1] * g + M[2] * b; + float sg = M[3] * r + M[4] * g + M[5] * b; + float sb = M[6] * r + M[7] * g + M[8] * b; + // gamut check uses pre-boost values so pass-through decisions + // reflect the original color + if(in_gamut_mask) + { + const float m = 0.01f; // ~1% margin beyond [0, 1] + in_gamut_mask[p] = (sr >= -m && sr <= 1.0f + m + && sg >= -m && sg <= 1.0f + m + && sb >= -m && sb <= 1.0f + m) ? 1 : 0; + } + if(boost) + { + sr = sr > 0.0f ? sqrtf(sr) : 0.0f; + sg = sg > 0.0f ? sqrtf(sg) : 0.0f; + sb = sb > 0.0f ? sqrtf(sb) : 0.0f; + } + srgb_in[p] = _linear_to_srgb(sr); + srgb_in[p + plane] = _linear_to_srgb(sg); + srgb_in[p + 2 * plane] = _linear_to_srgb(sb); + } + } + else if(ctx->shadow_boost) + { + // no profile: still boost shadows so the model stays within its + // comfort zone, even though we treat WP values as sRGB + for(size_t i = 0; i < in_pixels; i++) + { + const float v = in_patch[i]; + const float boosted = v > 0.0f ? sqrtf(v) : 0.0f; + srgb_in[i] = _linear_to_srgb(boosted); + } + } + else + { + for(size_t i = 0; i < in_pixels; i++) + srgb_in[i] = _linear_to_srgb(in_patch[i]); + } + + const int num_inputs = dt_ai_get_input_count(ctx->ai_ctx); + if(num_inputs > MAX_MODEL_INPUTS) + { + g_free(srgb_in); + return 1; + } + + int64_t input_shape[] = {1, 3, h, w}; + dt_ai_tensor_t inputs[MAX_MODEL_INPUTS]; + memset(inputs, 0, sizeof(inputs)); + inputs[0] = (dt_ai_tensor_t){ + .data = (void *)srgb_in, + .shape = input_shape, + .ndim = 4, + .type = DT_AI_FLOAT}; + + // noise level map for multi-input models + float *noise_map = NULL; + int64_t noise_shape[] = {1, 1, h, w}; + if(num_inputs >= 2) + { + const size_t map_size = (size_t)w * h; + noise_map = g_try_malloc(map_size * sizeof(float)); + if(!noise_map) + { + g_free(srgb_in); + return 1; + } + const float sigma_norm = 25.0f / 255.0f; + for(size_t i = 0; i < map_size; i++) + noise_map[i] = sigma_norm; + inputs[1] = (dt_ai_tensor_t){ + .data = (void *)noise_map, + .shape = noise_shape, + .ndim = 4, + .type = DT_AI_FLOAT}; + } + + int64_t output_shape[] = {1, 3, out_h, out_w}; + dt_ai_tensor_t output = { + .data = (void *)out_patch, + .shape = output_shape, + .ndim = 4, + .type = DT_AI_FLOAT}; + + int ret = dt_ai_run(ctx->ai_ctx, inputs, num_inputs, + &output, 1); + g_free(srgb_in); + g_free(noise_map); + if(ret != 0) + { + g_free(in_gamut_mask); + return ret; + } + + // convert model output back to the working profile + // + // with profile: apply inverse sRGB gamma, then check if the ORIGINAL + // input pixel (converted to sRGB linear) is representable in sRGB + // gamut. if yes, use model output converted back to working profile. + // if no, pass through the original pixel (wide-gamut colors preserved, + // no denoising on those pixels). upscale has no pixel-to-pixel + // correspondence so pass-through is not possible — always use the + // model output + // + // without profile: fall back to per-channel pass-through in the + // original (working-profile-as-sRGB) space + const gboolean boost = ctx->shadow_boost; + if(ctx->has_profile && scale == 1 && ctx->preserve_wide_gamut) + { + const size_t out_plane = (size_t)out_w * out_h; + const float *Mi = ctx->srgb_to_wp; + // pass 1: write denoised values for in-gamut pixels; out-of-gamut + // pixels get plain pass-through as a fallback (used only when no + // in-gamut neighbors are found in pass 2) + for(size_t p = 0; p < out_plane; p++) + { + if(in_gamut_mask[p]) + { + float sr = _srgb_to_linear(out_patch[p]); + float sg = _srgb_to_linear(out_patch[p + out_plane]); + float sb = _srgb_to_linear(out_patch[p + 2 * out_plane]); + if(boost) { sr *= sr; sg *= sg; sb *= sb; } + out_patch[p] = Mi[0] * sr + Mi[1] * sg + Mi[2] * sb; + out_patch[p + out_plane] = Mi[3] * sr + Mi[4] * sg + Mi[5] * sb; + out_patch[p + 2 * out_plane] = Mi[6] * sr + Mi[7] * sg + Mi[8] * sb; + } + else + { + out_patch[p] = in_patch[p]; + out_patch[p + out_plane] = in_patch[p + plane]; + out_patch[p + 2 * out_plane] = in_patch[p + 2 * plane]; + } + } + // pass 2: luminance-only smoothing for out-of-gamut pixels. the + // original pixel keeps its chroma (wide-gamut color preserved + // exactly) but its brightness is shifted to match the local + // average luminance of denoised in-gamut neighbors; this kills + // the single-pixel speckles that pass-through would otherwise + // leave visible against the denoised background + const int radius = 2; // 5x5 window + for(int y = 0; y < out_h; y++) + { + for(int x = 0; x < out_w; x++) + { + const size_t p = (size_t)y * out_w + x; + if(in_gamut_mask[p]) continue; + const float r0 = in_patch[p]; + const float g0 = in_patch[p + plane]; + const float b0 = in_patch[p + 2 * plane]; + const float Y_orig = _luma_rec709(r0, g0, b0); + float sumY = 0.0f; + int count = 0; + const int y0 = y - radius < 0 ? 0 : y - radius; + const int y1 = y + radius >= out_h ? out_h - 1 : y + radius; + const int x0 = x - radius < 0 ? 0 : x - radius; + const int x1 = x + radius >= out_w ? out_w - 1 : x + radius; + for(int yy = y0; yy <= y1; yy++) + { + for(int xx = x0; xx <= x1; xx++) + { + const size_t q = (size_t)yy * out_w + xx; + if(!in_gamut_mask[q]) continue; + const float rq = out_patch[q]; + const float gq = out_patch[q + out_plane]; + const float bq = out_patch[q + 2 * out_plane]; + sumY += _luma_rec709(rq, gq, bq); + count++; + } + } + if(count > 0) + { + const float dY = sumY / (float)count - Y_orig; + out_patch[p] = r0 + dY; + out_patch[p + out_plane] = g0 + dY; + out_patch[p + 2 * out_plane] = b0 + dY; + } + } + } + } + else if(ctx->has_profile && scale == 1) + { + // denoise with profile but NO pass-through: apply the inverse + // matrix to every pixel. wide-gamut inputs will have been clipped + // by the model, but we get denoising everywhere + const size_t out_plane = (size_t)out_w * out_h; + const float *Mi = ctx->srgb_to_wp; + for(size_t p = 0; p < out_plane; p++) + { + float sr = _srgb_to_linear(out_patch[p]); + float sg = _srgb_to_linear(out_patch[p + out_plane]); + float sb = _srgb_to_linear(out_patch[p + 2 * out_plane]); + if(boost) { sr *= sr; sg *= sg; sb *= sb; } + out_patch[p] = Mi[0] * sr + Mi[1] * sg + Mi[2] * sb; + out_patch[p + out_plane] = Mi[3] * sr + Mi[4] * sg + Mi[5] * sb; + out_patch[p + 2 * out_plane] = Mi[6] * sr + Mi[7] * sg + Mi[8] * sb; + } + } + else if(scale == 1) + { + // no profile set: per-channel pass-through, treats working-profile + // numbers as if they were sRGB. colors will be slightly shifted + // for wide-gamut working profiles — rely on the profile path above + // when possible. pass-through still honored via preserve_wide_gamut + for(size_t i = 0; i < out_pixels; i++) + { + const float in = in_patch[i]; + if(ctx->preserve_wide_gamut && (in < 0.0f || in > 1.0f)) + { + out_patch[i] = in; + } + else + { + float v = _srgb_to_linear(out_patch[i]); + if(boost) v *= v; + out_patch[i] = v; + } + } + } + else + { + // upscale: no pixel-to-pixel correspondence, use model output as-is + if(ctx->has_profile) + { + const size_t out_plane = (size_t)out_w * out_h; + const float *Mi = ctx->srgb_to_wp; + for(size_t p = 0; p < out_plane; p++) + { + float sr = _srgb_to_linear(out_patch[p]); + float sg = _srgb_to_linear(out_patch[p + out_plane]); + float sb = _srgb_to_linear(out_patch[p + 2 * out_plane]); + if(boost) { sr *= sr; sg *= sg; sb *= sb; } + out_patch[p] = Mi[0] * sr + Mi[1] * sg + Mi[2] * sb; + out_patch[p + out_plane] = Mi[3] * sr + Mi[4] * sg + Mi[5] * sb; + out_patch[p + 2 * out_plane] = Mi[6] * sr + Mi[7] * sg + Mi[8] * sb; + } + } + else + { + for(size_t i = 0; i < out_pixels; i++) + { + float v = _srgb_to_linear(out_patch[i]); + if(boost) v *= v; + out_patch[i] = v; + } + } + } + + g_free(in_gamut_mask); + return 0; +} + +// per-image gate for the shadow-boost curve; enable only when the image +// has substantial near-black area to protect — bright images would only +// pay the curve cost (minor highlight compression) for no gain; +// thresholds tuned so localized very-dark features (a tree hollow, a +// silhouette) do NOT trigger; only broad noisy shadow regions do +// +// in_data is interleaved float4 RGBA +#define _SHADOW_BOOST_THRESHOLD 0.005f // 0.5% linear luminance +#define _SHADOW_BOOST_FRACTION 0.10f // 10% of sampled pixels +static gboolean _image_has_deep_shadows(const float *in_data, int w, int h) +{ + const size_t stride = 16; // sample 1/256 of pixels for speed + size_t dark = 0, total = 0; + for(size_t y = 0; y < (size_t)h; y += stride) + for(size_t x = 0; x < (size_t)w; x += stride) + { + const size_t p = ((size_t)y * w + x) * 4; + const float luma = 0.2126f * in_data[p] + + 0.7152f * in_data[p + 1] + + 0.0722f * in_data[p + 2]; + if(luma < _SHADOW_BOOST_THRESHOLD) dark++; + total++; + } + return total > 0 && (float)dark / total >= _SHADOW_BOOST_FRACTION; +} + +int dt_restore_process_tiled(dt_restore_context_t *ctx, + const float *in_data, + int width, int height, + int scale, + dt_restore_row_writer_t row_writer, + void *writer_data, + struct _dt_job_t *control_job) +{ + if(!ctx || !ctx->ai_ctx || !in_data || !row_writer) + return 1; + + // for shadow-boost-capable models, decide per-image whether the + // curve is worth applying; one analysis per call, before tiling, + // so all tiles see the same flag (avoids per-tile seams) + if(ctx->shadow_boost_capable) + { + const gboolean dark = _image_has_deep_shadows(in_data, width, height); + ctx->shadow_boost = dark; + dt_print(DT_DEBUG_AI, "[restore_rgb] shadow boost %s", + dark ? "enabled" : "disabled"); + } + + const int O = dt_restore_get_overlap(scale); + const int S = scale; + const int out_w = width * S; + // ladder was resolved at load time (either model's input_sizes or + // the built-in default for this scale) and travels with the context + const int *ladder = ctx->tile_ladder; + const int n_ladder = ctx->n_tile_ladder; + int T = ctx->tile_size; + + // outer retry loop: on inference failure (e.g. GPU OOM) drop to the + // next smaller candidate in the shared ladder and try again +retry:; + int step = T - 2 * O; + int T_out = T * S; + int O_out = O * S; + int step_out = step * S; + size_t in_plane = (size_t)T * T; + size_t out_plane = (size_t)T_out * T_out; + int cols = (width + step - 1) / step; + int rows = (height + step - 1) / step; + int total_tiles = cols * rows; + + dt_print(DT_DEBUG_AI, + "[restore_rgb] tiling %dx%d (scale=%d)" + " -> %dx%d, %dx%d grid (%d tiles, T=%d)", + width, height, S, out_w, height * S, + cols, rows, total_tiles, T); + + float *tile_in = g_try_malloc( + in_plane * 3 * sizeof(float)); + float *tile_out = g_try_malloc( + out_plane * 3 * sizeof(float)); + float *row_buf = g_try_malloc( + (size_t)out_w * step_out * 3 * sizeof(float)); + if(!tile_in || !tile_out || !row_buf) + { + g_free(tile_in); + g_free(tile_out); + g_free(row_buf); + return 1; + } + + int res = 0; + int tile_count = 0; + + for(int ty = 0; ty < rows; ty++) + { + const int y = ty * step; + const int valid_h = (y + step > height) + ? height - y : step; + const int valid_h_out = valid_h * S; + + memset(row_buf, 0, + (size_t)out_w * valid_h_out * 3 + * sizeof(float)); + + for(int tx = 0; tx < cols; tx++) + { + if(control_job + && dt_control_job_get_state(control_job) + == DT_JOB_STATE_CANCELLED) + { + res = 1; + goto cleanup; + } + + const int x = tx * step; + const int in_x = x - O; + const int in_y = y - O; + const int needs_mirror + = (in_x < 0 || in_y < 0 + || in_x + T > width + || in_y + T > height); + + // interleaved RGBx -> planar RGB + if(needs_mirror) + { + for(int dy = 0; dy < T; ++dy) + { + const int sy = _mirror(in_y + dy, height); + for(int dx = 0; dx < T; ++dx) + { + const int sx + = _mirror(in_x + dx, width); + const size_t po = (size_t)dy * T + dx; + const size_t si + = ((size_t)sy * width + sx) * 4; + tile_in[po] = in_data[si + 0]; + tile_in[po + in_plane] + = in_data[si + 1]; + tile_in[po + 2 * in_plane] + = in_data[si + 2]; + } + } + } + else + { + for(int dy = 0; dy < T; ++dy) + { + const float *row + = in_data + + ((size_t)(in_y + dy) * width + + in_x) * 4; + const size_t ro = (size_t)dy * T; + for(int dx = 0; dx < T; ++dx) + { + tile_in[ro + dx] = row[dx * 4 + 0]; + tile_in[ro + dx + in_plane] + = row[dx * 4 + 1]; + tile_in[ro + dx + 2 * in_plane] + = row[dx * 4 + 2]; + } + } + } + + if(dt_restore_run_patch( + ctx, tile_in, T, T, tile_out, S) != 0) + { + // retry with the next smaller ladder entry if no rows have + // been delivered yet (safe to restart). once rows are written + // we can't rewind the row_writer (e.g. TIFF is sequential). + // _reload_session() recreates the ORT session for the smaller + // tile size (dim overrides are shape-specific). + int next_T = 0; + for(int i = 0; i < n_ladder; i++) + if(ladder[i] < T) { next_T = ladder[i]; break; } + if(next_T > 0 && ty == 0 + && dt_restore_reload_session(ctx, next_T)) + { + dt_print(DT_DEBUG_AI, + "[restore_rgb] inference failed at tile %d,%d " + "(T=%d), retrying with T=%d", + x, y, T, next_T); + g_free(tile_in); + g_free(tile_out); + g_free(row_buf); + T = next_T; + goto retry; + } + dt_print(DT_DEBUG_AI, + "[restore_rgb] inference failed at" + " tile %d,%d (T=%d, minimum reached)", x, y, T); + res = 1; + goto cleanup; + } + + // valid region -> row buffer + const int valid_w = (x + step > width) + ? width - x : step; + const int valid_w_out = valid_w * S; + + for(int dy = 0; dy < valid_h_out; ++dy) + { + const size_t src_row + = (size_t)(O_out + dy) * T_out + O_out; + const size_t dst_row + = ((size_t)dy * out_w + x * S) * 3; + for(int dx = 0; dx < valid_w_out; ++dx) + { + row_buf[dst_row + dx * 3 + 0] + = tile_out[src_row + dx]; + row_buf[dst_row + dx * 3 + 1] + = tile_out[src_row + dx + out_plane]; + row_buf[dst_row + dx * 3 + 2] + = tile_out[src_row + dx + + 2 * out_plane]; + } + } + + tile_count++; + if(control_job) + dt_control_job_set_progress(control_job, + (double)tile_count / total_tiles); + } + + // deliver completed scanlines via callback + for(int dy = 0; dy < valid_h_out; dy++) + { + const float *src = row_buf + (size_t)dy * out_w * 3; + if(row_writer(src, out_w, y * S + dy, + writer_data) != 0) + { + res = 1; + goto cleanup; + } + } + } + + // persist tile size on first full success so subsequent runs skip OOM retry + if(res == 0) + dt_restore_persist_tile_size(ctx); + +cleanup: + g_free(tile_in); + g_free(tile_out); + g_free(row_buf); + return res; +} + +void dt_restore_apply_detail_recovery(const float *original_4ch, + float *denoised_4ch, + int width, int height, + float alpha) +{ + const size_t npix = (size_t)width * height; + + float *const restrict lum_residual + = dt_alloc_align_float(npix); + if(!lum_residual) return; + +#ifdef _OPENMP +#pragma omp parallel for simd default(none) \ + dt_omp_firstprivate(original_4ch, denoised_4ch, \ + lum_residual, npix) \ + schedule(simd:static) \ + aligned(original_4ch, denoised_4ch, lum_residual:64) +#endif + for(size_t i = 0; i < npix; i++) + { + const size_t p = i * 4; + const float lum_orig + = 0.2126f * original_4ch[p + 0] + + 0.7152f * original_4ch[p + 1] + + 0.0722f * original_4ch[p + 2]; + const float lum_den + = 0.2126f * denoised_4ch[p + 0] + + 0.7152f * denoised_4ch[p + 1] + + 0.0722f * denoised_4ch[p + 2]; + lum_residual[i] = lum_orig - lum_den; + } + + float noise[DWT_DETAIL_BANDS]; + _compute_adaptive_noise(lum_residual, npix, noise); + dwt_denoise(lum_residual, width, height, + DWT_DETAIL_BANDS, noise); + +#ifdef _OPENMP +#pragma omp parallel for simd default(none) \ + dt_omp_firstprivate(denoised_4ch, lum_residual, \ + npix, alpha) \ + schedule(simd:static) \ + aligned(denoised_4ch, lum_residual:64) +#endif + for(size_t i = 0; i < npix; i++) + { + const size_t p = i * 4; + const float d = alpha * lum_residual[i]; + denoised_4ch[p + 0] += d; + denoised_4ch[p + 1] += d; + denoised_4ch[p + 2] += d; + } + + dt_free_align(lum_residual); +} + + +float *dt_restore_compute_dwt_detail(const float *before_3ch, + const float *after_3ch, + int width, int height) +{ + const size_t npix = (size_t)width * height; + float *lum_residual = dt_alloc_align_float(npix); + if(!lum_residual) return NULL; + + for(size_t i = 0; i < npix; i++) + { + const size_t si = i * 3; + const float lum_orig + = 0.2126f * before_3ch[si + 0] + + 0.7152f * before_3ch[si + 1] + + 0.0722f * before_3ch[si + 2]; + const float lum_den + = 0.2126f * after_3ch[si + 0] + + 0.7152f * after_3ch[si + 1] + + 0.0722f * after_3ch[si + 2]; + lum_residual[i] = lum_orig - lum_den; + } + + float noise[DWT_DETAIL_BANDS]; + _compute_adaptive_noise(lum_residual, npix, noise); + dwt_denoise(lum_residual, width, height, + DWT_DETAIL_BANDS, noise); + + return lum_residual; +} + +// clang-format off +// modelines: These editor modelines have been set for all relevant files by tools/update_modelines.py +// vim: shiftwidth=2 expandtab tabstop=2 cindent +// kate: tab-indents: off; indent-width 2; replace-tabs on; indent-mode cstyle; remove-trailing-spaces modified; +// clang-format on diff --git a/src/common/ai/restore_rgb.h b/src/common/ai/restore_rgb.h new file mode 100644 index 000000000000..fa922fca5275 --- /dev/null +++ b/src/common/ai/restore_rgb.h @@ -0,0 +1,180 @@ +/* + This file is part of darktable, + Copyright (C) 2026 darktable developers. + + darktable is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + darktable is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with darktable. If not, see . +*/ + +// restore_rgb — public API for the RGB-path AI tasks (denoise + upscale). +// +// consumers: +// - src/libs/neural_restore.c +// +// pixel pipeline: +// input is linear working-profile float4 RGBA (from darktable export). +// dt_restore_run_patch() converts linear→sRGB before inference and +// sRGB→linear after. models operate in planar NCHW layout. +// dt_restore_process_tiled() handles interleaved→planar conversion, +// mirror padding at boundaries, gamut masking, shadow boost, and +// overlap blending. +// +// detail recovery: +// dt_restore_apply_detail_recovery() uses wavelet (DWT) decomposition +// to separate noise from texture in the luminance residual (original +// − denoised). fine bands are thresholded; coarser bands are preserved +// and blended back. + +#pragma once + +#include "common/ai/restore.h" + +#include + +struct _dt_job_t; + +// --- color management (RGB path) --- + +// @brief Set the working color profile for the context. +// +// The AI model was trained on sRGB primaries. If the input pixels are +// in a different working profile (e.g. Rec.2020), we must convert to +// sRGB before inference and back after to avoid hue shifts. Call this +// before running inference on each image that may use a different +// working profile. +// +// If profile is NULL, the pipeline falls back to gamma-only conversion +// (treating working-profile numbers as if they were sRGB), which can +// cause color shifts for wide-gamut working profiles. +// +// Thread-safety: must not be called concurrently with +// dt_restore_run_patch() or dt_restore_process_tiled(). Set the +// profile before dispatching inference on a given image. +// +// @param ctx context handle (NULL-safe) +// @param profile lcms2 cmsHPROFILE handle cast to void*; NULL to disable +void dt_restore_set_profile(dt_restore_context_t *ctx, void *profile); + +// @brief Enable/disable wide-gamut pass-through for denoise. +// +// When TRUE (default): pixels that would be out of sRGB gamut pass +// through unchanged, preserving color but not denoising them. When +// FALSE: all pixels use the model output, wide-gamut colors are +// clipped to sRGB but everything gets denoised. +// +// Affects denoise only (scale == 1). Upscale always uses the model +// output because there is no pixel-to-pixel correspondence to +// pass through. +// +// @param ctx context handle (NULL-safe) +// @param preserve TRUE to enable pass-through, FALSE to denoise everything +void dt_restore_set_preserve_wide_gamut(dt_restore_context_t *ctx, + gboolean preserve); + +// --- inference --- + +// @brief row writer callback for dt_restore_process_tiled +// +// called once per tile-row with 3ch interleaved float scanlines. +// the callback can write to a buffer, TIFF, or any other sink. +// +// @param scanline 3ch interleaved float data (out_w pixels) +// @param out_w output width in pixels +// @param y scanline index in the output image +// @param user_data caller-provided context +// @return 0 on success, non-zero to abort +typedef int (*dt_restore_row_writer_t)(const float *scanline, + int out_w, + int y, + void *user_data); + +// @brief run a single inference patch with sRGB conversion +// +// converts linear RGB input to sRGB, runs ONNX inference, +// converts output back to linear. input is planar NCHW float. +// +// @param ctx loaded restore context +// @param in_patch input tile (planar RGB, 3 * w * h floats) +// @param w tile width +// @param h tile height +// @param out_patch output buffer (planar RGB, 3 * w*s * h*s) +// @param scale upscale factor (1 for denoise) +// @return 0 on success +int dt_restore_run_patch(dt_restore_context_t *ctx, + const float *in_patch, + int w, int h, + float *out_patch, + int scale); + +// @brief process an image with tiled inference +// +// tiles the input, runs inference on each tile, and delivers +// completed scanlines via the row_writer callback. input is +// float4 RGBA interleaved (from dt export). +// +// @param ctx loaded restore context (tile_size is stored in ctx) +// @param in_data input pixels (float4 RGBA, width * height) +// @param width input width +// @param height input height +// @param scale upscale factor (1 for denoise) +// @param row_writer callback receiving 3ch float scanlines +// @param writer_data user data passed to row_writer +// @param control_job job handle for progress/cancellation (NULL-safe) +// @return 0 on success +int dt_restore_process_tiled(dt_restore_context_t *ctx, + const float *in_data, + int width, int height, + int scale, + dt_restore_row_writer_t row_writer, + void *writer_data, + struct _dt_job_t *control_job); + +// --- detail recovery --- + +// @brief apply DWT-based detail recovery after denoising +// +// extracts luminance residual, filters noise with wavelet +// decomposition, and blends preserved texture back. +// both buffers are float4 RGBA at the same dimensions. +// +// @param original_4ch original input pixels (read-only) +// @param denoised_4ch denoised pixels (modified in-place) +// @param width image width +// @param height image height +// @param alpha blend strength (0 = none, 1 = full) +void dt_restore_apply_detail_recovery(const float *original_4ch, + float *denoised_4ch, + int width, int height, + float alpha); + +// @brief compute DWT-filtered luminance detail from 3ch buffers +// +// returns a 1ch float array with wavelet-filtered luminance +// residual (noise removed, texture preserved). used for +// preview split visualization. +// +// @param before_3ch original image (3ch interleaved float) +// @param after_3ch processed image (3ch interleaved float) +// @param width image width +// @param height image height +// @return newly allocated 1ch buffer, or NULL. caller frees +// with dt_free_align() +float *dt_restore_compute_dwt_detail(const float *before_3ch, + const float *after_3ch, + int width, int height); + +// clang-format off +// modelines: These editor modelines have been set for all relevant files by tools/update_modelines.py +// vim: shiftwidth=2 expandtab tabstop=2 cindent +// kate: tab-indents: off; indent-width 2; replace-tabs on; indent-mode cstyle; remove-trailing-spaces modified; +// clang-format on diff --git a/src/common/ai/segmentation.c b/src/common/ai/segmentation.c index 946d26749d4c..13cf7c06f1ef 100644 --- a/src/common/ai/segmentation.c +++ b/src/common/ai/segmentation.c @@ -225,7 +225,7 @@ dt_seg_context_t *dt_seg_load(dt_ai_environment_t *env, const char *model_id) // issues with some decoder graphs (e.g. SegNext's Concat->Reshape) dt_ai_context_t *decoder = dt_ai_load_model_ext(env, model_id, "decoder.onnx", DT_AI_PROVIDER_CPU, - DT_AI_OPT_DISABLED, NULL, 0); + DT_AI_OPT_DISABLED, NULL, 0, 0); if(!decoder) { dt_print(DT_DEBUG_AI, "[segmentation] failed to load decoder for %s", model_id); @@ -388,7 +388,7 @@ dt_seg_context_t *dt_seg_load(dt_ai_environment_t *env, const char *model_id) const dt_ai_dim_override_t overrides[] = {{"num_labels", 1}}; ctx->decoder = dt_ai_load_model_ext(env, model_id, "decoder.onnx", DT_AI_PROVIDER_CPU, DT_AI_OPT_BASIC, - overrides, 1); + overrides, 1, 0); if(!ctx->decoder) { dt_print(DT_DEBUG_AI, "[segmentation] failed to reload decoder for %s", model_id); diff --git a/src/common/dng_writer.c b/src/common/dng_writer.c new file mode 100644 index 000000000000..9c537f56c7a4 --- /dev/null +++ b/src/common/dng_writer.c @@ -0,0 +1,373 @@ +/* + This file is part of darktable, + Copyright (C) 2026 darktable developers. + + darktable is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + darktable is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with darktable. If not, see . +*/ + +#include "common/dng_writer.h" +#include "common/darktable.h" +#include "common/exif.h" +#include "common/image.h" +#include "develop/imageop_math.h" + +#include +#include +#include +#include + +#ifdef _WIN32 +#include +#endif + +// DNG uses SRATIONAL / RATIONAL for matrix and WB tags. libtiff accepts +// these as float/double arrays and handles the conversion; we just pass +// the values as double + +// map the dcraw 2x2 CFA filters word to 4 single-byte channel indices +// for the DNG CFAPattern tag: 0=R, 1=G, 2=B, following DNG spec §A.3.1 +static void _cfa_bytes_from_filters(uint32_t filters, uint8_t out[4]) +{ + out[0] = FC(0, 0, filters); + out[1] = FC(0, 1, filters); + out[2] = FC(1, 0, filters); + out[3] = FC(1, 1, filters); +} + +int dt_dng_write_cfa_bayer(const char *filename, + const uint16_t *cfa, + int width, + int height, + const dt_image_t *img, + const void *exif_blob, + int exif_len) +{ + if(!filename || !cfa || !img || width <= 0 || height <= 0) + return 1; + +#ifdef _WIN32 + wchar_t *wfilename = g_utf8_to_utf16(filename, -1, NULL, NULL, NULL); + TIFF *tif = TIFFOpenW(wfilename, "wl"); + g_free(wfilename); +#else + TIFF *tif = TIFFOpen(filename, "wl"); +#endif + if(!tif) return 1; + + // required baseline TIFF tags for a single-plane raw image + TIFFSetField(tif, TIFFTAG_SUBFILETYPE, 0); + TIFFSetField(tif, TIFFTAG_IMAGEWIDTH, (uint32_t)width); + TIFFSetField(tif, TIFFTAG_IMAGELENGTH, (uint32_t)height); + TIFFSetField(tif, TIFFTAG_BITSPERSAMPLE, (uint16_t)16); + TIFFSetField(tif, TIFFTAG_SAMPLESPERPIXEL, (uint16_t)1); + TIFFSetField(tif, TIFFTAG_PLANARCONFIG, PLANARCONFIG_CONTIG); + TIFFSetField(tif, TIFFTAG_PHOTOMETRIC, PHOTOMETRIC_CFA); + TIFFSetField(tif, TIFFTAG_SAMPLEFORMAT, SAMPLEFORMAT_UINT); + TIFFSetField(tif, TIFFTAG_COMPRESSION, COMPRESSION_NONE); + TIFFSetField(tif, TIFFTAG_ORIENTATION, ORIENTATION_TOPLEFT); + TIFFSetField(tif, TIFFTAG_ROWSPERSTRIP, TIFFDefaultStripSize(tif, 0)); + TIFFSetField(tif, TIFFTAG_XRESOLUTION, 300.0); + TIFFSetField(tif, TIFFTAG_YRESOLUTION, 300.0); + TIFFSetField(tif, TIFFTAG_RESOLUTIONUNIT, RESUNIT_INCH); + { + gchar *software = g_strdup_printf("darktable %s", + darktable_package_version); + TIFFSetField(tif, TIFFTAG_SOFTWARE, software); + g_free(software); + } + + // camera identification + if(img->camera_maker[0]) + TIFFSetField(tif, TIFFTAG_MAKE, img->camera_maker); + if(img->camera_model[0]) + TIFFSetField(tif, TIFFTAG_MODEL, img->camera_model); + if(img->camera_makermodel[0]) + TIFFSetField(tif, TIFFTAG_UNIQUECAMERAMODEL, img->camera_makermodel); + + // DNG identification + const uint8_t dng_version[4] = { 1, 4, 0, 0 }; + const uint8_t dng_backward[4] = { 1, 2, 0, 0 }; + TIFFSetField(tif, TIFFTAG_DNGVERSION, dng_version); + TIFFSetField(tif, TIFFTAG_DNGBACKWARDVERSION, dng_backward); + + // CFA description + const uint16_t cfa_repeat_dim[2] = { 2, 2 }; + TIFFSetField(tif, TIFFTAG_CFAREPEATPATTERNDIM, cfa_repeat_dim); + + uint8_t cfa_pattern[4]; + _cfa_bytes_from_filters(img->buf_dsc.filters, cfa_pattern); + TIFFSetField(tif, TIFFTAG_CFAPATTERN, 4, cfa_pattern); + + const uint8_t cfa_plane_color[3] = { 0, 1, 2 }; // R, G, B + TIFFSetField(tif, TIFFTAG_CFAPLANECOLOR, 3, cfa_plane_color); + TIFFSetField(tif, TIFFTAG_CFALAYOUT, (uint16_t)1); // rectangular + + // black/white levels + // BlackLevel is declared as a 2x2 repeat over the CFA pattern. we + // honor per-channel values when rawspeed provided them, otherwise + // fall back to the single raw_black_level broadcast to all four + const uint16_t bl_repeat_dim[2] = { 2, 2 }; + TIFFSetField(tif, TIFFTAG_BLACKLEVELREPEATDIM, bl_repeat_dim); + + float black_level[4]; + const gboolean have_separate + = (img->raw_black_level_separate[0] != 0 + || img->raw_black_level_separate[1] != 0 + || img->raw_black_level_separate[2] != 0 + || img->raw_black_level_separate[3] != 0); + for(int i = 0; i < 4; i++) + { + black_level[i] = have_separate + ? (float)img->raw_black_level_separate[i] + : (float)img->raw_black_level; + } + TIFFSetField(tif, TIFFTAG_BLACKLEVEL, 4, black_level); + + const uint32_t white = img->raw_white_point + ? img->raw_white_point : 65535u; + TIFFSetField(tif, TIFFTAG_WHITELEVEL, 1, &white); + + // AsShotNeutral (derived from wb_coeffs) + // DNG AsShotNeutral encodes the neutral white balance as a + // cameraRGB triple where smaller values mean more amplification. + // darktable's wb_coeffs are raw-to-white multipliers; AsShotNeutral + // is their inverse, normalized so the maximum element is 1 + if(img->wb_coeffs[0] > 0.0f + && img->wb_coeffs[1] > 0.0f + && img->wb_coeffs[2] > 0.0f) + { + float inv[3]; + for(int i = 0; i < 3; i++) + inv[i] = 1.0f / img->wb_coeffs[i]; + const float m = fmaxf(inv[0], fmaxf(inv[1], inv[2])); + if(m > 0.0f) + for(int i = 0; i < 3; i++) inv[i] /= m; + TIFFSetField(tif, TIFFTAG_ASSHOTNEUTRAL, 3, inv); + } + + // ColorMatrix1 (XYZ D50 -> cameraRGB, 3x3 for trichromatic) + // darktable's adobe_XYZ_to_CAM is populated from the rawspeed + // cameras.xml matrix in row-major [camRGB][XYZ] layout, which + // matches the DNG ColorMatrix1 layout exactly (row = camera axis, + // column = XYZ axis) + { + float non_zero = 0.0f; + for(int k = 0; k < 3; k++) + for(int i = 0; i < 3; i++) + non_zero += fabsf(img->adobe_XYZ_to_CAM[k][i]); + + if(non_zero > 0.0f) + { + float color_matrix[9]; + for(int k = 0; k < 3; k++) + for(int i = 0; i < 3; i++) + color_matrix[k * 3 + i] = img->adobe_XYZ_to_CAM[k][i]; + TIFFSetField(tif, TIFFTAG_COLORMATRIX1, 9, color_matrix); + } + } + + // default scale / crop + // DefaultScale=1,1; DefaultCropOrigin=0,0; DefaultCropSize=W,H. + // this keeps the importer from applying any additional framing; + // darktable's crop module handles downstream framing + const float default_scale[2] = { 1.0f, 1.0f }; + const float default_crop_origin[2] = { 0.0f, 0.0f }; + const float default_crop_size[2] = { (float)width, (float)height }; + TIFFSetField(tif, TIFFTAG_DEFAULTSCALE, default_scale); + TIFFSetField(tif, TIFFTAG_DEFAULTCROPORIGIN, default_crop_origin); + TIFFSetField(tif, TIFFTAG_DEFAULTCROPSIZE, default_crop_size); + + // scanline write + int res = 0; + for(int y = 0; y < height && res == 0; y++) + { + const uint16_t *row = cfa + (size_t)y * width; + if(TIFFWriteScanline(tif, (void *)row, y, 0) < 0) + res = 1; + } + + TIFFClose(tif); + + // embed source EXIF (datetime, ISO, shutter, etc.) + // dt_exif_write_blob takes a non-const pointer; we don't modify it + if(res == 0 && exif_blob && exif_len > 0) + dt_exif_write_blob((uint8_t *)exif_blob, (uint32_t)exif_len, + filename, 0); + + if(res != 0) + g_unlink(filename); + + return res; +} + +int dt_dng_write_linear(const char *filename, + const float *rgb, + int width, + int height, + const dt_image_t *img, + const void *exif_blob, + int exif_len) +{ + if(!filename || !rgb || !img || width <= 0 || height <= 0) + return 1; + +#ifdef _WIN32 + wchar_t *wfilename = g_utf8_to_utf16(filename, -1, NULL, NULL, NULL); + TIFF *tif = TIFFOpenW(wfilename, "wl"); + g_free(wfilename); +#else + TIFF *tif = TIFFOpen(filename, "wl"); +#endif + if(!tif) return 1; + + // baseline TIFF tags, 3 samples per pixel (demosaicked) + TIFFSetField(tif, TIFFTAG_SUBFILETYPE, 0); + TIFFSetField(tif, TIFFTAG_IMAGEWIDTH, (uint32_t)width); + TIFFSetField(tif, TIFFTAG_IMAGELENGTH, (uint32_t)height); + TIFFSetField(tif, TIFFTAG_BITSPERSAMPLE, (uint16_t)16); + TIFFSetField(tif, TIFFTAG_SAMPLESPERPIXEL, (uint16_t)3); + TIFFSetField(tif, TIFFTAG_PLANARCONFIG, PLANARCONFIG_CONTIG); + TIFFSetField(tif, TIFFTAG_PHOTOMETRIC, 34892); // LinearRaw + TIFFSetField(tif, TIFFTAG_SAMPLEFORMAT, SAMPLEFORMAT_UINT); + TIFFSetField(tif, TIFFTAG_COMPRESSION, COMPRESSION_NONE); + TIFFSetField(tif, TIFFTAG_ORIENTATION, ORIENTATION_TOPLEFT); + TIFFSetField(tif, TIFFTAG_ROWSPERSTRIP, TIFFDefaultStripSize(tif, 0)); + TIFFSetField(tif, TIFFTAG_XRESOLUTION, 300.0); + TIFFSetField(tif, TIFFTAG_YRESOLUTION, 300.0); + TIFFSetField(tif, TIFFTAG_RESOLUTIONUNIT, RESUNIT_INCH); + { + gchar *software = g_strdup_printf("darktable %s", + darktable_package_version); + TIFFSetField(tif, TIFFTAG_SOFTWARE, software); + g_free(software); + } + + // camera identification + if(img->camera_maker[0]) + TIFFSetField(tif, TIFFTAG_MAKE, img->camera_maker); + if(img->camera_model[0]) + TIFFSetField(tif, TIFFTAG_MODEL, img->camera_model); + if(img->camera_makermodel[0]) + TIFFSetField(tif, TIFFTAG_UNIQUECAMERAMODEL, img->camera_makermodel); + + // DNG identification + const uint8_t dng_version[4] = { 1, 4, 0, 0 }; + const uint8_t dng_backward[4] = { 1, 2, 0, 0 }; + TIFFSetField(tif, TIFFTAG_DNGVERSION, dng_version); + TIFFSetField(tif, TIFFTAG_DNGBACKWARDVERSION, dng_backward); + + // NO CFA tags: this is demosaicked data. + // encode as normalized: BlackLevel=0, WhiteLevel=65535. the + // pixel data is already un-WB'd camRGB in [0, 1] range (the + // raw_restore_linear pipeline does matrix + un-boost + un-WB + // before handing off). the consumer applies WB via + // AsShotNeutral, reads uint16 as [0, 65535] and normalizes to + // [0, 1] via black/white + const uint32_t white_norm = 65535u; + const float black3[3] = { 0.0f, 0.0f, 0.0f }; + TIFFSetField(tif, TIFFTAG_BLACKLEVEL, 3, black3); + TIFFSetField(tif, TIFFTAG_WHITELEVEL, 1, &white_norm); + + // AsShotNeutral = inverse of WB multipliers, normalized so max=1. + // on re-import, darktable reads this and derives WB coeffs via + // wb[c] = 1/AsShotNeutral[c] / wb[G-normalized]. the temperature + // iop then applies this WB to our un-WB'd data, giving the standard + // raw-pipeline result + if(img->wb_coeffs[0] > 0.0f + && img->wb_coeffs[1] > 0.0f + && img->wb_coeffs[2] > 0.0f) + { + float inv[3]; + for(int i = 0; i < 3; i++) inv[i] = 1.0f / img->wb_coeffs[i]; + const float m = fmaxf(inv[0], fmaxf(inv[1], inv[2])); + if(m > 0.0f) for(int i = 0; i < 3; i++) inv[i] /= m; + TIFFSetField(tif, TIFFTAG_ASSHOTNEUTRAL, 3, inv); + } + else + { + const float neutral[3] = { 1.0f, 1.0f, 1.0f }; + TIFFSetField(tif, TIFFTAG_ASSHOTNEUTRAL, 3, neutral); + } + + // ColorMatrix1 from camera's XYZ->CAM (3x3 portion) + { + float non_zero = 0.0f; + for(int k = 0; k < 3; k++) + for(int i = 0; i < 3; i++) + non_zero += fabsf(img->adobe_XYZ_to_CAM[k][i]); + if(non_zero > 0.0f) + { + float color_matrix[9]; + for(int k = 0; k < 3; k++) + for(int i = 0; i < 3; i++) + color_matrix[k * 3 + i] = img->adobe_XYZ_to_CAM[k][i]; + TIFFSetField(tif, TIFFTAG_COLORMATRIX1, 9, color_matrix); + } + } + + // default scale / crop (full frame, no inset) + const float default_scale[2] = { 1.0f, 1.0f }; + const float default_crop_origin[2] = { 0.0f, 0.0f }; + const float default_crop_size[2] = { (float)width, (float)height }; + TIFFSetField(tif, TIFFTAG_DEFAULTSCALE, default_scale); + TIFFSetField(tif, TIFFTAG_DEFAULTCROPORIGIN, default_crop_origin); + TIFFSetField(tif, TIFFTAG_DEFAULTCROPSIZE, default_crop_size); + + // scanline write: float [0, 1] normalized camRGB -> uint16 + // [0, 65535]. BlackLevel=0 and WhiteLevel=65535 let the + // re-importer recover the [0, 1] range via the standard raw + // normalization (val - black) / (white - black) + const float clip_hi = 65535.0f; + uint16_t *scan = g_malloc((size_t)width * 3 * sizeof(uint16_t)); + int res = 0; + if(!scan) + { + TIFFClose(tif); + g_unlink(filename); + return 1; + } + for(int y = 0; y < height && res == 0; y++) + { + const float *row = rgb + (size_t)y * width * 3; + for(int x = 0; x < width; x++) + { + for(int c = 0; c < 3; c++) + { + float adc = row[x * 3 + c] * 65535.0f; + if(adc < 0.0f) adc = 0.0f; + if(adc > clip_hi) adc = clip_hi; + scan[x * 3 + c] = (uint16_t)(adc + 0.5f); + } + } + if(TIFFWriteScanline(tif, scan, y, 0) < 0) res = 1; + } + g_free(scan); + + TIFFClose(tif); + + if(res == 0 && exif_blob && exif_len > 0) + dt_exif_write_blob((uint8_t *)exif_blob, (uint32_t)exif_len, + filename, 0); + + if(res != 0) + g_unlink(filename); + + return res; +} + +// clang-format off +// modelines: These editor modelines have been set for all relevant files by tools/update_modelines.py +// vim: shiftwidth=2 expandtab tabstop=2 cindent +// kate: tab-indents: off; indent-width 2; replace-tabs on; indent-mode cstyle; remove-trailing-spaces modified; +// clang-format on diff --git a/src/common/dng_writer.h b/src/common/dng_writer.h new file mode 100644 index 000000000000..e039cc848a09 --- /dev/null +++ b/src/common/dng_writer.h @@ -0,0 +1,105 @@ +/* + This file is part of darktable, + Copyright (C) 2026 darktable developers. + + darktable is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + darktable is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with darktable. If not, see . +*/ + +// dng_writer — minimal DNG CFA writer +// +// writes a single-plane uint16 Bayer mosaic plus enough DNG metadata +// to let a raw processor (darktable, adobe, etc.) re-import the file +// and run the normal raw pipeline: black/white level, CFA pattern, +// AsShotNeutral (white balance), ColorMatrix1 (camRGB -> CIE XYZ), +// camera make/model, and a pass-through of the source EXIF blob. +// +// this writer is intentionally narrow in scope: +// - bayer only (no X-Trans, no LinearRaw demosaiced DNG) +// - uncompressed strip layout +// - single IFD (no embedded JPEG preview or thumbnails) +// - 16-bit integer data only +// +// the consumer of the DNG (darktable itself) does not need more than +// this for the neural restore round-trip. + +#pragma once + +#include +#include + +struct dt_image_t; + +// @brief Write a Bayer CFA mosaic as a DNG file. +// +// The output file contains a single IFD with PhotometricInterpretation=CFA. +// All DNG metadata required for darktable re-import is sourced from @p img: +// - BlackLevel[4] from img->raw_black_level_separate +// - WhiteLevel from img->raw_white_point +// - CFAPattern / CFARepeatDim from img->buf_dsc.filters (dcraw format) +// - AsShotNeutral from img->wb_coeffs (inverted) +// - ColorMatrix1 from img->adobe_XYZ_to_CAM +// - Make / Model / UniqueModel from img->camera_maker / camera_model +// +// @param filename output path (UTF-8) +// @param cfa Bayer mosaic (uint16, width * height samples, row-major) +// @param width image width in pixels (CFA samples per row) +// @param height image height in rows +// @param img source image, for DNG metadata +// @param exif_blob optional Exif blob to embed (NULL = skip) +// @param exif_len size of exif_blob in bytes +// @return 0 on success, non-zero on failure (file is removed on failure) +int dt_dng_write_cfa_bayer(const char *filename, + const uint16_t *cfa, + int width, + int height, + const struct dt_image_t *img, + const void *exif_blob, + int exif_len); + +// @brief Write a demosaicked 3-channel linear DNG. +// +// Used for sensors the bayer DNG round-trip can't handle (X-Trans, +// Foveon-like, pre-demosaicked raws). The output file has +// PhotometricInterpretation=LinearRaw, SamplesPerPixel=3, and carries +// the camera's ColorMatrix1 / AsShotNeutral / BlackLevel / WhiteLevel +// so darktable re-imports it as a raw-origin image and skips its own +// demosaic stage. +// +// Pixel data is interpreted as float-normalized camRGB in [0, ~1+] +// (1.0 = source sensor white point after black subtract). The writer +// scales that to uint16 using black = img->raw_black_level, +// white = img->raw_white_point, so the encoding matches what the +// corresponding raw CFA data would be in ADC units. +// +// @param filename output path (UTF-8) +// @param rgb interleaved 3ch float RGB, width*height*3 samples +// @param width image width in pixels +// @param height image height in pixels +// @param img source image, for DNG metadata + encoding range +// @param exif_blob optional Exif blob to embed (NULL = skip) +// @param exif_len size of exif_blob in bytes +// @return 0 on success, non-zero on failure (file removed on failure) +int dt_dng_write_linear(const char *filename, + const float *rgb, + int width, + int height, + const struct dt_image_t *img, + const void *exif_blob, + int exif_len); + +// clang-format off +// modelines: These editor modelines have been set for all relevant files by tools/update_modelines.py +// vim: shiftwidth=2 expandtab tabstop=2 cindent +// kate: tab-indents: off; indent-width 2; replace-tabs on; indent-mode cstyle; remove-trailing-spaces modified; +// clang-format on diff --git a/src/libs/neural_restore.c b/src/libs/neural_restore.c index e11360e960a5..07ca26834410 100644 --- a/src/libs/neural_restore.c +++ b/src/libs/neural_restore.c @@ -16,114 +16,195 @@ along with darktable. If not, see . */ -/* - neural restore — lighttable module for AI-based image restoration - - overview - -------- - provides two operations via a tabbed notebook UI: - - denoise: run an ONNX denoiser (e.g. NIND UNet) on selected images - - upscale: run an ONNX super-resolution model (e.g. BSRGAN) at 2x or 4x - - the module lives in the right panel (DT_UI_CONTAINER_PANEL_RIGHT_CENTER) - and is available in both lighttable and darkroom views. it is only built - when cmake option USE_AI=ON. - - architecture - ------------ - the core AI inference, tiling, and detail recovery logic lives in - src/common/ai/restore.c (the darktable_ai library). this module handles: - - 1. preview (interactive, single-image) - triggered by clicking the preview widget or switching tabs. - runs on a background GThread (_preview_thread): - - exports the selected image at reduced resolution via the - darktable export pipeline (captures fully-processed pixels) - - crops a patch matching the widget aspect ratio - - runs AI inference on the patch via dt_restore_run_patch() - - delivers before/after buffers to the main thread via g_idle_add - the preview widget draws a split before/after view with a draggable - divider. for denoise, DWT-filtered detail is pre-computed so the - detail recovery slider updates instantly without re-running inference. - cancellation uses an atomic sequence counter (preview_sequence): - the thread checks it at key points and discards stale results. - - 2. batch processing (multi-image) - runs as a dt_control_job on the user background queue. - for each selected image: - - exports via the darktable pipeline with a custom format module - that intercepts the pixel buffer in _ai_write_image() - - for denoise with detail recovery: buffers the full output via - dt_restore_process_tiled(), applies dt_restore_apply_detail_recovery(), - then writes TIFF - - for plain denoise/upscale: streams tiles directly to TIFF via - _process_tiled_tiff() to avoid buffering the full output - - output TIFF embeds the selected output ICC profile and source EXIF - - imports the result into the darktable library and groups it - with the source image - when the batch finishes, a single completion toast is shown via - dt_control_log (e.g. "neural denoise: 3 images processed"). the - module deliberately does NOT raise DT_SIGNAL_VIEWMANAGER_THUMBTABLE_ACTIVATE: - lighttable ignores that signal while darkroom / map / culling / - tethering / print_settings would swap the user's current view to - the freshly-imported image and clobber any in-progress edit. - - 3. output parameters (collapsible section) - - bit depth: 8/16/32-bit TIFF (default 16-bit) - - output ICC profile: pick any installed profile, or keep image settings - - preserve wide-gamut: when on, out-of-sRGB-gamut pixels pass through - the denoise model unchanged (wide-gamut colors preserved exactly); - when off, those pixels are clipped to sRGB and denoised like the rest - - add to catalog: auto-import output into darktable library - - output directory: supports darktable variables (e.g. $(FILE_FOLDER)) - - threading - --------- - - preview: background GThread, one at a time. joined before starting - a new preview and in gui_cleanup. stale results are discarded via - atomic preview_sequence counter - - batch: dt_control_job on DT_JOB_QUEUE_USER_BG. supports cancellation - via dt_control_job_get_state() - - ai_registry->lock: held briefly to read provider setting - - all GTK updates go through g_idle_add to stay on the main thread - - key structs - ----------- - dt_lib_neural_restore_t — module GUI state and preview data - dt_neural_job_t — batch processing job parameters - dt_neural_format_params_t — custom format module for export interception - dt_neural_preview_data_t — preview thread input parameters - dt_neural_preview_result_t — preview thread output (delivered via idle) - dt_neural_preview_capture_t — captures export pixels for preview - - preferences - ----------- - CONF_DETAIL_RECOVERY — detail recovery slider value - CONF_ACTIVE_PAGE — last active notebook tab - CONF_BIT_DEPTH — output TIFF bit depth (0=8, 1=16, 2=32) - CONF_ADD_CATALOG — auto-import output into library - CONF_OUTPUT_DIR — output directory pattern (supports variables) - CONF_ICC_TYPE — output ICC profile type (image settings by default) - CONF_ICC_FILE — filename for file-type ICC profiles - CONF_PRESERVE_WIDE_GAMUT — pass-through out-of-sRGB-gamut pixels during denoise - CONF_PREVIEW_EXPORT_SIZE — preview export longest-edge in pixels - CONF_PREVIEW_HEIGHT — preview widget height in pixels - CONF_EXPAND_OUTPUT — output section collapsed/expanded state -*/ +// neural restore — lighttable module for AI-based image restoration +// +// overview +// -------- +// provides three operations via a tabbed notebook UI: +// - raw denoise: run a RawNIND UtNet2 denoiser pre-demosaic (Bayer +// CFA) or post-demosaic (X-Trans / Foveon, via lin_rec2020). +// output is a DNG (CFA Bayer or LinearRaw) that re-imports into +// the user's full pipeline. +// - denoise: run an ONNX denoiser (e.g. NIND UNet) on the user's +// processed/exported image. output is a TIFF. +// - upscale: run an ONNX super-resolution model (e.g. BSRGAN) at +// 2x or 4x. output is a TIFF. +// +// the module lives in the right panel (DT_UI_CONTAINER_PANEL_RIGHT_CENTER) +// and is available in both lighttable and darkroom views. it is only built +// when cmake option USE_AI=ON. +// +// architecture +// ------------ +// the core AI inference, tiling, color management and DWT detail +// recovery logic lives in the darktable_ai library, split across: +// - src/common/ai/restore.{c,h} env/ctx lifecycle, model +// loaders, raw patch runners, +// shared user-pipe ROI bridge +// - src/common/ai/restore_rgb.{c,h} RGB denoise + upscale tiled +// driver, sRGB wrapper, shadow +// boost, DWT detail recovery +// - src/common/ai/restore_raw_bayer. RawNIND Bayer end-to-end +// {c,h} (batch + piped preview) +// - src/common/ai/restore_raw_linear. RawNIND linear / X-Trans +// {c,h} end-to-end +// +// this module handles UI, threading, output writing, and the per-task +// preview cache. +// +// 1. preview (interactive, single-image) +// triggered by clicking the picker thumbnail (which sets a +// "preview requested" flag) or by tab switching afterwards. +// two worker functions, dispatched via _preview_thread_dispatch: +// - _preview_thread: RGB denoise + upscale. exports the +// image at reduced resolution and runs +// dt_restore_run_patch() on a crop. +// - _preview_thread_raw: raw denoise. reads the mipmap CFA +// (Bayer) or runs darktable's demosaic +// pipe (X-Trans → lin_rec2020), feeds +// one inference tile, then runs the +// user's full pipe twice (with +// rawdenoise disabled) on the patched +// vs original CFA so before/after match +// a re-imported DNG. +// both deliver result buffers to the main thread via g_idle_add. +// the preview widget draws a split before/after view with a +// draggable divider. for RGB denoise, DWT-filtered detail is +// pre-computed once per inference so the strength slider can +// re-blend interactively without re-running the model. +// +// responsiveness: +// - tab switch routes through a 150 ms debounce +// (_schedule_preview_refresh) so rapid cycling collapses to +// one preview run +// - per-task preview cache keyed by (imgid, patch_center): on +// tab switch back to a previously-seen state we install the +// cached buffers and skip inference entirely +// - new triggers do NOT join the previous worker (would freeze +// the UI for the duration of an in-flight pipe call). the +// previous thread is detached; preview_inference_lock +// serialises inference so the new worker queues without +// fighting for the GPU; preview_sequence is bumped so any +// in-flight result is discarded by its idle callback when it +// eventually arrives. +// +// 2. batch processing (multi-image) +// runs as a dt_control_job on the user background queue. +// for each selected image, dispatches by task: +// - raw denoise (Bayer): pre-process the sensor CFA (black/WB/ +// pack), run tiled inference via dt_restore_raw_bayer(), un- +// process and re-mosaic, write a CFA DNG via +// dt_dng_write_cfa_bayer(). +// - raw denoise (X-Trans / linear): demosaic via the darktable +// pipe (rawprepare + highlights + demosaic only), run +// dt_restore_raw_linear(), write a LinearRaw DNG via +// dt_dng_write_linear(). +// - denoise / upscale (RGB): export via the darktable pipeline +// with a custom format module that intercepts the pixel +// buffer in _ai_write_image(). when strength < 100 (so the +// DWT detail recovery is active), buffer the full denoised +// output, apply dt_restore_apply_detail_recovery(), then +// write TIFF. otherwise stream tiles directly to TIFF via +// _process_tiled_tiff() to avoid buffering the full output. +// output TIFF embeds the selected output ICC profile and +// source EXIF. +// in all cases, the output is imported into the darktable library +// and grouped with the source image (when add-to-catalog is on). +// +// when the batch finishes, a single completion toast is shown via +// dt_control_log (e.g. "neural denoise: 3 images processed"). the +// module deliberately does NOT raise DT_SIGNAL_VIEWMANAGER_THUMBTABLE_ACTIVATE: +// lighttable ignores that signal while darkroom / map / culling / +// tethering / print_settings would swap the user's current view to +// the freshly-imported image and clobber any in-progress edit. +// +// 3. output parameters (collapsible section) +// common (all three tabs): +// - add to catalog: auto-import output into darktable library +// - output directory: supports darktable variables (e.g. $(FILE_FOLDER)) +// RGB tabs only (denoise / upscale — raw denoise writes DNG): +// - bit depth: 8/16/32-bit TIFF (default 16-bit) +// - output ICC profile: pick any installed profile, or keep +// image settings +// - preserve wide-gamut: when on, out-of-sRGB-gamut pixels pass +// through the denoise model unchanged (wide-gamut colors +// preserved exactly); when off, those pixels are clipped to +// sRGB and denoised like the rest +// +// threading +// --------- +// - preview: background GThread spawned per refresh. previous worker +// is detached (not joined) so the UI thread never blocks on tab +// switch. preview_inference_lock (GMutex) serialises the actual +// inference / pipe work so two workers don't fight for the GPU. +// stale workers are discarded via the atomic preview_sequence +// counter, checked at the dispatcher entry and at key points +// inside the worker; idle callbacks re-check before installing. +// gui_cleanup joins the latest worker and drains the main context +// to flush any pending idle callbacks before freeing module state. +// - batch: dt_control_job on DT_JOB_QUEUE_USER_BG. supports +// cancellation via dt_control_job_get_state(). +// - ai_registry->lock: held briefly to read provider setting. +// - all GTK updates go through g_idle_add to stay on the main thread. +// +// key structs +// ----------- +// dt_lib_neural_restore_t — module GUI state, preview cache, +// inference lock, debounce timer +// dt_neural_job_t — batch processing job parameters +// dt_neural_format_params_t — custom format module for export +// interception (RGB tabs) +// dt_neural_preview_data_t — preview thread input parameters +// (shared by both workers) +// dt_neural_preview_result_t — RGB-tab preview thread output +// dt_neural_preview_result_raw_t — raw-tab preview thread output +// (also carries the cached +// full-image buffers for re-pick) +// dt_neural_preview_capture_t — captures export pixels for the +// RGB-tab preview +// +// preferences +// ----------- +// CONF_STRENGTH — RGB denoise strength slider (0 = source, +// 100 = full denoise; internally mapped to +// a DWT-filtered residual recovery amount +// so lowering strength brings back texture +// without reintroducing noise-frequency +// content) +// CONF_RAW_STRENGTH — raw denoise strength slider (0 = source +// CFA, 100 = full denoised CFA; uniform +// blend at the re-mosaic sample level) +// CONF_ACTIVE_PAGE — last active notebook tab +// CONF_BIT_DEPTH — output TIFF bit depth (0=8, 1=16, 2=32) +// CONF_ADD_CATALOG — auto-import output into library +// CONF_OUTPUT_DIR — output directory pattern (supports variables) +// CONF_ICC_TYPE — output ICC profile type (image settings by default) +// CONF_ICC_FILE — filename for file-type ICC profiles +// CONF_PRESERVE_WIDE_GAMUT — pass-through out-of-sRGB-gamut pixels during denoise +// CONF_PREVIEW_EXPORT_SIZE — preview export longest-edge in pixels +// CONF_PREVIEW_HEIGHT — preview widget height in pixels +// CONF_EXPAND_OUTPUT — output section collapsed/expanded state #include "common/ai/restore.h" +#include "common/ai/restore_rgb.h" +#include "common/ai/restore_raw_bayer.h" +#include "common/ai/restore_raw_linear.h" #include "control/conf.h" #include "bauhaus/bauhaus.h" #include "common/act_on.h" #include "common/collection.h" #include "common/variables.h" #include "common/colorspaces.h" +#include "common/dng_writer.h" #include "common/exif.h" #include "common/film.h" #include "common/grouping.h" +#include "common/image_cache.h" +#include "common/mipmap_cache.h" #include "control/jobs/control_jobs.h" #include "control/signal.h" #include "develop/develop.h" +#include "develop/format.h" #include "dtgtk/button.h" #include "dtgtk/paint.h" #include "gui/accelerators.h" @@ -140,7 +221,8 @@ DT_MODULE(1) // warn the user when upscaled output exceeds this many megapixels #define LARGE_OUTPUT_MP 60.0 -#define CONF_DETAIL_RECOVERY "plugins/lighttable/neural_restore/detail_recovery" +#define CONF_STRENGTH "plugins/lighttable/neural_restore/strength" +#define CONF_RAW_STRENGTH "plugins/lighttable/neural_restore/raw_strength" #define CONF_ACTIVE_PAGE "plugins/lighttable/neural_restore/active_page" #define CONF_BIT_DEPTH "plugins/lighttable/neural_restore/bit_depth" #define CONF_ADD_CATALOG "plugins/lighttable/neural_restore/add_to_catalog" @@ -156,6 +238,8 @@ typedef enum dt_neural_task_t NEURAL_TASK_DENOISE = 0, NEURAL_TASK_UPSCALE_2X, NEURAL_TASK_UPSCALE_4X, + NEURAL_TASK_RAW_DENOISE, + NEURAL_TASK_COUNT, // sentinel; used to size per-task arrays } dt_neural_task_t; typedef enum dt_neural_bpp_t @@ -168,6 +252,7 @@ typedef enum dt_neural_bpp_t typedef struct dt_lib_neural_restore_t { GtkNotebook *notebook; + GtkWidget *raw_denoise_page; GtkWidget *denoise_page; GtkWidget *upscale_page; GtkWidget *scale_combo; @@ -177,6 +262,7 @@ typedef struct dt_lib_neural_restore_t char info_text_right[128]; char warning_text[128]; GtkWidget *recovery_slider; + GtkWidget *raw_strength_slider; dt_neural_task_t task; dt_restore_env_t *env; dt_restore_context_t *cached_ctx; @@ -212,6 +298,53 @@ typedef struct dt_lib_neural_restore_t unsigned char *export_cairo; int export_cairo_stride; + // raw denoise preview state — disjoint from the export-based preview + // above. cached per-image (CFA for Bayer, demosaicked lin_rec2020 for + // X-Trans / linear) so re-picking a new crop on the same image skips + // the slow load + demosaic; freed on imgid or sensor-type change. + dt_imgid_t preview_raw_imgid; + dt_restore_sensor_class_t preview_raw_sensor_class; + float *preview_full_cfa; // Bayer: full sensor (w*h floats) + int preview_full_w; + int preview_full_h; + float *preview_full_lin; // linear: 3ch interleaved (w*h*3 floats) + int preview_lin_w; + int preview_lin_h; + // per-refresh inference output (3ch interleaved at the displayed crop + // dims, both in lin_rec2020). cached so the strength slider can blend + // these without re-running inference. + float *preview_raw_src_rgb; + float *preview_raw_denoised_rgb; + int preview_raw_crop_w; + int preview_raw_crop_h; + // strength slider debounce: re-blend on UI thread 50 ms after the + // last value-changed emit. set/replaced via g_timeout_add. + guint preview_strength_timer; + // debounce timer for `_trigger_preview`. tab switches and rapid + // re-triggers schedule via this so a quick burst of switches + // doesn't pile up worker threads. set/replaced via g_timeout_add; + // 0 means no pending trigger + guint preview_trigger_timer; + // serializes the expensive inference / pipe work in worker threads. + // a stale worker (sequence bumped while it was in flight) holds this + // until it finishes, so a freshly-spawned worker waits its turn + // rather than competing for the same GPU/CPU + GMutex preview_inference_lock; + // per-task cache of the last successful preview, keyed by + // (imgid, patch_center). on tab switch we look up the new task's + // slot; if it matches the current image+patch we install the + // cached buffers and skip inference entirely. invalidated on + // image change or patch move + struct { + gboolean valid; + dt_imgid_t imgid; + float patch_center[2]; + int crop_w, crop_h; + float *before_rgb; // 3ch interleaved, crop_w*crop_h*3 floats + float *after_rgb; // same + float *detail; // denoise: DWT luminance detail; NULL otherwise + } preview_cache[NEURAL_TASK_COUNT]; + // output settings (collapsible) dt_gui_collapsible_section_t cs_output; GtkWidget *bpp_combo; @@ -230,7 +363,12 @@ typedef struct dt_neural_job_t dt_job_t *control_job; dt_restore_context_t *ctx; int scale; - float detail_recovery; + float strength; + float raw_strength; // 0..1 blend for raw denoise + // raw denoise only: sensor class of the currently-loaded rawdenoise + // ctx. lets us reuse ctx across images of the same class in a batch + // and avoid reloading the ORT session for every image + dt_restore_sensor_class_t raw_ctx_sensor_class; dt_lib_module_t *self; dt_neural_bpp_t bpp; gboolean add_to_catalog; @@ -286,6 +424,14 @@ typedef struct dt_neural_preview_result_t int sequence; int width; int height; + // cache key components copied from the originating preview request: + // the worker may run after the user has switched tabs/images, so + // the idle callback uses these (not d->* live values) to decide + // whether the result is still applicable to the current state and + // to populate the per-task preview cache slot + dt_neural_task_t task; + dt_imgid_t imgid; + float patch_center[2]; } dt_neural_preview_result_t; const char *name(dt_lib_module_t *self) { return _("neural restore"); } @@ -428,6 +574,10 @@ static dt_restore_context_t *_load_for_task( { case NEURAL_TASK_DENOISE: return dt_restore_load_denoise(env); + case NEURAL_TASK_RAW_DENOISE: + // focus on bayer for now; auto-pick bayer vs linear per image + // sensor is a follow-up (see dt_restore_load_rawdenoise_linear) + return dt_restore_load_rawdenoise_bayer(env); case NEURAL_TASK_UPSCALE_2X: return dt_restore_load_upscale_x2(env); case NEURAL_TASK_UPSCALE_4X: @@ -437,6 +587,20 @@ static dt_restore_context_t *_load_for_task( } } +// short, untranslated task names for debug logs (use the localised +// labels at line ~1022 for user-visible strings) +static const char *_task_log_name(dt_neural_task_t task) +{ + switch(task) + { + case NEURAL_TASK_DENOISE: return "denoise"; + case NEURAL_TASK_RAW_DENOISE: return "raw denoise"; + case NEURAL_TASK_UPSCALE_2X: return "upscale 2x"; + case NEURAL_TASK_UPSCALE_4X: return "upscale 4x"; + default: return "?"; + } +} + // check if a model is available for a task static gboolean _task_model_available( dt_restore_env_t *env, @@ -446,6 +610,8 @@ static gboolean _task_model_available( { case NEURAL_TASK_DENOISE: return dt_restore_denoise_available(env); + case NEURAL_TASK_RAW_DENOISE: + return dt_restore_rawdenoise_available(env); default: return dt_restore_upscale_available(env); } @@ -579,7 +745,10 @@ static int _ai_write_image(dt_imageio_module_data_t *data, } } - const float recovery_alpha = job->detail_recovery / 100.0f; + // strength: 100 = full denoise (no recovery), 0 = source-like. + // DWT detail recovery runs whenever strength < 100, mixing + // (1 - strength/100) of the filtered residual back into the output. + const float recovery_alpha = 1.0f - job->strength / 100.0f; const gboolean need_buffer = (recovery_alpha > 0.0f && S == 1); int res; @@ -687,10 +856,11 @@ static const char *_task_suffix(dt_neural_task_t task) { switch(task) { - case NEURAL_TASK_DENOISE: return "_denoise"; - case NEURAL_TASK_UPSCALE_2X: return "_upscale-2x"; - case NEURAL_TASK_UPSCALE_4X: return "_upscale-4x"; - default: return "_restore"; + case NEURAL_TASK_DENOISE: return "_denoise"; + case NEURAL_TASK_RAW_DENOISE: return "_raw-denoise"; + case NEURAL_TASK_UPSCALE_2X: return "_upscale-2x"; + case NEURAL_TASK_UPSCALE_4X: return "_upscale-4x"; + default: return "_restore"; } } @@ -748,12 +918,237 @@ static void _job_cleanup(void *param) g_free(job); } +// raw-denoise batch path: bypasses the darktable export pipeline and +// goes directly from the source CFA mosaic to a denoised DNG. unlike +// RGB denoise/upscale, there is no demosaic / WB / tonemap / export +// involved — the raw pixels leave and re-enter the pipeline at the +// same stage, so the darktable re-import runs its normal pipeline on +// cleaner data. intentionally self-contained and free of interactions +// with the RGB denoise path +// ensure j->ctx is loaded with the rawdenoise variant matching the +// image's sensor class. reloads if needed; tracks the currently-loaded +// variant in j->raw_ctx_sensor_class so consecutive images of the same +// class don't pay the reload cost. returns 0 on success +static int _ensure_raw_ctx(dt_neural_job_t *j, + dt_restore_sensor_class_t cls) +{ + if(j->ctx && j->raw_ctx_sensor_class == cls) + return 0; + + if(j->ctx) + { + dt_restore_unref(j->ctx); + j->ctx = NULL; + } + const char *label = NULL; + switch(cls) + { + case DT_RESTORE_SENSOR_CLASS_BAYER: + j->ctx = dt_restore_load_rawdenoise_bayer(j->env); + label = _("bayer"); + break; + case DT_RESTORE_SENSOR_CLASS_XTRANS: + j->ctx = dt_restore_load_rawdenoise_xtrans(j->env); + label = _("x-trans"); + break; + case DT_RESTORE_SENSOR_CLASS_LINEAR: + j->ctx = dt_restore_load_rawdenoise_linear(j->env); + label = _("linear"); + break; + default: + return 1; + } + if(!j->ctx) + { + dt_control_log(_("failed to load AI raw denoise %s model"), label); + return 1; + } + j->raw_ctx_sensor_class = cls; + return 0; +} + +// bayer variant: source CFA (single-channel) -> denoise -> CFA DNG +static int _process_raw_denoise_bayer(dt_neural_job_t *j, + dt_imgid_t imgid, + const char *out_filename, + const char *src_path, + const dt_image_t *img_meta) +{ + dt_mipmap_buffer_t mbuf; + dt_mipmap_cache_get(&mbuf, imgid, DT_MIPMAP_FULL, + DT_MIPMAP_BLOCKING, 'r'); + if(!mbuf.buf) + { + dt_print(DT_DEBUG_AI, + "[neural_restore] failed to load raw mosaic for imgid %d", + imgid); + dt_mipmap_cache_release(&mbuf); + return 1; + } + + const int width = img_meta->width; + const int height = img_meta->height; + const size_t npix = (size_t)width * height; + float *cfa_in = g_try_malloc(npix * sizeof(float)); + if(!cfa_in) + { + dt_mipmap_cache_release(&mbuf); + return 1; + } + + if(img_meta->buf_dsc.datatype == TYPE_UINT16) + { + const uint16_t *const src = (const uint16_t *)mbuf.buf; + for(size_t i = 0; i < npix; i++) cfa_in[i] = (float)src[i]; + } + else if(img_meta->buf_dsc.datatype == TYPE_FLOAT) + { + memcpy(cfa_in, mbuf.buf, npix * sizeof(float)); + } + else + { + dt_control_log(_("raw denoise: unsupported raw datatype")); + g_free(cfa_in); + dt_mipmap_cache_release(&mbuf); + return 1; + } + + dt_mipmap_cache_release(&mbuf); + + uint16_t *cfa_out = g_try_malloc(npix * sizeof(uint16_t)); + if(!cfa_out) + { + g_free(cfa_in); + return 1; + } + + int res = dt_restore_raw_bayer(j->ctx, img_meta, cfa_in, + width, height, cfa_out, + j->raw_strength, + j->control_job); + g_free(cfa_in); + if(res != 0) + { + g_free(cfa_out); + return res; + } + + uint8_t *exif_blob = NULL; + const int exif_len = dt_exif_read_blob(&exif_blob, src_path, imgid, + FALSE, width, height, TRUE); + res = dt_dng_write_cfa_bayer(out_filename, cfa_out, + width, height, img_meta, + exif_blob, exif_len); + g_free(exif_blob); + g_free(cfa_out); + return res; +} + +// linear variant: darktable's demosaic runs inside raw_restore_linear, +// so there's no CFA buffer to hand in. output is a 3ch linear DNG +static int _process_raw_denoise_linear(dt_neural_job_t *j, + dt_imgid_t imgid, + const char *out_filename, + const char *src_path, + const dt_image_t *img_meta) +{ + float *rgb = NULL; + int w = 0, h = 0; + int res = dt_restore_raw_linear(j->ctx, imgid, &rgb, &w, &h, + j->raw_strength, j->control_job); + if(res != 0 || !rgb) + { + g_free(rgb); + return res ? res : 1; + } + + uint8_t *exif_blob = NULL; + const int exif_len = dt_exif_read_blob(&exif_blob, src_path, imgid, + FALSE, w, h, TRUE); + res = dt_dng_write_linear(out_filename, rgb, w, h, img_meta, + exif_blob, exif_len); + g_free(exif_blob); + dt_free_align(rgb); + return res; +} + +static int _process_raw_denoise_one(dt_neural_job_t *j, + dt_imgid_t imgid, + const char *out_filename, + const char *src_path) +{ + // force the raw to be loaded so buf_dsc.{filters,channels,datatype} + // are populated. for a fresh session, dt_image_cache_get alone may + // return a dt_image_t whose buf_dsc is zeroed because rawspeed has + // not been invoked on this id yet + dt_mipmap_buffer_t warmup; + dt_mipmap_cache_get(&warmup, imgid, DT_MIPMAP_FULL, + DT_MIPMAP_BLOCKING, 'r'); + const gboolean loaded = (warmup.buf != NULL); + dt_mipmap_cache_release(&warmup); + if(!loaded) + { + dt_print(DT_DEBUG_AI, + "[neural_restore] mipmap warmup failed for imgid %d", imgid); + dt_control_log(_("raw denoise: cannot load source image")); + return 1; + } + + const dt_image_t *cached = dt_image_cache_get(imgid, 'r'); + if(!cached) return 1; + dt_image_t img_meta = *cached; + dt_image_cache_read_release(cached); + + const uint32_t filters = img_meta.buf_dsc.filters; + const uint32_t channels = img_meta.buf_dsc.channels; + const uint32_t flags = img_meta.flags; + const dt_restore_sensor_class_t cls = dt_restore_classify_sensor(&img_meta); + + const char *cls_name + = (cls == DT_RESTORE_SENSOR_CLASS_BAYER) ? "bayer" + : (cls == DT_RESTORE_SENSOR_CLASS_XTRANS) ? "x-trans" + : (cls == DT_RESTORE_SENSOR_CLASS_LINEAR) ? "linear" + : "unsupported"; + dt_print(DT_DEBUG_AI, + "[neural_restore] imgid %d: flags=0x%x channels=%u " + "filters=0x%x (%s)", imgid, flags, channels, filters, + cls_name); + + if(cls == DT_RESTORE_SENSOR_CLASS_UNSUPPORTED) + { + dt_control_log(_("raw denoise: image is not a supported raw sensor format")); + return 1; + } + + if(_ensure_raw_ctx(j, cls)) return 1; + + switch(cls) + { + case DT_RESTORE_SENSOR_CLASS_BAYER: + return _process_raw_denoise_bayer(j, imgid, out_filename, + src_path, &img_meta); + case DT_RESTORE_SENSOR_CLASS_XTRANS: + // today: X-Trans runs through the linear pipeline. a future + // dedicated xtrans_v1 model with a different input format would + // get its own _process_raw_denoise_xtrans() here; the dispatch + // structure stays, just the target function swaps + return _process_raw_denoise_linear(j, imgid, out_filename, + src_path, &img_meta); + case DT_RESTORE_SENSOR_CLASS_LINEAR: + return _process_raw_denoise_linear(j, imgid, out_filename, + src_path, &img_meta); + default: + return 1; + } +} + static int32_t _process_job_run(dt_job_t *job) { dt_neural_job_t *j = dt_control_job_get_params(job); - const char *task_name = (j->task == NEURAL_TASK_DENOISE) - ? _("denoise") : _("upscale"); + const char *task_name = (j->task == NEURAL_TASK_DENOISE) ? _("denoise") + : (j->task == NEURAL_TASK_RAW_DENOISE) ? _("raw denoise") + : _("upscale"); char msg[256]; snprintf(msg, sizeof(msg), _("loading %s model..."), task_name); dt_control_job_set_progress_message(job, msg); @@ -863,16 +1258,20 @@ static int32_t _process_job_run(dt_job_t *job) } g_free(out_dir_resolved); - // find unique filename: base.tif, base_1.tif, ... + // raw denoise writes DNG; RGB denoise/upscale writes TIFF + const char *ext + = (j->task == NEURAL_TASK_RAW_DENOISE) ? "dng" : "tif"; + + // find unique filename: base., base_1., ... char filename[PATH_MAX]; - snprintf(filename, sizeof(filename), "%s.tif", base); + snprintf(filename, sizeof(filename), "%s.%s", base, ext); if(g_file_test(filename, G_FILE_TEST_EXISTS)) { gboolean found = FALSE; for(int s = 1; s < 10000; s++) { - snprintf(filename, sizeof(filename), "%s_%d.tif", base, s); + snprintf(filename, sizeof(filename), "%s_%d.%s", base, s, ext); if(!g_file_test(filename, G_FILE_TEST_EXISTS)) { found = TRUE; @@ -893,40 +1292,54 @@ static int32_t _process_job_run(dt_job_t *job) dt_print(DT_DEBUG_AI, "[neural_restore] processing imgid %d -> %s", imgid, filename); snprintf(msg, sizeof(msg), - (j->task == NEURAL_TASK_DENOISE) ? _("denoising image %d/%d...") - : (j->task == NEURAL_TASK_UPSCALE_2X) ? _("upscaling 2x image %d/%d...") - : _("upscaling 4x image %d/%d..."), + (j->task == NEURAL_TASK_DENOISE) ? _("denoising image %d/%d...") + : (j->task == NEURAL_TASK_RAW_DENOISE) ? _("raw denoising image %d/%d...") + : (j->task == NEURAL_TASK_UPSCALE_2X) ? _("upscaling 2x image %d/%d...") + : _("upscaling 4x image %d/%d..."), count + 1, total); dt_control_job_set_progress_message(job, msg); - const int export_err - = dt_imageio_export_with_flags(imgid, - filename, - &fmt, - (dt_imageio_module_data_t *)&fmt_params, - FALSE, // ignore_exif — pass EXIF to write_image - FALSE, // display_byteorder - TRUE, // high_quality - TRUE, // upscale - FALSE, // is_scaling - 1.0, // scale_factor - FALSE, // thumbnail_export - NULL, // filter - FALSE, // copy_metadata - FALSE, // export_masks - (j->icc_type == DT_COLORSPACE_NONE) - ? dt_colorspaces_get_work_profile(imgid)->type - : j->icc_type, - j->icc_filename, - DT_INTENT_PERCEPTUAL, - NULL, NULL, - count, total, NULL, -1); - - if(export_err) + int step_err = 0; + if(j->task == NEURAL_TASK_RAW_DENOISE) + { + step_err = _process_raw_denoise_one(j, imgid, filename, srcpath); + if(step_err) + dt_control_log(_("raw denoise failed for image %d"), imgid); + } + else + { + step_err = dt_imageio_export_with_flags( + imgid, + filename, + &fmt, + (dt_imageio_module_data_t *)&fmt_params, + FALSE, // ignore_exif — pass EXIF to write_image + FALSE, // display_byteorder + TRUE, // high_quality + TRUE, // upscale + FALSE, // is_scaling + 1.0, // scale_factor + FALSE, // thumbnail_export + NULL, // filter + FALSE, // copy_metadata + FALSE, // export_masks + (j->icc_type == DT_COLORSPACE_NONE) + ? dt_colorspaces_get_work_profile(imgid)->type + : j->icc_type, + j->icc_filename, + DT_INTENT_PERCEPTUAL, + NULL, NULL, + count, total, NULL, -1); + if(step_err) + { + dt_print(DT_DEBUG_AI, + "[neural_restore] export failed for imgid %d", imgid); + dt_control_log(_("neural restore: export failed")); + } + } + + if(step_err) { - dt_print(DT_DEBUG_AI, - "[neural_restore] export failed for imgid %d", imgid); - dt_control_log(_("neural restore: export failed")); dt_control_job_set_progress(job, (double)++count / total); continue; } @@ -1031,29 +1444,50 @@ static void _update_info_label(dt_lib_neural_restore_t *d) } } + // raw denoise: DNG variant batch will produce. Source of truth is + // preview_raw_sensor_class — buf_dsc.filters is zeroed until rawspeed + // decodes the image, so probing it here would misclassify an + // unloaded X-Trans RAF. both X-Trans and the linear-fallback class + // currently write LinearRaw DNG; only Bayer writes CFA DNG + if(d->task == NEURAL_TASK_RAW_DENOISE + && dt_is_valid_imgid(imgid) + && d->preview_raw_imgid == imgid) + { + const gboolean is_bayer_out + = (d->preview_raw_sensor_class == DT_RESTORE_SENSOR_CLASS_BAYER); + snprintf(d->info_text_left, sizeof(d->info_text_left), "%s", + is_bayer_out ? _("output: Bayer CFA DNG") + : _("output: LinearRaw DNG")); + } + // gamut note (informational, not a warning): reuse the same info // line as the upscale size display. for denoise, shows standalone - // in info_text_left; for upscale, appended to the size info - const dt_colorspaces_color_profile_type_t icc_type - = dt_conf_key_exists(CONF_ICC_TYPE) - ? dt_conf_get_int(CONF_ICC_TYPE) - : DT_COLORSPACE_NONE; - if(dt_image_has_wide_gamut_output_profile(imgid, icc_type)) - { - const gboolean preserve = dt_conf_key_exists(CONF_PRESERVE_WIDE_GAMUT) - ? dt_conf_get_bool(CONF_PRESERVE_WIDE_GAMUT) : TRUE; - const char *msg = (scale == 1 && preserve) - ? _("wide-gamut preserved, not denoised") - : _("wide-gamut clipped"); - if(d->info_text_right[0]) - { - const size_t used = strlen(d->info_text_right); - snprintf(d->info_text_right + used, sizeof(d->info_text_right) - used, - " · %s", msg); - } - else + // in info_text_left; for upscale, appended to the size info. not + // applicable to raw denoise — that path writes camRGB DNG without + // any sRGB wrapper, so there's no gamut clipping to warn about + if(d->task != NEURAL_TASK_RAW_DENOISE) + { + const dt_colorspaces_color_profile_type_t icc_type + = dt_conf_key_exists(CONF_ICC_TYPE) + ? dt_conf_get_int(CONF_ICC_TYPE) + : DT_COLORSPACE_NONE; + if(dt_image_has_wide_gamut_output_profile(imgid, icc_type)) { - snprintf(d->info_text_left, sizeof(d->info_text_left), "%s", msg); + const gboolean preserve = dt_conf_key_exists(CONF_PRESERVE_WIDE_GAMUT) + ? dt_conf_get_bool(CONF_PRESERVE_WIDE_GAMUT) : TRUE; + const char *msg = (scale == 1 && preserve) + ? _("wide-gamut preserved, not denoised") + : _("wide-gamut clipped"); + if(d->info_text_right[0]) + { + const size_t used = strlen(d->info_text_right); + snprintf(d->info_text_right + used, sizeof(d->info_text_right) - used, + " · %s", msg); + } + else + { + snprintf(d->info_text_left, sizeof(d->info_text_left), "%s", msg); + } } } @@ -1074,26 +1508,141 @@ static void _task_changed(dt_lib_neural_restore_t *d) gtk_widget_queue_draw(d->preview_area); } - // restore detail recovery slider from conf when switching to denoise, - // reset to 0 when switching away (upscale has no detail recovery). - // use _recovery_changing flag to avoid redundant conf writes from - // the slider's value-changed callback + // restore strength slider from conf when switching to a scale==1 + // task (denoise / raw denoise — both benefit from the strength + // knob), snap to 100 when switching to upscale (upscale has no + // strength semantics — see note below). use _strength_changing + // flag to avoid redundant conf writes from the slider's + // value-changed callback. + // raw denoise has no DWT analysis of exported preview pixels, so + // the slider is hidden there — but for DENOISE and UPSCALE we + // preserve the master behaviour (slider visible, value restored / + // reset). d->recovery_changing = TRUE; - if(d->task == NEURAL_TASK_DENOISE) + if(d->task == NEURAL_TASK_DENOISE || d->task == NEURAL_TASK_RAW_DENOISE) { - const float saved = dt_conf_get_float(CONF_DETAIL_RECOVERY); + const float saved = dt_conf_key_exists(CONF_STRENGTH) + ? dt_conf_get_float(CONF_STRENGTH) : 100.0f; dt_bauhaus_slider_set(d->recovery_slider, saved); } else { - dt_bauhaus_slider_set(d->recovery_slider, 0.0f); + dt_bauhaus_slider_set(d->recovery_slider, 100.0f); } + gtk_widget_set_visible(d->recovery_slider, + d->task != NEURAL_TASK_RAW_DENOISE); d->recovery_changing = FALSE; + // output settings that only apply to the RGB-export-based tasks: + // bit depth selects TIFF bits-per-pixel (raw denoise writes DNG), + // output ICC profile applies to the exported TIFF, and + // preserve-wide-gamut is part of the sRGB-gamma wrapper around the + // denoise-nind model. bpp + ICC apply to any TIFF-emitting task + // (denoise + upscale); preserve-wide-gamut only matters for denoise + // (upscale has no pixel-to-pixel correspondence to pass through). + // raw denoise writes a DNG directly from the sensor-space inference + // result, so none of these knobs apply to it + const gboolean tiff_knobs_visible + = (d->task != NEURAL_TASK_RAW_DENOISE); + const gboolean wide_gamut_visible + = (d->task == NEURAL_TASK_DENOISE); + if(d->bpp_combo) + gtk_widget_set_visible(d->bpp_combo, tiff_knobs_visible); + if(d->profile_combo) + gtk_widget_set_visible(d->profile_combo, tiff_knobs_visible); + if(d->preserve_wide_gamut_toggle) + gtk_widget_set_visible(d->preserve_wide_gamut_toggle, wide_gamut_visible); + _update_info_label(d); _update_button_sensitivity(d); } +// per-task preview cache helpers +// +// the cache holds, per task, the buffers needed to redisplay the most +// recent successful preview without re-running inference. on tab switch +// we look up the new task's slot keyed by (imgid, patch_center). on +// hit we install the cached buffers and skip the worker; on miss we +// schedule one. invalidated wholesale on image / patch change +// +// for raw denoise, before_rgb / after_rgb hold the unblended source vs +// fully-denoised lin_rec2020 crops (= preview_raw_src_rgb / +// preview_raw_denoised_rgb). detail is always NULL there +// +// for RGB denoise + upscale, before_rgb / after_rgb hold the displayed +// preview_before / preview_after, and detail (denoise only) holds the +// DWT luminance residual used by the strength slider + +static void _preview_cache_free_slot(dt_lib_neural_restore_t *d, int task) +{ + g_free(d->preview_cache[task].before_rgb); + d->preview_cache[task].before_rgb = NULL; + g_free(d->preview_cache[task].after_rgb); + d->preview_cache[task].after_rgb = NULL; + dt_free_align(d->preview_cache[task].detail); + d->preview_cache[task].detail = NULL; + d->preview_cache[task].valid = FALSE; +} + +static void _preview_cache_invalidate_all(dt_lib_neural_restore_t *d) +{ + for(int t = 0; t < NEURAL_TASK_COUNT; t++) + _preview_cache_free_slot(d, t); +} + +static gboolean _preview_cache_hit(dt_lib_neural_restore_t *d, + dt_neural_task_t task, + dt_imgid_t imgid) +{ + if(task >= NEURAL_TASK_COUNT) return FALSE; + const __typeof__(d->preview_cache[0]) *e = &d->preview_cache[task]; + // exact match on patch_center (no fp tolerance: we store the exact + // value the worker received, so equality is reliable) + return e->valid + && e->imgid == imgid + && e->patch_center[0] == d->patch_center[0] + && e->patch_center[1] == d->patch_center[1]; +} + +// memcpy buffers into the cache slot for `task`. caller retains +// ownership of the source pointers (we duplicate) +static void _preview_cache_store(dt_lib_neural_restore_t *d, + dt_neural_task_t task, + dt_imgid_t imgid, + const float patch_center[2], + int crop_w, int crop_h, + const float *before, const float *after, + const float *detail) +{ + // task is an unsigned enum, no need for < 0 check + if(task >= NEURAL_TASK_COUNT) return; + if(crop_w <= 0 || crop_h <= 0 || !before || !after) return; + _preview_cache_free_slot(d, task); + const size_t n3 = (size_t)crop_w * crop_h * 3; + d->preview_cache[task].before_rgb = g_try_malloc(n3 * sizeof(float)); + d->preview_cache[task].after_rgb = g_try_malloc(n3 * sizeof(float)); + if(!d->preview_cache[task].before_rgb || !d->preview_cache[task].after_rgb) + { + _preview_cache_free_slot(d, task); + return; + } + memcpy(d->preview_cache[task].before_rgb, before, n3 * sizeof(float)); + memcpy(d->preview_cache[task].after_rgb, after, n3 * sizeof(float)); + if(detail) + { + const size_t n1 = (size_t)crop_w * crop_h; + d->preview_cache[task].detail = dt_alloc_align_float(n1); + if(d->preview_cache[task].detail) + memcpy(d->preview_cache[task].detail, detail, n1 * sizeof(float)); + } + d->preview_cache[task].imgid = imgid; + d->preview_cache[task].patch_center[0] = patch_center[0]; + d->preview_cache[task].patch_center[1] = patch_center[1]; + d->preview_cache[task].crop_w = crop_w; + d->preview_cache[task].crop_h = crop_h; + d->preview_cache[task].valid = TRUE; +} + // rebuild the "after" cairo surface from cached float buffers, applying // DWT-filtered detail recovery so that slider changes don't re-run inference static void _rebuild_cairo_after(dt_lib_neural_restore_t *d) @@ -1103,7 +1652,11 @@ static void _rebuild_cairo_after(dt_lib_neural_restore_t *d) const int w = d->preview_w; const int h = d->preview_h; const int stride = d->cairo_stride; - const float alpha = dt_conf_get_float(CONF_DETAIL_RECOVERY) / 100.0f; + // strength = 100 → no recovery (max denoise visible); strength = 0 + // → full filtered detail back. preview mirrors batch semantics. + const float strength = dt_conf_key_exists(CONF_STRENGTH) + ? dt_conf_get_float(CONF_STRENGTH) : 100.0f; + const float alpha = 1.0f - strength / 100.0f; const gboolean recover = (alpha > 0.0f && d->preview_detail); for(int y = 0; y < h; y++) @@ -1180,6 +1733,13 @@ static gboolean _preview_result_idle(gpointer data) res->width, res->height, stride); _rebuild_cairo_after(d); + // store this result in the per-task cache so a later tab switch + // back to the same task / image / patch skips inference + _preview_cache_store(d, res->task, res->imgid, res->patch_center, + res->width, res->height, + d->preview_before, d->preview_after, + d->preview_detail); + d->preview_ready = TRUE; d->preview_generating = FALSE; _update_button_sensitivity(d); @@ -1443,6 +2003,10 @@ static gpointer _preview_thread(gpointer data) result->sequence = pd->sequence; result->width = pw; result->height = ph; + result->task = pd->task; + result->imgid = pd->imgid; + result->patch_center[0] = pd->patch_center[0]; + result->patch_center[1] = pd->patch_center[1]; g_idle_add(_preview_result_idle, result); cleanup: @@ -1455,10 +2019,26 @@ static void _cancel_preview(dt_lib_module_t *self) dt_lib_neural_restore_t *d = (dt_lib_neural_restore_t *)self->data; d->preview_ready = FALSE; d->preview_generating = FALSE; + // bump sequence so any in-flight worker (and its idle callback) + // discards its result. we DO NOT join here — that would block the + // UI for the full duration of a running inference. the worker + // keeps running in the background and exits silently. but: we DO + // need to wait for it before freeing export_pixels below, since the + // worker may still be reading from them. take + release the + // inference lock as a synchronisation barrier — the worker holds + // it during the heavy work, so once we get it we know it's done + // touching shared buffers g_atomic_int_inc(&d->preview_sequence); + if(d->preview_trigger_timer) + { + g_source_remove(d->preview_trigger_timer); + d->preview_trigger_timer = 0; + } if(d->preview_thread) { - g_thread_join(d->preview_thread); + g_mutex_lock(&d->preview_inference_lock); + g_mutex_unlock(&d->preview_inference_lock); + g_thread_unref(d->preview_thread); d->preview_thread = NULL; } // invalidate cached export (image changed) @@ -1470,6 +2050,759 @@ static void _cancel_preview(dt_lib_module_t *self) gtk_widget_queue_draw(d->preview_area); } +// ============================================================================ +// raw denoise preview path. parallel to the export-based preview above: +// * pixel source: full-resolution CFA (Bayer) or demosaicked lin_rec2020 +// (X-Trans / linear), cached per image so re-picks reuse it +// * inference: dt_restore_raw_{bayer,linear}_preview returns 3ch +// source + 3ch denoised crops (both lin_rec2020) for the displayed +// region, using one fixed-size tile that matches the JIT-compiled +// batch session +// * strength slider: re-blends the cached src/denoised on the UI thread +// (microseconds) without touching the model — debounced to 50 ms so +// fast drags don't queue up redraws +// ============================================================================ + +#define RAW_PREVIEW_STRENGTH_DEBOUNCE_MS 50 + +typedef struct dt_neural_preview_result_raw_t +{ + dt_lib_module_t *self; + float *src_rgb; // crop_w * crop_h * 3, lin_rec2020 + float *denoised_rgb; // same dims, lin_rec2020 (gain-matched) + int width; + int height; + int sequence; + // optional: full-image buffers to install into the cache (NULL when + // the worker reused an already-cached buffer for this image). + // ownership transfers to d on idle install + float *take_full_cfa; // Bayer; allocated with g_malloc + float *take_full_lin; // X-Trans/linear; allocated with dt_alloc_align + int full_w; + int full_h; + dt_imgid_t full_imgid; + dt_restore_sensor_class_t full_sensor_class; + // optional: picker thumbnail (4ch interleaved float) produced via + // dt_imageio_export_with_flags. matches whatever the user sees in + // darkroom — identical colours to the preview's before/after. NULL + // when we reused an already-cached export_pixels on d. + float *take_export_pixels; + int export_thumb_w; + int export_thumb_h; + // cache key (see comment on dt_neural_preview_result_t) + float patch_center[2]; +} dt_neural_preview_result_raw_t; + +// blend cached src + denoised crops at the given strength, write into +// preview_before/after (allocating fresh buffers), rebuild cairo, and +// queue a redraw. UI thread only. +static void _blend_raw_into_preview(dt_lib_neural_restore_t *d, + float strength) +{ + if(!d->preview_raw_src_rgb || !d->preview_raw_denoised_rgb) return; + const int w = d->preview_raw_crop_w; + const int h = d->preview_raw_crop_h; + if(w <= 0 || h <= 0) return; + + if(strength < 0.0f) strength = 0.0f; + if(strength > 1.0f) strength = 1.0f; + const float a = strength; + const float ia = 1.0f - strength; + + const size_t n3 = (size_t)w * h * 3; + + // preview_before stays at the source (split widget shows pre-denoise + // on one side, blended-strength on the other) + g_free(d->preview_before); + d->preview_before = g_malloc(n3 * sizeof(float)); + memcpy(d->preview_before, d->preview_raw_src_rgb, n3 * sizeof(float)); + + // preview_after = α · denoised + (1-α) · source, per channel + g_free(d->preview_after); + d->preview_after = g_malloc(n3 * sizeof(float)); + for(size_t i = 0; i < n3; i++) + d->preview_after[i] + = a * d->preview_raw_denoised_rgb[i] + + ia * d->preview_raw_src_rgb[i]; + + // detail-recovery DWT does not apply to raw (different pipeline + // position; would need its own analysis pass). leave NULL so + // _rebuild_cairo_after takes the no-recovery branch. + dt_free_align(d->preview_detail); + d->preview_detail = NULL; + + d->preview_w = w; + d->preview_h = h; + + // rebuild cached cairo surfaces + g_free(d->cairo_before); + g_free(d->cairo_after); + const int stride = cairo_format_stride_for_width(CAIRO_FORMAT_RGB24, w); + d->cairo_before = g_malloc(stride * h); + d->cairo_after = g_malloc(stride * h); + d->cairo_stride = stride; + _float_rgb_to_cairo(d->preview_before, d->cairo_before, w, h, stride); + _rebuild_cairo_after(d); + + d->preview_ready = TRUE; + gtk_widget_queue_draw(d->preview_area); +} + +// install a cached preview slot into the active preview buffers and +// rebuild cairo so the widget displays it. dispatches by task: raw +// denoise needs to repopulate preview_raw_src/denoised_rgb and re-blend +// at the current strength; RGB denoise / upscale just install +// preview_before/after/detail and rebuild the after surface +static void _install_cache_slot_raw(dt_lib_module_t *self, + dt_neural_task_t task) +{ + dt_lib_neural_restore_t *d = (dt_lib_neural_restore_t *)self->data; + const __typeof__(d->preview_cache[0]) *e = &d->preview_cache[task]; + if(!e->valid) return; + const size_t n3 = (size_t)e->crop_w * e->crop_h * 3; + g_free(d->preview_raw_src_rgb); + g_free(d->preview_raw_denoised_rgb); + d->preview_raw_src_rgb = g_malloc(n3 * sizeof(float)); + d->preview_raw_denoised_rgb = g_malloc(n3 * sizeof(float)); + memcpy(d->preview_raw_src_rgb, e->before_rgb, n3 * sizeof(float)); + memcpy(d->preview_raw_denoised_rgb, e->after_rgb, n3 * sizeof(float)); + d->preview_raw_crop_w = e->crop_w; + d->preview_raw_crop_h = e->crop_h; + const float strength + = dt_bauhaus_slider_get(d->raw_strength_slider) / 100.0f; + _blend_raw_into_preview(d, strength); +} + +static void _install_cache_slot_rgb(dt_lib_module_t *self, + dt_neural_task_t task) +{ + dt_lib_neural_restore_t *d = (dt_lib_neural_restore_t *)self->data; + const __typeof__(d->preview_cache[0]) *e = &d->preview_cache[task]; + if(!e->valid) return; + const size_t n3 = (size_t)e->crop_w * e->crop_h * 3; + g_free(d->preview_before); + g_free(d->preview_after); + dt_free_align(d->preview_detail); + d->preview_before = g_malloc(n3 * sizeof(float)); + d->preview_after = g_malloc(n3 * sizeof(float)); + memcpy(d->preview_before, e->before_rgb, n3 * sizeof(float)); + memcpy(d->preview_after, e->after_rgb, n3 * sizeof(float)); + d->preview_detail = NULL; + if(e->detail) + { + const size_t n1 = (size_t)e->crop_w * e->crop_h; + d->preview_detail = dt_alloc_align_float(n1); + if(d->preview_detail) + memcpy(d->preview_detail, e->detail, n1 * sizeof(float)); + } + d->preview_w = e->crop_w; + d->preview_h = e->crop_h; + // rebuild cairo surfaces + g_free(d->cairo_before); + g_free(d->cairo_after); + const int stride + = cairo_format_stride_for_width(CAIRO_FORMAT_RGB24, e->crop_w); + d->cairo_before = g_malloc(stride * e->crop_h); + d->cairo_after = g_malloc(stride * e->crop_h); + d->cairo_stride = stride; + _float_rgb_to_cairo(d->preview_before, d->cairo_before, + e->crop_w, e->crop_h, stride); + _rebuild_cairo_after(d); + d->preview_ready = TRUE; + gtk_widget_queue_draw(d->preview_area); +} + +static void _install_cache_slot(dt_lib_module_t *self, dt_neural_task_t task) +{ + if(task == NEURAL_TASK_RAW_DENOISE) _install_cache_slot_raw(self, task); + else _install_cache_slot_rgb(self, task); +} + +// debounced strength-slider re-blend. returns G_SOURCE_REMOVE so the +// timer fires once. +static gboolean _strength_blend_timer_cb(gpointer data) +{ + dt_lib_module_t *self = (dt_lib_module_t *)data; + dt_lib_neural_restore_t *d = (dt_lib_neural_restore_t *)self->data; + d->preview_strength_timer = 0; + + if(d->task != NEURAL_TASK_RAW_DENOISE) return G_SOURCE_REMOVE; + if(!d->preview_raw_src_rgb || !d->preview_raw_denoised_rgb) + return G_SOURCE_REMOVE; + + const float strength + = dt_bauhaus_slider_get(d->raw_strength_slider) / 100.0f; + _blend_raw_into_preview(d, strength); + return G_SOURCE_REMOVE; +} + +static void _schedule_raw_strength_reblend(dt_lib_module_t *self) +{ + dt_lib_neural_restore_t *d = (dt_lib_neural_restore_t *)self->data; + if(d->preview_strength_timer) + g_source_remove(d->preview_strength_timer); + d->preview_strength_timer + = g_timeout_add(RAW_PREVIEW_STRENGTH_DEBOUNCE_MS, + _strength_blend_timer_cb, self); +} + +// fired when the raw worker bails before producing a result, so the UI +// doesn't get stuck with preview_generating == TRUE forever. +static gboolean _preview_raw_failed_idle(gpointer data) +{ + dt_lib_module_t *self = (dt_lib_module_t *)data; + dt_lib_neural_restore_t *d = (dt_lib_neural_restore_t *)self->data; + d->preview_generating = FALSE; + _update_button_sensitivity(d); + gtk_widget_queue_draw(d->preview_area); + return G_SOURCE_REMOVE; +} + +static gboolean _preview_raw_result_idle(gpointer data) +{ + dt_neural_preview_result_raw_t *res + = (dt_neural_preview_result_raw_t *)data; + dt_lib_neural_restore_t *d + = (dt_lib_neural_restore_t *)res->self->data; + + // discard stale results + if(res->sequence != g_atomic_int_get(&d->preview_sequence)) + { + g_free(res->src_rgb); + g_free(res->denoised_rgb); + g_free(res->take_full_cfa); + dt_free_align(res->take_full_lin); + g_free(res->take_export_pixels); + g_free(res); + return G_SOURCE_REMOVE; + } + + // install the per-image full buffer if the worker freshly loaded one. + // also (re)build the patch-picker thumbnail (export_pixels) from it — + // the raw path doesn't run a pipeline export so the picker needs us + // to synthesise a whole-image 4ch RGBA buffer. + gboolean refresh_thumbnail = FALSE; + if(res->take_full_cfa) + { + g_free(d->preview_full_cfa); + d->preview_full_cfa = res->take_full_cfa; + d->preview_full_w = res->full_w; + d->preview_full_h = res->full_h; + d->preview_raw_imgid = res->full_imgid; + d->preview_raw_sensor_class = res->full_sensor_class; + // free the other variant's cache (we switched sensor type) + dt_free_align(d->preview_full_lin); + d->preview_full_lin = NULL; + refresh_thumbnail = TRUE; + } + if(res->take_full_lin) + { + dt_free_align(d->preview_full_lin); + d->preview_full_lin = res->take_full_lin; + d->preview_lin_w = res->full_w; + d->preview_lin_h = res->full_h; + d->preview_raw_imgid = res->full_imgid; + d->preview_raw_sensor_class = res->full_sensor_class; + g_free(d->preview_full_cfa); + d->preview_full_cfa = NULL; + refresh_thumbnail = TRUE; + } + + // install the picker thumbnail when the worker produced a fresh + // export (triggered by new imgid / sensor-type change). matches + // exactly what the user sees in darkroom — same pipeline the RGB + // denoise preview uses for its picker thumbnail. + if(res->take_export_pixels) + { + g_free(d->export_pixels); + g_free(d->export_cairo); + d->export_pixels = res->take_export_pixels; + d->export_w = res->export_thumb_w; + d->export_h = res->export_thumb_h; + d->export_cairo = NULL; // rebuilt on demand by picker + res->take_export_pixels = NULL; + } + (void)refresh_thumbnail; // legacy flag; export is handled above + + // install per-refresh inference output + g_free(d->preview_raw_src_rgb); + g_free(d->preview_raw_denoised_rgb); + d->preview_raw_src_rgb = res->src_rgb; + d->preview_raw_denoised_rgb = res->denoised_rgb; + d->preview_raw_crop_w = res->width; + d->preview_raw_crop_h = res->height; + + const float strength + = dt_bauhaus_slider_get(d->raw_strength_slider) / 100.0f; + _blend_raw_into_preview(d, strength); + + // preview_raw_sensor_class is now authoritative for this imgid; + // refresh the overlay so it shows the correct DNG output format + _update_info_label(d); + + // store unblended source / denoised in the cache. raw never has DWT + // detail; the strength slider blends src ↔ denoised on the fly via + // _blend_raw_into_preview, so the cache only needs the two anchors + _preview_cache_store(d, NEURAL_TASK_RAW_DENOISE, res->full_imgid, + res->patch_center, + res->width, res->height, + d->preview_raw_src_rgb, d->preview_raw_denoised_rgb, + NULL); + + d->preview_generating = FALSE; + _update_button_sensitivity(d); + g_free(res); + return G_SOURCE_REMOVE; +} + +static gpointer _preview_thread_raw(gpointer data) +{ + dt_neural_preview_data_t *pd = (dt_neural_preview_data_t *)data; + dt_lib_neural_restore_t *d = (dt_lib_neural_restore_t *)pd->self->data; + + + // 1. load source image metadata to determine sensor type. + // on a fresh session dt_image_cache_get returns img_meta with a + // zeroed buf_dsc until rawspeed has been invoked on this id. the + // batch path (_process_raw_denoise_one) does this same warmup + // BEFORE reading metadata for the same reason. + if(pd->sequence != g_atomic_int_get(&d->preview_sequence)) goto cleanup; + + dt_mipmap_buffer_t warmup; + dt_mipmap_cache_get(&warmup, pd->imgid, DT_MIPMAP_FULL, + DT_MIPMAP_BLOCKING, 'r'); + const gboolean warm_loaded = (warmup.buf != NULL); + dt_mipmap_cache_release(&warmup); + if(!warm_loaded) + { + dt_print(DT_DEBUG_AI, + "[neural_restore] raw preview: mipmap warmup failed for imgid %d", + pd->imgid); + goto cleanup; + } + + const dt_image_t *cached = dt_image_cache_get(pd->imgid, 'r'); + if(!cached) goto cleanup; + dt_image_t img_meta = *cached; + dt_image_cache_read_release(cached); + + const uint32_t filters = img_meta.buf_dsc.filters; + const dt_restore_sensor_class_t cls = dt_restore_classify_sensor(&img_meta); + const gboolean is_xtrans = (cls == DT_RESTORE_SENSOR_CLASS_XTRANS); + if(cls != DT_RESTORE_SENSOR_CLASS_BAYER + && cls != DT_RESTORE_SENSOR_CLASS_XTRANS) + { + dt_print(DT_DEBUG_AI, + "[neural_restore] raw preview: imgid %d is not bayer/xtrans " + "(filters=0x%x class=%d)", + pd->imgid, filters, cls); + goto cleanup; + } + dt_print(DT_DEBUG_AI, + "[neural_restore] raw preview: imgid=%d %s patch=(%.3f,%.3f) " + "widget=%dx%d filters=0x%x", + pd->imgid, is_xtrans ? "x-trans" : "bayer", + pd->patch_center[0], pd->patch_center[1], + pd->preview_w, pd->preview_h, filters); + + // 2. ensure the right ctx is loaded (matches batch logic in + // _ensure_raw_ctx). reload if cached_task is wrong or if the + // cached sensor class doesn't match this image. + dt_restore_context_t *ctx = NULL; + dt_pthread_mutex_lock(&d->ctx_lock); + { + const gboolean cached_is_raw_correct + = d->cached_ctx + && d->cached_task == NEURAL_TASK_RAW_DENOISE + && (cls == d->preview_raw_sensor_class); + if(!cached_is_raw_correct) + { + dt_restore_unref(d->cached_ctx); + switch(cls) + { + case DT_RESTORE_SENSOR_CLASS_BAYER: + d->cached_ctx = dt_restore_load_rawdenoise_bayer(pd->env); + break; + case DT_RESTORE_SENSOR_CLASS_XTRANS: + d->cached_ctx = dt_restore_load_rawdenoise_xtrans(pd->env); + break; + default: + d->cached_ctx = NULL; + break; + } + d->cached_task = NEURAL_TASK_RAW_DENOISE; + // mark cached sensor class so a follow-up preview matches; this + // does NOT update preview_raw_imgid because we may not have a + // fresh full-image buffer for this image yet + d->preview_raw_sensor_class = cls; + } + if(d->cached_ctx) ctx = dt_restore_ref(d->cached_ctx); + } + dt_pthread_mutex_unlock(&d->ctx_lock); + + if(!ctx) + { + dt_print(DT_DEBUG_AI, + "[neural_restore] raw preview: failed to load model"); + goto cleanup; + } + if(pd->sequence != g_atomic_int_get(&d->preview_sequence)) + { + dt_restore_unref(ctx); + goto cleanup; + } + + // 3. acquire the full-image buffer. reuse cache if it matches imgid + + // sensor type; otherwise load fresh and stage into result for the + // UI to install on idle. + float *take_full_cfa = NULL; + float *take_full_lin = NULL; + int full_w = 0, full_h = 0; + const float *full_cfa_use = NULL; // borrowed pointer (cache or take_*) + const float *full_lin_use = NULL; + + const gboolean cache_matches + = d->preview_raw_imgid == pd->imgid + && d->preview_raw_sensor_class == cls + && ((is_xtrans && d->preview_full_lin) + || (!is_xtrans && d->preview_full_cfa)); + + if(cache_matches) + { + if(is_xtrans) + { + full_lin_use = d->preview_full_lin; + full_w = d->preview_lin_w; + full_h = d->preview_lin_h; + } + else + { + full_cfa_use = d->preview_full_cfa; + full_w = d->preview_full_w; + full_h = d->preview_full_h; + } + } + else if(is_xtrans) + { + if(dt_restore_raw_linear_prepare(pd->imgid, &take_full_lin, + &full_w, &full_h) != 0 + || !take_full_lin) + { + dt_restore_unref(ctx); + goto cleanup; + } + full_lin_use = take_full_lin; + } + else + { + // Bayer: read CFA from mipmap cache + dt_mipmap_buffer_t mbuf; + dt_mipmap_cache_get(&mbuf, pd->imgid, DT_MIPMAP_FULL, + DT_MIPMAP_BLOCKING, 'r'); + if(!mbuf.buf) + { + dt_mipmap_cache_release(&mbuf); + dt_restore_unref(ctx); + goto cleanup; + } + full_w = img_meta.width; + full_h = img_meta.height; + const size_t npix = (size_t)full_w * full_h; + take_full_cfa = g_try_malloc(npix * sizeof(float)); + if(!take_full_cfa) + { + dt_mipmap_cache_release(&mbuf); + dt_restore_unref(ctx); + goto cleanup; + } + if(img_meta.buf_dsc.datatype == TYPE_UINT16) + { + const uint16_t *src = (const uint16_t *)mbuf.buf; + for(size_t i = 0; i < npix; i++) take_full_cfa[i] = (float)src[i]; + } + else if(img_meta.buf_dsc.datatype == TYPE_FLOAT) + { + memcpy(take_full_cfa, mbuf.buf, npix * sizeof(float)); + } + else + { + dt_mipmap_cache_release(&mbuf); + g_free(take_full_cfa); + dt_restore_unref(ctx); + goto cleanup; + } + dt_mipmap_cache_release(&mbuf); + full_cfa_use = take_full_cfa; + } + + if(pd->sequence != g_atomic_int_get(&d->preview_sequence)) + { + g_free(take_full_cfa); + dt_free_align(take_full_lin); + dt_restore_unref(ctx); + goto cleanup; + } + + // 3.5. refresh the picker-thumbnail export when we loaded a fresh + // full buffer (i.e. imgid / sensor-type changed). match the RGB + // preview path: dt_imageio_export_with_flags runs the user's + // full pipeline at ~1024 long edge, giving a display-accurate + // thumbnail whose colours match what the user sees in darkroom + // (and match our before/after ROI pipe outputs). + float *take_export_pixels = NULL; + int export_thumb_w = 0; + int export_thumb_h = 0; + if(!cache_matches) + { + dt_neural_preview_capture_t cap = {0}; + const int export_size = dt_conf_get_int(CONF_PREVIEW_EXPORT_SIZE); + cap.parent.max_width = export_size; + cap.parent.max_height = export_size; + + dt_imageio_module_format_t fmt = { + .mime = _ai_get_mime, + .levels = _ai_check_levels, + .bpp = _ai_check_bpp, + .write_image = _preview_capture_write_image}; + + const dt_colorspaces_color_profile_type_t cfg_type + = dt_conf_key_exists(CONF_ICC_TYPE) + ? dt_conf_get_int(CONF_ICC_TYPE) + : DT_COLORSPACE_NONE; + gchar *cfg_file = (cfg_type == DT_COLORSPACE_FILE) + ? dt_conf_get_string(CONF_ICC_FILE) + : NULL; + dt_imageio_export_with_flags( + pd->imgid, "unused", &fmt, + (dt_imageio_module_data_t *)&cap, + TRUE, // ignore_exif + FALSE, // display_byteorder + TRUE, // high_quality + FALSE, // upscale + FALSE, // is_scaling + 1.0, // scale_factor + FALSE, // thumbnail_export + NULL, // filter + FALSE, // copy_metadata + FALSE, // export_masks + (cfg_type == DT_COLORSPACE_NONE) + ? dt_colorspaces_get_work_profile(pd->imgid)->type + : cfg_type, + cfg_file, + DT_INTENT_PERCEPTUAL, + NULL, NULL, 1, 1, NULL, -1); + g_free(cfg_file); + + if(cap.pixels && cap.cap_w > 0 && cap.cap_h > 0) + { + take_export_pixels = cap.pixels; + export_thumb_w = cap.cap_w; + export_thumb_h = cap.cap_h; + } + } + + // 4. compute crop region. widget dims define the "100% preview" size, + // capped by the model's compiled tile size minus mandatory overlap. + const int T = dt_restore_get_tile_size(ctx); + // Bayer model upscales 2x; linear is 1:1. so the maximum displayed + // crop in sensor pixels: + // bayer: 2*T - 4*overlap_packed = 2*T - 128 (for OVERLAP_PACKED=32) + // linear: T - 2*overlap_linear = T - 64 (for OVERLAP_LINEAR=32) + const int max_disp = is_xtrans ? (T - 64) : (2 * T - 128); + + // the raw buffer is always landscape (sensor layout), but the preview + // thumbnail the user clicks on is oriented per EXIF. un-rotate the + // widget dims + click position into sensor coords before picking the + // crop, otherwise portrait images end up sampling the wrong area + const dt_image_orientation_t ori = dt_image_orientation(&img_meta); + const gboolean swap_xy = (ori & ORIENTATION_SWAP_XY) != 0; + + int crop_w = MIN(swap_xy ? pd->preview_h : pd->preview_w, max_disp); + int crop_h = MIN(swap_xy ? pd->preview_w : pd->preview_h, max_disp); + // Bayer: snap to mod 2 (CFA grid) + if(!is_xtrans) + { + crop_w = (crop_w / 2) * 2; + crop_h = (crop_h / 2) * 2; + } + if(crop_w <= 0 || crop_h <= 0) + { + g_free(take_full_cfa); + dt_free_align(take_full_lin); + dt_restore_unref(ctx); + goto cleanup; + } + + // display-normalised click (u, v) -> sensor pixel, inverting whatever + // combination of swap/flip the flip iop will apply during display. + // matches dt_iop_flip:distort_backtransform semantics + const int disp_w = swap_xy ? full_h : full_w; + const int disp_h = swap_xy ? full_w : full_h; + float dx_disp = pd->patch_center[0] * disp_w; + float dy_disp = pd->patch_center[1] * disp_h; + float sx, sy; + if(swap_xy) { sx = dy_disp; sy = dx_disp; } + else { sx = dx_disp; sy = dy_disp; } + if(ori & ORIENTATION_FLIP_X) sx = (float)full_w - sx; + if(ori & ORIENTATION_FLIP_Y) sy = (float)full_h - sy; + + int crop_x = (int)sx - crop_w / 2; + int crop_y = (int)sy - crop_h / 2; + crop_x = CLAMP(crop_x, 0, full_w - crop_w); + crop_y = CLAMP(crop_y, 0, full_h - crop_h); + if(!is_xtrans) + { + crop_x = (crop_x / 2) * 2; + crop_y = (crop_y / 2) * 2; + } + + dt_print(DT_DEBUG_AI, + "[neural_restore] raw preview: full=%dx%d ori=0x%x " + "patch_center=(%.3f,%.3f) -> sensor=(%d,%d %dx%d) %s", + full_w, full_h, (unsigned)ori, + pd->patch_center[0], pd->patch_center[1], + crop_x, crop_y, crop_w, crop_h, + is_xtrans ? "linear" : "bayer"); + + // 5. inference + // Bayer path uses the _piped variant which runs darktable's full + // pixelpipe on both the original CFA and a denoised-patched CFA, so + // "before"/"after" match what the user would see after Process + + // re-import (same history stack, same filmic/tone curve, same output + // profile). Slower (~2-5 s for two pipes) but colour-accurate. + // Linear path still uses the simpler in-space blend for now. + float *src_rgb = NULL; + float *denoised_rgb = NULL; + int actual_w = 0, actual_h = 0; + int err; + if(is_xtrans) + err = dt_restore_raw_linear_preview_piped(ctx, &img_meta, pd->imgid, + full_lin_use, + full_w, full_h, + crop_x, crop_y, + crop_w, crop_h, + &src_rgb, &denoised_rgb, + &actual_w, &actual_h); + else + err = dt_restore_raw_bayer_preview_piped(ctx, &img_meta, pd->imgid, + full_cfa_use, + full_w, full_h, + crop_x, crop_y, + crop_w, crop_h, + &src_rgb, &denoised_rgb, + &actual_w, &actual_h); + + dt_restore_unref(ctx); + + dt_print(DT_DEBUG_AI, + "[neural_restore] raw preview: inference returned err=%d " + "src=%p denoised=%p requested=%dx%d actual=%dx%d", + err, (void *)src_rgb, (void *)denoised_rgb, + crop_w, crop_h, actual_w, actual_h); + + if(err || !src_rgb || !denoised_rgb || actual_w <= 0 || actual_h <= 0) + { + g_free(src_rgb); + g_free(denoised_rgb); + g_free(take_full_cfa); + dt_free_align(take_full_lin); + goto cleanup; + } + + // 6. ship to UI thread. width/height carry the ACTUAL rendered dims + // from the pipe, which can be smaller than crop_w/crop_h when the + // user's history includes geometry-modifying modules (clipping, + // ashift, lens). downstream blend + cairo render must use these. + dt_neural_preview_result_raw_t *res + = g_new0(dt_neural_preview_result_raw_t, 1); + res->self = pd->self; + res->src_rgb = src_rgb; + res->denoised_rgb = denoised_rgb; + res->width = actual_w; + res->height = actual_h; + res->sequence = pd->sequence; + res->take_full_cfa = take_full_cfa; + res->take_full_lin = take_full_lin; + res->full_w = full_w; + res->full_h = full_h; + res->full_imgid = pd->imgid; + res->full_sensor_class = cls; + res->patch_center[0] = pd->patch_center[0]; + res->patch_center[1] = pd->patch_center[1]; + res->take_export_pixels = take_export_pixels; + res->export_thumb_w = export_thumb_w; + res->export_thumb_h = export_thumb_h; + g_idle_add(_preview_raw_result_idle, res); + g_free(pd); + return NULL; + +cleanup: + // worker bailed before producing a result. clear preview_generating + // on the UI thread so the user can re-trigger and the button state + // reflects reality. only schedule when the sequence is still current + // (a stale bail means a newer trigger is already in flight). + if(pd->sequence == g_atomic_int_get(&d->preview_sequence)) + g_idle_add(_preview_raw_failed_idle, pd->self); + g_free(pd); + return NULL; +} + +// thread dispatcher: serialises the actual inference / pipe work via +// preview_inference_lock so that even when an old worker is still +// running, the new one queues up rather than fighting for the GPU. +// also re-checks the sequence after acquiring the lock — if the +// trigger that spawned us has already been superseded while we were +// waiting, drop on the floor without doing anything expensive +static gpointer _preview_thread(gpointer data); +static gpointer _preview_thread_raw(gpointer data); +static gpointer _preview_thread_dispatch(gpointer data) +{ + dt_neural_preview_data_t *pd = (dt_neural_preview_data_t *)data; + dt_lib_neural_restore_t *d = (dt_lib_neural_restore_t *)pd->self->data; + + g_mutex_lock(&d->preview_inference_lock); + + if(pd->sequence != g_atomic_int_get(&d->preview_sequence)) + { + g_mutex_unlock(&d->preview_inference_lock); + g_free(pd); + return NULL; + } + + gpointer res = (pd->task == NEURAL_TASK_RAW_DENOISE) + ? _preview_thread_raw(data) + : _preview_thread(data); + + g_mutex_unlock(&d->preview_inference_lock); + return res; +} + +// debounced trigger: rapid tab switches collapse to one preview run. +// the timer handle in d->preview_trigger_timer is replaced (and the +// previous one removed) so the trigger only fires after the user +// settles on a tab for `delay_ms` +static gboolean _trigger_preview_from_timer(gpointer user_data); +static void _trigger_preview(dt_lib_module_t *self); + +static void _schedule_preview_refresh(dt_lib_module_t *self, guint delay_ms) +{ + dt_lib_neural_restore_t *d = (dt_lib_neural_restore_t *)self->data; + if(!d->model_available || !d->preview_requested) return; + if(d->preview_trigger_timer) + g_source_remove(d->preview_trigger_timer); + d->preview_trigger_timer + = g_timeout_add(delay_ms, _trigger_preview_from_timer, self); +} + +static gboolean _trigger_preview_from_timer(gpointer user_data) +{ + dt_lib_module_t *self = (dt_lib_module_t *)user_data; + dt_lib_neural_restore_t *d = (dt_lib_neural_restore_t *)self->data; + d->preview_trigger_timer = 0; + _trigger_preview(self); + return G_SOURCE_REMOVE; +} + static void _trigger_preview(dt_lib_module_t *self) { dt_lib_neural_restore_t *d = (dt_lib_neural_restore_t *)self->data; @@ -1489,6 +2822,23 @@ static void _trigger_preview(dt_lib_module_t *self) if(!dt_is_valid_imgid(imgid)) return; + // per-task cache lookup: if we already have a result for this exact + // (task, imgid, patch_center) tuple, install it and skip the worker + if(_preview_cache_hit(d, d->task, imgid)) + { + dt_print(DT_DEBUG_AI, + "[neural_restore] preview cache hit for %s", + _task_log_name(d->task)); + _install_cache_slot(self, d->task); + d->preview_generating = FALSE; + _update_button_sensitivity(d); + // cache hit means preview_raw_sensor_class was set by an earlier + // worker run on this imgid — refresh the overlay so the DNG + // output label appears when _task_changed cleared it on tab switch + _update_info_label(d); + return; + } + // compute preview dimensions matching widget aspect ratio const int widget_w = gtk_widget_get_allocated_width(d->preview_area); const int widget_h = gtk_widget_get_allocated_height(d->preview_area); @@ -1518,36 +2868,49 @@ static void _trigger_preview(dt_lib_module_t *self) // borrow cached export pixels if available (re-pick scenario). // the pointer is valid for the thread's lifetime because - // _trigger_preview joins the previous thread before starting, - // and _cancel_preview joins before freeing export_pixels + // _cancel_preview joins before freeing export_pixels if(d->export_pixels) { pd->reuse_pixels = d->export_pixels; pd->reuse_w = d->export_w; pd->reuse_h = d->export_h; } - // join previous preview thread before starting a new one + // detach the previous worker (don't join — that would block the + // UI thread for the duration of the in-flight inference / pipe + // call). preview_inference_lock serialises the actual heavy work, + // and the bumped sequence + per-task cache lookup at the new + // worker's entry guarantees we don't run two inferences for the + // same target. gui_cleanup joins the latest worker for shutdown. if(d->preview_thread) { - g_thread_join(d->preview_thread); + g_thread_unref(d->preview_thread); d->preview_thread = NULL; } d->preview_thread = g_thread_new("neural_preview", - _preview_thread, pd); + _preview_thread_dispatch, pd); } -static void _update_task_from_ui(dt_lib_neural_restore_t *d) +// map notebook page index to task. pages are ordered in the notebook as: +// 0 = raw denoise, 1 = denoise, 2 = upscale (with scale_combo picking 2x/4x) +static dt_neural_task_t _task_from_page(dt_lib_neural_restore_t *d, int page) { - const int page = gtk_notebook_get_current_page(d->notebook); - if(page == 0) - d->task = NEURAL_TASK_DENOISE; - else + switch(page) { - const int scale_pos = dt_bauhaus_combobox_get(d->scale_combo); - d->task = (scale_pos == 1) ? NEURAL_TASK_UPSCALE_4X : NEURAL_TASK_UPSCALE_2X; + case 0: return NEURAL_TASK_RAW_DENOISE; + case 1: return NEURAL_TASK_DENOISE; + default: + { + const int scale_pos = dt_bauhaus_combobox_get(d->scale_combo); + return (scale_pos == 1) ? NEURAL_TASK_UPSCALE_4X : NEURAL_TASK_UPSCALE_2X; + } } } +static void _update_task_from_ui(dt_lib_neural_restore_t *d) +{ + d->task = _task_from_page(d, gtk_notebook_get_current_page(d->notebook)); +} + static void _notebook_page_changed(GtkNotebook *notebook, GtkWidget *page, guint page_num, @@ -1556,18 +2919,13 @@ static void _notebook_page_changed(GtkNotebook *notebook, dt_lib_neural_restore_t *d = (dt_lib_neural_restore_t *)self->data; // switch-page fires before the page changes, so use page_num - if(page_num == 0) - d->task = NEURAL_TASK_DENOISE; - else - { - const int scale_pos = dt_bauhaus_combobox_get(d->scale_combo); - d->task = (scale_pos == 1) ? NEURAL_TASK_UPSCALE_4X : NEURAL_TASK_UPSCALE_2X; - } + d->task = _task_from_page(d, page_num); dt_conf_set_int(CONF_ACTIVE_PAGE, page_num); _task_changed(d); + // debounced — rapid tab cycling won't pile up worker threads if(d->preview_requested) - _trigger_preview(self); + _schedule_preview_refresh(self, 150); } static void _scale_combo_changed(GtkWidget *widget, dt_lib_module_t *self) @@ -1576,14 +2934,14 @@ static void _scale_combo_changed(GtkWidget *widget, dt_lib_module_t *self) _update_task_from_ui(d); _task_changed(d); if(d->preview_requested) - _trigger_preview(self); + _schedule_preview_refresh(self, 150); } static void _recovery_slider_changed(GtkWidget *widget, dt_lib_module_t *self) { dt_lib_neural_restore_t *d = (dt_lib_neural_restore_t *)self->data; if(d->recovery_changing) return; - dt_conf_set_float(CONF_DETAIL_RECOVERY, dt_bauhaus_slider_get(d->recovery_slider)); + dt_conf_set_float(CONF_STRENGTH, dt_bauhaus_slider_get(d->recovery_slider)); if(d->preview_ready) { _rebuild_cairo_after(d); @@ -1591,6 +2949,22 @@ static void _recovery_slider_changed(GtkWidget *widget, dt_lib_module_t *self) } } +static void _raw_strength_slider_changed(GtkWidget *widget, + dt_lib_module_t *self) +{ + dt_lib_neural_restore_t *d = (dt_lib_neural_restore_t *)self->data; + dt_conf_set_float(CONF_RAW_STRENGTH, + dt_bauhaus_slider_get(d->raw_strength_slider)); + + // live preview re-blend (debounced). only fires when raw denoise tab + // is active and a preview is already cached — otherwise the model + // hasn't run yet and there's nothing to blend. + if(d->task == NEURAL_TASK_RAW_DENOISE + && d->preview_raw_src_rgb + && d->preview_raw_denoised_rgb) + _schedule_raw_strength_reblend(self); +} + static void _process_clicked(GtkWidget *widget, gpointer user_data) { dt_lib_module_t *self = (dt_lib_module_t *)user_data; @@ -1608,7 +2982,12 @@ static void _process_clicked(GtkWidget *widget, gpointer user_data) job_data->env = d->env; job_data->images = images; job_data->scale = _task_scale(d->task); - job_data->detail_recovery = dt_conf_get_float(CONF_DETAIL_RECOVERY); + job_data->strength = dt_conf_key_exists(CONF_STRENGTH) + ? dt_conf_get_float(CONF_STRENGTH) : 100.0f; + // raw denoise strength: 0..100 in UI, 0..1 for the pipeline + job_data->raw_strength = dt_conf_key_exists(CONF_RAW_STRENGTH) + ? dt_conf_get_float(CONF_RAW_STRENGTH) / 100.0f + : 1.0f; job_data->bpp = dt_conf_key_exists(CONF_BIT_DEPTH) ? dt_conf_get_int(CONF_BIT_DEPTH) : NEURAL_BPP_16; @@ -1769,14 +3148,48 @@ static gboolean _preview_draw(GtkWidget *widget, cairo_t *cr, dt_lib_module_t *s cairo_paint(cr); cairo_restore(cr); - // draw crop rectangle at current patch_center - const int task_scale = _task_scale(d->task); - const int crop_w = w / task_scale; - const int crop_h = h / task_scale; - const double rw = (double)crop_w / d->export_w * img_w; - const double rh = (double)crop_h / d->export_h * img_h; - const double rx = ox + d->patch_center[0] * img_w - rw / 2.0; - const double ry = oy + d->patch_center[1] * img_h - rh / 2.0; + // draw crop rectangle at current patch_center. the rectangle + // represents the actual displayed patch, not the full tile size: + // * RGB denoise / upscale: the preview runs on the exported image + // at 1:1, so crop pixels == widget pixels, measured against the + // thumbnail's own resolution (export_w == source for this path) + // * raw denoise: preview_raw_crop_* carries the pipe's backbuf + // dims — already in display orientation (post-flip iop) — so we + // scale them against the display-oriented thumbnail (export_w / + // export_h), NOT the sensor buffer (preview_full_*). using the + // sensor dims would draw the wrong rectangle size on portrait + // images where sensor and display axes swap + double rw, rh; + if(d->task == NEURAL_TASK_RAW_DENOISE + && d->preview_raw_crop_w > 0 && d->preview_raw_crop_h > 0 + && d->export_w > 0 && d->export_h > 0) + { + rw = (double)d->preview_raw_crop_w / d->export_w * img_w; + rh = (double)d->preview_raw_crop_h / d->export_h * img_h; + } + else + { + const int task_scale = _task_scale(d->task); + const int crop_w = w / task_scale; + const int crop_h = h / task_scale; + rw = (double)crop_w / d->export_w * img_w; + rh = (double)crop_h / d->export_h * img_h; + } + // compute rectangle top-left. for RGB denoise / upscale, the click + // / motion handlers already clamp patch_center with inner margins, + // so the rectangle always fits — match master by NOT pushing here. + // for raw denoise, patch_center is free-range in [0, 1]; push the + // rectangle inward so it still fits (matches the worker's CLAMP on + // crop_x / crop_y and keeps the picker visually honest). + double rx = ox + d->patch_center[0] * img_w - rw / 2.0; + double ry = oy + d->patch_center[1] * img_h - rh / 2.0; + if(d->task == NEURAL_TASK_RAW_DENOISE) + { + if(rx < ox) rx = ox; + if(ry < oy) ry = oy; + if(rx + rw > ox + img_w) rx = ox + img_w - rw; + if(ry + rh > oy + img_h) ry = oy + img_h - rh; + } // dim area outside the rectangle cairo_save(cr); @@ -1974,18 +3387,35 @@ static gboolean _preview_button_press(GtkWidget *widget, double img_w, img_h, ox, oy; _picking_geometry(d, w, h, &img_w, &img_h, &ox, &oy); - // convert click to normalized image coords, clamped so - // the crop rectangle stays within the image - const int task_scale = _task_scale(d->task); - const float half_w = (float)w / task_scale / (2.0f * d->export_w); - const float half_h = (float)h / task_scale / (2.0f * d->export_h); + // convert click to normalized image coords. + // * RGB denoise / upscale: clamp so the crop rectangle stays + // within the image (master behaviour — the export-based preview + // needs this because the worker and draw share a single + // export_w-based scale). + // * raw denoise: no inner-margin clamp — user can pick corners. + // the raw worker CLAMPs crop_x/y, and _preview_draw pushes the + // rectangle inward to match. const float nx = (float)((ex - ox) / img_w); const float ny = (float)((ey - oy) / img_h); if(nx < 0.0f || nx > 1.0f || ny < 0.0f || ny > 1.0f) return TRUE; - d->patch_center[0] = CLAMP(nx, half_w, 1.0f - half_w); - d->patch_center[1] = CLAMP(ny, half_h, 1.0f - half_h); + if(d->task == NEURAL_TASK_RAW_DENOISE) + { + d->patch_center[0] = CLAMP(nx, 0.0f, 1.0f); + d->patch_center[1] = CLAMP(ny, 0.0f, 1.0f); + } + else + { + const int task_scale = _task_scale(d->task); + const float half_w = (float)w / task_scale / (2.0f * d->export_w); + const float half_h = (float)h / task_scale / (2.0f * d->export_h); + d->patch_center[0] = CLAMP(nx, half_w, 1.0f - half_w); + d->patch_center[1] = CLAMP(ny, half_h, 1.0f - half_h); + } + + // patch moved — every cached preview is now stale (different crop) + _preview_cache_invalidate_all(d); // exit picking mode d->picking_thumbnail = FALSE; @@ -2057,16 +3487,24 @@ static gboolean _preview_motion(GtkWidget *widget, double img_w, img_h, ox, oy; _picking_geometry(d, w, h, &img_w, &img_h, &ox, &oy); - // clamp so the crop rectangle stays within the image - const int task_scale = _task_scale(d->task); - const float half_w = (float)w / task_scale / (2.0f * d->export_w); - const float half_h = (float)h / task_scale / (2.0f * d->export_h); - const float nx = CLAMP((float)((ex - ox) / img_w), - half_w, 1.0f - half_w); - const float ny = CLAMP((float)((ey - oy) / img_h), - half_h, 1.0f - half_h); - d->patch_center[0] = nx; - d->patch_center[1] = ny; + // motion follows the cursor, with clamping based on task: + // * RGB denoise / upscale: inner-margin clamp (master behaviour) + // * raw denoise: free-range in [0, 1] so corners are reachable + const float rx = (float)((ex - ox) / img_w); + const float ry = (float)((ey - oy) / img_h); + if(d->task == NEURAL_TASK_RAW_DENOISE) + { + d->patch_center[0] = CLAMP(rx, 0.0f, 1.0f); + d->patch_center[1] = CLAMP(ry, 0.0f, 1.0f); + } + else + { + const int task_scale = _task_scale(d->task); + const float half_w = (float)w / task_scale / (2.0f * d->export_w); + const float half_h = (float)h / task_scale / (2.0f * d->export_h); + d->patch_center[0] = CLAMP(rx, half_w, 1.0f - half_w); + d->patch_center[1] = CLAMP(ry, half_h, 1.0f - half_h); + } gtk_widget_queue_draw(widget); return TRUE; } @@ -2127,6 +3565,7 @@ static void _selection_changed_callback(gpointer instance, dt_lib_module_t *self dt_lib_neural_restore_t *d = (dt_lib_neural_restore_t *)self->data; d->preview_requested = FALSE; _cancel_preview(self); + _preview_cache_invalidate_all(d); _update_info_label(d); _update_button_sensitivity(d); } @@ -2136,6 +3575,7 @@ static void _image_changed_callback(gpointer instance, dt_lib_module_t *self) dt_lib_neural_restore_t *d = (dt_lib_neural_restore_t *)self->data; d->preview_requested = FALSE; _cancel_preview(self); + _preview_cache_invalidate_all(d); _update_info_label(d); _update_button_sensitivity(d); } @@ -2252,6 +3692,7 @@ void gui_init(dt_lib_module_t *self) d->env = dt_restore_env_init(); d->processing_images = g_hash_table_new(g_direct_hash, g_direct_equal); dt_pthread_mutex_init(&d->ctx_lock, NULL); + g_mutex_init(&d->preview_inference_lock); d->split_pos = 0.5f; // notebook tabs (denoise / upscale) @@ -2260,21 +3701,46 @@ void gui_init(dt_lib_module_t *self) dt_action_define(DT_ACTION(self), NULL, N_("page"), GTK_WIDGET(d->notebook), ¬ebook_def); - d->denoise_page = dt_ui_notebook_page(d->notebook, N_("denoise"), - _("AI denoising")); - d->upscale_page = dt_ui_notebook_page(d->notebook, N_("upscale"), - _("AI upscaling")); - - // denoise page: detail recovery slider - const float saved_recovery = dt_conf_get_float(CONF_DETAIL_RECOVERY); + // raw denoise sits first: it runs earliest in the denoise workflow + // (before demosaic-stage processing). bayer / linear variant selection + // is driven by the active "rawdenoise" model rather than a UI toggle + d->raw_denoise_page = dt_ui_notebook_page(d->notebook, N_("raw denoise"), + _("AI raw denoising")); + d->denoise_page = dt_ui_notebook_page(d->notebook, N_("denoise"), + _("AI denoising")); + d->upscale_page = dt_ui_notebook_page(d->notebook, N_("upscale"), + _("AI upscaling")); + + // raw denoise page: strength slider. 100 = full model output, + // 0 = unchanged source CFA, linear blend in raw ADC space + const float saved_raw_strength = dt_conf_key_exists(CONF_RAW_STRENGTH) + ? dt_conf_get_float(CONF_RAW_STRENGTH) : 100.0f; + d->raw_strength_slider = dt_bauhaus_slider_new_action(DT_ACTION(self), + 0.0f, 100.0f, 1.0f, + saved_raw_strength, 0); + dt_bauhaus_widget_set_label(d->raw_strength_slider, NULL, N_("strength")); + dt_bauhaus_slider_set_format(d->raw_strength_slider, "%"); + gtk_widget_set_tooltip_text(d->raw_strength_slider, + _("blend between the source CFA (0%) and " + "the denoised output (100%)")); + g_signal_connect(G_OBJECT(d->raw_strength_slider), "value-changed", + G_CALLBACK(_raw_strength_slider_changed), self); + dt_gui_box_add(d->raw_denoise_page, d->raw_strength_slider); + + // denoise page: strength slider. 100 = full denoise, 0 = source-like. + // dialing below 100 brings DWT-filtered texture back without + // reintroducing the noise-frequency content. + const float saved_strength = dt_conf_key_exists(CONF_STRENGTH) + ? dt_conf_get_float(CONF_STRENGTH) : 100.0f; d->recovery_slider = dt_bauhaus_slider_new_action(DT_ACTION(self), 0.0f, 100.0f, 1.0f, - saved_recovery, 0); - dt_bauhaus_widget_set_label(d->recovery_slider, NULL, N_("detail recovery")); + saved_strength, 0); + dt_bauhaus_widget_set_label(d->recovery_slider, NULL, N_("strength")); dt_bauhaus_slider_set_format(d->recovery_slider, "%"); gtk_widget_set_tooltip_text(d->recovery_slider, - _("recover fine texture lost during denoising " - "while suppressing noise")); + _("100% applies the full AI model output; " + "lower values bring back luminance texture " + "and grain while keeping color noise suppressed")); g_signal_connect(G_OBJECT(d->recovery_slider), "value-changed", G_CALLBACK(_recovery_slider_changed), self); dt_gui_box_add(d->denoise_page, d->recovery_slider); @@ -2470,8 +3936,10 @@ void gui_init(dt_lib_module_t *self) DT_CONTROL_SIGNAL_HANDLE(DT_SIGNAL_DEVELOP_IMAGE_CHANGED, _image_changed_callback); DT_CONTROL_SIGNAL_HANDLE(DT_SIGNAL_AI_MODELS_CHANGED, _ai_models_changed_callback); - _update_info_label(d); - _update_button_sensitivity(d); + // sync per-task widget visibility for the initially-active tab. + // _task_changed does detail-slider + output-knobs visibility and + // info/button state — safe to call here after all widgets exist. + _task_changed(d); } void gui_cleanup(dt_lib_module_t *self) @@ -2484,13 +3952,28 @@ void gui_cleanup(dt_lib_module_t *self) if(d) { - // signal preview thread to exit and wait for it + // cancel any pending debounced trigger before tearing down state + if(d->preview_trigger_timer) + { + g_source_remove(d->preview_trigger_timer); + d->preview_trigger_timer = 0; + } + // signal preview thread to exit and wait for it. join blocks + // here (unlike _cancel_preview during runtime, where we can't + // afford to freeze the UI) — happens once on shutdown only g_atomic_int_inc(&d->preview_sequence); if(d->preview_thread) { g_thread_join(d->preview_thread); d->preview_thread = NULL; } + // any worker idle callbacks queued just before the join may still + // fire after this point. they check sequence and discard, but they + // dereference `d` to do that — drain the main context once so they + // run while `d` is still alive + while(g_main_context_pending(NULL)) + g_main_context_iteration(NULL, FALSE); + g_mutex_clear(&d->preview_inference_lock); g_free(d->preview_before); g_free(d->preview_after); @@ -2499,6 +3982,18 @@ void gui_cleanup(dt_lib_module_t *self) g_free(d->cairo_after); g_free(d->export_pixels); g_free(d->export_cairo); + + // raw denoise preview cache + if(d->preview_strength_timer) + { + g_source_remove(d->preview_strength_timer); + d->preview_strength_timer = 0; + } + g_free(d->preview_full_cfa); + dt_free_align(d->preview_full_lin); + g_free(d->preview_raw_src_rgb); + g_free(d->preview_raw_denoised_rgb); + _preview_cache_invalidate_all(d); if(d->processing_images) g_hash_table_destroy(d->processing_images); dt_restore_unref(d->cached_ctx); diff --git a/src/tests/unittests/ai/test_ai_backend.c b/src/tests/unittests/ai/test_ai_backend.c index d226ad1bcd89..c9df71e0b68c 100644 --- a/src/tests/unittests/ai/test_ai_backend.c +++ b/src/tests/unittests/ai/test_ai_backend.c @@ -332,7 +332,7 @@ static void test_load_opt_levels(void **state) // DT_AI_OPT_BASIC dt_ai_context_t *ctx_basic = dt_ai_load_model_ext(env, "test-multiply", NULL, - DT_AI_PROVIDER_CPU, DT_AI_OPT_BASIC, NULL, 0); + DT_AI_PROVIDER_CPU, DT_AI_OPT_BASIC, NULL, 0, 0); assert_non_null(ctx_basic); // verify inference still works with basic optimization @@ -348,7 +348,7 @@ static void test_load_opt_levels(void **state) // DT_AI_OPT_DISABLED dt_ai_context_t *ctx_none = dt_ai_load_model_ext(env, "test-multiply", NULL, - DT_AI_PROVIDER_CPU, DT_AI_OPT_DISABLED, NULL, 0); + DT_AI_PROVIDER_CPU, DT_AI_OPT_DISABLED, NULL, 0, 0); assert_non_null(ctx_none); dt_ai_unload_model(ctx_none); } From 2291c1e3c15f4e1a1126e225506bf75fe6cf9153 Mon Sep 17 00:00:00 2001 From: Andrii Ryzhkov Date: Fri, 24 Apr 2026 08:16:36 +0200 Subject: [PATCH 2/9] Promote neural restore output as group leader when source leads --- src/libs/neural_restore.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/libs/neural_restore.c b/src/libs/neural_restore.c index 07ca26834410..17eb52dedf51 100644 --- a/src/libs/neural_restore.c +++ b/src/libs/neural_restore.c @@ -843,7 +843,17 @@ static void _import_image(const char *filename, dt_imgid_t source_imgid) { dt_print(DT_DEBUG_AI, "[neural_restore] imported imgid=%d: %s", newid, filename); if(dt_is_valid_imgid(source_imgid)) + { dt_grouping_add_to_group(source_imgid, newid); + // promote the output as group leader, but only when the source + // was the current leader — preserves any manually-set leader the + // user deliberately chose + const dt_image_t *src = dt_image_cache_get(source_imgid, 'r'); + const gboolean source_is_leader = src && src->group_id == source_imgid; + dt_image_cache_read_release(src); + if(source_is_leader) + dt_grouping_change_representative(newid); + } // refresh the collection so the new image appears in the thumb grid dt_collection_update_query(darktable.collection, DT_COLLECTION_CHANGE_RELOAD, From 6f37d5106327a38d185e765705959a0644918034 Mon Sep 17 00:00:00 2001 From: Andrii Ryzhkov Date: Sat, 25 Apr 2026 09:50:13 +0200 Subject: [PATCH 3/9] DNG writer: advertise visible area via ACTIVEAREA tag --- src/common/dng_writer.c | 27 +++++++++++++++++++++------ 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/src/common/dng_writer.c b/src/common/dng_writer.c index 9c537f56c7a4..d26c6d27181e 100644 --- a/src/common/dng_writer.c +++ b/src/common/dng_writer.c @@ -177,13 +177,23 @@ int dt_dng_write_cfa_bayer(const char *filename, } } - // default scale / crop - // DefaultScale=1,1; DefaultCropOrigin=0,0; DefaultCropSize=W,H. - // this keeps the importer from applying any additional framing; - // darktable's crop module handles downstream framing + // advertise the visible region inside the full raw buffer; without + // these tags the importer renders the optical-black margins too + const int crop_x = (img->crop_x > 0) ? img->crop_x : 0; + const int crop_y = (img->crop_y > 0) ? img->crop_y : 0; + const int vis_w = (img->p_width > 0 && img->p_width <= width - crop_x) + ? img->p_width : (width - crop_x); + const int vis_h = (img->p_height > 0 && img->p_height <= height - crop_y) + ? img->p_height : (height - crop_y); + + const uint32_t active_area[4] = { + (uint32_t)crop_y, (uint32_t)crop_x, + (uint32_t)(crop_y + vis_h), (uint32_t)(crop_x + vis_w), + }; const float default_scale[2] = { 1.0f, 1.0f }; const float default_crop_origin[2] = { 0.0f, 0.0f }; - const float default_crop_size[2] = { (float)width, (float)height }; + const float default_crop_size[2] = { (float)vis_w, (float)vis_h }; + TIFFSetField(tif, TIFFTAG_ACTIVEAREA, active_area); TIFFSetField(tif, TIFFTAG_DEFAULTSCALE, default_scale); TIFFSetField(tif, TIFFTAG_DEFAULTCROPORIGIN, default_crop_origin); TIFFSetField(tif, TIFFTAG_DEFAULTCROPSIZE, default_crop_size); @@ -316,10 +326,15 @@ int dt_dng_write_linear(const char *filename, } } - // default scale / crop (full frame, no inset) + // linear DNG: buffer is already at visible dims (post-demosaic); + // ACTIVEAREA covers the full buffer, no margin to crop + const uint32_t active_area[4] = { + 0, 0, (uint32_t)height, (uint32_t)width, + }; const float default_scale[2] = { 1.0f, 1.0f }; const float default_crop_origin[2] = { 0.0f, 0.0f }; const float default_crop_size[2] = { (float)width, (float)height }; + TIFFSetField(tif, TIFFTAG_ACTIVEAREA, active_area); TIFFSetField(tif, TIFFTAG_DEFAULTSCALE, default_scale); TIFFSetField(tif, TIFFTAG_DEFAULTCROPORIGIN, default_crop_origin); TIFFSetField(tif, TIFFTAG_DEFAULTCROPSIZE, default_crop_size); From f9afb2ad38e13437f3191831e799ee70df8dbc65 Mon Sep 17 00:00:00 2001 From: Andrii Ryzhkov Date: Sat, 25 Apr 2026 20:39:46 +0200 Subject: [PATCH 4/9] AI raw denoise: blend tile overlaps to hide per-tile gain seams --- src/common/ai/restore_common.h | 34 +++++ src/common/ai/restore_raw_bayer.c | 206 +++++++++++++++++++++++++---- src/common/ai/restore_raw_linear.c | 206 ++++++++++++++++++++++++++--- 3 files changed, 402 insertions(+), 44 deletions(-) diff --git a/src/common/ai/restore_common.h b/src/common/ai/restore_common.h index 2a5138b6ac4d..1da0b2ee3219 100644 --- a/src/common/ai/restore_common.h +++ b/src/common/ai/restore_common.h @@ -229,6 +229,40 @@ static inline int _mirror_in_range(int i, int lo, int hi) return lo + _mirror(i - lo, n); } +// tile overlap blending weights: each tile contributes ax·ay; adjacent +// tiles' ramps sum to 1, so strip accumulators recover the blended value +// with no per-pixel division. seam = 2*sensor_O wide, centered on the +// core boundary; returns 1.0 outside the seam (pure interior) + +static inline float _seam_ramp(int d, int sensor_O) +{ + return ((float)d + 0.5f) / (float)(2 * sensor_O); +} + +static inline float _seam_ax(int sc, + int px_base, int px_end, + int sensor_O, + gboolean has_left, gboolean has_right) +{ + if(has_left && sc < px_base + sensor_O) + return _seam_ramp(sc - (px_base - sensor_O), sensor_O); + if(has_right && sc >= px_end - sensor_O) + return 1.0f - _seam_ramp(sc - (px_end - sensor_O), sensor_O); + return 1.0f; +} + +static inline float _seam_ay(int sr, + int py_base, int py_end, + int sensor_O, + gboolean has_top, gboolean has_bot) +{ + if(has_top && sr < py_base + sensor_O) + return _seam_ramp(sr - (py_base - sensor_O), sensor_O); + if(has_bot && sr >= py_end - sensor_O) + return 1.0f - _seam_ramp(sr - (py_end - sensor_O), sensor_O); + return 1.0f; +} + // clang-format off // modelines: These editor modelines have been set for all relevant files by tools/update_modelines.py // vim: shiftwidth=2 expandtab tabstop=2 cindent diff --git a/src/common/ai/restore_raw_bayer.c b/src/common/ai/restore_raw_bayer.c index 91cff804936b..0f0d76cc294e 100644 --- a/src/common/ai/restore_raw_bayer.c +++ b/src/common/ai/restore_raw_bayer.c @@ -400,8 +400,34 @@ retry:; int res = 0; int tile_count = 0; + // overlap blending: at each tile boundary 2 (4 at corners) tiles emit + // ax·ay-weighted contributions whose ramps sum to 1. only the 2*sensor_O- + // wide seam regions accumulate; pure interior hard-writes. h-strips own + // corners. memory ~4 MB live, independent of image size + const int sensor_O = 2 * O; + const int hstrip_h = 2 * sensor_O; + + // h_strip_top = seam between (ty-1) and ty: built by ty-1 as bot, flushed by ty + float *h_strip_top = NULL; + int h_strip_top_sy0 = 0; + for(int ty = 0; ty < rows && res == 0; ty++) { + const gboolean has_top = ty > 0; + const gboolean has_bot = ty < rows - 1; + + float *h_strip_bot = NULL; + int h_strip_bot_sy0 = 0; + if(has_bot) + { + h_strip_bot = g_try_malloc0((size_t)width * hstrip_h * sizeof(float)); + if(!h_strip_bot) { res = 1; break; } + } + + // v_strip_left = seam (tx-1)↔tx: rotated in from tx-1's right, flushed by tx + float *v_strip_left = NULL; + int v_strip_left_sx0 = 0, v_strip_left_sy0 = 0, v_strip_left_h = 0; + for(int tx = 0; tx < cols && res == 0; tx++) { if(control_job @@ -416,8 +442,6 @@ retry:; const int px_base = tx * step; const int py_end = (py_base + step > Hh) ? Hh : py_base + step; const int px_end = (px_base + step > Wh) ? Wh : px_base + step; - const int core_h = py_end - py_base; - const int core_w = px_end - px_base; // build 4ch input at packed half-res (T x T). geometry picks // the right origin and mirror-reflection bounds based on @@ -472,6 +496,9 @@ retry:; T, next_T); g_free(tile_in); g_free(tile_out); + g_free(h_strip_top); + g_free(h_strip_bot); + g_free(v_strip_left); T = next_T; goto retry; } @@ -519,49 +546,176 @@ retry:; in_mean, out_mean, (double)gain); } - // re-mosaic the core-valid region and un-preprocess - // model output dims: 2T x 2T (sensor pixels) for T x T packed tile. - // core valid region in model output starts at (2*O, 2*O) and spans - // (2*core_h) x (2*core_w) sensor pixels - const int core_sh = 2 * core_h; // sensor height of core - const int core_sw = 2 * core_w; - for(int dy = 0; dy < core_sh; dy++) + const gboolean has_left = tx > 0; + const gboolean has_right = tx < cols - 1; + const int sensor_py_base = y0 + 2 * py_base; + const int sensor_py_end = y0 + 2 * py_end; + const int sensor_px_base = x0 + 2 * px_base; + const int sensor_px_end = x0 + 2 * px_end; + + // cores edge-to-edge in y → one shared h_strip_bot origin per row + if(has_bot && tx == 0) h_strip_bot_sy0 = sensor_py_end - sensor_O; + + // v-strip excludes top/bot corners (h-strips own them) → y extent = pure interior + float *v_strip_right = NULL; + int v_strip_right_sx0 = 0, v_strip_right_sy0 = 0, v_strip_right_h = 0; + if(has_right) { - const int r = y0 + 2 * py_base + dy; // sensor row - const int my = 2 * O + dy; // model-output row - const size_t row_off = (size_t)my * tile_out_w; - for(int dx = 0; dx < core_sw; dx++) + v_strip_right_sx0 = sensor_px_end - sensor_O; + v_strip_right_sy0 = sensor_py_base + (has_top ? sensor_O : 0); + const int v_y_end = sensor_py_end - (has_bot ? sensor_O : 0); + v_strip_right_h = v_y_end - v_strip_right_sy0; + if(v_strip_right_h > 0) { - const int c = x0 + 2 * px_base + dx; // sensor col - const int mx = 2 * O + dx; + v_strip_right = g_try_malloc0((size_t)(2 * sensor_O) + * v_strip_right_h * sizeof(float)); + if(!v_strip_right) { res = 1; break; } + } + } - const int ch = FC(r, c, filters); // 0=R, 1=G, 2=B - const float model_val - = tile_out[(size_t)ch * tile_out_plane + row_off + mx]; + // extended extent = core ± seam where a neighbor exists; matches model-output validity + const int ext_y0 = has_top ? sensor_py_base - sensor_O : sensor_py_base; + const int ext_y1 = has_bot ? sensor_py_end + sensor_O : sensor_py_end; + const int ext_x0 = has_left ? sensor_px_base - sensor_O : sensor_px_base; + const int ext_x1 = has_right? sensor_px_end + sensor_O : sensor_px_end; - // reverse WB + normalisation → raw ADC + for(int sr = ext_y0; sr < ext_y1; sr++) + { + const int my = 2 * O + (sr - sensor_py_base); + const float ay = _seam_ay(sr, sensor_py_base, sensor_py_end, + sensor_O, has_top, has_bot); + const gboolean in_horiz_seam = (ay < 1.0f); + const size_t mo_row = (size_t)my * tile_out_w; + + float *h_strip = NULL; + int h_strip_sy0 = 0; + if(in_horiz_seam) + { + if(has_top && sr < sensor_py_base + sensor_O) + { + h_strip = h_strip_top; + h_strip_sy0 = h_strip_top_sy0; + } + else if(has_bot && sr >= sensor_py_end - sensor_O) + { + h_strip = h_strip_bot; + h_strip_sy0 = h_strip_bot_sy0; + } + } + const size_t h_strip_row_off = h_strip + ? (size_t)(sr - h_strip_sy0) * width : 0; + + for(int sc = ext_x0; sc < ext_x1; sc++) + { + const int mx = 2 * O + (sc - sensor_px_base); + const float ax = _seam_ax(sc, sensor_px_base, sensor_px_end, + sensor_O, has_left, has_right); + const gboolean in_vert_seam = (ax < 1.0f); + + const int ch = FC(sr, sc, filters); // 0=R, 1=G, 2=B + const float model_val + = tile_out[(size_t)ch * tile_out_plane + mo_row + mx]; const float raw_val - = _bayer_remosaic_raw(r, c, ch, model_val, &prep); + = _bayer_remosaic_raw(sr, sc, ch, model_val, &prep); - // strength blend: α=1 → denoised, α=0 → source CFA - const size_t pidx = (size_t)r * width + c; + const size_t pidx = (size_t)sr * width + sc; const float blended = alpha * raw_val + inv_alpha * cfa_in[pidx]; - const float clipped - = blended < 0.0f ? 0.0f - : (blended > clip_max ? clip_max : blended); - cfa_out[pidx] = (uint16_t)(clipped + 0.5f); + if(in_horiz_seam) + { + // h-strip owns corners too; weight ax·ay (other 3 tiles complete the sum) + if(h_strip) + h_strip[h_strip_row_off + sc] += ax * ay * blended; + } + else if(in_vert_seam) + { + float *v_strip = NULL; + int v_sx0 = 0, v_sy0 = 0; + if(has_left && sc < sensor_px_base + sensor_O) + { + v_strip = v_strip_left; + v_sx0 = v_strip_left_sx0; v_sy0 = v_strip_left_sy0; + } + else if(has_right && sc >= sensor_px_end - sensor_O) + { + v_strip = v_strip_right; + v_sx0 = v_strip_right_sx0; v_sy0 = v_strip_right_sy0; + } + if(v_strip) + { + const size_t vidx + = (size_t)(sr - v_sy0) * (2 * sensor_O) + (sc - v_sx0); + v_strip[vidx] += ax * blended; + } + } + else + { + const float clipped + = blended < 0.0f ? 0.0f + : (blended > clip_max ? clip_max : blended); + cfa_out[pidx] = (uint16_t)(clipped + 0.5f); + } } } + // tx-1 + tx ramps now sum to 1; strip = final value, flush + free + if(v_strip_left) + { + for(int sr = v_strip_left_sy0; + sr < v_strip_left_sy0 + v_strip_left_h; sr++) + { + const size_t vrow = (size_t)(sr - v_strip_left_sy0) * (2 * sensor_O); + for(int dxs = 0; dxs < 2 * sensor_O; dxs++) + { + const int sc = v_strip_left_sx0 + dxs; + const float v = v_strip_left[vrow + dxs]; + const float clipped + = v < 0.0f ? 0.0f : (v > clip_max ? clip_max : v); + cfa_out[(size_t)sr * width + sc] = (uint16_t)(clipped + 0.5f); + } + } + g_free(v_strip_left); + } + v_strip_left = v_strip_right; + v_strip_left_sx0 = v_strip_right_sx0; + v_strip_left_sy0 = v_strip_right_sy0; + v_strip_left_h = v_strip_right_h; + tile_count++; if(control_job) dt_control_job_set_progress(control_job, (double)tile_count / total_tiles); } + + // defensive: should be NULL after last col, free in case of mid-row break + g_free(v_strip_left); + v_strip_left = NULL; + + // ramps sum to 1, flush. clamp sc to working columns — outside cells + // were never written and would overwrite the cfa_in margin copy + if(h_strip_top) + { + for(int sr = h_strip_top_sy0; sr < h_strip_top_sy0 + hstrip_h; sr++) + { + const size_t hrow = (size_t)(sr - h_strip_top_sy0) * width; + for(int sc = x0; sc < x0 + 2 * Wh; sc++) + { + const float v = h_strip_top[hrow + sc]; + const float clipped + = v < 0.0f ? 0.0f : (v > clip_max ? clip_max : v); + cfa_out[(size_t)sr * width + sc] = (uint16_t)(clipped + 0.5f); + } + } + g_free(h_strip_top); + } + h_strip_top = h_strip_bot; + h_strip_top_sy0 = h_strip_bot_sy0; } + // last row never allocates a bottom strip — defensive free + g_free(h_strip_top); + g_free(tile_in); g_free(tile_out); diff --git a/src/common/ai/restore_raw_linear.c b/src/common/ai/restore_raw_linear.c index 23a63ea378ef..5a6d1b8ece04 100644 --- a/src/common/ai/restore_raw_linear.c +++ b/src/common/ai/restore_raw_linear.c @@ -548,8 +548,31 @@ retry:; int res = 0; int tile_count = 0; + // overlap blending — see restore_raw_bayer.c for the scheme. + // sensor_O = O (1:1 with input), strips are 3-ch planar matching rgb_out + const int sensor_O = O; + const int hstrip_h = 2 * sensor_O; + const size_t hstrip_chan = (size_t)w * hstrip_h; // floats per channel + + float *h_strip_top = NULL; + int h_strip_top_sy0 = 0; + for(int ty = 0; ty < rows && res == 0; ty++) { + const gboolean has_top = ty > 0; + const gboolean has_bot = ty < rows - 1; + + float *h_strip_bot = NULL; + int h_strip_bot_sy0 = 0; + if(has_bot) + { + h_strip_bot = g_try_malloc0(hstrip_chan * 3 * sizeof(float)); + if(!h_strip_bot) { res = 1; break; } + } + + float *v_strip_left = NULL; + int v_strip_left_sx0 = 0, v_strip_left_sy0 = 0, v_strip_left_h = 0; + for(int tx = 0; tx < cols && res == 0; tx++) { if(control_job @@ -564,8 +587,6 @@ retry:; const int x_base = tx * step; const int y_end = (y_base + step > h) ? h : y_base + step; const int x_end = (x_base + step > w) ? w : x_base + step; - const int core_h = y_end - y_base; - const int core_w = x_end - x_base; // extract T x T tile with mirror-pad at boundaries, planar for(int dy = 0; dy < T; dy++) @@ -596,6 +617,9 @@ retry:; T, next_T); g_free(tile_in); g_free(tile_out); + g_free(h_strip_top); + g_free(h_strip_bot); + g_free(v_strip_left); T = next_T; goto retry; } @@ -621,38 +645,184 @@ retry:; gain_ch[0], gain_ch[1], gain_ch[2]); } - // blend: write (α·denoised + (1-α)·source) per channel into - // the core-valid region. rgb_out was pre-filled with rgb_src - // so overlap gaps stay as source - for(int dy = 0; dy < core_h; dy++) + const gboolean has_left = tx > 0; + const gboolean has_right = tx < cols - 1; + const int sensor_py_base = y_base; + const int sensor_py_end = y_end; + const int sensor_px_base = x_base; + const int sensor_px_end = x_end; + + if(has_bot && tx == 0) h_strip_bot_sy0 = sensor_py_end - sensor_O; + + float *v_strip_right = NULL; + int v_strip_right_sx0 = 0, v_strip_right_sy0 = 0, v_strip_right_h = 0; + if(has_right) + { + v_strip_right_sx0 = sensor_px_end - sensor_O; + v_strip_right_sy0 = sensor_py_base + (has_top ? sensor_O : 0); + const int v_y_end = sensor_py_end - (has_bot ? sensor_O : 0); + v_strip_right_h = v_y_end - v_strip_right_sy0; + if(v_strip_right_h > 0) + { + v_strip_right = g_try_malloc0((size_t)(2 * sensor_O) + * v_strip_right_h * 3 * sizeof(float)); + if(!v_strip_right) { res = 1; break; } + } + } + + const int ext_y0 = has_top ? sensor_py_base - sensor_O : sensor_py_base; + const int ext_y1 = has_bot ? sensor_py_end + sensor_O : sensor_py_end; + const int ext_x0 = has_left ? sensor_px_base - sensor_O : sensor_px_base; + const int ext_x1 = has_right? sensor_px_end + sensor_O : sensor_px_end; + + for(int sr = ext_y0; sr < ext_y1; sr++) { - const int y = y_base + dy; - const int my = O + dy; - for(int dx = 0; dx < core_w; dx++) + const int my = O + (sr - sensor_py_base); + const float ay = _seam_ay(sr, sensor_py_base, sensor_py_end, + sensor_O, has_top, has_bot); + const gboolean in_horiz_seam = (ay < 1.0f); + + float *h_strip = NULL; + int h_strip_sy0 = 0; + if(in_horiz_seam) + { + if(has_top && sr < sensor_py_base + sensor_O) + { + h_strip = h_strip_top; + h_strip_sy0 = h_strip_top_sy0; + } + else if(has_bot && sr >= sensor_py_end - sensor_O) + { + h_strip = h_strip_bot; + h_strip_sy0 = h_strip_bot_sy0; + } + } + const size_t h_strip_row_off = h_strip + ? (size_t)(sr - h_strip_sy0) * w : 0; + + for(int sc = ext_x0; sc < ext_x1; sc++) { - const int x = x_base + dx; - const int mx = O + dx; + const int mx = O + (sc - sensor_px_base); + const float ax = _seam_ax(sc, sensor_px_base, sensor_px_end, + sensor_O, has_left, has_right); + const gboolean in_vert_seam = (ax < 1.0f); + const size_t tloc = (size_t)my * T + mx; - const size_t dst = (size_t)y * w + x; + const size_t dst = (size_t)sr * w + sc; - for(int k = 0; k < 3; k++) + if(in_horiz_seam) { - const float model_v - = tile_out[tloc + (size_t)k * per_ch]; - const float src_v = rgb_src[dst + (size_t)k * plane]; - rgb_out[dst + (size_t)k * plane] - = alpha * model_v + inv_alpha * src_v; + if(h_strip) + { + const float wgt = ax * ay; + for(int k = 0; k < 3; k++) + { + const float model_v = tile_out[tloc + (size_t)k * per_ch]; + const float src_v = rgb_src[dst + (size_t)k * plane]; + const float blended = alpha * model_v + inv_alpha * src_v; + h_strip[h_strip_row_off + sc + (size_t)k * hstrip_chan] + += wgt * blended; + } + } + } + else if(in_vert_seam) + { + float *v_strip = NULL; + int v_sx0 = 0, v_sy0 = 0, v_h = 0; + if(has_left && sc < sensor_px_base + sensor_O) + { + v_strip = v_strip_left; + v_sx0 = v_strip_left_sx0; v_sy0 = v_strip_left_sy0; + v_h = v_strip_left_h; + } + else if(has_right && sc >= sensor_px_end - sensor_O) + { + v_strip = v_strip_right; + v_sx0 = v_strip_right_sx0; v_sy0 = v_strip_right_sy0; + v_h = v_strip_right_h; + } + if(v_strip) + { + const size_t vchan = (size_t)(2 * sensor_O) * v_h; + const size_t vidx + = (size_t)(sr - v_sy0) * (2 * sensor_O) + (sc - v_sx0); + for(int k = 0; k < 3; k++) + { + const float model_v = tile_out[tloc + (size_t)k * per_ch]; + const float src_v = rgb_src[dst + (size_t)k * plane]; + const float blended = alpha * model_v + inv_alpha * src_v; + v_strip[vidx + (size_t)k * vchan] += ax * blended; + } + } + } + else + { + for(int k = 0; k < 3; k++) + { + const float model_v = tile_out[tloc + (size_t)k * per_ch]; + const float src_v = rgb_src[dst + (size_t)k * plane]; + rgb_out[dst + (size_t)k * plane] + = alpha * model_v + inv_alpha * src_v; + } + } + } + } + + // tx-1 + tx ramps sum to 1; flush + free + if(v_strip_left) + { + const size_t vchan = (size_t)(2 * sensor_O) * v_strip_left_h; + for(int sr = v_strip_left_sy0; + sr < v_strip_left_sy0 + v_strip_left_h; sr++) + { + const size_t vrow = (size_t)(sr - v_strip_left_sy0) * (2 * sensor_O); + for(int dxs = 0; dxs < 2 * sensor_O; dxs++) + { + const int sc = v_strip_left_sx0 + dxs; + const size_t dst = (size_t)sr * w + sc; + for(int k = 0; k < 3; k++) + rgb_out[dst + (size_t)k * plane] + = v_strip_left[vrow + dxs + (size_t)k * vchan]; } } + g_free(v_strip_left); } + v_strip_left = v_strip_right; + v_strip_left_sx0 = v_strip_right_sx0; + v_strip_left_sy0 = v_strip_right_sy0; + v_strip_left_h = v_strip_right_h; tile_count++; if(control_job) dt_control_job_set_progress(control_job, (double)tile_count / total_tiles); } + + g_free(v_strip_left); + v_strip_left = NULL; + + // ramps sum to 1, flush. no column clamp needed (no working-region offset) + if(h_strip_top) + { + for(int sr = h_strip_top_sy0; sr < h_strip_top_sy0 + hstrip_h; sr++) + { + const size_t hrow = (size_t)(sr - h_strip_top_sy0) * w; + for(int sc = 0; sc < w; sc++) + { + const size_t dst = (size_t)sr * w + sc; + for(int k = 0; k < 3; k++) + rgb_out[dst + (size_t)k * plane] + = h_strip_top[hrow + sc + (size_t)k * hstrip_chan]; + } + } + g_free(h_strip_top); + } + h_strip_top = h_strip_bot; + h_strip_top_sy0 = h_strip_bot_sy0; } + g_free(h_strip_top); + g_free(tile_in); g_free(tile_out); From 7408dba74c08f77d24b2ed9759e0f101604001e5 Mon Sep 17 00:00:00 2001 From: Andrii Ryzhkov Date: Mon, 27 Apr 2026 13:25:59 +0200 Subject: [PATCH 5/9] AI raw denoise: scalar match_gain on linear path --- src/common/ai/restore_raw_linear.c | 63 +++++++++++++----------------- 1 file changed, 28 insertions(+), 35 deletions(-) diff --git a/src/common/ai/restore_raw_linear.c b/src/common/ai/restore_raw_linear.c index 5a6d1b8ece04..1d62e7be0a3d 100644 --- a/src/common/ai/restore_raw_linear.c +++ b/src/common/ai/restore_raw_linear.c @@ -85,33 +85,29 @@ static void _linear_build_M_boosted(const float input_to_cam[9], M[k * 3 + i] = input_to_cam[k * 3 + i] * inv_boost / wb_norm[k]; } -// per-channel scalar match_gain: tile_out[c] *= in_mean[c]/out_mean[c]. -// RawNIND linear output is arbitrary-scale camRGB-in-lin_rec2020 and -// match_gain() is the canonical post-step that puts it back on the -// input's scale. applied in place. out_gain[3] optional (batch uses it -// for a tile0 diagnostic) -static void _linear_gain_match_3ch(const float *tile_in, - float *tile_out, - size_t per_ch, - float out_gain[3]) +// scalar match_gain: tile_out *= in_mean / out_mean, where both means +// are taken over all 3 channels and all spatial positions. mirrors the +// upstream Python rawproc.match_gain (mean over (-1, -2, -3) dims), +// which the model was trained against. applied in place. out_gain +// optional (batch uses it for a tile0 diagnostic) +static void _linear_gain_match(const float *tile_in, + float *tile_out, + size_t per_ch, + float *out_gain) { - for(int k = 0; k < 3; k++) + const size_t total = per_ch * 3; + double in_sum = 0.0, out_sum = 0.0; + for(size_t i = 0; i < total; i++) { - const float *pi = tile_in + (size_t)k * per_ch; - float *po = tile_out + (size_t)k * per_ch; - double in_sum = 0.0, out_sum = 0.0; - for(size_t i = 0; i < per_ch; i++) - { - in_sum += pi[i]; - out_sum += po[i]; - } - const double im = in_sum / (double)per_ch; - const double om = out_sum / (double)per_ch; - const float g = (fabs(om) > 1e-8) ? (float)(im / om) : 1.0f; - if(g != 1.0f) - for(size_t i = 0; i < per_ch; i++) po[i] *= g; - if(out_gain) out_gain[k] = g; + in_sum += tile_in[i]; + out_sum += tile_out[i]; } + const double im = in_sum / (double)total; + const double om = out_sum / (double)total; + const float g = (fabs(om) > 1e-8) ? (float)(im / om) : 1.0f; + if(g != 1.0f) + for(size_t i = 0; i < total; i++) tile_out[i] *= g; + if(out_gain) *out_gain = g; } // derive + apply an exposure boost to a planar 3ch lin_rec2020 buffer. @@ -630,20 +626,17 @@ retry:; break; } - // scalar match_gain per channel: tile_out *= in_mean / out_mean - // (applied in place by the helper). skipped for ABSOLUTE-scale - // models whose output is already calibrated + // scalar match_gain: tile_out *= in_mean / out_mean (applied in + // place by the helper). skipped for ABSOLUTE-scale models whose + // output is already calibrated const size_t per_ch = tile_plane; - float gain_ch[3] = { 1.0f, 1.0f, 1.0f }; + float gain = 1.0f; if(ctx->output_scale == DT_RESTORE_OUT_MATCH_GAIN) - _linear_gain_match_3ch(tile_in, tile_out, per_ch, gain_ch); + _linear_gain_match(tile_in, tile_out, per_ch, &gain); if(tx == 0 && ty == 0) - { dt_print(DT_DEBUG_AI, - "[restore_raw_linear] tile0 match_gain " - "R=%.3e G=%.3e B=%.3e", - gain_ch[0], gain_ch[1], gain_ch[2]); - } + "[restore_raw_linear] tile0 match_gain=%.3e", + (double)gain); const gboolean has_left = tx > 0; const gboolean has_right = tx < cols - 1; @@ -1046,7 +1039,7 @@ int dt_restore_raw_linear_preview_piped(dt_restore_context_t *ctx, } if(ctx->output_scale == DT_RESTORE_OUT_MATCH_GAIN) - _linear_gain_match_3ch(tile_in, tile_out, tile_plane, NULL); + _linear_gain_match(tile_in, tile_out, tile_plane, NULL); g_free(tile_in); // build matrix to reverse matrix + WB + boost + normalise From edd9ac1ba8a6e6faac88ce72566efe19a9e36183 Mon Sep 17 00:00:00 2001 From: Andrii Ryzhkov Date: Mon, 27 Apr 2026 16:51:52 +0200 Subject: [PATCH 6/9] DNG writer: consolidate into imageio/imageio_dng.{c,h} --- src/CMakeLists.txt | 2 +- src/common/ai/restore_raw_bayer.h | 2 +- src/common/ai/restore_raw_linear.h | 2 +- src/common/dng_writer.h | 105 ------ src/control/jobs/control_jobs.c | 22 +- .../dng_writer.c => imageio/imageio_dng.c} | 274 +++++++++++++- src/imageio/imageio_dng.h | 343 ++++++------------ src/libs/neural_restore.c | 16 +- 8 files changed, 386 insertions(+), 380 deletions(-) delete mode 100644 src/common/dng_writer.h rename src/{common/dng_writer.c => imageio/imageio_dng.c} (58%) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 035104ed95d4..18916433608e 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -160,6 +160,7 @@ FILE(GLOB SOURCE_FILES "gui/welcome.c" "gui/styles_dialog.c" "imageio/imageio.c" + "imageio/imageio_dng.c" "imageio/imageio_jpeg.c" "imageio/imageio_module.c" "imageio/imageio_pfm.c" @@ -465,7 +466,6 @@ if(USE_AI) "common/ai/restore_rgb.c" "common/ai/restore_raw_bayer.c" "common/ai/restore_raw_linear.c" - "common/dng_writer.c" "develop/masks/object.c" "gui/preferences_ai.c" ) diff --git a/src/common/ai/restore_raw_bayer.h b/src/common/ai/restore_raw_bayer.h index bd25ff48ae57..43ffdabb2b4f 100644 --- a/src/common/ai/restore_raw_bayer.h +++ b/src/common/ai/restore_raw_bayer.h @@ -23,7 +23,7 @@ // WB, 2x2 pack), tiled inference with overlap blending, postprocessing // (un-WB, un-normalize), and re-mosaic back to the original CFA // pattern. produces a uint16 sensor-sized mosaic that is written to -// DNG by dt_dng_write_cfa_bayer(). +// DNG by dt_imageio_dng_write_cfa_bayer(). // // this is kept separate from the RGB denoise/upscale path in restore.c // because: diff --git a/src/common/ai/restore_raw_linear.h b/src/common/ai/restore_raw_linear.h index 53f3eaeec980..183db6d98bb7 100644 --- a/src/common/ai/restore_raw_linear.h +++ b/src/common/ai/restore_raw_linear.h @@ -32,7 +32,7 @@ // output is a 3ch float RGB buffer at full sensor resolution, in the // same camRGB + raw ADC range as the source. the neural_restore batch // path re-mosaics nothing (this sensor type can't be round-tripped -// through a CFA DNG) and writes a LinearRaw DNG via dng_writer. +// through a CFA DNG) and writes a LinearRaw DNG via imageio_dng. #pragma once diff --git a/src/common/dng_writer.h b/src/common/dng_writer.h deleted file mode 100644 index e039cc848a09..000000000000 --- a/src/common/dng_writer.h +++ /dev/null @@ -1,105 +0,0 @@ -/* - This file is part of darktable, - Copyright (C) 2026 darktable developers. - - darktable is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - darktable is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with darktable. If not, see . -*/ - -// dng_writer — minimal DNG CFA writer -// -// writes a single-plane uint16 Bayer mosaic plus enough DNG metadata -// to let a raw processor (darktable, adobe, etc.) re-import the file -// and run the normal raw pipeline: black/white level, CFA pattern, -// AsShotNeutral (white balance), ColorMatrix1 (camRGB -> CIE XYZ), -// camera make/model, and a pass-through of the source EXIF blob. -// -// this writer is intentionally narrow in scope: -// - bayer only (no X-Trans, no LinearRaw demosaiced DNG) -// - uncompressed strip layout -// - single IFD (no embedded JPEG preview or thumbnails) -// - 16-bit integer data only -// -// the consumer of the DNG (darktable itself) does not need more than -// this for the neural restore round-trip. - -#pragma once - -#include -#include - -struct dt_image_t; - -// @brief Write a Bayer CFA mosaic as a DNG file. -// -// The output file contains a single IFD with PhotometricInterpretation=CFA. -// All DNG metadata required for darktable re-import is sourced from @p img: -// - BlackLevel[4] from img->raw_black_level_separate -// - WhiteLevel from img->raw_white_point -// - CFAPattern / CFARepeatDim from img->buf_dsc.filters (dcraw format) -// - AsShotNeutral from img->wb_coeffs (inverted) -// - ColorMatrix1 from img->adobe_XYZ_to_CAM -// - Make / Model / UniqueModel from img->camera_maker / camera_model -// -// @param filename output path (UTF-8) -// @param cfa Bayer mosaic (uint16, width * height samples, row-major) -// @param width image width in pixels (CFA samples per row) -// @param height image height in rows -// @param img source image, for DNG metadata -// @param exif_blob optional Exif blob to embed (NULL = skip) -// @param exif_len size of exif_blob in bytes -// @return 0 on success, non-zero on failure (file is removed on failure) -int dt_dng_write_cfa_bayer(const char *filename, - const uint16_t *cfa, - int width, - int height, - const struct dt_image_t *img, - const void *exif_blob, - int exif_len); - -// @brief Write a demosaicked 3-channel linear DNG. -// -// Used for sensors the bayer DNG round-trip can't handle (X-Trans, -// Foveon-like, pre-demosaicked raws). The output file has -// PhotometricInterpretation=LinearRaw, SamplesPerPixel=3, and carries -// the camera's ColorMatrix1 / AsShotNeutral / BlackLevel / WhiteLevel -// so darktable re-imports it as a raw-origin image and skips its own -// demosaic stage. -// -// Pixel data is interpreted as float-normalized camRGB in [0, ~1+] -// (1.0 = source sensor white point after black subtract). The writer -// scales that to uint16 using black = img->raw_black_level, -// white = img->raw_white_point, so the encoding matches what the -// corresponding raw CFA data would be in ADC units. -// -// @param filename output path (UTF-8) -// @param rgb interleaved 3ch float RGB, width*height*3 samples -// @param width image width in pixels -// @param height image height in pixels -// @param img source image, for DNG metadata + encoding range -// @param exif_blob optional Exif blob to embed (NULL = skip) -// @param exif_len size of exif_blob in bytes -// @return 0 on success, non-zero on failure (file removed on failure) -int dt_dng_write_linear(const char *filename, - const float *rgb, - int width, - int height, - const struct dt_image_t *img, - const void *exif_blob, - int exif_len); - -// clang-format off -// modelines: These editor modelines have been set for all relevant files by tools/update_modelines.py -// vim: shiftwidth=2 expandtab tabstop=2 cindent -// kate: tab-indents: off; indent-width 2; replace-tabs on; indent-mode cstyle; remove-trailing-spaces modified; -// clang-format on diff --git a/src/control/jobs/control_jobs.c b/src/control/jobs/control_jobs.c index db18726fb9f2..9c8fc02e4a63 100644 --- a/src/control/jobs/control_jobs.c +++ b/src/control/jobs/control_jobs.c @@ -693,17 +693,17 @@ static int32_t _control_merge_hdr_job_run(dt_job_t *job) char *c = pathname + strlen(pathname); while(*c != '.' && c > pathname) c--; g_strlcpy(c, "-hdr.dng", sizeof(pathname) - (c - pathname)); - dt_imageio_write_dng(pathname, - d.pixels, - d.wd, - d.ht, - exif, - exif_len, - d.first_filter, - (const uint8_t (*)[6])d.first_xtrans, - 1.0f, - (const float (*))d.wb_coeffs, - d.adobe_XYZ_to_CAM); + dt_imageio_dng_write_float(pathname, + d.pixels, + d.wd, + d.ht, + exif, + exif_len, + d.first_filter, + (const uint8_t (*)[6])d.first_xtrans, + 1.0f, + (const float (*))d.wb_coeffs, + d.adobe_XYZ_to_CAM); free(exif); dt_control_job_set_progress(job, 1.0); diff --git a/src/common/dng_writer.c b/src/imageio/imageio_dng.c similarity index 58% rename from src/common/dng_writer.c rename to src/imageio/imageio_dng.c index d26c6d27181e..255b377f9539 100644 --- a/src/common/dng_writer.c +++ b/src/imageio/imageio_dng.c @@ -1,6 +1,6 @@ /* This file is part of darktable, - Copyright (C) 2026 darktable developers. + Copyright (C) 2011-2026 darktable developers. darktable is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -16,7 +16,7 @@ along with darktable. If not, see . */ -#include "common/dng_writer.h" +#include "imageio/imageio_dng.h" #include "common/darktable.h" #include "common/exif.h" #include "common/image.h" @@ -24,6 +24,10 @@ #include #include +#include +#include +#include +#include #include #include @@ -45,13 +49,13 @@ static void _cfa_bytes_from_filters(uint32_t filters, uint8_t out[4]) out[3] = FC(1, 1, filters); } -int dt_dng_write_cfa_bayer(const char *filename, - const uint16_t *cfa, - int width, - int height, - const dt_image_t *img, - const void *exif_blob, - int exif_len) +int dt_imageio_dng_write_cfa_bayer(const char *filename, + const uint16_t *cfa, + int width, + int height, + const dt_image_t *img, + const void *exif_blob, + int exif_len) { if(!filename || !cfa || !img || width <= 0 || height <= 0) return 1; @@ -221,13 +225,13 @@ int dt_dng_write_cfa_bayer(const char *filename, return res; } -int dt_dng_write_linear(const char *filename, - const float *rgb, - int width, - int height, - const dt_image_t *img, - const void *exif_blob, - int exif_len) +int dt_imageio_dng_write_linear(const char *filename, + const float *rgb, + int width, + int height, + const dt_image_t *img, + const void *exif_blob, + int exif_len) { if(!filename || !rgb || !img || width <= 0 || height <= 0) return 1; @@ -381,6 +385,244 @@ int dt_dng_write_linear(const char *filename, return res; } +// ============================================================================= +// dt_imageio_dng_write_float — hand-rolled byte-level TIFF/DNG writer +// +// Used by HDR merge. Writes a 32-bit float CFA DNG (Bayer or X-Trans). +// The helpers and macros below are private to this writer and are not +// shared with the libtiff-based uint16 writers above +// ============================================================================= + +// TIFF type codes (libtiff knows these natively, so the uint16 writers +// above don't need them) +#define BYTE 1 +#define ASCII 2 +#define SHORT 3 +#define LONG 4 +#define RATIONAL 5 +#define SRATIONAL 10 + +#define HEADBUFFSIZE 1024 + +static inline void _imageio_dng_write_buf(uint8_t *buf, const uint32_t d, const int val) +{ + if(d + 4 >= HEADBUFFSIZE) return; + buf[d] = val & 0xff; + buf[d + 1] = (val >> 8) & 0xff; + buf[d + 2] = (val >> 16) & 0xff; + buf[d + 3] = val >> 24; +} + +static inline int _imageio_dng_make_tag( + const uint16_t tag, + const uint16_t type, + const uint32_t lng, + const uint32_t fld, + uint8_t *buf, + const uint32_t b, + uint8_t *cnt) +{ + if(b + 12 < HEADBUFFSIZE) + { + _imageio_dng_write_buf(buf, b, (type << 16) | tag); + _imageio_dng_write_buf(buf, b+4, lng); + _imageio_dng_write_buf(buf, b+8, fld); + *cnt = *cnt + 1; + } + return b + 12; +} + +static inline void _imageio_dng_write_tiff_header( + FILE *fp, + uint32_t xs, + uint32_t ys, + float Tv, + float Av, + float f, + float iso, + uint32_t filter, + const uint8_t xtrans[6][6], + const float whitelevel, + const dt_aligned_pixel_t wb_coeffs, + const float adobe_XYZ_to_CAM[4][3]) +{ + const uint32_t channels = 1; + uint8_t buf[HEADBUFFSIZE]; + uint8_t cnt = 0; + + // this matrix is generic for XYZ->sRGB / D65 + int m[9] = { 3240454, -1537138, -498531, -969266, 1876010, 41556, 55643, -204025, 1057225 }; + int den = 1000000; + + memset(buf, 0, sizeof(buf)); + /* TIFF file header, little-endian */ + buf[0] = 0x49; + buf[1] = 0x49; + buf[2] = 0x2a; + buf[4] = 8; + + // If you want to add other tags written to a dng file include the the ID in the enum to + // keep track of written tags so we don't a) have leaks or b) overwrite anything in data section + const int first_tag = __LINE__ + 3; + enum write_tags + { + EXIF_TAG_SUBFILE = 254, /* New subfile type. */ + EXIF_TAG_IMGWIDTH = 256, /* Image width. */ + EXIF_TAG_IMGLENGTH = 257, /* Image length. */ + EXIF_TAG_BPS = 258, /* Bits per sample: 32-bit float */ + EXIF_TAG_COMPRESS = 259, /* Compression. */ + EXIF_TAG_PHOTOMINTREP = 262, /* Photo interp: CFA */ + EXIF_TAG_STRIP_OFFSET = 273, /* Strip offset. */ + EXIF_TAG_ORIENTATION = 274, /* Orientation. */ + EXIF_TAG_SAMPLES_PER_PIXEL = 277, /* Samples per pixel. */ + EXIF_TAG_ROWS_PER_STRIP = 278, /* Rows per strip. */ + EXIF_TAG_STRIP_BCOUNT = 279, /* Strip byte count. */ + EXIF_TAG_PLANAR_CONFIG = 284, /* Planar configuration. */ + EXIF_TAG_SAMPLE_FORMAT = 339, /* SampleFormat = 3 => ieee floating point */ + EXIF_TAG_REPEAT_PATTERN = 33421, /* pattern repeat */ + EXIF_TAG_SENS_PATTERN = 33422, /* sensor pattern */ + EXIF_TAG_VERSION = 50706, /* DNG Version */ + EXIF_TAG_WHITE_LEVEL = 50717, /* White level */ + EXIF_TAG_COLOR_MATRIX1 = 50721, /* ColorMatrix1 (XYZ->native cam) */ + EXIF_TAG_SHOT_NEUTRAL = 50728, /* AsShotNeutral for rawspeed Dngdecoder camera white balance */ + EXIF_TAG_ILLUMINANT1 = 50778, /* CalibrationIlluminant1 */ + }; + buf[8] = (uint8_t)(__LINE__ - first_tag - 1); /* number of entries */ + + uint32_t b = 10; + uint32_t data = 10 + buf[8] * 12 + 4; // takes care of the header, entries, and termination + + b = _imageio_dng_make_tag(EXIF_TAG_SUBFILE, LONG, 1, 0, buf, b, &cnt); + b = _imageio_dng_make_tag(EXIF_TAG_IMGWIDTH, LONG, 1, xs, buf, b, &cnt); + b = _imageio_dng_make_tag(EXIF_TAG_IMGLENGTH, LONG, 1, ys, buf, b, &cnt); + b = _imageio_dng_make_tag(EXIF_TAG_BPS, SHORT, 1, 32, buf, b, &cnt); + b = _imageio_dng_make_tag(EXIF_TAG_COMPRESS, SHORT, 1, 1, buf, b, &cnt); + b = _imageio_dng_make_tag(EXIF_TAG_PHOTOMINTREP, SHORT, 1, 32803, buf, b, &cnt); + b = _imageio_dng_make_tag(EXIF_TAG_STRIP_OFFSET, LONG, 1, 0, buf, b, &cnt); + uint32_t ofst = b - 4; /* remember buffer address for updating strip offset later */ + b = _imageio_dng_make_tag(EXIF_TAG_ORIENTATION, SHORT, 1, 1, buf, b, &cnt); + b = _imageio_dng_make_tag(EXIF_TAG_SAMPLES_PER_PIXEL, SHORT, 1, channels, buf, b, &cnt); + b = _imageio_dng_make_tag(EXIF_TAG_ROWS_PER_STRIP, LONG, 1, ys, buf, b, &cnt); + b = _imageio_dng_make_tag(EXIF_TAG_STRIP_BCOUNT, LONG, 1, (ys * xs * channels*4), buf, b, &cnt); + b = _imageio_dng_make_tag(EXIF_TAG_PLANAR_CONFIG, SHORT, 1, 1, buf, b, &cnt); + b = _imageio_dng_make_tag(EXIF_TAG_SAMPLE_FORMAT, SHORT, 1, 3, buf, b, &cnt); + + if(filter == 9u) // xtrans + b = _imageio_dng_make_tag(EXIF_TAG_REPEAT_PATTERN, SHORT, 2, (6 << 16) | 6, buf, b, &cnt); + else + b = _imageio_dng_make_tag(EXIF_TAG_REPEAT_PATTERN, SHORT, 2, (2 << 16) | 2, buf, b, &cnt); + + uint32_t cfapattern = 0; + switch(filter) + { + case 0x94949494: + cfapattern = (2 << 24) | (1 << 16) | (1 << 8) | 0; // rggb + break; + case 0x49494949: + cfapattern = (1 << 24) | (0 << 16) | (2 << 8) | 1; // gbrg + break; + case 0x61616161: + cfapattern = (1 << 24) | (2 << 16) | (0 << 8) | 1; // grbg + break; + default: // case 0x16161616: + cfapattern = (0 << 24) | (1 << 16) | (1 << 8) | 2; // bggr + break; + } + + if(filter == 9u) // xtrans + { + b = _imageio_dng_make_tag(EXIF_TAG_SENS_PATTERN, BYTE, 36, data, buf, b, &cnt); /* xtrans PATTERN */ + // apparently this doesn't need byteswap: + memcpy(buf + data, xtrans, sizeof(uint8_t)*36); + data += 36; + } + else // bayer + b = _imageio_dng_make_tag(EXIF_TAG_SENS_PATTERN, BYTE, 4, cfapattern, buf, b, &cnt); /* bayer PATTERN */ + + b = _imageio_dng_make_tag(EXIF_TAG_VERSION, BYTE, 4, 1 | (4 << 8), buf, b, &cnt); + + // WhiteLevel is straight integer even for float DNGs + b = _imageio_dng_make_tag(EXIF_TAG_WHITE_LEVEL, LONG, 1, (uint32_t)roundf(whitelevel), buf, b, &cnt); + + // ColorMatrix1 try to get camera matrix else m[k] like before + if(dt_is_valid_colormatrix(adobe_XYZ_to_CAM[0][0])) + { + den = 10000; + for(int k= 0; k < 3; k++) + for(int i= 0; i < 3; i++) + m[k*3+i] = roundf(adobe_XYZ_to_CAM[k][i] * den); + } + b = _imageio_dng_make_tag(EXIF_TAG_COLOR_MATRIX1, SRATIONAL, 9, data, buf, b, &cnt); /* ColorMatrix1 (XYZ->native cam) */ + for(int k = 0; k < 9; k++) + { + _imageio_dng_write_buf(buf, data + k*8, m[k]); + _imageio_dng_write_buf(buf, data+4 + k*8, den); + } + data += 9 * 8; + + b = _imageio_dng_make_tag(EXIF_TAG_SHOT_NEUTRAL, RATIONAL, 3, data, buf, b, &cnt); + den = 1000000; + for(int k = 0; k < 3; k++) + { + const float coeff = roundf(((float)den * wb_coeffs[1]) / wb_coeffs[k]); + _imageio_dng_write_buf(buf, data + k*8, (int)coeff); + _imageio_dng_write_buf(buf, data+4 + k*8, den); + } + data += 3 * 8; + + b = _imageio_dng_make_tag(EXIF_TAG_ILLUMINANT1, SHORT, 1, DT_LS_D65, buf, b, &cnt); + + // We have all tags using data now written so we can finally use strip offset + _imageio_dng_write_buf(buf, ofst, data); + + /* Termination is implicit: next IFD already 0 when buf initialized */ + + if(buf[8] != cnt) + { + dt_print(DT_DEBUG_ALWAYS, "[dng_write_header] can't write valid header, unexpected number of entries!"); + return; + } + + if(data >= HEADBUFFSIZE) + { + dt_print(DT_DEBUG_ALWAYS, "[dng_write_header] can't write valid header as it exceeds buffer size!"); + return; + } + + // exif is written later, by exiv2: + const int written = fwrite(buf, 1, data, fp); + if(written != data) dt_print(DT_DEBUG_ALWAYS, "[dng_write_header] failed to write image header!"); +} + + +void dt_imageio_dng_write_float( + const char *filename, const float *const pixel, const int wd, + const int ht, void *exif, const int exif_len, const uint32_t filter, + const uint8_t xtrans[6][6], + const float whitelevel, + const dt_aligned_pixel_t wb_coeffs, + const float adobe_XYZ_to_CAM[4][3]) +{ + FILE *f = g_fopen(filename, "wb"); + if(f) + { + _imageio_dng_write_tiff_header(f, wd, ht, 1.0f / 100.0f, 1.0f / 4.0f, 50.0f, 100.0f, + filter, xtrans, whitelevel, wb_coeffs, adobe_XYZ_to_CAM); + const int k = fwrite(pixel, sizeof(float), (size_t)wd * ht, f); + if(k != wd * ht) dt_print(DT_DEBUG_ALWAYS, "[dng_write] Error writing image data to %s", filename); + fclose(f); + if(exif) dt_exif_write_blob(exif, exif_len, filename, 0); + } +} + +#undef BYTE +#undef ASCII +#undef SHORT +#undef LONG +#undef RATIONAL +#undef SRATIONAL +#undef HEADBUFFSIZE + // clang-format off // modelines: These editor modelines have been set for all relevant files by tools/update_modelines.py // vim: shiftwidth=2 expandtab tabstop=2 cindent diff --git a/src/imageio/imageio_dng.h b/src/imageio/imageio_dng.h index a4068e549069..e72ca801ede8 100644 --- a/src/imageio/imageio_dng.h +++ b/src/imageio/imageio_dng.h @@ -1,6 +1,6 @@ /* This file is part of darktable, - Copyright (C) 2011-2023 darktable developers. + Copyright (C) 2011-2026 darktable developers. darktable is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -16,244 +16,113 @@ along with darktable. If not, see . */ -#pragma once - -// writes buffers as digital negative (dng) raw images - -#include -#include -#include -#include - -#include "common/darktable.h" -#include "common/exif.h" - -#define BYTE 1 -#define ASCII 2 -#define SHORT 3 -#define LONG 4 -#define RATIONAL 5 -#define SRATIONAL 10 - -#define HEADBUFFSIZE 1024 - -static inline void _imageio_dng_write_buf(uint8_t *buf, const uint32_t d, const int val) -{ - if(d + 4 >= HEADBUFFSIZE) return; - buf[d] = val & 0xff; - buf[d + 1] = (val >> 8) & 0xff; - buf[d + 2] = (val >> 16) & 0xff; - buf[d + 3] = val >> 24; -} - -static inline int _imageio_dng_make_tag( - const uint16_t tag, - const uint16_t type, - const uint32_t lng, - const uint32_t fld, - uint8_t *buf, - const uint32_t b, - uint8_t *cnt) -{ - if(b + 12 < HEADBUFFSIZE) - { - _imageio_dng_write_buf(buf, b, (type << 16) | tag); - _imageio_dng_write_buf(buf, b+4, lng); - _imageio_dng_write_buf(buf, b+8, fld); - *cnt = *cnt + 1; - } - return b + 12; -} - -static inline void _imageio_dng_write_tiff_header( - FILE *fp, - uint32_t xs, - uint32_t ys, - float Tv, - float Av, - float f, - float iso, - uint32_t filter, - const uint8_t xtrans[6][6], - const float whitelevel, - const dt_aligned_pixel_t wb_coeffs, - const float adobe_XYZ_to_CAM[4][3]) -{ - const uint32_t channels = 1; - uint8_t buf[HEADBUFFSIZE]; - uint8_t cnt = 0; - - // this matrix is generic for XYZ->sRGB / D65 - int m[9] = { 3240454, -1537138, -498531, -969266, 1876010, 41556, 55643, -204025, 1057225 }; - int den = 1000000; - - memset(buf, 0, sizeof(buf)); - /* TIFF file header, little-endian */ - buf[0] = 0x49; - buf[1] = 0x49; - buf[2] = 0x2a; - buf[4] = 8; - - // If you want to add other tags written to a dng file include the the ID in the enum to - // keep track of written tags so we don't a) have leaks or b) overwrite anything in data section - const int first_tag = __LINE__ + 3; - enum write_tags - { - EXIF_TAG_SUBFILE = 254, /* New subfile type. */ - EXIF_TAG_IMGWIDTH = 256, /* Image width. */ - EXIF_TAG_IMGLENGTH = 257, /* Image length. */ - EXIF_TAG_BPS = 258, /* Bits per sample: 32-bit float */ - EXIF_TAG_COMPRESS = 259, /* Compression. */ - EXIF_TAG_PHOTOMINTREP = 262, /* Photo interp: CFA */ - EXIF_TAG_STRIP_OFFSET = 273, /* Strip offset. */ - EXIF_TAG_ORIENTATION = 274, /* Orientation. */ - EXIF_TAG_SAMPLES_PER_PIXEL = 277, /* Samples per pixel. */ - EXIF_TAG_ROWS_PER_STRIP = 278, /* Rows per strip. */ - EXIF_TAG_STRIP_BCOUNT = 279, /* Strip byte count. */ - EXIF_TAG_PLANAR_CONFIG = 284, /* Planar configuration. */ - EXIF_TAG_SAMPLE_FORMAT = 339, /* SampleFormat = 3 => ieee floating point */ - EXIF_TAG_REPEAT_PATTERN = 33421, /* pattern repeat */ - EXIF_TAG_SENS_PATTERN = 33422, /* sensor pattern */ - EXIF_TAG_VERSION = 50706, /* DNG Version */ - EXIF_TAG_WHITE_LEVEL = 50717, /* White level */ - EXIF_TAG_COLOR_MATRIX1 = 50721, /* ColorMatrix1 (XYZ->native cam) */ - EXIF_TAG_SHOT_NEUTRAL = 50728, /* AsShotNeutral for rawspeed Dngdecoder camera white balance */ - EXIF_TAG_ILLUMINANT1 = 50778, /* CalibrationIlluminant1 */ - }; - buf[8] = (uint8_t)(__LINE__ - first_tag - 1); /* number of entries */ - - uint32_t b = 10; - uint32_t data = 10 + buf[8] * 12 + 4; // takes care of the header, entries, and termination - - b = _imageio_dng_make_tag(EXIF_TAG_SUBFILE, LONG, 1, 0, buf, b, &cnt); - b = _imageio_dng_make_tag(EXIF_TAG_IMGWIDTH, LONG, 1, xs, buf, b, &cnt); - b = _imageio_dng_make_tag(EXIF_TAG_IMGLENGTH, LONG, 1, ys, buf, b, &cnt); - b = _imageio_dng_make_tag(EXIF_TAG_BPS, SHORT, 1, 32, buf, b, &cnt); - b = _imageio_dng_make_tag(EXIF_TAG_COMPRESS, SHORT, 1, 1, buf, b, &cnt); - b = _imageio_dng_make_tag(EXIF_TAG_PHOTOMINTREP, SHORT, 1, 32803, buf, b, &cnt); - b = _imageio_dng_make_tag(EXIF_TAG_STRIP_OFFSET, LONG, 1, 0, buf, b, &cnt); - uint32_t ofst = b - 4; /* remember buffer address for updating strip offset later */ - b = _imageio_dng_make_tag(EXIF_TAG_ORIENTATION, SHORT, 1, 1, buf, b, &cnt); - b = _imageio_dng_make_tag(EXIF_TAG_SAMPLES_PER_PIXEL, SHORT, 1, channels, buf, b, &cnt); - b = _imageio_dng_make_tag(EXIF_TAG_ROWS_PER_STRIP, LONG, 1, ys, buf, b, &cnt); - b = _imageio_dng_make_tag(EXIF_TAG_STRIP_BCOUNT, LONG, 1, (ys * xs * channels*4), buf, b, &cnt); - b = _imageio_dng_make_tag(EXIF_TAG_PLANAR_CONFIG, SHORT, 1, 1, buf, b, &cnt); - b = _imageio_dng_make_tag(EXIF_TAG_SAMPLE_FORMAT, SHORT, 1, 3, buf, b, &cnt); - - if(filter == 9u) // xtrans - b = _imageio_dng_make_tag(EXIF_TAG_REPEAT_PATTERN, SHORT, 2, (6 << 16) | 6, buf, b, &cnt); - else - b = _imageio_dng_make_tag(EXIF_TAG_REPEAT_PATTERN, SHORT, 2, (2 << 16) | 2, buf, b, &cnt); +// imageio_dng — DNG writers +// +// Three entry points for three different DNG flavors: +// - dt_imageio_dng_write_float 32-bit float CFA (HDR merge) +// - dt_imageio_dng_write_cfa_bayer 16-bit uint Bayer CFA (raw round-trip) +// - dt_imageio_dng_write_linear 16-bit uint LinearRaw 3ch (demosaicked) +// +// The float writer is hand-rolled byte assembly into a small TIFF +// header buffer; the two uint16 writers use libtiff. - uint32_t cfapattern = 0; - switch(filter) - { - case 0x94949494: - cfapattern = (2 << 24) | (1 << 16) | (1 << 8) | 0; // rggb - break; - case 0x49494949: - cfapattern = (1 << 24) | (0 << 16) | (2 << 8) | 1; // gbrg - break; - case 0x61616161: - cfapattern = (1 << 24) | (2 << 16) | (0 << 8) | 1; // grbg - break; - default: // case 0x16161616: - cfapattern = (0 << 24) | (1 << 16) | (1 << 8) | 2; // bggr - break; - } - - if(filter == 9u) // xtrans - { - b = _imageio_dng_make_tag(EXIF_TAG_SENS_PATTERN, BYTE, 36, data, buf, b, &cnt); /* xtrans PATTERN */ - // apparently this doesn't need byteswap: - memcpy(buf + data, xtrans, sizeof(uint8_t)*36); - data += 36; - } - else // bayer - b = _imageio_dng_make_tag(EXIF_TAG_SENS_PATTERN, BYTE, 4, cfapattern, buf, b, &cnt); /* bayer PATTERN */ - - b = _imageio_dng_make_tag(EXIF_TAG_VERSION, BYTE, 4, 1 | (4 << 8), buf, b, &cnt); - - // WhiteLevel is straight integer even for float DNGs - b = _imageio_dng_make_tag(EXIF_TAG_WHITE_LEVEL, LONG, 1, (uint32_t)roundf(whitelevel), buf, b, &cnt); - - // ColorMatrix1 try to get camera matrix else m[k] like before - if(dt_is_valid_colormatrix(adobe_XYZ_to_CAM[0][0])) - { - den = 10000; - for(int k= 0; k < 3; k++) - for(int i= 0; i < 3; i++) - m[k*3+i] = roundf(adobe_XYZ_to_CAM[k][i] * den); - } - b = _imageio_dng_make_tag(EXIF_TAG_COLOR_MATRIX1, SRATIONAL, 9, data, buf, b, &cnt); /* ColorMatrix1 (XYZ->native cam) */ - for(int k = 0; k < 9; k++) - { - _imageio_dng_write_buf(buf, data + k*8, m[k]); - _imageio_dng_write_buf(buf, data+4 + k*8, den); - } - data += 9 * 8; - - b = _imageio_dng_make_tag(EXIF_TAG_SHOT_NEUTRAL, RATIONAL, 3, data, buf, b, &cnt); - den = 1000000; - for(int k = 0; k < 3; k++) - { - const float coeff = roundf(((float)den * wb_coeffs[1]) / wb_coeffs[k]); - _imageio_dng_write_buf(buf, data + k*8, (int)coeff); - _imageio_dng_write_buf(buf, data+4 + k*8, den); - } - data += 3 * 8; - - b = _imageio_dng_make_tag(EXIF_TAG_ILLUMINANT1, SHORT, 1, DT_LS_D65, buf, b, &cnt); - - // We have all tags using data now written so we can finally use strip offset - _imageio_dng_write_buf(buf, ofst, data); - - /* Termination is implicit: next IFD already 0 when buf initialized */ - - if(buf[8] != cnt) - { - dt_print(DT_DEBUG_ALWAYS, "[dng_write_header] can't write valid header, unexpected number of entries!"); - return; - } - - if(data >= HEADBUFFSIZE) - { - dt_print(DT_DEBUG_ALWAYS, "[dng_write_header] can't write valid header as it exceeds buffer size!"); - return; - } - - // exif is written later, by exiv2: - const int written = fwrite(buf, 1, data, fp); - if(written != data) dt_print(DT_DEBUG_ALWAYS, "[dng_write_header] failed to write image header!"); -} - - -static inline void dt_imageio_write_dng( - const char *filename, const float *const pixel, const int wd, - const int ht, void *exif, const int exif_len, const uint32_t filter, - const uint8_t xtrans[6][6], - const float whitelevel, - const dt_aligned_pixel_t wb_coeffs, - const float adobe_XYZ_to_CAM[4][3]) -{ - FILE *f = g_fopen(filename, "wb"); - if(f) - { - _imageio_dng_write_tiff_header(f, wd, ht, 1.0f / 100.0f, 1.0f / 4.0f, 50.0f, 100.0f, - filter, xtrans, whitelevel, wb_coeffs, adobe_XYZ_to_CAM); - const int k = fwrite(pixel, sizeof(float), (size_t)wd * ht, f); - if(k != wd * ht) dt_print(DT_DEBUG_ALWAYS, "[dng_write] Error writing image data to %s", filename); - fclose(f); - if(exif) dt_exif_write_blob(exif, exif_len, filename, 0); - } -} +#pragma once -#undef BYTE -#undef ASCII -#undef SHORT -#undef LONG -#undef RATIONAL -#undef SRATIONAL +#include +#include + +#include "common/dttypes.h" // for dt_aligned_pixel_t + +struct dt_image_t; + +// @brief Write a 32-bit float CFA DNG (Bayer or X-Trans). +// +// Used by HDR merge: pixel data is float pre-normalized to +// [0, whitelevel], so values may exceed any single sensor's white +// point. The writer doesn't emit BlackLevel or ACTIVEAREA — the +// importer assumes black=0 and the buffer is at the dimensions you +// want displayed. +// +// @param filename output path (UTF-8) +// @param pixel float CFA, wd*ht samples, row-major +// @param wd image width in pixels +// @param ht image height in pixels +// @param exif optional Exif blob to embed (NULL = skip) +// @param exif_len size of @p exif in bytes +// @param filter dcraw 2x2 CFA filters word, or 9u for X-Trans +// @param xtrans X-Trans 6x6 pattern (used iff filter == 9u) +// @param whitelevel pre-normalized white level (typically 1.0f for HDR) +// @param wb_coeffs camera-RGB raw-to-white multipliers +// @param adobe_XYZ_to_CAM XYZ->cameraRGB matrix (4x3, only first 3 rows used) +void dt_imageio_dng_write_float(const char *filename, + const float *pixel, + int wd, + int ht, + void *exif, + int exif_len, + uint32_t filter, + const uint8_t xtrans[6][6], + float whitelevel, + const dt_aligned_pixel_t wb_coeffs, + const float adobe_XYZ_to_CAM[4][3]); + +// @brief Write a Bayer CFA mosaic as a DNG file. +// +// The output file contains a single IFD with PhotometricInterpretation=CFA. +// All DNG metadata required for darktable re-import is sourced from @p img: +// - BlackLevel[4] from img->raw_black_level_separate +// - WhiteLevel from img->raw_white_point +// - CFAPattern / CFARepeatDim from img->buf_dsc.filters (dcraw format) +// - AsShotNeutral from img->wb_coeffs (inverted) +// - ColorMatrix1 from img->adobe_XYZ_to_CAM +// - Make / Model / UniqueModel from img->camera_maker / camera_model +// +// @param filename output path (UTF-8) +// @param cfa Bayer mosaic (uint16, width * height samples, row-major) +// @param width image width in pixels (CFA samples per row) +// @param height image height in rows +// @param img source image, for DNG metadata +// @param exif_blob optional Exif blob to embed (NULL = skip) +// @param exif_len size of exif_blob in bytes +// @return 0 on success, non-zero on failure (file is removed on failure) +int dt_imageio_dng_write_cfa_bayer(const char *filename, + const uint16_t *cfa, + int width, + int height, + const struct dt_image_t *img, + const void *exif_blob, + int exif_len); + +// @brief Write a demosaicked 3-channel linear DNG. +// +// Used for sensors the bayer DNG round-trip can't handle (X-Trans, +// Foveon-like, pre-demosaicked raws). The output file has +// PhotometricInterpretation=LinearRaw, SamplesPerPixel=3, and carries +// the camera's ColorMatrix1 / AsShotNeutral / BlackLevel / WhiteLevel +// so darktable re-imports it as a raw-origin image and skips its own +// demosaic stage. +// +// Pixel data is interpreted as float-normalized camRGB in [0, ~1+] +// (1.0 = source sensor white point after black subtract). The writer +// scales that to uint16 using black = img->raw_black_level, +// white = img->raw_white_point, so the encoding matches what the +// corresponding raw CFA data would be in ADC units. +// +// @param filename output path (UTF-8) +// @param rgb interleaved 3ch float RGB, width*height*3 samples +// @param width image width in pixels +// @param height image height in pixels +// @param img source image, for DNG metadata + encoding range +// @param exif_blob optional Exif blob to embed (NULL = skip) +// @param exif_len size of exif_blob in bytes +// @return 0 on success, non-zero on failure (file removed on failure) +int dt_imageio_dng_write_linear(const char *filename, + const float *rgb, + int width, + int height, + const struct dt_image_t *img, + const void *exif_blob, + int exif_len); // clang-format off // modelines: These editor modelines have been set for all relevant files by tools/update_modelines.py diff --git a/src/libs/neural_restore.c b/src/libs/neural_restore.c index 17eb52dedf51..5d342090ed9b 100644 --- a/src/libs/neural_restore.c +++ b/src/libs/neural_restore.c @@ -94,11 +94,11 @@ // - raw denoise (Bayer): pre-process the sensor CFA (black/WB/ // pack), run tiled inference via dt_restore_raw_bayer(), un- // process and re-mosaic, write a CFA DNG via -// dt_dng_write_cfa_bayer(). +// dt_imageio_dng_write_cfa_bayer(). // - raw denoise (X-Trans / linear): demosaic via the darktable // pipe (rawprepare + highlights + demosaic only), run // dt_restore_raw_linear(), write a LinearRaw DNG via -// dt_dng_write_linear(). +// dt_imageio_dng_write_linear(). // - denoise / upscale (RGB): export via the darktable pipeline // with a custom format module that intercepts the pixel // buffer in _ai_write_image(). when strength < 100 (so the @@ -195,7 +195,7 @@ #include "common/collection.h" #include "common/variables.h" #include "common/colorspaces.h" -#include "common/dng_writer.h" +#include "imageio/imageio_dng.h" #include "common/exif.h" #include "common/film.h" #include "common/grouping.h" @@ -1046,9 +1046,9 @@ static int _process_raw_denoise_bayer(dt_neural_job_t *j, uint8_t *exif_blob = NULL; const int exif_len = dt_exif_read_blob(&exif_blob, src_path, imgid, FALSE, width, height, TRUE); - res = dt_dng_write_cfa_bayer(out_filename, cfa_out, - width, height, img_meta, - exif_blob, exif_len); + res = dt_imageio_dng_write_cfa_bayer(out_filename, cfa_out, + width, height, img_meta, + exif_blob, exif_len); g_free(exif_blob); g_free(cfa_out); return res; @@ -1075,8 +1075,8 @@ static int _process_raw_denoise_linear(dt_neural_job_t *j, uint8_t *exif_blob = NULL; const int exif_len = dt_exif_read_blob(&exif_blob, src_path, imgid, FALSE, w, h, TRUE); - res = dt_dng_write_linear(out_filename, rgb, w, h, img_meta, - exif_blob, exif_len); + res = dt_imageio_dng_write_linear(out_filename, rgb, w, h, img_meta, + exif_blob, exif_len); g_free(exif_blob); dt_free_align(rgb); return res; From ee21d811e101145c60f89bc38a27c8601c4a7db6 Mon Sep 17 00:00:00 2001 From: Andrii Ryzhkov Date: Mon, 27 Apr 2026 17:50:22 +0200 Subject: [PATCH 7/9] Surface preview init failures in widget placeholder --- src/libs/neural_restore.c | 61 ++++++++++++++++++++++++++++++++------- 1 file changed, 51 insertions(+), 10 deletions(-) diff --git a/src/libs/neural_restore.c b/src/libs/neural_restore.c index 5d342090ed9b..844299be0b7d 100644 --- a/src/libs/neural_restore.c +++ b/src/libs/neural_restore.c @@ -249,6 +249,14 @@ typedef enum dt_neural_bpp_t NEURAL_BPP_32 = 2, } dt_neural_bpp_t; +// preview-area placeholder state when no rendered preview exists +typedef enum dt_nr_preview_err_t +{ + DT_NR_PREVIEW_ERR_NONE = 0, + DT_NR_PREVIEW_ERR_UNSUPPORTED, // sensor class not handled by task + DT_NR_PREVIEW_ERR_INIT_FAILED, // mipmap / model / cache load bailed +} dt_nr_preview_err_t; + typedef struct dt_lib_neural_restore_t { GtkNotebook *notebook; @@ -280,6 +288,7 @@ typedef struct dt_lib_neural_restore_t gboolean preview_requested; gboolean dragging_split; gboolean preview_generating; + dt_nr_preview_err_t preview_error; gboolean recovery_changing; GThread *preview_thread; gint preview_sequence; @@ -1507,6 +1516,8 @@ static void _update_info_label(dt_lib_neural_restore_t *d) static void _trigger_preview(dt_lib_module_t *self); static void _cancel_preview(dt_lib_module_t *self); +static void _schedule_preview_failed(dt_lib_module_t *self, + dt_nr_preview_err_t err); static void _task_changed(dt_lib_neural_restore_t *d) { @@ -1763,6 +1774,9 @@ static gpointer _preview_thread(gpointer data) dt_neural_preview_data_t *pd = (dt_neural_preview_data_t *)data; dt_lib_neural_restore_t *d = (dt_lib_neural_restore_t *)pd->self->data; + // bail reason if we hit cleanup; stale-sequence bails are dropped + dt_nr_preview_err_t err = DT_NR_PREVIEW_ERR_INIT_FAILED; + // reuse borrowed export if available (re-pick), otherwise export. // pixels points to either the borrowed buffer (not owned) or // cap.pixels (owned, must be freed on error or passed to result) @@ -2018,8 +2032,13 @@ static gpointer _preview_thread(gpointer data) result->patch_center[0] = pd->patch_center[0]; result->patch_center[1] = pd->patch_center[1]; g_idle_add(_preview_result_idle, result); + g_free(pd); + return NULL; cleanup: + // bail: clear preview_generating on UI thread (stale-sequence bails dropped) + if(pd->sequence == g_atomic_int_get(&d->preview_sequence)) + _schedule_preview_failed(pd->self, err); g_free(pd); return NULL; } @@ -2256,18 +2275,35 @@ static void _schedule_raw_strength_reblend(dt_lib_module_t *self) _strength_blend_timer_cb, self); } -// fired when the raw worker bails before producing a result, so the UI -// doesn't get stuck with preview_generating == TRUE forever. -static gboolean _preview_raw_failed_idle(gpointer data) +// fired when a preview worker bails: clears preview_generating and +// records the bail reason so the placeholder shows it +typedef struct { - dt_lib_module_t *self = (dt_lib_module_t *)data; - dt_lib_neural_restore_t *d = (dt_lib_neural_restore_t *)self->data; + dt_lib_module_t *self; + dt_nr_preview_err_t err; +} _preview_failed_data_t; + +static gboolean _preview_failed_idle(gpointer data) +{ + _preview_failed_data_t *fd = (_preview_failed_data_t *)data; + dt_lib_neural_restore_t *d = (dt_lib_neural_restore_t *)fd->self->data; d->preview_generating = FALSE; + d->preview_error = fd->err; _update_button_sensitivity(d); gtk_widget_queue_draw(d->preview_area); + g_free(fd); return G_SOURCE_REMOVE; } +static void _schedule_preview_failed(dt_lib_module_t *self, + dt_nr_preview_err_t err) +{ + _preview_failed_data_t *fd = g_new0(_preview_failed_data_t, 1); + fd->self = self; + fd->err = err; + g_idle_add(_preview_failed_idle, fd); +} + static gboolean _preview_raw_result_idle(gpointer data) { dt_neural_preview_result_raw_t *res @@ -2370,6 +2406,8 @@ static gpointer _preview_thread_raw(gpointer data) dt_neural_preview_data_t *pd = (dt_neural_preview_data_t *)data; dt_lib_neural_restore_t *d = (dt_lib_neural_restore_t *)pd->self->data; + // bail reason for cleanup path; unsupported-sensor branch overrides + dt_nr_preview_err_t bail_err = DT_NR_PREVIEW_ERR_INIT_FAILED; // 1. load source image metadata to determine sensor type. // on a fresh session dt_image_cache_get returns img_meta with a @@ -2406,6 +2444,7 @@ static gpointer _preview_thread_raw(gpointer data) "[neural_restore] raw preview: imgid %d is not bayer/xtrans " "(filters=0x%x class=%d)", pd->imgid, filters, cls); + bail_err = DT_NR_PREVIEW_ERR_UNSUPPORTED; goto cleanup; } dt_print(DT_DEBUG_AI, @@ -2747,12 +2786,9 @@ static gpointer _preview_thread_raw(gpointer data) return NULL; cleanup: - // worker bailed before producing a result. clear preview_generating - // on the UI thread so the user can re-trigger and the button state - // reflects reality. only schedule when the sequence is still current - // (a stale bail means a newer trigger is already in flight). + // bail: clear preview_generating on UI thread (stale-sequence bails dropped) if(pd->sequence == g_atomic_int_get(&d->preview_sequence)) - g_idle_add(_preview_raw_failed_idle, pd->self); + _schedule_preview_failed(pd->self, bail_err); g_free(pd); return NULL; } @@ -2822,6 +2858,7 @@ static void _trigger_preview(dt_lib_module_t *self) // invalidate current preview and bump sequence so running thread exits early d->preview_ready = FALSE; + d->preview_error = DT_NR_PREVIEW_ERR_NONE; g_atomic_int_inc(&d->preview_sequence); gtk_widget_queue_draw(d->preview_area); @@ -3251,6 +3288,10 @@ static gboolean _preview_draw(GtkWidget *widget, cairo_t *cr, dt_lib_module_t *s ? _("generating preview...") : !d->preview_requested ? _("click to generate preview") + : d->preview_error == DT_NR_PREVIEW_ERR_UNSUPPORTED + ? _("image not supported by this task") + : d->preview_error == DT_NR_PREVIEW_ERR_INIT_FAILED + ? _("preview initialization failed") : _("select an image to preview"); cairo_text_extents(cr, text, &ext); cairo_move_to(cr, (w - ext.width) / 2.0, (h + ext.height) / 2.0); From 27b3bdec846c689d759415b60b545f0958eca6fb Mon Sep 17 00:00:00 2001 From: Andrii Ryzhkov Date: Tue, 28 Apr 2026 09:41:18 +0200 Subject: [PATCH 8/9] Fix magenta X-Trans preview from WB mismatch in linear prepare --- src/common/ai/restore_raw_linear.c | 19 +++++++++---------- src/common/ai/restore_raw_linear.h | 10 +++++++--- src/libs/neural_restore.c | 2 +- 3 files changed, 17 insertions(+), 14 deletions(-) diff --git a/src/common/ai/restore_raw_linear.c b/src/common/ai/restore_raw_linear.c index 1d62e7be0a3d..338d832f3810 100644 --- a/src/common/ai/restore_raw_linear.c +++ b/src/common/ai/restore_raw_linear.c @@ -884,7 +884,8 @@ retry:; // WB + camRGB->lin_rec2020 once and returns a 3ch interleaved buffer at // sensor resolution; neural_restore.c caches it across previews of the // same image -int dt_restore_raw_linear_prepare(const dt_imgid_t imgid, +int dt_restore_raw_linear_prepare(const dt_restore_context_t *ctx, + const dt_imgid_t imgid, float **out_rgb, int *out_w, int *out_h) @@ -904,19 +905,17 @@ int dt_restore_raw_linear_prepare(const dt_imgid_t imgid, dt_image_t img_meta = *cached; dt_image_cache_read_release(cached); - // this prepare path has no ctx; use the default (AS_SHOT) WB. the - // cached lin_rec2020 buffer assumes this mode, so a future model - // that needs a different WB would require keying the cache on ctx - // too (or adding ctx to this API) + // WB + matrix derived from ctx so the cached buffer matches what the + // inference + undo paths will assume. without this, a NONE-mode model + // would see a buffer with WB baked in by the prepare default and the + // undo step (which honours ctx) would not strip it back out — magenta + // cast on re-mosaic float wb_norm[3]; - _resolve_linear_wb(NULL, &img_meta, wb_norm); + _resolve_linear_wb(ctx, &img_meta, wb_norm); - // 3. camRGB -> input-space matrix. this prepare path has no ctx - // so we use the default (LIN_REC2020); a second model expecting a - // different input space would need the cache keyed on it too float cam_to_input[9]; float input_to_cam[9]; - if(!_build_cam_matrices(NULL, &img_meta, cam_to_input, input_to_cam)) + if(!_build_cam_matrices(ctx, &img_meta, cam_to_input, input_to_cam)) { for(int i = 0; i < 9; i++) cam_to_input[i] = (i % 4 == 0) ? 1.0f : 0.0f; diff --git a/src/common/ai/restore_raw_linear.h b/src/common/ai/restore_raw_linear.h index 183db6d98bb7..ca6dcad1b602 100644 --- a/src/common/ai/restore_raw_linear.h +++ b/src/common/ai/restore_raw_linear.h @@ -88,14 +88,18 @@ int dt_restore_raw_linear(dt_restore_context_t *ctx, // Slow (full-image demosaic via darktable's pipeline). neural_restore // caches the result across multiple preview refreshes of the same image. // +// @param ctx loaded linear context (selects WB mode / colorspace +// to match the model the inference + undo paths will +// use). may be NULL only if the caller knows defaults +// match the downstream consumer. // @param imgid image id // @param out_rgb caller-frees with dt_free_align. 3ch interleaved -// (sensor_w * sensor_h * 3 floats), in lin_rec2020 with -// as-shot WB applied. +// (sensor_w * sensor_h * 3 floats), in lin_rec2020. // @param out_w out: sensor width // @param out_h out: sensor height // @return 0 on success -int dt_restore_raw_linear_prepare(const dt_imgid_t imgid, +int dt_restore_raw_linear_prepare(const dt_restore_context_t *ctx, + const dt_imgid_t imgid, float **out_rgb, int *out_w, int *out_h); diff --git a/src/libs/neural_restore.c b/src/libs/neural_restore.c index 844299be0b7d..925793c30620 100644 --- a/src/libs/neural_restore.c +++ b/src/libs/neural_restore.c @@ -2533,7 +2533,7 @@ static gpointer _preview_thread_raw(gpointer data) } else if(is_xtrans) { - if(dt_restore_raw_linear_prepare(pd->imgid, &take_full_lin, + if(dt_restore_raw_linear_prepare(ctx, pd->imgid, &take_full_lin, &full_w, &full_h) != 0 || !take_full_lin) { From 4d5e1c971135767c1885f432396013c1ce0a53b6 Mon Sep 17 00:00:00 2001 From: Andrii Ryzhkov Date: Tue, 28 Apr 2026 10:33:37 +0200 Subject: [PATCH 9/9] Embed source JPEG preview in denoised DNGs --- src/imageio/imageio_dng.c | 271 ++++++++++++++++++++------------------ src/imageio/imageio_dng.h | 19 ++- src/libs/neural_restore.c | 107 ++++++++++++++- 3 files changed, 268 insertions(+), 129 deletions(-) diff --git a/src/imageio/imageio_dng.c b/src/imageio/imageio_dng.c index 255b377f9539..8d1c65addf70 100644 --- a/src/imageio/imageio_dng.c +++ b/src/imageio/imageio_dng.c @@ -49,13 +49,113 @@ static void _cfa_bytes_from_filters(uint32_t filters, uint8_t out[4]) out[3] = FC(1, 1, filters); } +// shared DNG metadata block: written on whichever IFD readers consult +// first for camera/colour information. for single-IFD layouts that's +// the raw IFD; for the canonical preview-leading layout (IFD0 = JPEG +// preview, SubIFD0 = raw) it's IFD0 +static void _set_dng_shared_metadata(TIFF *tif, const dt_image_t *img) +{ + TIFFSetField(tif, TIFFTAG_XRESOLUTION, 300.0); + TIFFSetField(tif, TIFFTAG_YRESOLUTION, 300.0); + TIFFSetField(tif, TIFFTAG_RESOLUTIONUNIT, RESUNIT_INCH); + + gchar *software = g_strdup_printf("darktable %s", darktable_package_version); + TIFFSetField(tif, TIFFTAG_SOFTWARE, software); + g_free(software); + + if(img->camera_maker[0]) + TIFFSetField(tif, TIFFTAG_MAKE, img->camera_maker); + if(img->camera_model[0]) + TIFFSetField(tif, TIFFTAG_MODEL, img->camera_model); + if(img->camera_makermodel[0]) + TIFFSetField(tif, TIFFTAG_UNIQUECAMERAMODEL, img->camera_makermodel); + + const uint8_t dng_version[4] = { 1, 4, 0, 0 }; + const uint8_t dng_backward[4] = { 1, 2, 0, 0 }; + TIFFSetField(tif, TIFFTAG_DNGVERSION, dng_version); + TIFFSetField(tif, TIFFTAG_DNGBACKWARDVERSION, dng_backward); + + // AsShotNeutral: inverse of wb_coeffs, normalized so max=1. fallback + // to neutral [1,1,1] when wb_coeffs missing so the tag is always set + float neutral[3] = { 1.0f, 1.0f, 1.0f }; + if(img->wb_coeffs[0] > 0.0f + && img->wb_coeffs[1] > 0.0f + && img->wb_coeffs[2] > 0.0f) + { + for(int i = 0; i < 3; i++) neutral[i] = 1.0f / img->wb_coeffs[i]; + const float m = fmaxf(neutral[0], fmaxf(neutral[1], neutral[2])); + if(m > 0.0f) for(int i = 0; i < 3; i++) neutral[i] /= m; + } + TIFFSetField(tif, TIFFTAG_ASSHOTNEUTRAL, 3, neutral); + + // ColorMatrix1 (XYZ D50 -> cameraRGB, 3x3). row-major [camRGB][XYZ] + // matches darktable's adobe_XYZ_to_CAM layout exactly + float non_zero = 0.0f; + for(int k = 0; k < 3; k++) + for(int i = 0; i < 3; i++) + non_zero += fabsf(img->adobe_XYZ_to_CAM[k][i]); + if(non_zero > 0.0f) + { + float color_matrix[9]; + for(int k = 0; k < 3; k++) + for(int i = 0; i < 3; i++) + color_matrix[k * 3 + i] = img->adobe_XYZ_to_CAM[k][i]; + TIFFSetField(tif, TIFFTAG_COLORMATRIX1, 9, color_matrix); + } +} + +// write IFD0 as the canonical Adobe-layout JPEG preview: small YCbCr +// thumbnail + shared DNG metadata + SubIFD pointer to the raw payload +// that the caller will write next. caller must follow with +// TIFFCreateDirectory + raw-IFD population + TIFFWriteDirectory +static int _write_jpeg_preview_ifd(TIFF *tif, + const dt_image_t *img, + const dt_imageio_dng_preview_t *p) +{ + TIFFSetField(tif, TIFFTAG_SUBFILETYPE, FILETYPE_REDUCEDIMAGE); + TIFFSetField(tif, TIFFTAG_IMAGEWIDTH, (uint32_t)p->width); + TIFFSetField(tif, TIFFTAG_IMAGELENGTH, (uint32_t)p->height); + TIFFSetField(tif, TIFFTAG_BITSPERSAMPLE, (uint16_t)8); + TIFFSetField(tif, TIFFTAG_SAMPLESPERPIXEL, (uint16_t)3); + TIFFSetField(tif, TIFFTAG_PLANARCONFIG, PLANARCONFIG_CONTIG); + TIFFSetField(tif, TIFFTAG_PHOTOMETRIC, PHOTOMETRIC_YCBCR); + TIFFSetField(tif, TIFFTAG_COMPRESSION, COMPRESSION_JPEG); + TIFFSetField(tif, TIFFTAG_ORIENTATION, ORIENTATION_TOPLEFT); + TIFFSetField(tif, TIFFTAG_ROWSPERSTRIP, (uint32_t)p->height); + + _set_dng_shared_metadata(tif, img); + + // SubIFD pointer with one slot. libtiff fills the actual offset + // when the SubIFD is later written via TIFFCreateDirectory + ... + toff_t sub_offsets[1] = { 0 }; + TIFFSetField(tif, TIFFTAG_SUBIFD, (uint16_t)1, sub_offsets); + + // pre-encoded JPEG written as a single raw strip (libtiff does not + // re-encode when COMPRESSION_JPEG is paired with TIFFWriteRawStrip) + if(TIFFWriteRawStrip(tif, 0, (void *)p->data, (tmsize_t)p->len) < 0) + { + dt_print(DT_DEBUG_ALWAYS, + "[imageio_dng] TIFFWriteRawStrip failed for JPEG preview " + "(%d bytes, %dx%d)", p->len, p->width, p->height); + return 1; + } + if(!TIFFWriteDirectory(tif)) + { + dt_print(DT_DEBUG_ALWAYS, + "[imageio_dng] TIFFWriteDirectory failed for JPEG preview IFD0"); + return 1; + } + return 0; +} + int dt_imageio_dng_write_cfa_bayer(const char *filename, const uint16_t *cfa, int width, int height, const dt_image_t *img, const void *exif_blob, - int exif_len) + int exif_len, + const dt_imageio_dng_preview_t *preview) { if(!filename || !cfa || !img || width <= 0 || height <= 0) return 1; @@ -69,7 +169,28 @@ int dt_imageio_dng_write_cfa_bayer(const char *filename, #endif if(!tif) return 1; - // required baseline TIFF tags for a single-plane raw image + // canonical Adobe layout when a preview is provided: IFD0 holds the + // JPEG thumbnail + DNG identification metadata, the raw payload + // moves into SubIFD0 + const gboolean canonical = (preview && preview->data && preview->len > 0 + && preview->width > 0 && preview->height > 0); + if(canonical) + { + if(_write_jpeg_preview_ifd(tif, img, preview) != 0) + { + dt_print(DT_DEBUG_ALWAYS, + "[imageio_dng] write_cfa_bayer: preview IFD0 failed, aborting"); + TIFFClose(tif); + g_unlink(filename); + return 1; + } + // libtiff entered INSUBIFD mode when the IFD0 carrying TIFFTAG_SUBIFD + // was written; subsequent TIFFSetField + scanline writes populate + // the SubIFD without an explicit TIFFCreateDirectory call (whose + // return-value convention changed between libtiff versions) + } + + // raw payload IFD: single IFD when no preview, otherwise SubIFD0 TIFFSetField(tif, TIFFTAG_SUBFILETYPE, 0); TIFFSetField(tif, TIFFTAG_IMAGEWIDTH, (uint32_t)width); TIFFSetField(tif, TIFFTAG_IMAGELENGTH, (uint32_t)height); @@ -81,29 +202,9 @@ int dt_imageio_dng_write_cfa_bayer(const char *filename, TIFFSetField(tif, TIFFTAG_COMPRESSION, COMPRESSION_NONE); TIFFSetField(tif, TIFFTAG_ORIENTATION, ORIENTATION_TOPLEFT); TIFFSetField(tif, TIFFTAG_ROWSPERSTRIP, TIFFDefaultStripSize(tif, 0)); - TIFFSetField(tif, TIFFTAG_XRESOLUTION, 300.0); - TIFFSetField(tif, TIFFTAG_YRESOLUTION, 300.0); - TIFFSetField(tif, TIFFTAG_RESOLUTIONUNIT, RESUNIT_INCH); - { - gchar *software = g_strdup_printf("darktable %s", - darktable_package_version); - TIFFSetField(tif, TIFFTAG_SOFTWARE, software); - g_free(software); - } - - // camera identification - if(img->camera_maker[0]) - TIFFSetField(tif, TIFFTAG_MAKE, img->camera_maker); - if(img->camera_model[0]) - TIFFSetField(tif, TIFFTAG_MODEL, img->camera_model); - if(img->camera_makermodel[0]) - TIFFSetField(tif, TIFFTAG_UNIQUECAMERAMODEL, img->camera_makermodel); - - // DNG identification - const uint8_t dng_version[4] = { 1, 4, 0, 0 }; - const uint8_t dng_backward[4] = { 1, 2, 0, 0 }; - TIFFSetField(tif, TIFFTAG_DNGVERSION, dng_version); - TIFFSetField(tif, TIFFTAG_DNGBACKWARDVERSION, dng_backward); + // shared metadata only on single-IFD layout — canonical has it on IFD0 + if(!canonical) + _set_dng_shared_metadata(tif, img); // CFA description const uint16_t cfa_repeat_dim[2] = { 2, 2 }; @@ -142,45 +243,6 @@ int dt_imageio_dng_write_cfa_bayer(const char *filename, ? img->raw_white_point : 65535u; TIFFSetField(tif, TIFFTAG_WHITELEVEL, 1, &white); - // AsShotNeutral (derived from wb_coeffs) - // DNG AsShotNeutral encodes the neutral white balance as a - // cameraRGB triple where smaller values mean more amplification. - // darktable's wb_coeffs are raw-to-white multipliers; AsShotNeutral - // is their inverse, normalized so the maximum element is 1 - if(img->wb_coeffs[0] > 0.0f - && img->wb_coeffs[1] > 0.0f - && img->wb_coeffs[2] > 0.0f) - { - float inv[3]; - for(int i = 0; i < 3; i++) - inv[i] = 1.0f / img->wb_coeffs[i]; - const float m = fmaxf(inv[0], fmaxf(inv[1], inv[2])); - if(m > 0.0f) - for(int i = 0; i < 3; i++) inv[i] /= m; - TIFFSetField(tif, TIFFTAG_ASSHOTNEUTRAL, 3, inv); - } - - // ColorMatrix1 (XYZ D50 -> cameraRGB, 3x3 for trichromatic) - // darktable's adobe_XYZ_to_CAM is populated from the rawspeed - // cameras.xml matrix in row-major [camRGB][XYZ] layout, which - // matches the DNG ColorMatrix1 layout exactly (row = camera axis, - // column = XYZ axis) - { - float non_zero = 0.0f; - for(int k = 0; k < 3; k++) - for(int i = 0; i < 3; i++) - non_zero += fabsf(img->adobe_XYZ_to_CAM[k][i]); - - if(non_zero > 0.0f) - { - float color_matrix[9]; - for(int k = 0; k < 3; k++) - for(int i = 0; i < 3; i++) - color_matrix[k * 3 + i] = img->adobe_XYZ_to_CAM[k][i]; - TIFFSetField(tif, TIFFTAG_COLORMATRIX1, 9, color_matrix); - } - } - // advertise the visible region inside the full raw buffer; without // these tags the importer renders the optical-black margins too const int crop_x = (img->crop_x > 0) ? img->crop_x : 0; @@ -231,7 +293,8 @@ int dt_imageio_dng_write_linear(const char *filename, int height, const dt_image_t *img, const void *exif_blob, - int exif_len) + int exif_len, + const dt_imageio_dng_preview_t *preview) { if(!filename || !rgb || !img || width <= 0 || height <= 0) return 1; @@ -245,6 +308,22 @@ int dt_imageio_dng_write_linear(const char *filename, #endif if(!tif) return 1; + // canonical layout when a preview is provided (see write_cfa_bayer) + const gboolean canonical = (preview && preview->data && preview->len > 0 + && preview->width > 0 && preview->height > 0); + if(canonical) + { + if(_write_jpeg_preview_ifd(tif, img, preview) != 0) + { + dt_print(DT_DEBUG_ALWAYS, + "[imageio_dng] write_linear: preview IFD0 failed, aborting"); + TIFFClose(tif); + g_unlink(filename); + return 1; + } + // libtiff is in INSUBIFD mode after IFD0 was written with TIFFTAG_SUBIFD + } + // baseline TIFF tags, 3 samples per pixel (demosaicked) TIFFSetField(tif, TIFFTAG_SUBFILETYPE, 0); TIFFSetField(tif, TIFFTAG_IMAGEWIDTH, (uint32_t)width); @@ -257,29 +336,8 @@ int dt_imageio_dng_write_linear(const char *filename, TIFFSetField(tif, TIFFTAG_COMPRESSION, COMPRESSION_NONE); TIFFSetField(tif, TIFFTAG_ORIENTATION, ORIENTATION_TOPLEFT); TIFFSetField(tif, TIFFTAG_ROWSPERSTRIP, TIFFDefaultStripSize(tif, 0)); - TIFFSetField(tif, TIFFTAG_XRESOLUTION, 300.0); - TIFFSetField(tif, TIFFTAG_YRESOLUTION, 300.0); - TIFFSetField(tif, TIFFTAG_RESOLUTIONUNIT, RESUNIT_INCH); - { - gchar *software = g_strdup_printf("darktable %s", - darktable_package_version); - TIFFSetField(tif, TIFFTAG_SOFTWARE, software); - g_free(software); - } - - // camera identification - if(img->camera_maker[0]) - TIFFSetField(tif, TIFFTAG_MAKE, img->camera_maker); - if(img->camera_model[0]) - TIFFSetField(tif, TIFFTAG_MODEL, img->camera_model); - if(img->camera_makermodel[0]) - TIFFSetField(tif, TIFFTAG_UNIQUECAMERAMODEL, img->camera_makermodel); - - // DNG identification - const uint8_t dng_version[4] = { 1, 4, 0, 0 }; - const uint8_t dng_backward[4] = { 1, 2, 0, 0 }; - TIFFSetField(tif, TIFFTAG_DNGVERSION, dng_version); - TIFFSetField(tif, TIFFTAG_DNGBACKWARDVERSION, dng_backward); + if(!canonical) + _set_dng_shared_metadata(tif, img); // NO CFA tags: this is demosaicked data. // encode as normalized: BlackLevel=0, WhiteLevel=65535. the @@ -293,43 +351,6 @@ int dt_imageio_dng_write_linear(const char *filename, TIFFSetField(tif, TIFFTAG_BLACKLEVEL, 3, black3); TIFFSetField(tif, TIFFTAG_WHITELEVEL, 1, &white_norm); - // AsShotNeutral = inverse of WB multipliers, normalized so max=1. - // on re-import, darktable reads this and derives WB coeffs via - // wb[c] = 1/AsShotNeutral[c] / wb[G-normalized]. the temperature - // iop then applies this WB to our un-WB'd data, giving the standard - // raw-pipeline result - if(img->wb_coeffs[0] > 0.0f - && img->wb_coeffs[1] > 0.0f - && img->wb_coeffs[2] > 0.0f) - { - float inv[3]; - for(int i = 0; i < 3; i++) inv[i] = 1.0f / img->wb_coeffs[i]; - const float m = fmaxf(inv[0], fmaxf(inv[1], inv[2])); - if(m > 0.0f) for(int i = 0; i < 3; i++) inv[i] /= m; - TIFFSetField(tif, TIFFTAG_ASSHOTNEUTRAL, 3, inv); - } - else - { - const float neutral[3] = { 1.0f, 1.0f, 1.0f }; - TIFFSetField(tif, TIFFTAG_ASSHOTNEUTRAL, 3, neutral); - } - - // ColorMatrix1 from camera's XYZ->CAM (3x3 portion) - { - float non_zero = 0.0f; - for(int k = 0; k < 3; k++) - for(int i = 0; i < 3; i++) - non_zero += fabsf(img->adobe_XYZ_to_CAM[k][i]); - if(non_zero > 0.0f) - { - float color_matrix[9]; - for(int k = 0; k < 3; k++) - for(int i = 0; i < 3; i++) - color_matrix[k * 3 + i] = img->adobe_XYZ_to_CAM[k][i]; - TIFFSetField(tif, TIFFTAG_COLORMATRIX1, 9, color_matrix); - } - } - // linear DNG: buffer is already at visible dims (post-demosaic); // ACTIVEAREA covers the full buffer, no margin to crop const uint32_t active_area[4] = { diff --git a/src/imageio/imageio_dng.h b/src/imageio/imageio_dng.h index e72ca801ede8..1ea2ef3e97cf 100644 --- a/src/imageio/imageio_dng.h +++ b/src/imageio/imageio_dng.h @@ -35,6 +35,19 @@ struct dt_image_t; +// optional embedded JPEG preview for the uint16 DNG writers. when +// non-NULL, the writer uses the canonical Adobe layout (IFD0 = JPEG +// preview, SubIFD0 = raw payload) so library browsers (Finder, +// Photomator, Lightroom) can render thumbnails without decoding the +// raw. when NULL, falls back to the historical single-IFD layout +typedef struct dt_imageio_dng_preview_t +{ + const uint8_t *data; // pre-encoded JPEG bytes, 8-bit YCbCr + int len; // length of @p data in bytes + int width; // declared image width + int height; // declared image height +} dt_imageio_dng_preview_t; + // @brief Write a 32-bit float CFA DNG (Bayer or X-Trans). // // Used by HDR merge: pixel data is float pre-normalized to @@ -91,7 +104,8 @@ int dt_imageio_dng_write_cfa_bayer(const char *filename, int height, const struct dt_image_t *img, const void *exif_blob, - int exif_len); + int exif_len, + const dt_imageio_dng_preview_t *preview); // @brief Write a demosaicked 3-channel linear DNG. // @@ -122,7 +136,8 @@ int dt_imageio_dng_write_linear(const char *filename, int height, const struct dt_image_t *img, const void *exif_blob, - int exif_len); + int exif_len, + const dt_imageio_dng_preview_t *preview); // clang-format off // modelines: These editor modelines have been set for all relevant files by tools/update_modelines.py diff --git a/src/libs/neural_restore.c b/src/libs/neural_restore.c index 925793c30620..7447f7896394 100644 --- a/src/libs/neural_restore.c +++ b/src/libs/neural_restore.c @@ -196,6 +196,7 @@ #include "common/variables.h" #include "common/colorspaces.h" #include "imageio/imageio_dng.h" +#include "imageio/imageio_jpeg.h" #include "common/exif.h" #include "common/film.h" #include "common/grouping.h" @@ -501,6 +502,62 @@ static inline float _linear_to_srgb(float v) return (v <= 0.0031308f) ? 12.92f * v : 1.055f * powf(v, 1.0f / 2.4f) - 0.055f; } +// pull the camera's embedded JPEG preview from the source raw, to embed +// as the DNG thumbnail; output buffer is g_malloc'd (caller frees) +static int _extract_source_jpeg_preview(const char *src_path, + uint8_t **out_jpeg, + int *out_jpeg_len, + int *out_w, + int *out_h) +{ + *out_jpeg = NULL; + *out_jpeg_len = 0; + *out_w = 0; + *out_h = 0; + + uint8_t *raw_buf = NULL; + size_t raw_size = 0; + char *mime = NULL; + // dt_exif_get_thumbnail returns FALSE on success, allocates via malloc/strdup + if(dt_exif_get_thumbnail(src_path, &raw_buf, &raw_size, &mime) || !raw_buf) + { + free(raw_buf); + free(mime); + return 1; + } + const gboolean is_jpeg = mime && (strcmp(mime, "image/jpeg") == 0); + free(mime); + if(!is_jpeg || raw_size == 0 || raw_size > (size_t)INT_MAX) + { + free(raw_buf); + return 1; + } + + dt_imageio_jpeg_t jpg; + if(dt_imageio_jpeg_decompress_header(raw_buf, raw_size, &jpg) != 0 + || jpg.width <= 0 || jpg.height <= 0) + { + free(raw_buf); + return 1; + } + + // re-allocate via glib so caller can g_free uniformly + uint8_t *jpeg = g_try_malloc(raw_size); + if(!jpeg) + { + free(raw_buf); + return 1; + } + memcpy(jpeg, raw_buf, raw_size); + free(raw_buf); + + *out_jpeg = jpeg; + *out_jpeg_len = (int)raw_size; + *out_w = jpg.width; + *out_h = jpg.height; + return 0; +} + // convert float RGB (3ch interleaved, linear) to cairo RGB24 surface data static void _float_rgb_to_cairo(const float *const restrict src, unsigned char *const restrict dst, @@ -1055,9 +1112,32 @@ static int _process_raw_denoise_bayer(dt_neural_job_t *j, uint8_t *exif_blob = NULL; const int exif_len = dt_exif_read_blob(&exif_blob, src_path, imgid, FALSE, width, height, TRUE); + uint8_t *jpeg_buf = NULL; + int jpeg_len = 0, jpeg_w = 0, jpeg_h = 0; + dt_imageio_dng_preview_t preview = {0}; + const int prev_rc = _extract_source_jpeg_preview(src_path, &jpeg_buf, + &jpeg_len, &jpeg_w, &jpeg_h); + if(prev_rc == 0) + { + preview.data = jpeg_buf; + preview.len = jpeg_len; + preview.width = jpeg_w; + preview.height = jpeg_h; + dt_print(DT_DEBUG_AI, + "[neural_restore] embedded JPEG preview from source %dx%d (%d bytes)", + jpeg_w, jpeg_h, jpeg_len); + } + else + { + dt_print(DT_DEBUG_AI, + "[neural_restore] no embedded preview in source (rc=%d) — " + "writing DNG without thumbnail", prev_rc); + } res = dt_imageio_dng_write_cfa_bayer(out_filename, cfa_out, width, height, img_meta, - exif_blob, exif_len); + exif_blob, exif_len, + jpeg_buf ? &preview : NULL); + g_free(jpeg_buf); g_free(exif_blob); g_free(cfa_out); return res; @@ -1084,8 +1164,31 @@ static int _process_raw_denoise_linear(dt_neural_job_t *j, uint8_t *exif_blob = NULL; const int exif_len = dt_exif_read_blob(&exif_blob, src_path, imgid, FALSE, w, h, TRUE); + uint8_t *jpeg_buf = NULL; + int jpeg_len = 0, jpeg_w = 0, jpeg_h = 0; + dt_imageio_dng_preview_t preview = {0}; + const int prev_rc = _extract_source_jpeg_preview(src_path, &jpeg_buf, + &jpeg_len, &jpeg_w, &jpeg_h); + if(prev_rc == 0) + { + preview.data = jpeg_buf; + preview.len = jpeg_len; + preview.width = jpeg_w; + preview.height = jpeg_h; + dt_print(DT_DEBUG_AI, + "[neural_restore] embedded JPEG preview from source %dx%d (%d bytes)", + jpeg_w, jpeg_h, jpeg_len); + } + else + { + dt_print(DT_DEBUG_AI, + "[neural_restore] no embedded preview in source (rc=%d) — " + "writing DNG without thumbnail", prev_rc); + } res = dt_imageio_dng_write_linear(out_filename, rgb, w, h, img_meta, - exif_blob, exif_len); + exif_blob, exif_len, + jpeg_buf ? &preview : NULL); + g_free(jpeg_buf); g_free(exif_blob); dt_free_align(rgb); return res;