Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 60 additions & 11 deletions data/kernels/blurs.cl
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/*
This file is part of darktable,
copyright (c) 2021 darktable developers.
copyright (c) 2021-2026 darktable developers.

darktable is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
Expand All @@ -18,13 +18,18 @@

#include "common.h"

kernel void
convolve(read_only image2d_t in, read_only image2d_t kern, write_only image2d_t out,
const int width, const int height, const int radius)
// Dense 2D convolution for lens/motion blurs.
// kern is a flat float buffer (read-only; small enough for GPU L2 cache at typical radii).
kernel void convolve(read_only image2d_t in,
__global const float *kern,
write_only image2d_t out,
const int width,
const int height,
const int radius,
const int kernel_width)
{
const int x = get_global_id(0);
const int y = get_global_id(1);

if(x >= width || y >= height) return;

float4 pix_in = read_imagef(in, samplerA, (int2)(x, y));
Expand All @@ -35,16 +40,60 @@ convolve(read_only image2d_t in, read_only image2d_t kern, write_only image2d_t
{
const int ii = clamp(y + l, 0, height - 1);
const int jj = clamp(x + m, 0, width - 1);
const float4 pix = read_imagef(in, samplerA, (int2)(jj, ii));

const int ik = l + radius;
const int jk = m + radius;
const float k = read_imagef(kern, samplerA, (int2)(jk, ik)).x;

acc += k * pix;
acc += kern[ik * kernel_width + jk] * read_imagef(in, samplerA, (int2)(jj, ii));
}

// copy alpha
acc.w = pix_in.w;
write_imagef(out, (int2)(x, y), acc);
}

// Sparse 2D convolution: only the non-negligible kernel entries are applied.
// Dramatically faster for motion blur (thin arc kernel, ~1-5% fill at large radii)
// and moderate improvement for lens blur (circular aperture, ~78% fill).
// offsets_x / offsets_y are pixel coordinate deltas; values are the kernel weights.
kernel void convolve_sparse(read_only image2d_t in,
__global const int *offsets_x,
__global const int *offsets_y,
__global const float *values,
write_only image2d_t out,
const int width,
const int height,
const int n_entries)
{
const int x = get_global_id(0);
const int y = get_global_id(1);
if(x >= width || y >= height) return;

float4 pix_in = read_imagef(in, samplerA, (int2)(x, y));
float4 acc = 0.f;

for(int i = 0; i < n_entries; i++)
{
const int2 coord = (int2)(clamp(x + offsets_x[i], 0, width - 1),
clamp(y + offsets_y[i], 0, height - 1));
acc += values[i] * read_imagef(in, samplerA, coord);
}

acc.w = pix_in.w;
write_imagef(out, (int2)(x, y), acc);
}

// Copies RGB from blurred and alpha from original into out.
// Used after dt_gaussian_blur_cl to restore the pipeline mask channel.
kernel void restore_alpha(read_only image2d_t original,
read_only image2d_t blurred,
write_only image2d_t out,
const int width,
const int height)
{
const int x = get_global_id(0);
const int y = get_global_id(1);
if(x >= width || y >= height) return;

float4 px_blurred = read_imagef(blurred, samplerA, (int2)(x, y));
float4 px_orig = read_imagef(original, samplerA, (int2)(x, y));
px_blurred.w = px_orig.w;
write_imagef(out, (int2)(x, y), px_blurred);
}
Loading