diff --git a/data/CMakeLists.txt b/data/CMakeLists.txt
index 08aa89aad91b..d99e79ab5e8e 100644
--- a/data/CMakeLists.txt
+++ b/data/CMakeLists.txt
@@ -5,6 +5,10 @@ if(USE_OPENCL)
add_subdirectory(kernels ${DARKTABLE_DATADIR}/kernels)
endif(USE_OPENCL)
+if(APPLE)
+ add_subdirectory(metal ${DARKTABLE_DATADIR}/metal)
+endif(APPLE)
+
FILE(GLOB THEME_FILES "themes/*.css")
FILE(COPY ${THEME_FILES} DESTINATION "${DARKTABLE_DATADIR}/themes")
install(FILES ${THEME_FILES} DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/darktable/themes COMPONENT DTApplication)
diff --git a/data/metal/CMakeLists.txt b/data/metal/CMakeLists.txt
new file mode 100644
index 000000000000..8b67a5aa7a8f
--- /dev/null
+++ b/data/metal/CMakeLists.txt
@@ -0,0 +1,73 @@
+#
+# compile macOS metal kernel source files
+#
+
+FILE(GLOB DT_METAL_KERNEL_SOURCES "*.metal")
+
+set(DT_METALLIB "darktable.metallib")
+set(DT_METALLIB_TOUCH "${DT_METALLIB}.touch")
+
+set(KERNELS "")
+set(KERNEL_TOUCHES "")
+
+add_custom_target(metal_kernels ALL)
+add_custom_target(dt_metallib ALL)
+
+macro (compile_metal_kernel IN)
+ get_filename_component(KERNAME ${IN} NAME)
+ get_filename_component(KERNAME_WLE ${IN} NAME_WLE)
+
+ set(KERNAME_OUT "${KERNAME_WLE}.air")
+
+ set(TOUCH "${CMAKE_CURRENT_BINARY_DIR}/${KERNAME}.touch")
+
+ add_custom_command(
+ OUTPUT ${TOUCH}
+ COMMAND ${CMAKE_COMMAND} -E touch ${TOUCH} # will be empty!
+ COMMAND xcrun -sdk macosx metal -c ${IN} -o ${KERNAME_OUT}
+ DEPENDS ${IN}
+ COMMENT "Compiling metal kernel ${IN}"
+ VERBATIM
+ )
+
+ add_custom_target(
+ ${KERNAME_OUT}
+ DEPENDS ${TOUCH} # will be empty!
+ DEPENDS ${IN}
+ )
+
+ add_dependencies(metal_kernels ${KERNAME_OUT})
+ add_dependencies(dt_metallib ${KERNAME_OUT})
+
+ list(APPEND KERNELS ${KERNAME_OUT})
+ list(APPEND KERNEL_TOUCHES ${TOUCH})
+endmacro (compile_metal_kernel)
+
+foreach(KERNEL IN ITEMS ${DT_METAL_KERNEL_SOURCES})
+ compile_metal_kernel(${KERNEL})
+endforeach()
+
+add_custom_command(
+ OUTPUT ${DT_METALLIB_TOUCH}
+ DEPENDS ${KERNEL_TOUCHES}
+ COMMAND xcrun -sdk macosx metallib -o ${DT_METALLIB} ${KERNELS}
+ COMMENT "Building ${DT_METALLIB}"
+ COMMAND ${CMAKE_COMMAND} -E touch ${DT_METALLIB_TOUCH}
+)
+
+add_custom_target(
+ dt_metallib_touch ALL
+ DEPENDS ${DT_METALLIB_TOUCH}
+ DEPENDS metal_kernels
+)
+
+add_dependencies(dt_metallib dt_metallib_touch)
+
+set(METAL_INSTALL_DIR "${CMAKE_INSTALL_DATAROOTDIR}/darktable/metal")
+set(METAL_INSTALL_FILES "")
+foreach(K IN LISTS KERNELS)
+ list(APPEND METAL_INSTALL_FILES "${CMAKE_CURRENT_BINARY_DIR}/${K}")
+endforeach()
+list(APPEND METAL_INSTALL_FILES "${CMAKE_CURRENT_BINARY_DIR}/${DT_METALLIB}")
+
+install(FILES ${METAL_INSTALL_FILES} DESTINATION ${METAL_INSTALL_DIR})
diff --git a/data/metal/diffuse.metal b/data/metal/diffuse.metal
new file mode 100644
index 000000000000..4d84c1aa6670
--- /dev/null
+++ b/data/metal/diffuse.metal
@@ -0,0 +1,436 @@
+/*
+ This file is part of darktable,
+ Copyright (C) 2026 darktable developers.
+
+ darktable is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ darktable is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with darktable. If not, see .
+*/
+
+#include
+using namespace metal;
+
+/* ── B-spline wavelet kernels ────────────────────────────────────── */
+
+// B-spline filter: 5-tap {1/16, 4/16, 6/16, 4/16, 1/16}
+#define FSIZE 5
+#define FSTART 2
+
+constant float4 bspline_filter[FSIZE] = {
+ float4(1.0f / 16.0f),
+ float4(4.0f / 16.0f),
+ float4(6.0f / 16.0f),
+ float4(4.0f / 16.0f),
+ float4(1.0f / 16.0f)
+};
+
+kernel void
+blur_2D_Bspline_vertical(texture2d input [[texture(0)]],
+ texture2d output [[texture(1)]],
+ constant int &width [[buffer(0)]],
+ constant int &height [[buffer(1)]],
+ constant int &mult [[buffer(2)]],
+ uint2 gid [[thread_position_in_grid]])
+{
+ const int x = gid.x;
+ const int y = gid.y;
+ if(x >= width || y >= height) return;
+
+ float4 acc = float4(0.0f);
+ for(int jj = 0; jj < FSIZE; ++jj)
+ {
+ const int yy = clamp(mult * (jj - FSTART) + y, 0, height - 1);
+ acc += bspline_filter[jj] * input.read(uint2(x, yy));
+ }
+
+ output.write(max(acc, float4(0.0f)), uint2(x, y));
+}
+
+kernel void
+blur_2D_Bspline_horizontal(texture2d input [[texture(0)]],
+ texture2d output [[texture(1)]],
+ constant int &width [[buffer(0)]],
+ constant int &height [[buffer(1)]],
+ constant int &mult [[buffer(2)]],
+ uint2 gid [[thread_position_in_grid]])
+{
+ const int x = gid.x;
+ const int y = gid.y;
+ if(x >= width || y >= height) return;
+
+ float4 acc = float4(0.0f);
+ for(int ii = 0; ii < FSIZE; ++ii)
+ {
+ const int xx = clamp(mult * (ii - FSTART) + x, 0, width - 1);
+ acc += bspline_filter[ii] * input.read(uint2(xx, y));
+ }
+
+ output.write(max(acc, float4(0.0f)), uint2(x, y));
+}
+
+kernel void
+wavelets_detail_level(texture2d detail [[texture(0)]],
+ texture2d LF [[texture(1)]],
+ texture2d HF [[texture(2)]],
+ constant int &width [[buffer(0)]],
+ constant int &height [[buffer(1)]],
+ uint2 gid [[thread_position_in_grid]])
+{
+ const int x = gid.x;
+ const int y = gid.y;
+ if(x >= width || y >= height) return;
+
+ const uint2 pos = uint2(x, y);
+ HF.write(detail.read(pos) - LF.read(pos), pos);
+}
+
+
+/* ── Diffuse/sharpen kernels ─────────────────────────────────────── */
+
+// Discretization parameters for the PDE solver
+#define H_STEP 1
+#define KAPPA 0.25f
+
+// Normalization scaling of the wavelet to approximate a laplacian
+#define B_SPLINE_TO_LAPLACIAN 3.182727439285017f
+#define B_SPLINE_TO_LAPLACIAN_2 10.129753952777762f
+
+// Isotropy types matching dt_isotropy_t
+#define DT_ISOTROPY_ISOTROPE 0
+#define DT_ISOTROPY_ISOPHOTE 1
+#define DT_ISOTROPY_GRADIENT 2
+
+static inline float4 sqf(float4 v) { return v * v; }
+
+static inline void find_gradient(thread const float4 pixels[9], thread float4 xy[2])
+{
+ xy[0] = (pixels[7] - pixels[1]) / 2.0f;
+ xy[1] = (pixels[5] - pixels[3]) / 2.0f;
+}
+
+static inline void find_laplacian(thread const float4 pixels[9], thread float4 xy[2])
+{
+ xy[0] = (pixels[7] + pixels[1]) - 2.0f * pixels[4];
+ xy[1] = (pixels[5] + pixels[3]) - 2.0f * pixels[4];
+}
+
+static inline void rotation_matrix_isophote(float4 c2,
+ float4 cos_theta_sin_theta,
+ float4 cos_theta2, float4 sin_theta2,
+ thread float4 a[2][2])
+{
+ a[0][0] = cos_theta2 + c2 * sin_theta2;
+ a[1][1] = c2 * cos_theta2 + sin_theta2;
+ a[0][1] = a[1][0] = (c2 - 1.0f) * cos_theta_sin_theta;
+}
+
+static inline void rotation_matrix_gradient(float4 c2,
+ float4 cos_theta_sin_theta,
+ float4 cos_theta2, float4 sin_theta2,
+ thread float4 a[2][2])
+{
+ a[0][0] = c2 * cos_theta2 + sin_theta2;
+ a[1][1] = cos_theta2 + c2 * sin_theta2;
+ a[0][1] = a[1][0] = (1.0f - c2) * cos_theta_sin_theta;
+}
+
+static inline void build_matrix(thread const float4 a[2][2], thread float4 kern[9])
+{
+ const float4 b11 = a[0][1] / 2.0f;
+ const float4 b13 = -b11;
+ const float4 b22 = -2.0f * (a[0][0] + a[1][1]);
+
+ kern[0] = b11;
+ kern[1] = a[1][1];
+ kern[2] = b13;
+ kern[3] = a[0][0];
+ kern[4] = b22;
+ kern[5] = a[0][0];
+ kern[6] = b13;
+ kern[7] = a[1][1];
+ kern[8] = b11;
+}
+
+static inline void isotrope_laplacian(thread float4 kern[9])
+{
+ kern[0] = 0.25f;
+ kern[1] = 0.5f;
+ kern[2] = 0.25f;
+ kern[3] = 0.5f;
+ kern[4] = -3.0f;
+ kern[5] = 0.5f;
+ kern[6] = 0.25f;
+ kern[7] = 0.5f;
+ kern[8] = 0.25f;
+}
+
+static inline void compute_kern(float4 c2,
+ float4 cos_theta_sin_theta,
+ float4 cos_theta2, float4 sin_theta2,
+ int isotropy_type,
+ thread float4 kern[9])
+{
+ switch(isotropy_type)
+ {
+ case DT_ISOTROPY_ISOPHOTE:
+ {
+ float4 a[2][2] = { { float4(0.0f) } };
+ rotation_matrix_isophote(c2, cos_theta_sin_theta, cos_theta2, sin_theta2, a);
+ build_matrix(a, kern);
+ break;
+ }
+ case DT_ISOTROPY_GRADIENT:
+ {
+ float4 a[2][2] = { { float4(0.0f) } };
+ rotation_matrix_gradient(c2, cos_theta_sin_theta, cos_theta2, sin_theta2, a);
+ build_matrix(a, kern);
+ break;
+ }
+ case DT_ISOTROPY_ISOTROPE:
+ default:
+ {
+ isotrope_laplacian(kern);
+ break;
+ }
+ }
+}
+
+
+kernel void
+diffuse_pde(texture2d HF_tex [[texture(0)]],
+ texture2d LF_tex [[texture(1)]],
+ texture2d mask_tex [[texture(2)]],
+ texture2d output [[texture(3)]],
+ constant int &has_mask [[buffer(0)]],
+ constant int &width [[buffer(1)]],
+ constant int &height [[buffer(2)]],
+ constant float4 &anisotropy [[buffer(3)]],
+ constant int4 &isotropy_type [[buffer(4)]],
+ constant float ®ularization [[buffer(5)]],
+ constant float &variance_threshold [[buffer(6)]],
+ constant float ¤t_radius_square [[buffer(7)]],
+ constant int &mult [[buffer(8)]],
+ constant float4 &ABCD [[buffer(9)]],
+ constant float &strength [[buffer(10)]],
+ uint2 gid [[thread_position_in_grid]])
+{
+ const int x = gid.x;
+ const int y = gid.y;
+ if(x >= width || y >= height) return;
+
+ const uint2 pos = uint2(x, y);
+ const uchar opacity = has_mask ? (uchar)mask_tex.read(pos).r : 1;
+
+ const float4 regularization_factor = regularization * current_radius_square / 9.0f;
+
+ float4 out;
+
+ if(opacity)
+ {
+ // non-local neighbour coordinates
+ const int j_neighbours[3] = {
+ clamp(x - mult * H_STEP, 0, width - 1),
+ x,
+ clamp(x + mult * H_STEP, 0, width - 1)
+ };
+ const int i_neighbours[3] = {
+ clamp(y - mult * H_STEP, 0, height - 1),
+ y,
+ clamp(y + mult * H_STEP, 0, height - 1)
+ };
+
+ // fetch non-local pixels via texture reads (hardware 2D cache)
+ float4 neighbour_pixel_HF[9];
+ float4 neighbour_pixel_LF[9];
+
+ for(int ii = 0; ii < 3; ii++)
+ for(int jj = 0; jj < 3; jj++)
+ {
+ const uint2 npos = uint2(j_neighbours[ii], i_neighbours[jj]);
+ neighbour_pixel_HF[3 * ii + jj] = HF_tex.read(npos);
+ neighbour_pixel_LF[3 * ii + jj] = LF_tex.read(npos);
+ }
+
+ // build local anisotropic convolution filters
+ float4 gradient[2], laplacian[2];
+ find_gradient(neighbour_pixel_LF, gradient);
+ find_gradient(neighbour_pixel_HF, laplacian);
+
+ const float4 magnitude_grad = sqrt(sqf(gradient[0]) + sqf(gradient[1]));
+ gradient[0] = select(gradient[0] / magnitude_grad, float4(1.0f), magnitude_grad == 0.0f);
+ gradient[1] = select(gradient[1] / magnitude_grad, float4(0.0f), magnitude_grad == 0.0f);
+
+ const float4 magnitude_lapl = sqrt(sqf(laplacian[0]) + sqf(laplacian[1]));
+ laplacian[0] = select(laplacian[0] / magnitude_lapl, float4(1.0f), magnitude_lapl == 0.0f);
+ laplacian[1] = select(laplacian[1] / magnitude_lapl, float4(0.0f), magnitude_lapl == 0.0f);
+
+ const float4 cos_theta_grad_sq = sqf(gradient[0]);
+ const float4 sin_theta_grad_sq = sqf(gradient[1]);
+ const float4 cos_theta_sin_theta_grad = gradient[0] * gradient[1];
+ const float4 cos_theta_lapl_sq = sqf(laplacian[0]);
+ const float4 sin_theta_lapl_sq = sqf(laplacian[1]);
+ const float4 cos_theta_sin_theta_lapl = laplacian[0] * laplacian[1];
+
+ // c² anisotropy coefficients
+ const float4 c2[4] = {
+ fast::exp(-magnitude_grad * anisotropy.x),
+ fast::exp(-magnitude_lapl * anisotropy.y),
+ fast::exp(-magnitude_grad * anisotropy.z),
+ fast::exp(-magnitude_lapl * anisotropy.w)
+ };
+
+ float4 kern_first[9], kern_second[9], kern_third[9], kern_fourth[9];
+ compute_kern(c2[0], cos_theta_sin_theta_grad, cos_theta_grad_sq, sin_theta_grad_sq, isotropy_type.x, kern_first);
+ compute_kern(c2[1], cos_theta_sin_theta_lapl, cos_theta_lapl_sq, sin_theta_lapl_sq, isotropy_type.y, kern_second);
+ compute_kern(c2[2], cos_theta_sin_theta_grad, cos_theta_grad_sq, sin_theta_grad_sq, isotropy_type.z, kern_third);
+ compute_kern(c2[3], cos_theta_sin_theta_lapl, cos_theta_lapl_sq, sin_theta_lapl_sq, isotropy_type.w, kern_fourth);
+
+ // convolve filters and compute variance + regularization
+ float4 derivatives[4] = { float4(0.0f) };
+ float4 variance = float4(0.0f);
+
+ [[unroll]] for(int k = 0; k < 9; k++)
+ {
+ derivatives[0] += kern_first[k] * neighbour_pixel_LF[k];
+ derivatives[1] += kern_second[k] * neighbour_pixel_LF[k];
+ derivatives[2] += kern_third[k] * neighbour_pixel_HF[k];
+ derivatives[3] += kern_fourth[k] * neighbour_pixel_HF[k];
+ variance += sqf(neighbour_pixel_HF[k]);
+ }
+
+ variance = variance_threshold + variance * regularization_factor;
+
+ // compute update
+ float4 acc = float4(0.0f);
+ acc += derivatives[0] * ABCD.x;
+ acc += derivatives[1] * ABCD.y;
+ acc += derivatives[2] * ABCD.z;
+ acc += derivatives[3] * ABCD.w;
+
+ float4 hf = HF_tex.read(pos);
+ acc = hf * strength + acc / variance;
+
+ float4 lf = LF_tex.read(pos);
+ out = max(acc + lf, float4(0.0f));
+ }
+ else
+ {
+ out = HF_tex.read(pos) + LF_tex.read(pos);
+ }
+
+ output.write(out, pos);
+}
+
+
+/* ── Mask kernels ────────────────────────────────────────────────── */
+
+kernel void
+build_mask(texture2d input [[texture(0)]],
+ texture2d mask [[texture(1)]],
+ constant float &threshold [[buffer(0)]],
+ constant int &width [[buffer(1)]],
+ constant int &height [[buffer(2)]],
+ uint2 gid [[thread_position_in_grid]])
+{
+ const int x = gid.x;
+ const int y = gid.y;
+ if(x >= width || y >= height) return;
+
+ const uint2 pos = uint2(x, y);
+ const float4 pix = input.read(pos);
+ const uint val = (pix.x > threshold || pix.y > threshold || pix.z > threshold) ? 1u : 0u;
+ mask.write(uint4(val, 0, 0, 0), pos);
+}
+
+
+/* ── Noise generator helpers ─────────────────────────────────────── */
+
+static inline uint splitmix32(uint seed)
+{
+ ulong result = ((ulong)seed ^ ((ulong)seed >> 33)) * 0x62a9d9ed799705f5UL;
+ result = (result ^ (result >> 28)) * 0xcb24d0a5c88c35b3UL;
+ return (uint)(result >> 32);
+}
+
+static inline uint rol32(uint x, int k)
+{
+ return (x << k) | (x >> (32 - k));
+}
+
+static inline float xoshiro128plus(thread uint state[4])
+{
+ const uint result = state[0] + state[3];
+ const uint t = state[1] << 9;
+
+ state[2] ^= state[0];
+ state[3] ^= state[1];
+ state[1] ^= state[2];
+ state[0] ^= state[3];
+
+ state[2] ^= t;
+ state[3] = rol32(state[3], 11);
+
+ return (float)(result >> 8) * 0x1.0p-24f;
+}
+
+static inline float4 gaussian_noise_simd(float4 mu, float4 sigma, thread uint state[4])
+{
+ float4 u1, u2;
+
+ u1.x = xoshiro128plus(state);
+ u1.y = xoshiro128plus(state);
+ u1.z = xoshiro128plus(state);
+
+ u2.x = xoshiro128plus(state);
+ u2.y = xoshiro128plus(state);
+ u2.z = xoshiro128plus(state);
+
+ u1 = max(u1, float4(FLT_MIN));
+
+ const float4 flip = float4(1.0f, 0.0f, 1.0f, 0.0f);
+ const float4 flip_comp = float4(0.0f, 1.0f, 0.0f, 0.0f);
+
+ const float4 noise = flip * sqrt(-2.0f * log(u1)) * cos(2.0f * M_PI_F * u2) +
+ flip_comp * sqrt(-2.0f * log(u1)) * sin(2.0f * M_PI_F * u2);
+ return noise * sigma + mu;
+}
+
+
+kernel void
+inpaint_mask(texture2d inpainted [[texture(0)]],
+ texture2d original [[texture(1)]],
+ texture2d mask [[texture(2)]],
+ constant int &width [[buffer(0)]],
+ constant int &height [[buffer(1)]],
+ uint2 gid [[thread_position_in_grid]])
+{
+ const int x = gid.x;
+ const int y = gid.y;
+ if(x >= width || y >= height) return;
+
+ const uint2 pos = uint2(x, y);
+ const float4 pix_in = original.read(pos);
+ const uint m = mask.read(pos).r;
+ float4 pix_out = pix_in;
+
+ if(m)
+ {
+ uint state[4] = { splitmix32(x + 1), splitmix32((x + 1) * (y + 3)), splitmix32(1337), splitmix32(666) };
+ xoshiro128plus(state);
+ xoshiro128plus(state);
+ xoshiro128plus(state);
+ xoshiro128plus(state);
+ pix_out = abs(gaussian_noise_simd(pix_in, pix_in, state));
+ }
+
+ inpainted.write(pix_out, pos);
+}
diff --git a/data/metal/exposure.metal b/data/metal/exposure.metal
new file mode 100644
index 000000000000..a528a675455b
--- /dev/null
+++ b/data/metal/exposure.metal
@@ -0,0 +1,43 @@
+/*
+ This file is part of darktable,
+ Copyright (C) 2026 darktable developers.
+
+ darktable is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ darktable is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with darktable. If not, see .
+*/
+
+#include
+using namespace metal;
+
+/* kernel for the exposure plugin.
+ Equivalent of the OpenCL exposure() kernel in basic.cl.
+ Operates on float4 pixel buffers (RGBA).
+ Applies: pixel.rgb = (pixel.rgb - black) * scale
+ Alpha channel is passed through unchanged. */
+
+kernel void
+exposure(device const float4 *input [[buffer(0)]],
+ device float4 *output [[buffer(1)]],
+ constant int &width [[buffer(2)]],
+ constant int &height [[buffer(3)]],
+ constant float &black [[buffer(4)]],
+ constant float &scale [[buffer(5)]],
+ uint2 gid [[thread_position_in_grid]])
+{
+ if(gid.x >= (uint)width || gid.y >= (uint)height) return;
+
+ const int idx = gid.y * width + gid.x;
+ float4 pixel = input[idx];
+ pixel.xyz = (pixel.xyz - black) * scale;
+ output[idx] = pixel;
+}
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index e2d15b5b16cc..af5f5b3bd4b8 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -180,6 +180,8 @@ if(APPLE)
list(APPEND SOURCE_FILES "common/pwstorage/backend_apple_keychain.c")
list(APPEND HEADER_FILES "common/pwstorage/backend_apple_keychain.h")
list(APPEND SOURCE_FILES "main.c")
+ list(APPEND SOURCE_FILES "osx/dt_metal.cc")
+ list(APPEND HEADER_FILES "osx/dt_metal.h")
include(CheckLanguage)
check_language(OBJCXX)
if(CMAKE_OBJCXX_COMPILER)
@@ -1118,7 +1120,7 @@ endif(BUILD_TESTING)
add_executable(darktable ${SUBSYSTEM_MACOSX} ${SUBSYSTEM_WIN32} main.c ${RESOURCE_OBJECT})
set_target_properties(darktable PROPERTIES LINKER_LANGUAGE C)
if(APPLE)
- set_target_properties(lib_darktable PROPERTIES LINK_FLAGS "-framework Carbon -framework AppKit -framework Security")
+ set_target_properties(lib_darktable PROPERTIES LINK_FLAGS "-framework Carbon -framework AppKit -framework Metal -framework Security")
endif(APPLE)
target_link_libraries(darktable lib_darktable)
diff --git a/src/common/darktable.c b/src/common/darktable.c
index 12fba89b069b..ffcfa33ba2da 100644
--- a/src/common/darktable.c
+++ b/src/common/darktable.c
@@ -24,6 +24,9 @@
#ifdef __APPLE__
#include
#endif
+#if defined(__APPLE__) && defined(__aarch64__)
+#include "osx/dt_metal.h"
+#endif
#include "common/collection.h"
#include "common/colorspaces.h"
@@ -264,7 +267,7 @@ static int usage(const char *argv0)
" Enable debug output to the terminal. Valid signals are:\n\n"
" act_on, cache, camctl, camsupport, control, dev, expose,\n"
" imageio, input, ioporder, lighttable, lua, masks, memory,\n"
- " nan, opencl, params, perf, pipe, print, pwstorage, signal,\n"
+ " nan, opencl, metal, params, perf, pipe, print, pwstorage, signal,\n"
" sql, tiling, picker, undo\n"
"\n"
" all -> to debug all signals\n"
@@ -1154,6 +1157,7 @@ int dt_init(int argc,
!strcmp(darg, "expose") ? DT_DEBUG_EXPOSE :
!strcmp(darg, "picker") ? DT_DEBUG_PICKER :
!strcmp(darg, "ai") ? DT_DEBUG_AI : // AI related stuff.
+ !strcmp(darg, "metal") ? DT_DEBUG_METAL : // macOS metal
0;
if(dadd)
darktable.unmuted |= dadd;
@@ -1880,6 +1884,11 @@ int dt_init(int argc,
else
dt_opencl_init(darktable.opencl, exclude_opencl, print_statistics);
+#if defined(__APPLE__) && defined(__aarch64__)
+ darktable.metal = (dt_metal_t *)calloc(1, sizeof(dt_metal_t));
+ dt_metal_init(darktable.metal);
+#endif
+
darktable.points = (dt_points_t *)calloc(1, sizeof(dt_points_t));
dt_points_init(darktable.points, dt_get_num_threads());
@@ -2237,6 +2246,14 @@ void dt_cleanup()
dt_opencl_cleanup(darktable.opencl);
free(darktable.opencl);
darktable.opencl = NULL;
+#if defined(__APPLE__) && defined(__aarch64__)
+ if(darktable.metal)
+ {
+ dt_metal_cleanup(darktable.metal);
+ free(darktable.metal);
+ darktable.metal = NULL;
+ }
+#endif
#ifdef HAVE_GPHOTO2
dt_camctl_destroy((dt_camctl_t *)darktable.camctl);
darktable.camctl = NULL;
diff --git a/src/common/darktable.h b/src/common/darktable.h
index 8b7d8d437957..c2f73da3c32d 100644
--- a/src/common/darktable.h
+++ b/src/common/darktable.h
@@ -1,6 +1,6 @@
/*
This file is part of darktable,
- Copyright (C) 2009-2024 darktable developers.
+ Copyright (C) 2009-2026 darktable developers.
darktable is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -67,6 +67,9 @@
#include
#include
#endif
+#if defined(__APPLE__) && defined(__aarch64__)
+#include
+#endif
#if defined(__DragonFly__) || defined(__FreeBSD__)
typedef unsigned int u_int;
@@ -364,6 +367,7 @@ typedef enum dt_debug_thread_t
DT_DEBUG_EXPOSE = 1 << 26,
DT_DEBUG_PICKER = 1 << 27,
DT_DEBUG_AI = 1 << 28,
+ DT_DEBUG_METAL = 1 << 29,
DT_DEBUG_ALL = 0xffffffff & ~DT_DEBUG_VERBOSE,
DT_DEBUG_COMMON = DT_DEBUG_OPENCL | DT_DEBUG_DEV | DT_DEBUG_MASKS | DT_DEBUG_PARAMS | DT_DEBUG_IMAGEIO | DT_DEBUG_PIPE,
DT_DEBUG_RESTRICT = DT_DEBUG_VERBOSE | DT_DEBUG_PERF,
@@ -438,6 +442,9 @@ typedef struct darktable_t
struct dt_points_t *points;
struct dt_imageio_t *imageio;
struct dt_opencl_t *opencl;
+
+ struct dt_metal_t *metal;
+
struct dt_dbus_t *dbus;
struct dt_undo_t *undo;
struct dt_colorspaces_t *color_profiles;
diff --git a/src/develop/pixelpipe_hb.c b/src/develop/pixelpipe_hb.c
index a8ebe3df1b02..9d5831601491 100644
--- a/src/develop/pixelpipe_hb.c
+++ b/src/develop/pixelpipe_hb.c
@@ -37,6 +37,10 @@
#include "gui/color_picker_proxy.h"
#include "imageio/imageio_rawspeed.h" // for dt_rawspeed_crop_dcraw_filters
+#if defined(__APPLE__) && defined(__aarch64__)
+#include "osx/dt_metal.h"
+#endif
+
#include
#include
#include
@@ -1261,71 +1265,66 @@ static inline gboolean _piece_wants_blending(const dt_dev_pixelpipe_iop_t *piece
return TRUE;
}
-static gboolean _pixelpipe_process_on_CPU(dt_dev_pixelpipe_t *pipe,
- dt_develop_t *dev,
- float *input,
- dt_iop_buffer_dsc_t *input_format,
- const dt_iop_roi_t *roi_in,
- void **output,
- dt_iop_buffer_dsc_t **out_format,
- const dt_iop_roi_t *roi_out,
- dt_iop_module_t *module,
- dt_dev_pixelpipe_iop_t *piece,
- dt_develop_tiling_t *tiling,
- dt_pixelpipe_flow_t *pixelpipe_flow,
- const int position)
+/* Shared context between pre/post processing helpers */
+typedef struct _pixelpipe_process_ctx_t
+{
+ const dt_iop_order_iccprofile_info_t *work_profile;
+ int cst_from, cst_to, cst_out;
+ size_t in_bpp, bpp, nfloats;
+ size_t m_bpp, m_width, m_height;
+ gboolean relevant, bcaching, pfm_dump;
+ dt_hash_t phash;
+} _pixelpipe_process_ctx_t;
+
+/* Shared preamble: colorspace transform, histogram, cache setup.
+ Returns TRUE if pipeline shutdown was requested. */
+static gboolean _pixelpipe_pre_process(dt_dev_pixelpipe_t *pipe,
+ dt_develop_t *dev,
+ float *input,
+ dt_iop_buffer_dsc_t *input_format,
+ const dt_iop_roi_t *roi_in,
+ void **output,
+ dt_iop_buffer_dsc_t **out_format,
+ const dt_iop_roi_t *roi_out,
+ dt_iop_module_t *module,
+ dt_dev_pixelpipe_iop_t *piece,
+ dt_pixelpipe_flow_t *pixelpipe_flow,
+ const int position,
+ _pixelpipe_process_ctx_t *ctx)
{
- if(dt_pipe_shutdown(pipe))
- return TRUE;
-
- // the data buffers must always have an alignment to DT_CACHELINE_BYTES
- if(!dt_check_aligned(input) || !dt_check_aligned(*output))
- {
- dt_print_pipe(DT_DEBUG_ALWAYS,
- "fatal process alignment",
- pipe, module, DT_DEVICE_NONE, roi_in, roi_out,
- "non-aligned buffers IN=%p OUT=%p",
- input, *output);
-
- dt_control_log(_("fatal pixelpipe abort due to non-aligned buffers\n"
- "in module '%s'\nplease report on GitHub"),
- module->op);
- // this is a fundamental problem with severe problems ahead so good to finish
- // the pipe as if good to avoid reprocessing in an endless loop.
- return FALSE;
- }
-
// Fetch RGB working profile
// if input is RAW, we can't color convert because RAW is not in a color space
// so we send NULL to by-pass
- const dt_iop_order_iccprofile_info_t *const work_profile =
+ ctx->work_profile =
(input_format->cst != IOP_CS_RAW)
? dt_ioppr_get_pipe_work_profile_info(pipe)
: NULL;
- const int cst_from = input_format->cst;
- const int cst_to = module->input_colorspace(module, pipe, piece);
- const int cst_out = module->output_colorspace(module, pipe, piece);
+ ctx->cst_from = input_format->cst;
+ ctx->cst_to = module->input_colorspace(module, pipe, piece);
+ ctx->cst_out = module->output_colorspace(module, pipe, piece);
dt_dev_prepare_piece_cfa(piece, roi_in);
- if(cst_from != cst_to)
+ if(ctx->cst_from != ctx->cst_to)
{
dt_print_pipe(DT_DEBUG_PIPE,
"transform colorspace",
pipe, module, DT_DEVICE_CPU, roi_in, NULL, "%s -> %s `%s'",
- dt_iop_colorspace_to_name(cst_from),
- dt_iop_colorspace_to_name(cst_to),
- work_profile
- ? dt_colorspaces_get_name(work_profile->type, work_profile->filename)
+ dt_iop_colorspace_to_name(ctx->cst_from),
+ dt_iop_colorspace_to_name(ctx->cst_to),
+ ctx->work_profile
+ ? dt_colorspaces_get_name(ctx->work_profile->type,
+ ctx->work_profile->filename)
: "no work profile");
}
// transform to module input colorspace
dt_ioppr_transform_image_colorspace(module, input, input,
roi_in->width, roi_in->height,
- cst_from, cst_to, &input_format->cst,
- work_profile);
+ ctx->cst_from, ctx->cst_to,
+ &input_format->cst,
+ ctx->work_profile);
if(dt_pipe_shutdown(pipe))
return TRUE;
@@ -1335,57 +1334,200 @@ static gboolean _pixelpipe_process_on_CPU(dt_dev_pixelpipe_t *pipe,
if(dt_pipe_shutdown(pipe))
return TRUE;
- const size_t in_bpp = dt_iop_buffer_dsc_to_bpp(input_format);
- const size_t bpp = dt_iop_buffer_dsc_to_bpp(*out_format);
- const size_t m_bpp = MAX(in_bpp, bpp);
- const size_t m_width = MAX(roi_in->width, roi_out->width);
- const size_t m_height = MAX(roi_in->height, roi_out->height);
-
- const gboolean fitting = dt_tiling_piece_fits_host_memory(piece, m_width, m_height, m_bpp,
- tiling->factor,
- tiling->overhead);
- /* process module on cpu. use tiling if needed and possible. */
+ ctx->in_bpp = dt_iop_buffer_dsc_to_bpp(input_format);
+ ctx->bpp = dt_iop_buffer_dsc_to_bpp(*out_format);
+ ctx->m_bpp = MAX(ctx->in_bpp, ctx->bpp);
+ ctx->m_width = MAX(roi_in->width, roi_out->width);
+ ctx->m_height = MAX(roi_in->height, roi_out->height);
- const gboolean pfm_dump = darktable.dump_pfm_pipe
+ ctx->pfm_dump = darktable.dump_pfm_pipe
&& (pipe->type & (DT_DEV_PIXELPIPE_FULL | DT_DEV_PIXELPIPE_EXPORT));
- if(pfm_dump)
+ if(ctx->pfm_dump)
dt_dump_pipe_pfm(module->op, input,
- roi_in->width, roi_in->height, in_bpp,
+ roi_in->width, roi_in->height, ctx->in_bpp,
TRUE, dt_dev_pixelpipe_type_to_str(pipe->type));
- const size_t nfloats = bpp * roi_out->width * roi_out->height / sizeof(float);
- const gboolean relevant = _piece_fast_blend(piece, module);
- const dt_hash_t phash = relevant
+ ctx->nfloats = ctx->bpp * roi_out->width * roi_out->height / sizeof(float);
+ ctx->relevant = _piece_fast_blend(piece, module);
+ ctx->phash = ctx->relevant
? _piece_process_hash(piece, roi_out, module, position)
: DT_INVALID_HASH;
- const gboolean bcaching = relevant
- ? pipe->bcache_data && phash == pipe->bcache_hash && phash != DT_INVALID_HASH
+ ctx->bcaching = ctx->relevant
+ ? pipe->bcache_data && ctx->phash == pipe->bcache_hash
+ && ctx->phash != DT_INVALID_HASH
: FALSE;
+ return FALSE;
+}
+
+/* Shared epilogue: PFM dump, color picking, blending.
+ Returns TRUE if pipeline shutdown was requested. */
+static gboolean _pixelpipe_post_process(dt_dev_pixelpipe_t *pipe,
+ dt_develop_t *dev,
+ float *input,
+ dt_iop_buffer_dsc_t *input_format,
+ const dt_iop_roi_t *roi_in,
+ void **output,
+ dt_iop_buffer_dsc_t **out_format,
+ const dt_iop_roi_t *roi_out,
+ dt_iop_module_t *module,
+ dt_dev_pixelpipe_iop_t *piece,
+ dt_pixelpipe_flow_t *pixelpipe_flow,
+ const _pixelpipe_process_ctx_t *ctx)
+{
+ if(_module_pipe_stop(pipe, input))
+ return TRUE;
+
+ if(ctx->pfm_dump)
+ {
+ dt_dump_pipe_pfm(module->op, *output,
+ roi_out->width, roi_out->height, ctx->bpp,
+ FALSE, dt_dev_pixelpipe_type_to_str(pipe->type));
+ _dump_pipe_pfm_diff(module->op, input, roi_in, ctx->in_bpp, *output, roi_out, ctx->bpp,
+ dt_dev_pixelpipe_type_to_str(pipe->type));
+ }
+
+ // and save the output colorspace
+ pipe->dsc.cst = module->output_colorspace(module, pipe, piece);
+
+ if(dt_pipe_shutdown(pipe))
+ return TRUE;
+
+ dt_iop_colorspace_type_t blend_cst = dt_develop_blend_colorspace(piece, pipe->dsc.cst);
+ const gboolean blend_picking = _request_color_pick(pipe, dev, module)
+ && _transform_for_blend(module, piece)
+ && blend_cst != ctx->cst_to;
+ // color picking for module
+ if(_request_color_pick(pipe, dev, module) && !blend_picking)
+ {
+ _pixelpipe_picker(module, piece, &piece->dsc_in, (float *)input, roi_in,
+ module->picked_color,
+ module->picked_color_min,
+ module->picked_color_max,
+ input_format->cst, PIXELPIPE_PICKER_INPUT);
+
+ _pixelpipe_picker(module, piece, &pipe->dsc, (float *)(*output), roi_out,
+ module->picked_output_color,
+ module->picked_output_color_min,
+ module->picked_output_color_max,
+ pipe->dsc.cst, PIXELPIPE_PICKER_OUTPUT);
+
+ DT_CONTROL_SIGNAL_RAISE(DT_SIGNAL_CONTROL_PICKERDATA_READY, module, pipe);
+ }
+
+ if(dt_pipe_shutdown(pipe))
+ return TRUE;
+
+ // blend needs input/output images with default colorspace
+ if(_transform_for_blend(module, piece))
+ {
+ dt_ioppr_transform_image_colorspace(module, input, input,
+ roi_in->width, roi_in->height,
+ input_format->cst, blend_cst, &input_format->cst,
+ ctx->work_profile);
+ dt_ioppr_transform_image_colorspace(module, *output, *output,
+ roi_out->width, roi_out->height,
+ pipe->dsc.cst, blend_cst, &pipe->dsc.cst,
+ ctx->work_profile);
+ if(blend_picking)
+ {
+ _pixelpipe_picker(module, piece, &piece->dsc_in, (float *)input, roi_in,
+ module->picked_color,
+ module->picked_color_min,
+ module->picked_color_max,
+ blend_cst, PIXELPIPE_PICKER_INPUT);
+
+ _pixelpipe_picker(module, piece, &pipe->dsc, (float *)(*output), roi_out,
+ module->picked_output_color,
+ module->picked_output_color_min,
+ module->picked_output_color_max,
+ blend_cst, PIXELPIPE_PICKER_OUTPUT);
+ DT_CONTROL_SIGNAL_RAISE(DT_SIGNAL_CONTROL_PICKERDATA_READY, module, pipe);
+ }
+ }
+
+ if(dt_pipe_shutdown(pipe))
+ return TRUE;
+
+ /* process blending on CPU */
+ if(_piece_wants_blending(piece))
+ {
+ dt_develop_blend_process(module, piece, input, *output, roi_in, roi_out);
+ *pixelpipe_flow |= PIXELPIPE_FLOW_BLENDED_ON_CPU;
+ *pixelpipe_flow &= ~PIXELPIPE_FLOW_BLENDED_ON_GPU;
+ }
+
+ return dt_pipe_shutdown(pipe);
+}
+
+static gboolean _pixelpipe_process_on_CPU(dt_dev_pixelpipe_t *pipe,
+ dt_develop_t *dev,
+ float *input,
+ dt_iop_buffer_dsc_t *input_format,
+ const dt_iop_roi_t *roi_in,
+ void **output,
+ dt_iop_buffer_dsc_t **out_format,
+ const dt_iop_roi_t *roi_out,
+ dt_iop_module_t *module,
+ dt_dev_pixelpipe_iop_t *piece,
+ dt_develop_tiling_t *tiling,
+ dt_pixelpipe_flow_t *pixelpipe_flow,
+ const int position)
+{
+ if(dt_pipe_shutdown(pipe))
+ return TRUE;
+
+ // the data buffers must always have an alignment to DT_CACHELINE_BYTES
+ if(!dt_check_aligned(input) || !dt_check_aligned(*output))
+ {
+ dt_print_pipe(DT_DEBUG_ALWAYS,
+ "fatal process alignment",
+ pipe, module, DT_DEVICE_NONE, roi_in, roi_out,
+ "non-aligned buffers IN=%p OUT=%p",
+ input, *output);
+
+ dt_control_log(_("fatal pixelpipe abort due to non-aligned buffers\n"
+ "in module '%s'\nplease report on GitHub"),
+ module->op);
+ // this is a fundamental problem with severe problems ahead so good to finish
+ // the pipe as if good to avoid reprocessing in an endless loop.
+ return FALSE;
+ }
+
+ _pixelpipe_process_ctx_t ctx;
+ if(_pixelpipe_pre_process(pipe, dev, input, input_format, roi_in, output, out_format,
+ roi_out, module, piece, pixelpipe_flow, position, &ctx))
+ return TRUE;
+
+ const gboolean fitting = dt_tiling_piece_fits_host_memory(piece, ctx.m_width, ctx.m_height, ctx.m_bpp,
+ tiling->factor,
+ tiling->overhead);
+ /* process module on cpu. use tiling if needed and possible. */
+
if(!fitting && _piece_may_tile(piece))
{
dt_print_pipe(DT_DEBUG_PIPE,
- bcaching ? "from blend cache tile" : "process tiles",
+ ctx.bcaching ? "from blend cache tile" : "process tiles",
pipe, module, DT_DEVICE_CPU, roi_in, roi_out, "%s%s%s",
- dt_iop_colorspace_to_name(cst_to),
- cst_to != cst_out ? " -> " : "",
- cst_to != cst_out ? dt_iop_colorspace_to_name(cst_out) : "");
+ dt_iop_colorspace_to_name(ctx.cst_to),
+ ctx.cst_to != ctx.cst_out ? " -> " : "",
+ ctx.cst_to != ctx.cst_out ? dt_iop_colorspace_to_name(ctx.cst_out) : "");
- if(bcaching)
+ if(ctx.bcaching)
{
- dt_iop_image_copy(*output, pipe->bcache_data, nfloats);
+ dt_iop_image_copy(*output, pipe->bcache_data, ctx.nfloats);
}
else
{
- module->process_tiling(module, piece, input, *output, roi_in, roi_out, in_bpp);
- if(relevant)
+ module->process_tiling(module, piece, input, *output, roi_in, roi_out, ctx.in_bpp);
+ if(ctx.relevant)
{
if(pipe->mask_display == DT_DEV_PIXELPIPE_DISPLAY_NONE
&& !dt_pipe_shutdown(pipe))
{
- float *cache = _get_fast_blendcache(nfloats, phash, pipe);
- if(cache) dt_iop_image_copy(cache, *output, nfloats);
+ float *cache = _get_fast_blendcache(ctx.nfloats, ctx.phash, pipe);
+ if(cache) dt_iop_image_copy(cache, *output, ctx.nfloats);
}
else
pipe->bcache_hash = DT_INVALID_HASH;
@@ -1398,15 +1540,15 @@ static gboolean _pixelpipe_process_on_CPU(dt_dev_pixelpipe_t *pipe,
else
{
dt_print_pipe(DT_DEBUG_PIPE,
- bcaching ? "from blend cache" : "process",
+ ctx.bcaching ? "from blend cache" : "process",
pipe, module, DT_DEVICE_CPU, roi_in, roi_out, "%s%s%s%s %.fMB",
- dt_iop_colorspace_to_name(cst_to),
- cst_to != cst_out ? " -> " : "",
- cst_to != cst_out ? dt_iop_colorspace_to_name(cst_out) : "",
+ dt_iop_colorspace_to_name(ctx.cst_to),
+ ctx.cst_to != ctx.cst_out ? " -> " : "",
+ ctx.cst_to != ctx.cst_out ? dt_iop_colorspace_to_name(ctx.cst_out) : "",
(fitting)
? ""
: " Warning: processed without tiling even if memory requirements are not met",
- 1e-6 * (tiling->factor * (m_width * m_height * m_bpp) + tiling->overhead));
+ 1e-6 * (tiling->factor * (ctx.m_width * ctx.m_height * ctx.m_bpp) + tiling->overhead));
// this code section is for simplistic benchmarking via --bench-module
if((pipe->type & (DT_DEV_PIXELPIPE_FULL | DT_DEV_PIXELPIPE_EXPORT))
@@ -1438,20 +1580,20 @@ static gboolean _pixelpipe_process_on_CPU(dt_dev_pixelpipe_t *pipe,
}
}
- if(bcaching)
+ if(ctx.bcaching)
{
- dt_iop_image_copy(*output, pipe->bcache_data, nfloats);
+ dt_iop_image_copy(*output, pipe->bcache_data, ctx.nfloats);
}
else
{
module->process(module, piece, input, *output, roi_in, roi_out);
- if(relevant)
+ if(ctx.relevant)
{
if(pipe->mask_display == DT_DEV_PIXELPIPE_DISPLAY_NONE
&& !dt_pipe_shutdown(pipe))
{
- float *cache = _get_fast_blendcache(nfloats, phash, pipe);
- if(cache) dt_iop_image_copy(cache, *output, nfloats);
+ float *cache = _get_fast_blendcache(ctx.nfloats, ctx.phash, pipe);
+ if(cache) dt_iop_image_copy(cache, *output, ctx.nfloats);
}
else
pipe->bcache_hash = DT_INVALID_HASH;
@@ -1460,93 +1602,173 @@ static gboolean _pixelpipe_process_on_CPU(dt_dev_pixelpipe_t *pipe,
*pixelpipe_flow |= (PIXELPIPE_FLOW_PROCESSED_ON_CPU);
*pixelpipe_flow &= ~(PIXELPIPE_FLOW_PROCESSED_ON_GPU
- | PIXELPIPE_FLOW_PROCESSED_WITH_TILING);
+ | PIXELPIPE_FLOW_PROCESSED_WITH_TILING);
}
- if(_module_pipe_stop(pipe, input))
- return TRUE;
-
- if(pfm_dump)
- {
- dt_dump_pipe_pfm(module->op, *output,
- roi_out->width, roi_out->height, bpp,
- FALSE, dt_dev_pixelpipe_type_to_str(pipe->type));
- _dump_pipe_pfm_diff(module->op, input, roi_in, in_bpp, *output, roi_out, bpp,
- dt_dev_pixelpipe_type_to_str(pipe->type));
- }
+ return _pixelpipe_post_process(pipe, dev, input, input_format, roi_in, output, out_format,
+ roi_out, module, piece, pixelpipe_flow, &ctx);
+}
- // and save the output colorspace
- pipe->dsc.cst = module->output_colorspace(module, pipe, piece);
+#if defined(__APPLE__) && defined(__aarch64__)
+/* Try to process the module using Metal compute.
+ Returns:
+ 1 = Metal processing succeeded (output is valid, all post-processing done)
+ 0 = Metal failed or not applicable (caller should try OpenCL/CPU)
+ -1 = Pipeline shutdown (caller should return TRUE)
+ Note: On failure, input colorspace conversion may have been performed,
+ which is fine since subsequent OpenCL/CPU paths handle already-converted input. */
+static int _pixelpipe_try_metal(dt_dev_pixelpipe_t *pipe,
+ dt_develop_t *dev,
+ float *input,
+ dt_iop_buffer_dsc_t *input_format,
+ const dt_iop_roi_t *roi_in,
+ void **output,
+ dt_iop_buffer_dsc_t **out_format,
+ const dt_iop_roi_t *roi_out,
+ dt_iop_module_t *module,
+ dt_dev_pixelpipe_iop_t *piece,
+ dt_pixelpipe_flow_t *pixelpipe_flow,
+ const int position)
+{
+ if(!module->process_metal
+ || !darktable.metal
+ || !dt_metal_is_available(darktable.metal))
+ return 0;
if(dt_pipe_shutdown(pipe))
- return TRUE;
+ return -1;
- dt_iop_colorspace_type_t blend_cst = dt_develop_blend_colorspace(piece, pipe->dsc.cst);
- const gboolean blend_picking = _request_color_pick(pipe, dev, module)
- && _transform_for_blend(module, piece)
- && blend_cst != cst_to;
- // color picking for module
- if(_request_color_pick(pipe, dev, module) && !blend_picking)
- {
- _pixelpipe_picker(module, piece, &piece->dsc_in, (float *)input, roi_in,
- module->picked_color,
- module->picked_color_min,
- module->picked_color_max,
- input_format->cst, PIXELPIPE_PICKER_INPUT);
+ _pixelpipe_process_ctx_t ctx;
+ if(_pixelpipe_pre_process(pipe, dev, input, input_format, roi_in, output, out_format,
+ roi_out, module, piece, pixelpipe_flow, position, &ctx))
+ return -1;
- _pixelpipe_picker(module, piece, &pipe->dsc, (float *)(*output), roi_out,
- module->picked_output_color,
- module->picked_output_color_min,
- module->picked_output_color_max,
- pipe->dsc.cst, PIXELPIPE_PICKER_OUTPUT);
+ dt_print_pipe(DT_DEBUG_PIPE | DT_DEBUG_METAL,
+ ctx.bcaching ? "from blend cache (Metal)" : "process (Metal)",
+ pipe, module, DT_DEVICE_CPU, roi_in, roi_out, "%s%s%s %.fMB",
+ dt_iop_colorspace_to_name(ctx.cst_to),
+ ctx.cst_to != ctx.cst_out ? " -> " : "",
+ ctx.cst_to != ctx.cst_out ? dt_iop_colorspace_to_name(ctx.cst_out) : "",
+ 1e-6 * (2.0f * (ctx.m_width * ctx.m_height * ctx.m_bpp)));
- DT_CONTROL_SIGNAL_RAISE(DT_SIGNAL_CONTROL_PICKERDATA_READY, module, pipe);
+ if(ctx.bcaching)
+ {
+ dt_iop_image_copy(*output, pipe->bcache_data, ctx.nfloats);
}
-
- if(dt_pipe_shutdown(pipe))
- return TRUE;
-
- // blend needs input/output images with default colorspace
- if(_transform_for_blend(module, piece))
+ else
{
- dt_ioppr_transform_image_colorspace(module, input, input,
- roi_in->width, roi_in->height,
- input_format->cst, blend_cst, &input_format->cst,
- work_profile);
- dt_ioppr_transform_image_colorspace(module, *output, *output,
- roi_out->width, roi_out->height,
- pipe->dsc.cst, blend_cst, &pipe->dsc.cst,
- work_profile);
- if(blend_picking)
+ // Try Metal processing
+ if(module->process_metal(module, piece, input, *output, roi_in, roi_out) != 0)
{
- _pixelpipe_picker(module, piece, &piece->dsc_in, (float *)input, roi_in,
- module->picked_color,
- module->picked_color_min,
- module->picked_color_max,
- blend_cst, PIXELPIPE_PICKER_INPUT);
+ dt_print(DT_DEBUG_METAL,
+ "[pixelpipe] Metal failed for `%s', falling back to OpenCL/CPU",
+ module->op);
+ return 0; // Metal failed, try other paths
+ }
- _pixelpipe_picker(module, piece, &pipe->dsc, (float *)(*output), roi_out,
- module->picked_output_color,
- module->picked_output_color_min,
- module->picked_output_color_max,
- blend_cst, PIXELPIPE_PICKER_OUTPUT);
- DT_CONTROL_SIGNAL_RAISE(DT_SIGNAL_CONTROL_PICKERDATA_READY, module, pipe);
+ dt_print(DT_DEBUG_METAL,
+ "[pixelpipe] `%s' processed with Metal", module->op);
+
+ if(ctx.relevant)
+ {
+ if(pipe->mask_display == DT_DEV_PIXELPIPE_DISPLAY_NONE
+ && !dt_pipe_shutdown(pipe))
+ {
+ float *cache = _get_fast_blendcache(ctx.nfloats, ctx.phash, pipe);
+ if(cache) dt_iop_image_copy(cache, *output, ctx.nfloats);
+ }
+ else
+ pipe->bcache_hash = DT_INVALID_HASH;
}
}
+ *pixelpipe_flow |= PIXELPIPE_FLOW_PROCESSED_ON_GPU;
+ *pixelpipe_flow &= ~(PIXELPIPE_FLOW_PROCESSED_ON_CPU
+ | PIXELPIPE_FLOW_PROCESSED_WITH_TILING);
+
+ return _pixelpipe_post_process(pipe, dev, input, input_format, roi_in, output, out_format,
+ roi_out, module, piece, pixelpipe_flow, &ctx)
+ ? -1 : 1;
+}
+
+/* Try to process the module using Metal compute with tiling.
+ Same return semantics as _pixelpipe_try_metal(). */
+static int _pixelpipe_try_metal_tiling(dt_dev_pixelpipe_t *pipe,
+ dt_develop_t *dev,
+ float *input,
+ dt_iop_buffer_dsc_t *input_format,
+ const dt_iop_roi_t *roi_in,
+ void **output,
+ dt_iop_buffer_dsc_t **out_format,
+ const dt_iop_roi_t *roi_out,
+ dt_iop_module_t *module,
+ dt_dev_pixelpipe_iop_t *piece,
+ dt_pixelpipe_flow_t *pixelpipe_flow,
+ const int position,
+ const int in_bpp)
+{
+ if(!module->process_metal
+ || !module->process_tiling_metal
+ || !darktable.metal
+ || !dt_metal_is_available(darktable.metal))
+ return 0;
+
if(dt_pipe_shutdown(pipe))
- return TRUE;
+ return -1;
- /* process blending on CPU */
- if(_piece_wants_blending(piece))
+ _pixelpipe_process_ctx_t ctx;
+ if(_pixelpipe_pre_process(pipe, dev, input, input_format, roi_in, output, out_format,
+ roi_out, module, piece, pixelpipe_flow, position, &ctx))
+ return -1;
+
+ dt_print_pipe(DT_DEBUG_PIPE | DT_DEBUG_METAL | DT_DEBUG_TILING,
+ ctx.bcaching ? "from blend cache (Metal tile)" : "process tiles (Metal)",
+ pipe, module, DT_DEVICE_CPU, roi_in, roi_out, "%s%s%s %.fMB",
+ dt_iop_colorspace_to_name(ctx.cst_to),
+ ctx.cst_to != ctx.cst_out ? " -> " : "",
+ ctx.cst_to != ctx.cst_out ? dt_iop_colorspace_to_name(ctx.cst_out) : "",
+ 1e-6 * (2.0f * (ctx.m_width * ctx.m_height * ctx.m_bpp)));
+
+ if(ctx.bcaching)
{
- dt_develop_blend_process(module, piece, input, *output, roi_in, roi_out);
- *pixelpipe_flow |= PIXELPIPE_FLOW_BLENDED_ON_CPU;
- *pixelpipe_flow &= ~PIXELPIPE_FLOW_BLENDED_ON_GPU;
+ dt_iop_image_copy(*output, pipe->bcache_data, ctx.nfloats);
}
+ else
+ {
+ const int err = module->process_tiling_metal(module, piece, input, *output,
+ roi_in, roi_out, in_bpp);
+ if(err != 0)
+ {
+ dt_print(DT_DEBUG_METAL | DT_DEBUG_TILING,
+ "[pixelpipe] Metal tiling failed for `%s', falling back",
+ module->op);
+ return 0; // Metal tiling failed, try other paths
+ }
- return dt_pipe_shutdown(pipe);
+ dt_print(DT_DEBUG_METAL | DT_DEBUG_TILING,
+ "[pixelpipe] `%s' processed with Metal tiling", module->op);
+
+ if(ctx.relevant)
+ {
+ if(pipe->mask_display == DT_DEV_PIXELPIPE_DISPLAY_NONE
+ && !dt_pipe_shutdown(pipe))
+ {
+ float *cache = _get_fast_blendcache(ctx.nfloats, ctx.phash, pipe);
+ if(cache) dt_iop_image_copy(cache, *output, ctx.nfloats);
+ }
+ else
+ pipe->bcache_hash = DT_INVALID_HASH;
+ }
+ }
+
+ *pixelpipe_flow |= (PIXELPIPE_FLOW_PROCESSED_ON_GPU | PIXELPIPE_FLOW_PROCESSED_WITH_TILING);
+ *pixelpipe_flow &= ~PIXELPIPE_FLOW_PROCESSED_ON_CPU;
+
+ return _pixelpipe_post_process(pipe, dev, input, input_format, roi_in, output, out_format,
+ roi_out, module, piece, pixelpipe_flow, &ctx)
+ ? -1 : 1;
}
+#endif
#ifdef HAVE_OPENCL
static inline gboolean _opencl_pipe_isok(dt_dev_pixelpipe_t *pipe)
@@ -1998,6 +2220,98 @@ static gboolean _dev_pixelpipe_process_rec(dt_dev_pixelpipe_t *pipe,
piece->module->position = pos;
+#if defined(__APPLE__) && defined(__aarch64__)
+ /* On Apple Silicon, prefer Metal compute over OpenCL.
+ Fallback chain: Metal -> OpenCL -> CPU.
+ Metal operates on CPU-side buffers via unified memory.
+ If the module implements process_metal() and Metal is available,
+ try Metal first. If Metal fails, fall through to OpenCL, then CPU. */
+ {
+ gboolean try_metal_path = module->process_metal
+ && darktable.metal
+ && dt_metal_is_available(darktable.metal);
+
+#ifdef HAVE_OPENCL
+ /* If input data is on GPU from a previous OpenCL module,
+ copy it back to CPU before trying Metal. */
+ if(cl_mem_input != NULL && try_metal_path)
+ {
+ if(dt_opencl_copy_device_to_host(pipe->devid, input, cl_mem_input,
+ roi_in.width, roi_in.height,
+ in_bpp) != CL_SUCCESS)
+ {
+ dt_print(DT_DEBUG_METAL | DT_DEBUG_OPENCL,
+ "[pixelpipe] couldn't copy GPU input to host for Metal `%s',"
+ " falling through to OpenCL/CPU\n", module->op);
+ try_metal_path = FALSE;
+ }
+ }
+#endif
+
+ if(try_metal_path)
+ {
+ /* Check if the full image fits in available memory for Metal processing.
+ On unified memory, use system RAM as the constraint. */
+ const uint32_t metal_m_bpp = MAX(in_bpp, bpp);
+ const size_t metal_m_width = MAX(roi_in.width, roi_out->width);
+ const size_t metal_m_height = MAX(roi_in.height, roi_out->height);
+ const float metal_factor = tiling.factor_cl > 0 ? tiling.factor_cl : tiling.factor;
+ const gboolean metal_fits = dt_tiling_piece_fits_host_memory(
+ piece, metal_m_width, metal_m_height, metal_m_bpp, metal_factor, tiling.overhead);
+
+ if(metal_fits)
+ {
+ /* Image fits — try direct Metal processing */
+ const int metal_result = _pixelpipe_try_metal(pipe, dev, input, input_format, &roi_in,
+ output, out_format, roi_out,
+ module, piece, &pixelpipe_flow, pos);
+ if(metal_result < 0)
+ return TRUE; // pipeline shutdown
+ if(metal_result > 0)
+ {
+#ifdef HAVE_OPENCL
+ dt_opencl_release_mem_object(cl_mem_input);
+ cl_mem_input = NULL;
+#endif
+ goto _metal_done;
+ }
+ /* metal_result == 0: Metal failed, fall through to OpenCL/CPU */
+ }
+ else if(piece->process_tiling_ready)
+ {
+ /* Image too large — try Metal tiling */
+ dt_print(DT_DEBUG_TILING | DT_DEBUG_METAL,
+ "[pixelpipe] image doesn't fit for Metal `%s', trying tiled Metal processing",
+ module->op);
+ const int tiling_result = _pixelpipe_try_metal_tiling(
+ pipe, dev, input, input_format, &roi_in,
+ output, out_format, roi_out,
+ module, piece, &pixelpipe_flow, pos, in_bpp);
+ if(tiling_result < 0)
+ return TRUE; // pipeline shutdown
+ if(tiling_result > 0)
+ {
+#ifdef HAVE_OPENCL
+ dt_opencl_release_mem_object(cl_mem_input);
+ cl_mem_input = NULL;
+#endif
+ goto _metal_done;
+ }
+ /* tiling_result == 0: Metal tiling failed, fall through to OpenCL/CPU */
+ dt_print(DT_DEBUG_METAL | DT_DEBUG_TILING,
+ "[pixelpipe] Metal tiling failed for `%s', falling back to OpenCL/CPU",
+ module->op);
+ }
+ else
+ {
+ dt_print(DT_DEBUG_METAL,
+ "[pixelpipe] image doesn't fit for Metal `%s' and tiling not ready, falling back",
+ module->op);
+ }
+ }
+ }
+#endif
+
#ifdef HAVE_OPENCL
// Fetch RGB working profile
@@ -2847,6 +3161,10 @@ static gboolean _dev_pixelpipe_process_rec(dt_dev_pixelpipe_t *pipe,
return TRUE;
#endif // HAVE_OPENCL
+#if defined(__APPLE__) && defined(__aarch64__)
+_metal_done: ;
+#endif
+
if(pipe->mask_display != DT_DEV_PIXELPIPE_DISPLAY_NONE)
dt_dev_pixelpipe_invalidate_cacheline(pipe, *output);
diff --git a/src/develop/tiling.c b/src/develop/tiling.c
index ca63aa06f43d..7d4519467673 100644
--- a/src/develop/tiling.c
+++ b/src/develop/tiling.c
@@ -42,7 +42,6 @@
Needs to be increased if tiling fails due to insufficient buffer sizes. */
#define RESERVE 5
-#ifdef HAVE_OPENCL
/* greatest common divisor */
static unsigned _gcd(unsigned a, unsigned b)
{
@@ -61,7 +60,6 @@ static unsigned _lcm(const unsigned a, const unsigned b)
{
return (((unsigned long)a * b) / _gcd(a, b));
}
-#endif
static inline int _align_up(const int n, const int a)
{
@@ -2166,6 +2164,262 @@ int default_process_tiling_cl(dt_iop_module_t *self,
#endif
+#if defined(__APPLE__) && defined(__aarch64__)
+/* Metal tiling for Apple Silicon unified memory.
+ Simplified version of _default_process_tiling_cl_ptp(): no host<->device copies needed
+ since Metal operates on CPU-accessible buffers via unified memory.
+ For each tile: extract tile region (with overlap) into temp buffer, call process_metal(),
+ composite "good" part (minus overlap) back to output. */
+static int _default_process_tiling_metal_ptp(dt_iop_module_t *self,
+ dt_dev_pixelpipe_iop_t *piece,
+ const void *const ivoid,
+ void *const ovoid,
+ const dt_iop_roi_t *const roi_in,
+ const dt_iop_roi_t *const roi_out,
+ const int in_bpp)
+{
+ void *tile_ibuf = NULL;
+ void *tile_obuf = NULL;
+
+ dt_iop_buffer_dsc_t dsc;
+ self->output_format(self, piece->pipe, piece, &dsc);
+ const int out_bpp = dt_iop_buffer_dsc_to_bpp(&dsc);
+
+ const int ipitch = roi_in->width * in_bpp;
+ const int opitch = roi_out->width * out_bpp;
+ const int max_bpp = MAX(in_bpp, out_bpp);
+
+ /* get tiling requirements of module */
+ dt_develop_tiling_t tiling = { 0 };
+ tiling.factor_cl = tiling.maxbuf_cl = -1;
+ self->tiling_callback(self, piece, roi_in, roi_out, &tiling);
+ /* Metal reuses factor_cl since it's GPU processing */
+ if(tiling.factor_cl < 0) tiling.factor_cl = tiling.factor;
+ if(tiling.maxbuf_cl < 0) tiling.maxbuf_cl = tiling.maxbuf;
+
+ /* On Apple Silicon, GPU and CPU share the same memory pool.
+ Use available pipe memory (system RAM aware) as the constraint. */
+ float available = dt_get_available_pipe_mem(piece->pipe);
+ /* subtract input and output buffers which are already allocated */
+ available = fmaxf(available - ((float)roi_out->width * roi_out->height * out_bpp)
+ - ((float)roi_in->width * roi_in->height * in_bpp) - tiling.overhead,
+ 0);
+
+ const float factor = fmaxf(tiling.factor_cl, 1.0f);
+ const float singlebuffer = fmaxf(available / factor, 0.0f);
+ const float maxbuf = fmaxf(tiling.maxbuf_cl, 1.0f);
+
+ int width = roi_in->width;
+ int height = roi_in->height;
+
+ /* shrink tile size in case it would exceed singlebuffer size */
+ if((float)width * height * max_bpp * maxbuf > singlebuffer)
+ {
+ const float scale = singlebuffer / ((float)width * height * max_bpp * maxbuf);
+
+ if(width < height && scale >= 0.333f)
+ {
+ height = floorf(height * scale);
+ }
+ else if(height <= width && scale >= 0.333f)
+ {
+ width = floorf(width * scale);
+ }
+ else
+ {
+ width = floorf(width * sqrtf(scale));
+ height = floorf(height * sqrtf(scale));
+ }
+ dt_print(DT_DEBUG_TILING | DT_DEBUG_VERBOSE,
+ "[default_process_tiling_metal_ptp] buffer exceeds singlebuffer, corrected to %dx%d",
+ width, height);
+ }
+
+ /* make sure we have a reasonably effective tile dimension. if not try square tiles */
+ if(3 * tiling.overlap > width || 3 * tiling.overlap > height)
+ {
+ width = height = floorf(sqrtf((float)width * height));
+ dt_print(DT_DEBUG_TILING | DT_DEBUG_VERBOSE,
+ "[default_process_tiling_metal_ptp] use squares because of overlap, corrected to %dx%d",
+ width, height);
+ }
+
+ /* alignment */
+ const unsigned int walign = _lcm(tiling.align, 1); /* no CL_ALIGNMENT needed for Metal */
+ const unsigned int halign = tiling.align;
+ assert(walign != 0 && halign != 0);
+
+ if(width < roi_in->width) width = (width / walign) * walign;
+ if(height < roi_in->height) height = (height / halign) * halign;
+
+ /* align overlap */
+ const int overlap = tiling.overlap % tiling.align != 0
+ ? (tiling.overlap / tiling.align + 1) * tiling.align
+ : tiling.overlap;
+
+ /* effective tile size */
+ const int tile_wd = width - 2 * overlap > 0 ? width - 2 * overlap : 1;
+ const int tile_ht = height - 2 * overlap > 0 ? height - 2 * overlap : 1;
+
+ /* number of tiles */
+ const int tiles_x = width < roi_in->width ? ceilf(roi_in->width / (float)tile_wd) : 1;
+ const int tiles_y = height < roi_in->height ? ceilf(roi_in->height / (float)tile_ht) : 1;
+
+ /* sanity check */
+ const int max_tiles = (darktable.dtresources.level == 3) ? 0x40000000 : 10000;
+ if(tiles_x * tiles_y > max_tiles)
+ {
+ dt_print(DT_DEBUG_TILING,
+ "[default_process_tiling_metal_ptp] aborted tiling for module '%s%s'. "
+ "too many tiles: %d x %d",
+ self->op, dt_iop_get_instance_id(self), tiles_x, tiles_y);
+ return 1;
+ }
+
+ dt_print(DT_DEBUG_TILING,
+ "[default_process_tiling_metal_ptp] processing %dx%d tiles, size=%dx%d, overlap=%d for '%s%s'",
+ tiles_x, tiles_y, tile_wd, tile_ht, overlap, self->op, dt_iop_get_instance_id(self));
+
+ /* store processed_maximum to be re-used and aggregated */
+ dt_aligned_pixel_t processed_maximum_saved;
+ dt_aligned_pixel_t processed_maximum_new = { 1.0f };
+ for_four_channels(k) processed_maximum_saved[k] = piece->pipe->dsc.processed_maximum[k];
+
+ /* iterate over tiles */
+ for(int tx = 0; tx < tiles_x; tx++)
+ {
+ for(int ty = 0; ty < tiles_y; ty++)
+ {
+ piece->pipe->tiling = TRUE;
+
+ const int wd = tx * tile_wd + width > roi_in->width ? roi_in->width - tx * tile_wd : width;
+ const int ht = ty * tile_ht + height > roi_in->height ? roi_in->height - ty * tile_ht : height;
+
+ /* skip degenerate end-tiles */
+ if((wd <= 2 * overlap && tx > 0) || (ht <= 2 * overlap && ty > 0)) continue;
+
+ /* roi for this tile */
+ dt_iop_roi_t iroi = { roi_in->x + tx * tile_wd, roi_in->y + ty * tile_ht, wd, ht, roi_in->scale };
+ dt_iop_roi_t oroi = { roi_out->x + tx * tile_wd, roi_out->y + ty * tile_ht, wd, ht, roi_out->scale };
+
+ /* allocate tile buffers */
+ tile_ibuf = dt_alloc_aligned((size_t)wd * ht * in_bpp);
+ tile_obuf = dt_alloc_aligned((size_t)wd * ht * out_bpp);
+ if(tile_ibuf == NULL || tile_obuf == NULL)
+ {
+ dt_print(DT_DEBUG_TILING,
+ "[default_process_tiling_metal_ptp] could not alloc tile buffers for '%s%s'",
+ self->op, dt_iop_get_instance_id(self));
+ goto error;
+ }
+
+ /* copy tile region from input image (with overlap border) */
+ const size_t ioffs = (size_t)(ty * tile_ht) * ipitch + (size_t)(tx * tile_wd) * in_bpp;
+ DT_OMP_FOR()
+ for(int j = 0; j < ht; j++)
+ memcpy((char *)tile_ibuf + (size_t)j * wd * in_bpp,
+ (const char *)ivoid + ioffs + (size_t)j * ipitch,
+ (size_t)wd * in_bpp);
+
+ /* restore processed_maximum for this tile */
+ for(int k = 0; k < 4; k++) piece->pipe->dsc.processed_maximum[k] = processed_maximum_saved[k];
+ dt_dev_prepare_piece_cfa(piece, &iroi);
+
+ /* call process_metal on the tile */
+ dt_print(DT_DEBUG_TILING | DT_DEBUG_VERBOSE,
+ "[default_process_tiling_metal_ptp] tile (%d,%d) size %dx%d at origin [%d,%d]",
+ tx, ty, wd, ht, tx * tile_wd, ty * tile_ht);
+
+ const int err = self->process_metal(self, piece, tile_ibuf, tile_obuf, &iroi, &oroi);
+ if(err != 0)
+ {
+ dt_print(DT_DEBUG_TILING,
+ "[default_process_tiling_metal_ptp] process_metal() failed for '%s%s' on tile (%d,%d)",
+ self->op, dt_iop_get_instance_id(self), tx, ty);
+ goto error;
+ }
+
+ /* aggregate processed_maximum */
+ for(int k = 0; k < 4; k++)
+ {
+ if(tx + ty > 0 && fabs(processed_maximum_new[k] - piece->pipe->dsc.processed_maximum[k]) > 1.0e-6f)
+ dt_print(DT_DEBUG_TILING,
+ "[default_process_tiling_metal_ptp] processed_maximum[%d] differs between tiles in module '%s%s'",
+ k, self->op, dt_iop_get_instance_id(self));
+ processed_maximum_new[k] = piece->pipe->dsc.processed_maximum[k];
+ }
+
+ /* copy "good" part of tile output (minus overlap on non-first tiles) to output image */
+ int origin_x = 0, origin_y = 0;
+ int region_w = wd, region_h = ht;
+ size_t ooffs = (size_t)(ty * tile_ht) * opitch + (size_t)(tx * tile_wd) * out_bpp;
+
+ if(tx > 0)
+ {
+ origin_x += overlap;
+ region_w -= overlap;
+ ooffs += (size_t)overlap * out_bpp;
+ }
+ if(ty > 0)
+ {
+ origin_y += overlap;
+ region_h -= overlap;
+ ooffs += (size_t)overlap * opitch;
+ }
+
+ DT_OMP_FOR()
+ for(int j = 0; j < region_h; j++)
+ memcpy((char *)ovoid + ooffs + (size_t)j * opitch,
+ (const char *)tile_obuf + (size_t)((j + origin_y) * wd + origin_x) * out_bpp,
+ (size_t)region_w * out_bpp);
+
+ /* free tile buffers for this iteration */
+ dt_free_align(tile_ibuf);
+ tile_ibuf = NULL;
+ dt_free_align(tile_obuf);
+ tile_obuf = NULL;
+ }
+ }
+
+ /* copy back final processed_maximum */
+ for(int k = 0; k < 4; k++) piece->pipe->dsc.processed_maximum[k] = processed_maximum_new[k];
+
+ piece->pipe->tiling = FALSE;
+ return 0;
+
+error:
+ for(int k = 0; k < 4; k++) piece->pipe->dsc.processed_maximum[k] = processed_maximum_saved[k];
+ dt_free_align(tile_ibuf);
+ dt_free_align(tile_obuf);
+ piece->pipe->tiling = FALSE;
+ return 1;
+}
+
+
+/* Default Metal tiling dispatcher. Dispatches to ptp (point-to-point) variant.
+ roi variant (geometric distortion) not yet supported — falls back to error. */
+int default_process_tiling_metal(dt_iop_module_t *self,
+ dt_dev_pixelpipe_iop_t *piece,
+ const void *const ivoid,
+ void *const ovoid,
+ const dt_iop_roi_t *const roi_in,
+ const dt_iop_roi_t *const roi_out,
+ const int in_bpp)
+{
+ const gboolean use_roi = memcmp(roi_in, roi_out, sizeof(struct dt_iop_roi_t))
+ || (self->flags() & IOP_FLAGS_TILING_FULL_ROI);
+ if(use_roi)
+ {
+ dt_print(DT_DEBUG_TILING,
+ "[default_process_tiling_metal] roi tiling not yet implemented for module '%s%s'",
+ self->op, dt_iop_get_instance_id(self));
+ return 1; /* fall back to OpenCL/CPU */
+ }
+ return _default_process_tiling_metal_ptp(self, piece, ivoid, ovoid, roi_in, roi_out, in_bpp);
+}
+#endif /* __APPLE__ && __aarch64__ */
+
+
/* If a module does not implement tiling_callback() by itself, this function is called instead.
Default is an image size factor of 2 (i.e. input + output buffer needed), no overhead (1),
no overlap between tiles, and an pixel alignment of 1 in x and y direction, i.e. no special
diff --git a/src/develop/tiling.h b/src/develop/tiling.h
index 936ac43c6458..65a8dcbd78fc 100644
--- a/src/develop/tiling.h
+++ b/src/develop/tiling.h
@@ -77,8 +77,18 @@ float dt_tiling_estimate_cpumem(struct dt_develop_tiling_t *tiling, struct dt_de
#ifdef HAVE_OPENCL
float dt_tiling_estimate_clmem(struct dt_develop_tiling_t *tiling, struct dt_dev_pixelpipe_iop_t *piece,
- const dt_iop_roi_t *const roi_in, const dt_iop_roi_t *const roi_out,
- const int max_bpp);
+ const dt_iop_roi_t *const roi_in, const dt_iop_roi_t *const roi_out,
+ const int max_bpp);
+#endif
+
+#if defined(__APPLE__) && defined(__aarch64__)
+int default_process_tiling_metal(struct dt_iop_module_t *self, struct dt_dev_pixelpipe_iop_t *piece,
+ const void *const ivoid, void *const ovoid, const dt_iop_roi_t *const roi_in,
+ const dt_iop_roi_t *const roi_out, const int bpp);
+
+int process_tiling_metal(struct dt_iop_module_t *self, struct dt_dev_pixelpipe_iop_t *piece,
+ const void *const ivoid, void *const ovoid, const dt_iop_roi_t *const roi_in,
+ const dt_iop_roi_t *const roi_out, const int bpp);
#endif
// clang-format off
// modelines: These editor modelines have been set for all relevant files by tools/update_modelines.py
diff --git a/src/iop/diffuse.c b/src/iop/diffuse.c
index 27885e7b966a..c12546e32294 100644
--- a/src/iop/diffuse.c
+++ b/src/iop/diffuse.c
@@ -43,6 +43,10 @@
#include "gui/presets.h"
#include "iop/iop_api.h"
+#if defined(__APPLE__) && defined(__aarch64__)
+#include "osx/dt_metal.h"
+#endif
+
DT_MODULE_INTROSPECTION(2, dt_iop_diffuse_params_t)
#define MAX_NUM_SCALES 10
@@ -90,6 +94,15 @@ typedef struct dt_iop_diffuse_global_data_t
int kernel_diffuse_build_mask;
int kernel_diffuse_inpaint_mask;
int kernel_diffuse_pde;
+
+#if defined(__APPLE__) && defined(__aarch64__)
+ int metal_kernel_bspline_vertical;
+ int metal_kernel_bspline_horizontal;
+ int metal_kernel_wavelets_detail;
+ int metal_kernel_build_mask;
+ int metal_kernel_inpaint_mask;
+ int metal_kernel_diffuse_pde;
+#endif
} dt_iop_diffuse_global_data_t;
@@ -1737,6 +1750,27 @@ void init_global(dt_iop_module_so_t *self)
dt_opencl_create_kernel(wavelets, "blur_2D_Bspline_vertical");
gd->kernel_filmic_wavelets_detail =
dt_opencl_create_kernel(wavelets, "wavelets_detail_level");
+
+#if defined(__APPLE__) && defined(__aarch64__)
+ if(darktable.metal && dt_metal_is_available(darktable.metal))
+ {
+ gd->metal_kernel_bspline_vertical = dt_metal_create_kernel(darktable.metal, "blur_2D_Bspline_vertical");
+ gd->metal_kernel_bspline_horizontal = dt_metal_create_kernel(darktable.metal, "blur_2D_Bspline_horizontal");
+ gd->metal_kernel_wavelets_detail = dt_metal_create_kernel(darktable.metal, "wavelets_detail_level");
+ gd->metal_kernel_build_mask = dt_metal_create_kernel(darktable.metal, "build_mask");
+ gd->metal_kernel_inpaint_mask = dt_metal_create_kernel(darktable.metal, "inpaint_mask");
+ gd->metal_kernel_diffuse_pde = dt_metal_create_kernel(darktable.metal, "diffuse_pde");
+ }
+ else
+ {
+ gd->metal_kernel_bspline_vertical = -1;
+ gd->metal_kernel_bspline_horizontal = -1;
+ gd->metal_kernel_wavelets_detail = -1;
+ gd->metal_kernel_build_mask = -1;
+ gd->metal_kernel_inpaint_mask = -1;
+ gd->metal_kernel_diffuse_pde = -1;
+ }
+#endif
}
@@ -1750,12 +1784,363 @@ void cleanup_global(dt_iop_module_so_t *self)
dt_opencl_free_kernel(gd->kernel_filmic_bspline_vertical);
dt_opencl_free_kernel(gd->kernel_filmic_bspline_horizontal);
dt_opencl_free_kernel(gd->kernel_filmic_wavelets_detail);
+
+#if defined(__APPLE__) && defined(__aarch64__)
+ if(darktable.metal)
+ {
+ dt_metal_free_kernel(darktable.metal, gd->metal_kernel_bspline_vertical);
+ dt_metal_free_kernel(darktable.metal, gd->metal_kernel_bspline_horizontal);
+ dt_metal_free_kernel(darktable.metal, gd->metal_kernel_wavelets_detail);
+ dt_metal_free_kernel(darktable.metal, gd->metal_kernel_build_mask);
+ dt_metal_free_kernel(darktable.metal, gd->metal_kernel_inpaint_mask);
+ dt_metal_free_kernel(darktable.metal, gd->metal_kernel_diffuse_pde);
+ }
+#endif
+
free(self->data);
self->data = NULL;
}
#endif
+#if defined(__APPLE__) && defined(__aarch64__)
+
+// Helper to dispatch a Metal kernel with the flex API + custom threadgroup size
+static inline int _metal_dispatch_tgs(dt_metal_t *metal, int kernel_id,
+ int width, int height,
+ int num_args, const dt_metal_arg_t *args,
+ int threadW, int threadH)
+{
+ return dt_metal_enqueue_kernel_2d_flex_with_tgs(metal, kernel_id, width, height,
+ num_args, args, threadW, threadH);
+}
+
+// Default dispatch (16x16 threadgroups)
+static inline int _metal_dispatch(dt_metal_t *metal, int kernel_id,
+ int width, int height,
+ int num_args, const dt_metal_arg_t *args)
+{
+ return dt_metal_enqueue_kernel_2d_flex(metal, kernel_id, width, height, num_args, args);
+}
+
+int process_metal(dt_iop_module_t *self,
+ dt_dev_pixelpipe_iop_t *piece,
+ const void *const ivoid,
+ void *const ovoid,
+ const dt_iop_roi_t *const roi_in,
+ const dt_iop_roi_t *const roi_out)
+{
+ const gboolean fastmode = piece->pipe->type & DT_DEV_PIXELPIPE_FAST;
+ const dt_iop_diffuse_data_t *const data = piece->data;
+ dt_iop_diffuse_global_data_t *const gd = self->global_data;
+
+ dt_metal_t *metal = darktable.metal;
+ if(!metal || !dt_metal_is_available(metal))
+ return DT_METAL_DEFAULT_ERROR;
+
+ // check all kernels are valid
+ if(gd->metal_kernel_bspline_vertical < 0 || gd->metal_kernel_bspline_horizontal < 0
+ || gd->metal_kernel_wavelets_detail < 0 || gd->metal_kernel_build_mask < 0
+ || gd->metal_kernel_inpaint_mask < 0 || gd->metal_kernel_diffuse_pde < 0)
+ return DT_METAL_DEFAULT_ERROR;
+
+ const int width = roi_in->width;
+ const int height = roi_in->height;
+ const size_t bytes_per_row_f4 = (size_t)width * 4 * sizeof(float);
+
+ int err = DT_METAL_DEFAULT_ERROR;
+
+ // fast mode: just copy
+ if(fastmode)
+ {
+ dt_iop_image_copy_by_size(ovoid, ivoid, width, height, 4);
+ return 0;
+ }
+
+ const float scale = fmaxf(piece->iscale / roi_in->scale, 1.f);
+ const float final_radius = (data->radius + data->radius_center) * 2.f / scale;
+ const int iterations = MAX(ceilf((float)data->iterations), 1);
+ const int diffusion_scales = num_steps_to_reach_equivalent_sigma(B_SPLINE_SIGMA, final_radius);
+ const int scales = CLAMP(diffusion_scales, 1, MAX_NUM_SCALES);
+
+ // Allocate textures — these leverage the GPU's hardware 2D texture cache
+ dt_metal_texture_t temp1 = dt_metal_alloc_texture_rgba_f32(metal, width, height);
+ dt_metal_texture_t temp2 = dt_metal_alloc_texture_rgba_f32(metal, width, height);
+ dt_metal_texture_t mask = dt_metal_alloc_texture_r8(metal, width, height);
+ dt_metal_texture_t LF_odd = dt_metal_alloc_texture_rgba_f32(metal, width, height);
+ dt_metal_texture_t LF_even = dt_metal_alloc_texture_rgba_f32(metal, width, height);
+ dt_metal_texture_t out_tex = dt_metal_alloc_texture_rgba_f32(metal, width, height);
+
+ gboolean out_of_memory = FALSE;
+ dt_metal_texture_t HF[MAX_NUM_SCALES] = { NULL };
+ for(int s = 0; s < scales; s++)
+ {
+ HF[s] = dt_metal_alloc_texture_rgba_f32(metal, width, height);
+ if(!HF[s]) out_of_memory = TRUE;
+ }
+
+ if(!temp1 || !temp2 || !mask || !LF_odd || !LF_even || !out_tex || out_of_memory)
+ {
+ dt_print(DT_DEBUG_METAL, "[diffuse process_metal] out of memory");
+ dt_iop_image_copy_by_size(ovoid, ivoid, width, height, 4);
+ err = DT_METAL_DEFAULT_ERROR;
+ goto error;
+ }
+
+ // Upload input image to texture
+ dt_metal_copy_to_texture(temp1, ivoid, bytes_per_row_f4);
+
+ dt_metal_texture_t in_tex = temp1;
+
+ const gboolean has_mask = (data->threshold > 0.f);
+ if(has_mask)
+ {
+ // Batch mask build + inpaint together (2 kernels, 1 GPU round-trip)
+ err = dt_metal_begin_batch(metal);
+ if(err != 0) goto error;
+
+ // build_mask: textures=[input, mask], buffers=[threshold, width, height]
+ {
+ const float threshold = data->threshold;
+ const dt_metal_arg_t args[] = {
+ { DT_METAL_ARG_TEXTURE, in_tex, 0 },
+ { DT_METAL_ARG_TEXTURE, mask, 0 },
+ { DT_METAL_ARG_BYTES, &threshold, sizeof(float) },
+ { DT_METAL_ARG_BYTES, &width, sizeof(int) },
+ { DT_METAL_ARG_BYTES, &height, sizeof(int) },
+ };
+ err = _metal_dispatch(metal, gd->metal_kernel_build_mask, width, height, 5, args);
+ if(err != 0) { dt_metal_end_batch(metal); goto error; }
+ }
+
+ // inpaint_mask: textures=[inpainted, original, mask], buffers=[width, height]
+ {
+ const dt_metal_arg_t args[] = {
+ { DT_METAL_ARG_TEXTURE, temp2, 0 },
+ { DT_METAL_ARG_TEXTURE, in_tex, 0 },
+ { DT_METAL_ARG_TEXTURE, mask, 0 },
+ { DT_METAL_ARG_BYTES, &width, sizeof(int) },
+ { DT_METAL_ARG_BYTES, &height, sizeof(int) },
+ };
+ err = _metal_dispatch(metal, gd->metal_kernel_inpaint_mask, width, height, 5, args);
+ if(err != 0) { dt_metal_end_batch(metal); goto error; }
+ }
+
+ err = dt_metal_end_batch(metal);
+ if(err != 0) goto error;
+
+ in_tex = temp2;
+ }
+
+ // Precompute anisotropy parameters (constant across iterations)
+ const dt_aligned_pixel_t anisotropy
+ = { compute_anisotropy_factor(data->anisotropy_first),
+ compute_anisotropy_factor(data->anisotropy_second),
+ compute_anisotropy_factor(data->anisotropy_third),
+ compute_anisotropy_factor(data->anisotropy_fourth) };
+
+ const dt_isotropy_t DT_ALIGNED_PIXEL isotropy_type_arr[4]
+ = { check_isotropy_mode(data->anisotropy_first),
+ check_isotropy_mode(data->anisotropy_second),
+ check_isotropy_mode(data->anisotropy_third),
+ check_isotropy_mode(data->anisotropy_fourth) };
+
+ const int isotropy_type[4] = { (int)isotropy_type_arr[0], (int)isotropy_type_arr[1],
+ (int)isotropy_type_arr[2], (int)isotropy_type_arr[3] };
+
+ const float regularization = powf(10.f, data->regularization) - 1.f;
+ const float variance_threshold = powf(10.f, data->variance_threshold);
+ const int has_mask_int = has_mask ? 1 : 0;
+
+ for(int it = 0; it < iterations; it++)
+ {
+ dt_metal_texture_t iter_in;
+ dt_metal_texture_t iter_out;
+
+ if(it == 0)
+ {
+ iter_in = in_tex;
+ iter_out = (in_tex == temp1) ? temp2 : temp1;
+ }
+ else if(it % 2 == 0)
+ {
+ iter_in = temp1;
+ iter_out = temp2;
+ }
+ else
+ {
+ iter_in = temp2;
+ iter_out = temp1;
+ }
+
+ if(it == iterations - 1)
+ iter_out = out_tex;
+
+ // ── Batch the entire iteration: wavelet decompose + PDE reconstruct ──
+ err = dt_metal_begin_batch(metal);
+ if(err != 0) goto error;
+
+ // ── Wavelet decompose ──
+ dt_metal_texture_t residual = NULL;
+ for(int s = 0; s < scales; ++s)
+ {
+ const int mult = 1 << s;
+
+ dt_metal_texture_t buffer_in;
+ dt_metal_texture_t buffer_out;
+
+ if(s == 0)
+ {
+ buffer_in = iter_in;
+ buffer_out = LF_odd;
+ }
+ else if(s % 2 != 0)
+ {
+ buffer_in = LF_odd;
+ buffer_out = LF_even;
+ }
+ else
+ {
+ buffer_in = LF_even;
+ buffer_out = LF_odd;
+ }
+
+ // bspline horizontal: textures=[input, output], buffers=[width, height, mult]
+ // Use wider threadgroups (32x8) for horizontal memory access pattern
+ {
+ const dt_metal_arg_t args[] = {
+ { DT_METAL_ARG_TEXTURE, buffer_in, 0 },
+ { DT_METAL_ARG_TEXTURE, HF[s], 0 },
+ { DT_METAL_ARG_BYTES, &width, sizeof(int) },
+ { DT_METAL_ARG_BYTES, &height, sizeof(int) },
+ { DT_METAL_ARG_BYTES, &mult, sizeof(int) },
+ };
+ err = _metal_dispatch_tgs(metal, gd->metal_kernel_bspline_horizontal,
+ width, height, 5, args, 32, 8);
+ if(err != 0) { dt_metal_end_batch(metal); goto error; }
+ }
+
+ // bspline vertical: textures=[input, output], buffers=[width, height, mult]
+ // Use taller threadgroups (8x32) for vertical memory access pattern
+ {
+ const dt_metal_arg_t args[] = {
+ { DT_METAL_ARG_TEXTURE, HF[s], 0 },
+ { DT_METAL_ARG_TEXTURE, buffer_out, 0 },
+ { DT_METAL_ARG_BYTES, &width, sizeof(int) },
+ { DT_METAL_ARG_BYTES, &height, sizeof(int) },
+ { DT_METAL_ARG_BYTES, &mult, sizeof(int) },
+ };
+ err = _metal_dispatch_tgs(metal, gd->metal_kernel_bspline_vertical,
+ width, height, 5, args, 8, 32);
+ if(err != 0) { dt_metal_end_batch(metal); goto error; }
+ }
+
+ // wavelets detail: textures=[detail, LF, HF], buffers=[width, height]
+ {
+ const dt_metal_arg_t args[] = {
+ { DT_METAL_ARG_TEXTURE, buffer_in, 0 },
+ { DT_METAL_ARG_TEXTURE, buffer_out, 0 },
+ { DT_METAL_ARG_TEXTURE, HF[s], 0 },
+ { DT_METAL_ARG_BYTES, &width, sizeof(int) },
+ { DT_METAL_ARG_BYTES, &height, sizeof(int) },
+ };
+ err = _metal_dispatch(metal, gd->metal_kernel_wavelets_detail, width, height, 5, args);
+ if(err != 0) { dt_metal_end_batch(metal); goto error; }
+ }
+
+ residual = buffer_out;
+ }
+
+ // ── PDE reconstruct (coarse to fine) ──
+ dt_metal_texture_t temp_pde = (residual == LF_even) ? LF_odd : LF_even;
+
+ int count = 0;
+ for(int s = scales - 1; s > -1; --s)
+ {
+ const int mult = 1 << s;
+ const float current_radius = equivalent_sigma_at_step(B_SPLINE_SIGMA, s);
+ const float real_radius = current_radius * scale;
+ const float current_radius_square = sqf(current_radius);
+
+ const float norm =
+ expf(-sqf(real_radius - (float)data->radius_center) / sqf(data->radius));
+ const dt_aligned_pixel_t ABCD = { data->first * KAPPA * norm,
+ data->second * KAPPA * norm,
+ data->third * KAPPA * norm,
+ data->fourth * KAPPA * norm };
+ const float strength = data->sharpness * norm + 1.f;
+
+ dt_metal_texture_t pde_in;
+ dt_metal_texture_t pde_out;
+
+ if(count == 0)
+ {
+ pde_in = residual;
+ pde_out = temp_pde;
+ }
+ else if(count % 2 != 0)
+ {
+ pde_in = temp_pde;
+ pde_out = residual;
+ }
+ else
+ {
+ pde_in = residual;
+ pde_out = temp_pde;
+ }
+
+ if(s == 0) pde_out = iter_out;
+
+ // diffuse_pde: textures=[HF, LF, mask, output], buffers=[has_mask, width, height, ...]
+ {
+ const dt_metal_arg_t args[] = {
+ { DT_METAL_ARG_TEXTURE, HF[s], 0 },
+ { DT_METAL_ARG_TEXTURE, pde_in, 0 },
+ { DT_METAL_ARG_TEXTURE, mask, 0 },
+ { DT_METAL_ARG_TEXTURE, pde_out, 0 },
+ { DT_METAL_ARG_BYTES, &has_mask_int, sizeof(int) },
+ { DT_METAL_ARG_BYTES, &width, sizeof(int) },
+ { DT_METAL_ARG_BYTES, &height, sizeof(int) },
+ { DT_METAL_ARG_BYTES, anisotropy, sizeof(float) * 4 },
+ { DT_METAL_ARG_BYTES, isotropy_type, sizeof(int) * 4 },
+ { DT_METAL_ARG_BYTES, ®ularization, sizeof(float) },
+ { DT_METAL_ARG_BYTES, &variance_threshold, sizeof(float) },
+ { DT_METAL_ARG_BYTES, ¤t_radius_square, sizeof(float) },
+ { DT_METAL_ARG_BYTES, &mult, sizeof(int) },
+ { DT_METAL_ARG_BYTES, ABCD, sizeof(float) * 4 },
+ { DT_METAL_ARG_BYTES, &strength, sizeof(float) },
+ };
+ err = _metal_dispatch(metal, gd->metal_kernel_diffuse_pde, width, height, 15, args);
+ if(err != 0) { dt_metal_end_batch(metal); goto error; }
+ }
+
+ count++;
+ }
+
+ // End iteration batch — submit all kernels to GPU and wait
+ err = dt_metal_end_batch(metal);
+ if(err != 0) goto error;
+ }
+
+ // Copy output texture back to host
+ dt_metal_copy_from_texture(out_tex, ovoid, bytes_per_row_f4);
+ err = 0;
+
+error:
+ dt_metal_free_texture(temp1);
+ dt_metal_free_texture(temp2);
+ dt_metal_free_texture(mask);
+ dt_metal_free_texture(LF_odd);
+ dt_metal_free_texture(LF_even);
+ dt_metal_free_texture(out_tex);
+ for(int s = 0; s < scales; s++)
+ dt_metal_free_texture(HF[s]);
+
+ return err;
+}
+#endif /* __APPLE__ && __aarch64__ */
+
+
void gui_init(dt_iop_module_t *self)
{
dt_iop_diffuse_gui_data_t *g = IOP_GUI_ALLOC(diffuse);
diff --git a/src/iop/exposure.c b/src/iop/exposure.c
index 5276cb91228f..b8f598157968 100644
--- a/src/iop/exposure.c
+++ b/src/iop/exposure.c
@@ -1,6 +1,6 @@
/*
This file is part of darktable,
- Copyright (C) 2009-2024 darktable developers.
+ Copyright (C) 2009-2026 darktable developers.
darktable is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -41,6 +41,10 @@
#include "gui/color_picker_proxy.h"
#include "iop/iop_api.h"
+#if defined(__APPLE__) && defined(__aarch64__)
+#include "osx/dt_metal.h"
+#endif
+
#define exposure2white(x) exp2f(-(x))
#define white2exposure(x) -dt_log2f(fmaxf(1e-20f, x))
@@ -112,6 +116,9 @@ typedef struct dt_iop_exposure_data_t
typedef struct dt_iop_exposure_global_data_t
{
int kernel_exposure;
+#if defined(__APPLE__) && defined(__aarch64__)
+ int kernel_exposure_metal;
+#endif
} dt_iop_exposure_global_data_t;
#define EXPOSURE_CORRECTION_UNDEFINED (-FLT_MAX)
@@ -508,6 +515,45 @@ static void _process_common_setup(dt_iop_module_t *self,
d->scale = 1.0 / (white - d->black);
}
+#if defined(__APPLE__) && defined(__aarch64__)
+int process_metal(dt_iop_module_t *self,
+ dt_dev_pixelpipe_iop_t *piece,
+ const void *const i,
+ void *const o,
+ const dt_iop_roi_t *const roi_in,
+ const dt_iop_roi_t *const roi_out)
+{
+ dt_iop_exposure_data_t *d = piece->data;
+ dt_iop_exposure_global_data_t *gd = self->global_data;
+
+ _process_common_setup(self, piece);
+
+ if(gd->kernel_exposure_metal < 0)
+ return DT_METAL_DEFAULT_ERROR;
+
+ const int width = roi_in->width;
+ const int height = roi_in->height;
+ const float black = d->black;
+ const float scale = d->scale;
+
+ const void *extra_args[] = { &black, &scale };
+ const size_t extra_arg_sizes[] = { sizeof(float), sizeof(float) };
+
+ const int err = dt_metal_enqueue_kernel_2d(darktable.metal,
+ gd->kernel_exposure_metal,
+ width, height,
+ (const float *)i, (float *)o,
+ 2, extra_args, extra_arg_sizes);
+
+ if(err != 0) return err;
+
+ for(int k = 0; k < 3; k++)
+ piece->pipe->dsc.processed_maximum[k] *= d->scale;
+
+ return 0;
+}
+#endif
+
#ifdef HAVE_OPENCL
int process_cl(dt_iop_module_t *self,
dt_dev_pixelpipe_iop_t *piece,
@@ -750,12 +796,22 @@ void init_global(dt_iop_module_so_t *self)
dt_iop_exposure_global_data_t *gd = calloc(1,sizeof(dt_iop_exposure_global_data_t));
self->data = gd;
gd->kernel_exposure = dt_opencl_create_kernel(program, "exposure");
+#if defined(__APPLE__) && defined(__aarch64__)
+ if(darktable.metal && dt_metal_is_available(darktable.metal))
+ gd->kernel_exposure_metal = dt_metal_create_kernel(darktable.metal, "exposure");
+ else
+ gd->kernel_exposure_metal = -1;
+#endif
}
void cleanup_global(dt_iop_module_so_t *self)
{
dt_iop_exposure_global_data_t *gd = self->data;
dt_opencl_free_kernel(gd->kernel_exposure);
+#if defined(__APPLE__) && defined(__aarch64__)
+ if(darktable.metal)
+ dt_metal_free_kernel(darktable.metal, gd->kernel_exposure_metal);
+#endif
free(self->data);
self->data = NULL;
}
diff --git a/src/iop/iop_api.h b/src/iop/iop_api.h
index 3ca7923f2819..c5cd749c7c0e 100644
--- a/src/iop/iop_api.h
+++ b/src/iop/iop_api.h
@@ -1,6 +1,6 @@
/*
This file is part of darktable,
- Copyright (C) 2016-2023 darktable developers.
+ Copyright (C) 2016-2026 darktable developers.
darktable is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
@@ -269,6 +269,27 @@ DEFAULT(void, process_tiling, struct dt_iop_module_t *self,
const struct dt_iop_roi_t *const roi_out,
const int bpp);
+#if defined(__APPLE__) && defined(__aarch64__)
+/** the Metal compute equivalent of process().
+ * Uses CPU-side buffers (Apple Silicon unified memory).
+ * Returns 0 on success, non-zero on error.
+ */
+OPTIONAL(int, process_metal, struct dt_iop_module_t *self,
+ struct dt_dev_pixelpipe_iop_t *piece,
+ const void *const i,
+ void *const o,
+ const struct dt_iop_roi_t *const roi_in,
+ const struct dt_iop_roi_t *const roi_out);
+/** a tiling variant of process_metal(). */
+DEFAULT(int, process_tiling_metal, struct dt_iop_module_t *self,
+ struct dt_dev_pixelpipe_iop_t *piece,
+ const void *const i,
+ void *const o,
+ const struct dt_iop_roi_t *const roi_in,
+ const struct dt_iop_roi_t *const roi_out,
+ const int bpp);
+#endif
+
#ifdef HAVE_OPENCL
/** the opencl equivalent of process().
* Both process_xx_cl() functions return a CL error code with CL_SUCCESS signalling ok.
diff --git a/src/osx/Metal.hpp b/src/osx/Metal.hpp
new file mode 100644
index 000000000000..b699787b3baa
--- /dev/null
+++ b/src/osx/Metal.hpp
@@ -0,0 +1,19644 @@
+//
+// Metal.hpp
+//
+// Autogenerated on August 09, 2023.
+//
+// Copyright 2020-2023 Apple Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+#pragma once
+
+#define _NS_WEAK_IMPORT __attribute__((weak_import))
+#ifdef METALCPP_SYMBOL_VISIBILITY_HIDDEN
+#define _NS_EXPORT __attribute__((visibility("hidden")))
+#else
+#define _NS_EXPORT __attribute__((visibility("default")))
+#endif // METALCPP_SYMBOL_VISIBILITY_HIDDEN
+#define _NS_EXTERN extern "C" _NS_EXPORT
+#define _NS_INLINE inline __attribute__((always_inline))
+#define _NS_PACKED __attribute__((packed))
+
+#define _NS_CONST(type, name) _NS_EXTERN type const name
+#define _NS_ENUM(type, name) enum name : type
+#define _NS_OPTIONS(type, name) \
+ using name = type; \
+ enum : name
+
+#define _NS_CAST_TO_UINT(value) static_cast(value)
+#define _NS_VALIDATE_SIZE(ns, name) static_assert(sizeof(ns::name) == sizeof(ns##name), "size mismatch " #ns "::" #name)
+#define _NS_VALIDATE_ENUM(ns, name) static_assert(_NS_CAST_TO_UINT(ns::name) == _NS_CAST_TO_UINT(ns##name), "value mismatch " #ns "::" #name)
+
+#include
+
+#define _NS_PRIVATE_CLS(symbol) (Private::Class::s_k##symbol)
+#define _NS_PRIVATE_SEL(accessor) (Private::Selector::s_k##accessor)
+
+#if defined(NS_PRIVATE_IMPLEMENTATION)
+
+#ifdef METALCPP_SYMBOL_VISIBILITY_HIDDEN
+#define _NS_PRIVATE_VISIBILITY __attribute__((visibility("hidden")))
+#else
+#define _NS_PRIVATE_VISIBILITY __attribute__((visibility("default")))
+#endif // METALCPP_SYMBOL_VISIBILITY_HIDDEN
+
+#define _NS_PRIVATE_IMPORT __attribute__((weak_import))
+
+#ifdef __OBJC__
+#define _NS_PRIVATE_OBJC_LOOKUP_CLASS(symbol) ((__bridge void*)objc_lookUpClass(#symbol))
+#define _NS_PRIVATE_OBJC_GET_PROTOCOL(symbol) ((__bridge void*)objc_getProtocol(#symbol))
+#else
+#define _NS_PRIVATE_OBJC_LOOKUP_CLASS(symbol) objc_lookUpClass(#symbol)
+#define _NS_PRIVATE_OBJC_GET_PROTOCOL(symbol) objc_getProtocol(#symbol)
+#endif // __OBJC__
+
+#define _NS_PRIVATE_DEF_CLS(symbol) void* s_k##symbol _NS_PRIVATE_VISIBILITY = _NS_PRIVATE_OBJC_LOOKUP_CLASS(symbol)
+#define _NS_PRIVATE_DEF_PRO(symbol) void* s_k##symbol _NS_PRIVATE_VISIBILITY = _NS_PRIVATE_OBJC_GET_PROTOCOL(symbol)
+#define _NS_PRIVATE_DEF_SEL(accessor, symbol) SEL s_k##accessor _NS_PRIVATE_VISIBILITY = sel_registerName(symbol)
+#define _NS_PRIVATE_DEF_CONST(type, symbol) \
+ _NS_EXTERN type const NS##symbol _NS_PRIVATE_IMPORT; \
+ type const NS::symbol = (nullptr != &NS##symbol) ? NS##symbol : nullptr
+
+#else
+
+#define _NS_PRIVATE_DEF_CLS(symbol) extern void* s_k##symbol
+#define _NS_PRIVATE_DEF_PRO(symbol) extern void* s_k##symbol
+#define _NS_PRIVATE_DEF_SEL(accessor, symbol) extern SEL s_k##accessor
+#define _NS_PRIVATE_DEF_CONST(type, symbol) extern type const NS::symbol
+
+#endif // NS_PRIVATE_IMPLEMENTATION
+
+namespace NS
+{
+namespace Private
+{
+ namespace Class
+ {
+
+ _NS_PRIVATE_DEF_CLS(NSArray);
+ _NS_PRIVATE_DEF_CLS(NSAutoreleasePool);
+ _NS_PRIVATE_DEF_CLS(NSBundle);
+ _NS_PRIVATE_DEF_CLS(NSCondition);
+ _NS_PRIVATE_DEF_CLS(NSDate);
+ _NS_PRIVATE_DEF_CLS(NSDictionary);
+ _NS_PRIVATE_DEF_CLS(NSError);
+ _NS_PRIVATE_DEF_CLS(NSNotificationCenter);
+ _NS_PRIVATE_DEF_CLS(NSNumber);
+ _NS_PRIVATE_DEF_CLS(NSObject);
+ _NS_PRIVATE_DEF_CLS(NSProcessInfo);
+ _NS_PRIVATE_DEF_CLS(NSSet);
+ _NS_PRIVATE_DEF_CLS(NSString);
+ _NS_PRIVATE_DEF_CLS(NSURL);
+ _NS_PRIVATE_DEF_CLS(NSValue);
+
+ } // Class
+} // Private
+} // MTL
+
+namespace NS
+{
+namespace Private
+{
+ namespace Protocol
+ {
+
+ } // Protocol
+} // Private
+} // NS
+
+namespace NS
+{
+namespace Private
+{
+ namespace Selector
+ {
+
+ _NS_PRIVATE_DEF_SEL(addObject_,
+ "addObject:");
+ _NS_PRIVATE_DEF_SEL(addObserverName_object_queue_block_,
+ "addObserverForName:object:queue:usingBlock:");
+ _NS_PRIVATE_DEF_SEL(activeProcessorCount,
+ "activeProcessorCount");
+ _NS_PRIVATE_DEF_SEL(allBundles,
+ "allBundles");
+ _NS_PRIVATE_DEF_SEL(allFrameworks,
+ "allFrameworks");
+ _NS_PRIVATE_DEF_SEL(allObjects,
+ "allObjects");
+ _NS_PRIVATE_DEF_SEL(alloc,
+ "alloc");
+ _NS_PRIVATE_DEF_SEL(appStoreReceiptURL,
+ "appStoreReceiptURL");
+ _NS_PRIVATE_DEF_SEL(arguments,
+ "arguments");
+ _NS_PRIVATE_DEF_SEL(array,
+ "array");
+ _NS_PRIVATE_DEF_SEL(arrayWithObject_,
+ "arrayWithObject:");
+ _NS_PRIVATE_DEF_SEL(arrayWithObjects_count_,
+ "arrayWithObjects:count:");
+ _NS_PRIVATE_DEF_SEL(automaticTerminationSupportEnabled,
+ "automaticTerminationSupportEnabled");
+ _NS_PRIVATE_DEF_SEL(autorelease,
+ "autorelease");
+ _NS_PRIVATE_DEF_SEL(beginActivityWithOptions_reason_,
+ "beginActivityWithOptions:reason:");
+ _NS_PRIVATE_DEF_SEL(boolValue,
+ "boolValue");
+ _NS_PRIVATE_DEF_SEL(broadcast,
+ "broadcast");
+ _NS_PRIVATE_DEF_SEL(builtInPlugInsPath,
+ "builtInPlugInsPath");
+ _NS_PRIVATE_DEF_SEL(builtInPlugInsURL,
+ "builtInPlugInsURL");
+ _NS_PRIVATE_DEF_SEL(bundleIdentifier,
+ "bundleIdentifier");
+ _NS_PRIVATE_DEF_SEL(bundlePath,
+ "bundlePath");
+ _NS_PRIVATE_DEF_SEL(bundleURL,
+ "bundleURL");
+ _NS_PRIVATE_DEF_SEL(bundleWithPath_,
+ "bundleWithPath:");
+ _NS_PRIVATE_DEF_SEL(bundleWithURL_,
+ "bundleWithURL:");
+ _NS_PRIVATE_DEF_SEL(caseInsensitiveCompare_,
+ "caseInsensitiveCompare:");
+ _NS_PRIVATE_DEF_SEL(characterAtIndex_,
+ "characterAtIndex:");
+ _NS_PRIVATE_DEF_SEL(charValue,
+ "charValue");
+ _NS_PRIVATE_DEF_SEL(countByEnumeratingWithState_objects_count_,
+ "countByEnumeratingWithState:objects:count:");
+ _NS_PRIVATE_DEF_SEL(cStringUsingEncoding_,
+ "cStringUsingEncoding:");
+ _NS_PRIVATE_DEF_SEL(code,
+ "code");
+ _NS_PRIVATE_DEF_SEL(compare_,
+ "compare:");
+ _NS_PRIVATE_DEF_SEL(copy,
+ "copy");
+ _NS_PRIVATE_DEF_SEL(count,
+ "count");
+ _NS_PRIVATE_DEF_SEL(dateWithTimeIntervalSinceNow_,
+ "dateWithTimeIntervalSinceNow:");
+ _NS_PRIVATE_DEF_SEL(defaultCenter,
+ "defaultCenter");
+ _NS_PRIVATE_DEF_SEL(descriptionWithLocale_,
+ "descriptionWithLocale:");
+ _NS_PRIVATE_DEF_SEL(disableAutomaticTermination_,
+ "disableAutomaticTermination:");
+ _NS_PRIVATE_DEF_SEL(disableSuddenTermination,
+ "disableSuddenTermination");
+ _NS_PRIVATE_DEF_SEL(debugDescription,
+ "debugDescription");
+ _NS_PRIVATE_DEF_SEL(description,
+ "description");
+ _NS_PRIVATE_DEF_SEL(dictionary,
+ "dictionary");
+ _NS_PRIVATE_DEF_SEL(dictionaryWithObject_forKey_,
+ "dictionaryWithObject:forKey:");
+ _NS_PRIVATE_DEF_SEL(dictionaryWithObjects_forKeys_count_,
+ "dictionaryWithObjects:forKeys:count:");
+ _NS_PRIVATE_DEF_SEL(domain,
+ "domain");
+ _NS_PRIVATE_DEF_SEL(doubleValue,
+ "doubleValue");
+ _NS_PRIVATE_DEF_SEL(drain,
+ "drain");
+ _NS_PRIVATE_DEF_SEL(enableAutomaticTermination_,
+ "enableAutomaticTermination:");
+ _NS_PRIVATE_DEF_SEL(enableSuddenTermination,
+ "enableSuddenTermination");
+ _NS_PRIVATE_DEF_SEL(endActivity_,
+ "endActivity:");
+ _NS_PRIVATE_DEF_SEL(environment,
+ "environment");
+ _NS_PRIVATE_DEF_SEL(errorWithDomain_code_userInfo_,
+ "errorWithDomain:code:userInfo:");
+ _NS_PRIVATE_DEF_SEL(executablePath,
+ "executablePath");
+ _NS_PRIVATE_DEF_SEL(executableURL,
+ "executableURL");
+ _NS_PRIVATE_DEF_SEL(fileSystemRepresentation,
+ "fileSystemRepresentation");
+ _NS_PRIVATE_DEF_SEL(fileURLWithPath_,
+ "fileURLWithPath:");
+ _NS_PRIVATE_DEF_SEL(floatValue,
+ "floatValue");
+ _NS_PRIVATE_DEF_SEL(fullUserName,
+ "fullUserName");
+ _NS_PRIVATE_DEF_SEL(getValue_size_,
+ "getValue:size:");
+ _NS_PRIVATE_DEF_SEL(globallyUniqueString,
+ "globallyUniqueString");
+ _NS_PRIVATE_DEF_SEL(hash,
+ "hash");
+ _NS_PRIVATE_DEF_SEL(hostName,
+ "hostName");
+ _NS_PRIVATE_DEF_SEL(infoDictionary,
+ "infoDictionary");
+ _NS_PRIVATE_DEF_SEL(init,
+ "init");
+ _NS_PRIVATE_DEF_SEL(initFileURLWithPath_,
+ "initFileURLWithPath:");
+ _NS_PRIVATE_DEF_SEL(initWithBool_,
+ "initWithBool:");
+ _NS_PRIVATE_DEF_SEL(initWithBytes_objCType_,
+ "initWithBytes:objCType:");
+ _NS_PRIVATE_DEF_SEL(initWithBytesNoCopy_length_encoding_freeWhenDone_,
+ "initWithBytesNoCopy:length:encoding:freeWhenDone:");
+ _NS_PRIVATE_DEF_SEL(initWithChar_,
+ "initWithChar:");
+ _NS_PRIVATE_DEF_SEL(initWithCoder_,
+ "initWithCoder:");
+ _NS_PRIVATE_DEF_SEL(initWithCString_encoding_,
+ "initWithCString:encoding:");
+ _NS_PRIVATE_DEF_SEL(initWithDomain_code_userInfo_,
+ "initWithDomain:code:userInfo:");
+ _NS_PRIVATE_DEF_SEL(initWithDouble_,
+ "initWithDouble:");
+ _NS_PRIVATE_DEF_SEL(initWithFloat_,
+ "initWithFloat:");
+ _NS_PRIVATE_DEF_SEL(initWithInt_,
+ "initWithInt:");
+ _NS_PRIVATE_DEF_SEL(initWithLong_,
+ "initWithLong:");
+ _NS_PRIVATE_DEF_SEL(initWithLongLong_,
+ "initWithLongLong:");
+ _NS_PRIVATE_DEF_SEL(initWithObjects_count_,
+ "initWithObjects:count:");
+ _NS_PRIVATE_DEF_SEL(initWithObjects_forKeys_count_,
+ "initWithObjects:forKeys:count:");
+ _NS_PRIVATE_DEF_SEL(initWithPath_,
+ "initWithPath:");
+ _NS_PRIVATE_DEF_SEL(initWithShort_,
+ "initWithShort:");
+ _NS_PRIVATE_DEF_SEL(initWithString_,
+ "initWithString:");
+ _NS_PRIVATE_DEF_SEL(initWithUnsignedChar_,
+ "initWithUnsignedChar:");
+ _NS_PRIVATE_DEF_SEL(initWithUnsignedInt_,
+ "initWithUnsignedInt:");
+ _NS_PRIVATE_DEF_SEL(initWithUnsignedLong_,
+ "initWithUnsignedLong:");
+ _NS_PRIVATE_DEF_SEL(initWithUnsignedLongLong_,
+ "initWithUnsignedLongLong:");
+ _NS_PRIVATE_DEF_SEL(initWithUnsignedShort_,
+ "initWithUnsignedShort:");
+ _NS_PRIVATE_DEF_SEL(initWithURL_,
+ "initWithURL:");
+ _NS_PRIVATE_DEF_SEL(integerValue,
+ "integerValue");
+ _NS_PRIVATE_DEF_SEL(intValue,
+ "intValue");
+ _NS_PRIVATE_DEF_SEL(isEqual_,
+ "isEqual:");
+ _NS_PRIVATE_DEF_SEL(isEqualToNumber_,
+ "isEqualToNumber:");
+ _NS_PRIVATE_DEF_SEL(isEqualToString_,
+ "isEqualToString:");
+ _NS_PRIVATE_DEF_SEL(isEqualToValue_,
+ "isEqualToValue:");
+ _NS_PRIVATE_DEF_SEL(isiOSAppOnMac,
+ "isiOSAppOnMac");
+ _NS_PRIVATE_DEF_SEL(isLoaded,
+ "isLoaded");
+ _NS_PRIVATE_DEF_SEL(isLowPowerModeEnabled,
+ "isLowPowerModeEnabled");
+ _NS_PRIVATE_DEF_SEL(isMacCatalystApp,
+ "isMacCatalystApp");
+ _NS_PRIVATE_DEF_SEL(isOperatingSystemAtLeastVersion_,
+ "isOperatingSystemAtLeastVersion:");
+ _NS_PRIVATE_DEF_SEL(keyEnumerator,
+ "keyEnumerator");
+ _NS_PRIVATE_DEF_SEL(length,
+ "length");
+ _NS_PRIVATE_DEF_SEL(lengthOfBytesUsingEncoding_,
+ "lengthOfBytesUsingEncoding:");
+ _NS_PRIVATE_DEF_SEL(load,
+ "load");
+ _NS_PRIVATE_DEF_SEL(loadAndReturnError_,
+ "loadAndReturnError:");
+ _NS_PRIVATE_DEF_SEL(localizedDescription,
+ "localizedDescription");
+ _NS_PRIVATE_DEF_SEL(localizedFailureReason,
+ "localizedFailureReason");
+ _NS_PRIVATE_DEF_SEL(localizedInfoDictionary,
+ "localizedInfoDictionary");
+ _NS_PRIVATE_DEF_SEL(localizedRecoveryOptions,
+ "localizedRecoveryOptions");
+ _NS_PRIVATE_DEF_SEL(localizedRecoverySuggestion,
+ "localizedRecoverySuggestion");
+ _NS_PRIVATE_DEF_SEL(localizedStringForKey_value_table_,
+ "localizedStringForKey:value:table:");
+ _NS_PRIVATE_DEF_SEL(lock,
+ "lock");
+ _NS_PRIVATE_DEF_SEL(longValue,
+ "longValue");
+ _NS_PRIVATE_DEF_SEL(longLongValue,
+ "longLongValue");
+ _NS_PRIVATE_DEF_SEL(mainBundle,
+ "mainBundle");
+ _NS_PRIVATE_DEF_SEL(maximumLengthOfBytesUsingEncoding_,
+ "maximumLengthOfBytesUsingEncoding:");
+ _NS_PRIVATE_DEF_SEL(methodSignatureForSelector_,
+ "methodSignatureForSelector:");
+ _NS_PRIVATE_DEF_SEL(mutableBytes,
+ "mutableBytes");
+ _NS_PRIVATE_DEF_SEL(name,
+ "name");
+ _NS_PRIVATE_DEF_SEL(nextObject,
+ "nextObject");
+ _NS_PRIVATE_DEF_SEL(numberWithBool_,
+ "numberWithBool:");
+ _NS_PRIVATE_DEF_SEL(numberWithChar_,
+ "numberWithChar:");
+ _NS_PRIVATE_DEF_SEL(numberWithDouble_,
+ "numberWithDouble:");
+ _NS_PRIVATE_DEF_SEL(numberWithFloat_,
+ "numberWithFloat:");
+ _NS_PRIVATE_DEF_SEL(numberWithInt_,
+ "numberWithInt:");
+ _NS_PRIVATE_DEF_SEL(numberWithLong_,
+ "numberWithLong:");
+ _NS_PRIVATE_DEF_SEL(numberWithLongLong_,
+ "numberWithLongLong:");
+ _NS_PRIVATE_DEF_SEL(numberWithShort_,
+ "numberWithShort:");
+ _NS_PRIVATE_DEF_SEL(numberWithUnsignedChar_,
+ "numberWithUnsignedChar:");
+ _NS_PRIVATE_DEF_SEL(numberWithUnsignedInt_,
+ "numberWithUnsignedInt:");
+ _NS_PRIVATE_DEF_SEL(numberWithUnsignedLong_,
+ "numberWithUnsignedLong:");
+ _NS_PRIVATE_DEF_SEL(numberWithUnsignedLongLong_,
+ "numberWithUnsignedLongLong:");
+ _NS_PRIVATE_DEF_SEL(numberWithUnsignedShort_,
+ "numberWithUnsignedShort:");
+ _NS_PRIVATE_DEF_SEL(objCType,
+ "objCType");
+ _NS_PRIVATE_DEF_SEL(object,
+ "object");
+ _NS_PRIVATE_DEF_SEL(objectAtIndex_,
+ "objectAtIndex:");
+ _NS_PRIVATE_DEF_SEL(objectEnumerator,
+ "objectEnumerator");
+ _NS_PRIVATE_DEF_SEL(objectForInfoDictionaryKey_,
+ "objectForInfoDictionaryKey:");
+ _NS_PRIVATE_DEF_SEL(objectForKey_,
+ "objectForKey:");
+ _NS_PRIVATE_DEF_SEL(operatingSystem,
+ "operatingSystem");
+ _NS_PRIVATE_DEF_SEL(operatingSystemVersion,
+ "operatingSystemVersion");
+ _NS_PRIVATE_DEF_SEL(operatingSystemVersionString,
+ "operatingSystemVersionString");
+ _NS_PRIVATE_DEF_SEL(pathForAuxiliaryExecutable_,
+ "pathForAuxiliaryExecutable:");
+ _NS_PRIVATE_DEF_SEL(performActivityWithOptions_reason_usingBlock_,
+ "performActivityWithOptions:reason:usingBlock:");
+ _NS_PRIVATE_DEF_SEL(performExpiringActivityWithReason_usingBlock_,
+ "performExpiringActivityWithReason:usingBlock:");
+ _NS_PRIVATE_DEF_SEL(physicalMemory,
+ "physicalMemory");
+ _NS_PRIVATE_DEF_SEL(pointerValue,
+ "pointerValue");
+ _NS_PRIVATE_DEF_SEL(preflightAndReturnError_,
+ "preflightAndReturnError:");
+ _NS_PRIVATE_DEF_SEL(privateFrameworksPath,
+ "privateFrameworksPath");
+ _NS_PRIVATE_DEF_SEL(privateFrameworksURL,
+ "privateFrameworksURL");
+ _NS_PRIVATE_DEF_SEL(processIdentifier,
+ "processIdentifier");
+ _NS_PRIVATE_DEF_SEL(processInfo,
+ "processInfo");
+ _NS_PRIVATE_DEF_SEL(processName,
+ "processName");
+ _NS_PRIVATE_DEF_SEL(processorCount,
+ "processorCount");
+ _NS_PRIVATE_DEF_SEL(rangeOfString_options_,
+ "rangeOfString:options:");
+ _NS_PRIVATE_DEF_SEL(release,
+ "release");
+ _NS_PRIVATE_DEF_SEL(removeObserver_,
+ "removeObserver:");
+ _NS_PRIVATE_DEF_SEL(resourcePath,
+ "resourcePath");
+ _NS_PRIVATE_DEF_SEL(resourceURL,
+ "resourceURL");
+ _NS_PRIVATE_DEF_SEL(respondsToSelector_,
+ "respondsToSelector:");
+ _NS_PRIVATE_DEF_SEL(retain,
+ "retain");
+ _NS_PRIVATE_DEF_SEL(retainCount,
+ "retainCount");
+ _NS_PRIVATE_DEF_SEL(setAutomaticTerminationSupportEnabled_,
+ "setAutomaticTerminationSupportEnabled:");
+ _NS_PRIVATE_DEF_SEL(setProcessName_,
+ "setProcessName:");
+ _NS_PRIVATE_DEF_SEL(sharedFrameworksPath,
+ "sharedFrameworksPath");
+ _NS_PRIVATE_DEF_SEL(sharedFrameworksURL,
+ "sharedFrameworksURL");
+ _NS_PRIVATE_DEF_SEL(sharedSupportPath,
+ "sharedSupportPath");
+ _NS_PRIVATE_DEF_SEL(sharedSupportURL,
+ "sharedSupportURL");
+ _NS_PRIVATE_DEF_SEL(shortValue,
+ "shortValue");
+ _NS_PRIVATE_DEF_SEL(showPools,
+ "showPools");
+ _NS_PRIVATE_DEF_SEL(signal,
+ "signal");
+ _NS_PRIVATE_DEF_SEL(string,
+ "string");
+ _NS_PRIVATE_DEF_SEL(stringValue,
+ "stringValue");
+ _NS_PRIVATE_DEF_SEL(stringWithString_,
+ "stringWithString:");
+ _NS_PRIVATE_DEF_SEL(stringWithCString_encoding_,
+ "stringWithCString:encoding:");
+ _NS_PRIVATE_DEF_SEL(stringByAppendingString_,
+ "stringByAppendingString:");
+ _NS_PRIVATE_DEF_SEL(systemUptime,
+ "systemUptime");
+ _NS_PRIVATE_DEF_SEL(thermalState,
+ "thermalState");
+ _NS_PRIVATE_DEF_SEL(unload,
+ "unload");
+ _NS_PRIVATE_DEF_SEL(unlock,
+ "unlock");
+ _NS_PRIVATE_DEF_SEL(unsignedCharValue,
+ "unsignedCharValue");
+ _NS_PRIVATE_DEF_SEL(unsignedIntegerValue,
+ "unsignedIntegerValue");
+ _NS_PRIVATE_DEF_SEL(unsignedIntValue,
+ "unsignedIntValue");
+ _NS_PRIVATE_DEF_SEL(unsignedLongValue,
+ "unsignedLongValue");
+ _NS_PRIVATE_DEF_SEL(unsignedLongLongValue,
+ "unsignedLongLongValue");
+ _NS_PRIVATE_DEF_SEL(unsignedShortValue,
+ "unsignedShortValue");
+ _NS_PRIVATE_DEF_SEL(URLForAuxiliaryExecutable_,
+ "URLForAuxiliaryExecutable:");
+ _NS_PRIVATE_DEF_SEL(userInfo,
+ "userInfo");
+ _NS_PRIVATE_DEF_SEL(userName,
+ "userName");
+ _NS_PRIVATE_DEF_SEL(UTF8String,
+ "UTF8String");
+ _NS_PRIVATE_DEF_SEL(valueWithBytes_objCType_,
+ "valueWithBytes:objCType:");
+ _NS_PRIVATE_DEF_SEL(valueWithPointer_,
+ "valueWithPointer:");
+ _NS_PRIVATE_DEF_SEL(wait,
+ "wait");
+ _NS_PRIVATE_DEF_SEL(waitUntilDate_,
+ "waitUntilDate:");
+ } // Class
+} // Private
+} // MTL
+
+#include
+#include
+
+namespace NS
+{
+using TimeInterval = double;
+
+using Integer = std::intptr_t;
+using UInteger = std::uintptr_t;
+
+const Integer IntegerMax = INTPTR_MAX;
+const Integer IntegerMin = INTPTR_MIN;
+const UInteger UIntegerMax = UINTPTR_MAX;
+
+struct OperatingSystemVersion
+{
+ Integer majorVersion;
+ Integer minorVersion;
+ Integer patchVersion;
+} _NS_PACKED;
+}
+
+#include
+#include
+
+#include
+
+namespace NS
+{
+template
+class _NS_EXPORT Referencing : public _Base
+{
+public:
+ _Class* retain();
+ void release();
+
+ _Class* autorelease();
+
+ UInteger retainCount() const;
+};
+
+template
+class Copying : public Referencing<_Class, _Base>
+{
+public:
+ _Class* copy() const;
+};
+
+template
+class SecureCoding : public Referencing<_Class, _Base>
+{
+};
+
+class Object : public Referencing