From 94e346207515ab690e8f7f2a43e1e4aa286153ab Mon Sep 17 00:00:00 2001 From: maxchisto Date: Wed, 25 Mar 2026 21:22:36 -0700 Subject: [PATCH 1/6] working attempt at rewriting sigmoid gpu stays at 100% --- mojo/Makefile | 20 + mojo/bench/sigmoid_bench.mojo | 148 +++++ mojo/build_sigmoid_iop.sh | 51 ++ mojo/darktable-mojo.sh | 7 + mojo/iop/__init__.mojo | 1 + mojo/iop/sigmoid/__init__.mojo | 1 + mojo/iop/sigmoid/kernels.mojo | 281 +++++++++ mojo/iop/sigmoid/lib.mojo | 370 ++++++++++++ mojo/pixi.lock | 843 +++++++++++++++++++++++++++ mojo/pixi.toml | 14 + mojo/reproduce_fault/README.md | 28 + mojo/reproduce_fault/build.sh | 10 + mojo/reproduce_fault/lib_fault.mojo | 69 +++ mojo/reproduce_fault/lib_fault.so | Bin 0 -> 57088 bytes mojo/reproduce_fault/main.c | 26 + mojo/reproduce_fault/reproduce_crash | Bin 0 -> 16192 bytes mojo/sigmoid_build_instructions.md | 61 ++ mojo/validate_sigmoid | Bin 0 -> 16344 bytes mojo/validate_sigmoid.c | 62 ++ src/iop/sigmoid.c | 177 +++--- src/iop/sigmoid_mojo.h | 37 ++ 21 files changed, 2134 insertions(+), 72 deletions(-) create mode 100644 mojo/Makefile create mode 100644 mojo/bench/sigmoid_bench.mojo create mode 100755 mojo/build_sigmoid_iop.sh create mode 100755 mojo/darktable-mojo.sh create mode 100644 mojo/iop/__init__.mojo create mode 100644 mojo/iop/sigmoid/__init__.mojo create mode 100644 mojo/iop/sigmoid/kernels.mojo create mode 100644 mojo/iop/sigmoid/lib.mojo create mode 100644 mojo/pixi.lock create mode 100644 mojo/pixi.toml create mode 100644 mojo/reproduce_fault/README.md create mode 100755 mojo/reproduce_fault/build.sh create mode 100644 mojo/reproduce_fault/lib_fault.mojo create mode 100755 mojo/reproduce_fault/lib_fault.so create mode 100644 mojo/reproduce_fault/main.c create mode 100755 mojo/reproduce_fault/reproduce_crash create mode 100644 mojo/sigmoid_build_instructions.md create mode 100755 mojo/validate_sigmoid create mode 100644 mojo/validate_sigmoid.c create mode 100644 src/iop/sigmoid_mojo.h diff --git a/mojo/Makefile b/mojo/Makefile new file mode 100644 index 0000000000..3db8bd8b44 --- /dev/null +++ b/mojo/Makefile @@ -0,0 +1,20 @@ +MOJO = pixi run mojo +PLUGIN_DIR = /usr/lib/darktable/plugins + +.PHONY: all build install bench clean + +all: build + +build: libsigmoid_mojo.so + +libsigmoid_mojo.so: iop/sigmoid/lib.mojo iop/sigmoid/kernels.mojo + $(MOJO) build -I . iop/sigmoid/lib.mojo --emit shared-lib -o libsigmoid_mojo.so + +install: libsigmoid_mojo.so + sudo cp libsigmoid_mojo.so $(PLUGIN_DIR)/ + +bench: + $(MOJO) bench/sigmoid_bench.mojo + +clean: + rm -f libsigmoid_mojo.so diff --git a/mojo/bench/sigmoid_bench.mojo b/mojo/bench/sigmoid_bench.mojo new file mode 100644 index 0000000000..ced674b102 --- /dev/null +++ b/mojo/bench/sigmoid_bench.mojo @@ -0,0 +1,148 @@ +from std.gpu.host import DeviceContext +from layout import Layout, LayoutTensor +from std.utils import Index, IndexList +from std.math import sqrt, pow, max, min +from std.benchmark import Bench, BenchConfig, Bencher, BenchId +from std.algorithm.functional import elementwise +from iop.sigmoid.kernels import apply_sigmoid_rgb_ratio, apply_sigmoid_per_channel + +# Configuration +comptime WIDTH = 6016 +comptime HEIGHT = 4016 +comptime CHANNELS = 4 +comptime IMAGE_LAYOUT = Layout.row_major(HEIGHT, WIDTH, CHANNELS) +comptime DTYPE = DType.float32 + +fn run_sigmoid_rgb_ratio( + ctx: DeviceContext, + output: LayoutTensor[DTYPE, IMAGE_LAYOUT, MutAnyOrigin], + input: LayoutTensor[DTYPE, IMAGE_LAYOUT, ImmutAnyOrigin], + white_target: Float32, + black_target: Float32, + paper_exp: Float32, + film_fog: Float32, + film_power: Float32, + paper_power: Float32, + num_pixels: Int, +) raises: + @parameter + @always_inline + fn rgb_ratio_closure[ + width: Int, rank: Int, alignment: Int + ](indices: IndexList[rank]) capturing -> None: + var px_idx = indices[0] + var y = px_idx // WIDTH + var x = px_idx % WIDTH + var pix = input.load[width=4](Index(y, x, 0)) + var res = apply_sigmoid_rgb_ratio( + pix[0], pix[1], pix[2], pix[3], + white_target, black_target, paper_exp, film_fog, film_power, paper_power + ) + output.store[width=4](Index(y, x, 0), res) + + elementwise[rgb_ratio_closure, 1, target="gpu"](num_pixels, ctx) + + +fn run_sigmoid_per_channel( + ctx: DeviceContext, + output: LayoutTensor[DTYPE, IMAGE_LAYOUT, MutAnyOrigin], + input: LayoutTensor[DTYPE, IMAGE_LAYOUT, ImmutAnyOrigin], + white_target: Float32, + paper_exp: Float32, + film_fog: Float32, + contrast_power: Float32, + skew_power: Float32, + hue_preservation: Float32, + pipe_to_base: SIMD[DType.float32, 16], + base_to_rendering: SIMD[DType.float32, 16], + rendering_to_pipe: SIMD[DType.float32, 16], + num_pixels: Int, +) raises: + @parameter + @always_inline + fn per_channel_closure[ + width: Int, rank: Int, alignment: Int + ](indices: IndexList[rank]) capturing -> None: + var px_idx = indices[0] + var y = px_idx // WIDTH + var x = px_idx % WIDTH + var pix = input.load[width=4](Index(y, x, 0)) + var res = apply_sigmoid_per_channel( + pix[0], pix[1], pix[2], pix[3], + white_target, paper_exp, film_fog, contrast_power, skew_power, hue_preservation, + pipe_to_base, base_to_rendering, rendering_to_pipe + ) + output.store[width=4](Index(y, x, 0), res) + + elementwise[per_channel_closure, 1, target="gpu"](num_pixels, ctx) + + +fn main() raises: + print("Mojo Sigmoid Benchmark - New Structure") + var total_floats = HEIGHT * WIDTH * CHANNELS + var ctx = DeviceContext() + print("Using GPU API:", ctx.api()) + + var input_buffer_host = ctx.enqueue_create_host_buffer[DTYPE](total_floats) + var input_buffer_device = ctx.enqueue_create_buffer[DTYPE](total_floats) + var output_buffer_device = ctx.enqueue_create_buffer[DTYPE](total_floats) + + var input_image_host = LayoutTensor[DTYPE, IMAGE_LAYOUT, MutAnyOrigin]( + input_buffer_host + ) + var input_image_device = LayoutTensor[DTYPE, IMAGE_LAYOUT, ImmutAnyOrigin]( + input_buffer_device + ) + var output_image_device = LayoutTensor[DTYPE, IMAGE_LAYOUT, MutAnyOrigin]( + output_buffer_device + ) + + # Initialize input data (simplified ramp for testing) + for y in range(HEIGHT): + for x in range(WIDTH): + var r = Float32(x) / WIDTH + var g = Float32(y) / HEIGHT + var b = Float32(0.5) + input_image_host.store[width=4]( + Index(y, x, 0), SIMD[DType.float32, 4](r, g, b, 1.0) + ) + + ctx.enqueue_copy(input_buffer_device, input_buffer_host) + + var white_target = Float32(1.0) + var black_target = Float32(0.000152) + var paper_exp = Float32(0.5) + var film_fog = Float32(0.0) + var contrast_power = Float32(2.5) + var skew_power = Float32(1.0) + var hue_preservation = Float32(1.0) + + var identity = SIMD[DType.float32, 16](0) + identity[0] = 1; identity[5] = 1; identity[10] = 1; identity[15] = 1 + + # Warmup + run_sigmoid_rgb_ratio(ctx, output_image_device, input_image_device, white_target, black_target, paper_exp, film_fog, contrast_power, skew_power, WIDTH * HEIGHT) + ctx.synchronize() + + # Benchmarking + var bench = Bench(BenchConfig(max_iters=100, num_warmup_iters=10)) + + @parameter + fn bench_rgb(mut b: Bencher) raises: + @parameter + fn run(ctx: DeviceContext) raises: + run_sigmoid_rgb_ratio(ctx, output_image_device, input_image_device, white_target, black_target, paper_exp, film_fog, contrast_power, skew_power, WIDTH * HEIGHT) + b.iter_custom[run](ctx) + ctx.synchronize() + + @parameter + fn bench_per(mut b: Bencher) raises: + @parameter + fn run(ctx: DeviceContext) raises: + run_sigmoid_per_channel(ctx, output_image_device, input_image_device, white_target, paper_exp, film_fog, contrast_power, skew_power, hue_preservation, identity, identity, identity, WIDTH * HEIGHT) + b.iter_custom[run](ctx) + ctx.synchronize() + + bench.bench_function[bench_rgb](BenchId("Mojo-Sigmoid-RGB-Ratio-New")) + bench.bench_function[bench_per](BenchId("Mojo-Sigmoid-Per-Channel-New")) + print(bench) diff --git a/mojo/build_sigmoid_iop.sh b/mojo/build_sigmoid_iop.sh new file mode 100755 index 0000000000..0c06170049 --- /dev/null +++ b/mojo/build_sigmoid_iop.sh @@ -0,0 +1,51 @@ +#!/bin/bash +# Build libsigmoid.so from the darktable source tree. +# This mimics what CMake does (run introspection, then compile + link). + +set -e + +SRC=/home/mc/code/darktable +BUILD=/tmp/sigmoid_build +DEST=$BUILD/libsigmoid.so + +mkdir -p "$BUILD" + +# Step 1: Run introspection (generates introspection_sigmoid.c) +echo "=== Step 1: Introspection ===" +perl "$SRC/tools/introspection/parser.pl" \ + "$SRC/src/" \ + "$SRC/src/iop/sigmoid.c" \ + "$BUILD/introspection_sigmoid.c" +echo " -> $BUILD/introspection_sigmoid.c ($(wc -l < "$BUILD/introspection_sigmoid.c") lines)" + +# Step 2: Collect flags +GTK_FLAGS=$(pkg-config --cflags gtk+-3.0 glib-2.0 librsvg-2.0 lcms2 json-glib-1.0) +DT_INCLUDE="-I$SRC/src -I$SRC/src/iop -I$SRC/build/bin -I/usr/lib/darktable" +DEFINES="-DHAVE_CONFIG_H -DHAVE_OPENCL -D_GNU_SOURCE -include common/module_api.h -include iop/iop_api.h" +CFLAGS="-O3 -march=native -fPIC -fvisibility=hidden -fopenmp $GTK_FLAGS $DT_INCLUDE $DEFINES" +LDFLAGS="-L/usr/lib/darktable -ldarktable -lm -lgomp -Wl,-rpath,/usr/lib/darktable" + +echo "" +echo "=== Step 2: Compile ===" +gcc $CFLAGS \ + -Wno-unused-function \ + -Wno-deprecated-declarations \ + -c "$BUILD/introspection_sigmoid.c" \ + -o "$BUILD/introspection_sigmoid.o" 2>&1 +echo " -> $BUILD/introspection_sigmoid.o" + +echo "" +echo "=== Step 3: Link ===" +gcc -shared -fPIC -fopenmp \ + "$BUILD/introspection_sigmoid.o" \ + $LDFLAGS \ + -o "$DEST" +echo " -> $DEST" + +echo "" +echo "=== Step 4: Verify ===" +nm -D "$DEST" | grep " T " | awk '{print $3}' + +echo "" +echo "Build complete: $DEST" +echo "Size: $(du -sh "$DEST" | cut -f1)" diff --git a/mojo/darktable-mojo.sh b/mojo/darktable-mojo.sh new file mode 100755 index 0000000000..0e10c48be9 --- /dev/null +++ b/mojo/darktable-mojo.sh @@ -0,0 +1,7 @@ +#!/bin/bash +MOJO_LIB_DIR="/home/mc/code/darktable/mojo/.pixi/envs/default/lib" +# We need both the Mojo libs and the directory where libsigmoid_mojo.so lives (usually /usr/lib) +export LD_LIBRARY_PATH="$MOJO_LIB_DIR:/usr/lib:$LD_LIBRARY_PATH" + +echo "--- Launching Darktable with Mojo GPU Support ---" +/usr/bin/darktable -d opencl "$@" diff --git a/mojo/iop/__init__.mojo b/mojo/iop/__init__.mojo new file mode 100644 index 0000000000..0c393fc1b4 --- /dev/null +++ b/mojo/iop/__init__.mojo @@ -0,0 +1 @@ +# iop package diff --git a/mojo/iop/sigmoid/__init__.mojo b/mojo/iop/sigmoid/__init__.mojo new file mode 100644 index 0000000000..256cd8163f --- /dev/null +++ b/mojo/iop/sigmoid/__init__.mojo @@ -0,0 +1 @@ +# iop.sigmoid package diff --git a/mojo/iop/sigmoid/kernels.mojo b/mojo/iop/sigmoid/kernels.mojo new file mode 100644 index 0000000000..c5b1a3d220 --- /dev/null +++ b/mojo/iop/sigmoid/kernels.mojo @@ -0,0 +1,281 @@ +from std.math import isnan, sqrt, pow, max, min + +@always_inline +fn generalized_loglogistic_sigmoid_scalar( + value: Float32, + magnitude: Float32, + paper_exp: Float32, + film_fog: Float32, + film_power: Float32, + paper_power: Float32, +) -> Float32: + var clamped_value = max(value, Float32(0.0)) + var film_response = pow(film_fog + clamped_value, film_power) + var paper_response = magnitude * pow( + film_response / (paper_exp + film_response), paper_power + ) + if isnan(paper_response): + return magnitude + return paper_response + +@always_inline +fn apply_sigmoid_rgb_ratio( + in_r: Float32, + in_g: Float32, + in_b: Float32, + in_a: Float32, + white_target: Float32, + black_target: Float32, + paper_exp: Float32, + film_fog: Float32, + film_power: Float32, + paper_power: Float32, +) -> SIMD[DType.float32, 4]: + # Desaturate negative values + var avg = max((in_r + in_g + in_b) / 3.0, Float32(0.0)) + var min_v = min(min(in_r, in_g), in_b) + var sat = Float32(1.0) + if min_v < 0.0: + sat = -avg / (min_v - avg) + + var p_r = avg + sat * (in_r - avg) + var p_g = avg + sat * (in_g - avg) + var p_b = avg + sat * (in_b - avg) + + var luma = (p_r + p_g + p_b) / 3.0 + var mapped_luma = generalized_loglogistic_sigmoid_scalar( + luma, white_target, paper_exp, film_fog, film_power, paper_power + ) + + if luma > 1e-9: + var scale = mapped_luma / luma + p_r *= scale + p_g *= scale + p_b *= scale + else: + p_r = mapped_luma + p_g = mapped_luma + p_b = mapped_luma + + var p_min = min(min(p_r, p_g), p_b) + var p_max = max(max(p_r, p_g), p_b) + var eps = Float32(1e-6) + var d_white = (white_target - mapped_luma) / (p_max - mapped_luma + eps) + var d_black = (black_target - mapped_luma) / (p_min - mapped_luma - eps) + var db_vs_chroma = min(d_white, d_black) + var cvm_border = (mapped_luma - p_min) / (mapped_luma + eps) + var p_chr_adj = 1.0 / (cvm_border * db_vs_chroma + eps) + var h_chr = ( + 2.0 * cvm_border / (1.0 - cvm_border * cvm_border + eps) + ) * p_chr_adj + var h_z = sqrt(h_chr * h_chr + 1.0) + var chroma_f = h_chr / (1.0 + h_z) * db_vs_chroma + + return SIMD[DType.float32, 4]( + mapped_luma + chroma_f * (p_r - mapped_luma), + mapped_luma + chroma_f * (p_g - mapped_luma), + mapped_luma + chroma_f * (p_b - mapped_luma), + in_a, + ) + +@always_inline +fn apply_sigmoid_per_channel( + in_r: Float32, + in_g: Float32, + in_b: Float32, + in_a: Float32, + white_target: Float32, + paper_exp: Float32, + film_fog: Float32, + contrast_power: Float32, + skew_power: Float32, + hue_preservation: Float32, + pipe_to_base: SIMD[DType.float32, 16], + base_to_rendering: SIMD[DType.float32, 16], + rendering_to_pipe: SIMD[DType.float32, 16], +) -> SIMD[DType.float32, 4]: + # 1. Transform to base space + var i_r = ( + pipe_to_base[0] * in_r + pipe_to_base[1] * in_g + pipe_to_base[2] * in_b + ) + var i_g = ( + pipe_to_base[4] * in_r + pipe_to_base[5] * in_g + pipe_to_base[6] * in_b + ) + var i_b = ( + pipe_to_base[8] * in_r + + pipe_to_base[9] * in_g + + pipe_to_base[10] * in_b + ) + + # 2. Desaturate negative + var avg = max((i_r + i_g + i_b) / 3.0, Float32(0.0)) + var min_v = min(min(i_r, i_g), i_b) + var sat = Float32(1.0) + if min_v < 0.0: + sat = -avg / (min_v - avg) + i_r = avg + sat * (i_r - avg) + i_g = avg + sat * (i_g - avg) + i_b = avg + sat * (i_b - avg) + + # 3. Transform to rendering space + var r_r = ( + base_to_rendering[0] * i_r + + base_to_rendering[1] * i_g + + base_to_rendering[2] * i_b + ) + var r_g = ( + base_to_rendering[4] * i_r + + base_to_rendering[5] * i_g + + base_to_rendering[6] * i_b + ) + var r_b = ( + base_to_rendering[8] * i_r + + base_to_rendering[9] * i_g + + base_to_rendering[10] * i_b + ) + + # 4. Per-channel sigmoid curves + var pc_r = generalized_loglogistic_sigmoid_scalar( + r_r, white_target, paper_exp, film_fog, contrast_power, skew_power + ) + var pc_g = generalized_loglogistic_sigmoid_scalar( + r_g, white_target, paper_exp, film_fog, contrast_power, skew_power + ) + var pc_b = generalized_loglogistic_sigmoid_scalar( + r_b, white_target, paper_exp, film_fog, contrast_power, skew_power + ) + + # 5. Preserve hue & energy + var p_min: Float32 + var p_mid: Float32 + var p_max: Float32 + var pc_min: Float32 + var pc_mid: Float32 + var pc_max: Float32 + + if r_r >= r_g: + if r_g >= r_b: # R G B + p_max = r_r + p_mid = r_g + p_min = r_b + pc_max = pc_r + pc_mid = pc_g + pc_min = pc_b + elif r_b >= r_r: # B R G + p_max = r_b + p_mid = r_r + p_min = r_g + pc_max = pc_b + pc_mid = pc_r + pc_min = pc_g + else: # R B G + p_max = r_r + p_mid = r_b + p_min = r_g + pc_max = pc_r + pc_mid = pc_b + pc_min = pc_g + else: + if r_r >= r_b: # G R B + p_max = r_g + p_mid = r_r + p_min = r_b + pc_max = pc_g + pc_mid = pc_r + pc_min = pc_b + elif r_b >= r_g: # B G R + p_max = r_b + p_mid = r_g + p_min = r_r + pc_max = pc_b + pc_mid = pc_g + pc_min = pc_r + else: # G B R + p_max = r_g + p_mid = r_b + p_min = r_r + pc_max = pc_g + pc_mid = pc_b + pc_min = pc_r + + var chroma = p_max - p_min + var midscale = Float32(0.0) + if chroma != 0.0: + midscale = (p_mid - p_min) / chroma + + var f_hc = pc_min + (pc_max - pc_min) * midscale + var n_mid = (1.0 - hue_preservation) * pc_mid + hue_preservation * f_hc + + var blend = 2.0 * p_min / (p_min + p_mid + 1e-9) + var target = blend * (pc_r + pc_g + pc_b) + (1.0 - blend) * ( + pc_min + n_mid + pc_max + ) + + var res_min: Float32 + var res_mid: Float32 + var res_max: Float32 + if n_mid <= pc_mid: + res_mid = ( + (1.0 - hue_preservation) * pc_mid + + hue_preservation + * (midscale * pc_max + (1.0 - midscale) * (target - pc_max)) + ) / (1.0 + hue_preservation * (1.0 - midscale)) + res_min = target - pc_max - res_mid + res_max = pc_max + else: + res_mid = ( + (1.0 - hue_preservation) * pc_mid + + hue_preservation + * (pc_min * (1.0 - midscale) + midscale * (target - pc_min)) + ) / (1.0 + hue_preservation * midscale) + res_min = pc_min + res_max = target - pc_min - res_mid + + var res_r: Float32 + var res_g: Float32 + var res_b: Float32 + if r_r >= r_g: + if r_g >= r_b: # R G B + res_r = res_max + res_g = res_mid + res_b = res_min + elif r_b >= r_r: # B R G + res_b = res_max + res_r = res_mid + res_g = res_min + else: # R B G + res_r = res_max + res_b = res_mid + res_g = res_min + else: + if r_r >= r_b: # G R B + res_g = res_max + res_r = res_mid + res_b = res_min + elif r_b >= r_g: # B G R + res_b = res_max + res_g = res_mid + res_r = res_min + else: # G B R + res_g = res_max + res_b = res_mid + res_r = res_min + + # 6. Transform to pipe space + var out_r = ( + rendering_to_pipe[0] * res_r + + rendering_to_pipe[1] * res_g + + rendering_to_pipe[2] * res_b + ) + var out_g = ( + rendering_to_pipe[4] * res_r + + rendering_to_pipe[5] * res_g + + rendering_to_pipe[6] * res_b + ) + var out_b = ( + rendering_to_pipe[8] * res_r + + rendering_to_pipe[9] * res_g + + rendering_to_pipe[10] * res_b + ) + + return SIMD[DType.float32, 4](out_r, out_g, out_b, in_a) diff --git a/mojo/iop/sigmoid/lib.mojo b/mojo/iop/sigmoid/lib.mojo new file mode 100644 index 0000000000..e0bcb90375 --- /dev/null +++ b/mojo/iop/sigmoid/lib.mojo @@ -0,0 +1,370 @@ +from std.gpu.host import DeviceContext +from layout import Layout, LayoutTensor, UNKNOWN_VALUE +from layout.runtime_layout import RuntimeLayout +from std.utils import Index, IndexList +from std.algorithm.functional import elementwise +from std.gpu.host.compile import get_gpu_target +from std.memory.unsafe_pointer import alloc, UnsafePointer +from std.sys import simd_width_of +from iop.sigmoid.kernels import ( + apply_sigmoid_rgb_ratio, + apply_sigmoid_per_channel, +) + +comptime CHANNELS = 4 +comptime IMAGE_LAYOUT = Layout.row_major(UNKNOWN_VALUE, UNKNOWN_VALUE, CHANNELS) +comptime DTYPE = DType.float32 +comptime SIMD_WIDTH = simd_width_of[DTYPE, target=get_gpu_target()]() + + +struct MojoCtx: + var use_gpu: Int + var dctx_addr: Int + + fn __init__(out self, use_gpu: Int, dctx_addr: Int): + self.use_gpu = use_gpu + self.dctx_addr = dctx_addr + + +struct CParamsView: + var p: UnsafePointer[Float32, MutAnyOrigin] + + fn __init__(out self, p: Int): + self.p = UnsafePointer[Float32, MutAnyOrigin](unsafe_from_address=p) + + # In SigmoidMojoParams, each scalar is a float[4] array + fn white_target(self) -> Float32: + return self.p[0] + + fn black_target(self) -> Float32: + return self.p[4] + + fn paper_exposure(self) -> Float32: + return self.p[8] + + fn film_fog(self) -> Float32: + return self.p[12] + + fn film_power(self) -> Float32: + return self.p[16] + + fn paper_power(self) -> Float32: + return self.p[20] + + fn hue_preservation(self) -> Float32: + return self.p[32] # hue_preservation[4] at index 32 + + fn pipe_to_base(self) -> SIMD[DType.float32, 16]: + var m = SIMD[DType.float32, 16]() + for i in range(16): + m[i] = self.p[36 + i] + return m + + fn base_to_rendering(self) -> SIMD[DType.float32, 16]: + var m = SIMD[DType.float32, 16]() + for i in range(16): + m[i] = self.p[52 + i] + return m + + fn rendering_to_pipe(self) -> SIMD[DType.float32, 16]: + var m = SIMD[DType.float32, 16]() + for i in range(16): + m[i] = self.p[68 + i] + return m + + +# ========================================================================= +# INTERNAL GPU LAUNCHERS (ZERO-POINTER PARAMETERS) +# ========================================================================= + + +fn _launch_rgb_ratio_gpu( + dctx: DeviceContext, + dev_in_ptr: Int, + dev_out_ptr: Int, + wt: Float32, + bt: Float32, + pe: Float32, + ff: Float32, + fp: Float32, + pp: Float32, + num_pixels: Int, +) raises: + @parameter + @always_inline + fn gpu_kernel[ + sw: Int, rank: Int, align: Int + ](indices: IndexList[rank]) capturing -> None: + var idx = indices[0] * 4 + var pin = UnsafePointer[Float32, ImmutAnyOrigin]( + unsafe_from_address=dev_in_ptr + ) + var pout = UnsafePointer[Float32, MutAnyOrigin]( + unsafe_from_address=dev_out_ptr + ) + + var r = apply_sigmoid_rgb_ratio( + pin[idx], + pin[idx + 1], + pin[idx + 2], + pin[idx + 3], + wt, + bt, + pe, + ff, + fp, + pp, + ) + pout[idx] = r[0] + pout[idx + 1] = r[1] + pout[idx + 2] = r[2] + pout[idx + 3] = r[3] + + elementwise[gpu_kernel, SIMD_WIDTH, target="gpu"](num_pixels, dctx) + dctx.synchronize() + + +fn _launch_per_channel_gpu( + dctx: DeviceContext, + dev_in_ptr: Int, + dev_out_ptr: Int, + wt: Float32, + pe: Float32, + ff: Float32, + fp: Float32, + pp: Float32, + hp: Float32, + kptb: SIMD[DType.float32, 16], + kbtr: SIMD[DType.float32, 16], + krtp: SIMD[DType.float32, 16], + num_pixels: Int, +) raises: + @parameter + @always_inline + fn gpu_kernel[ + sw: Int, rank: Int, align: Int + ](indices: IndexList[rank]) capturing -> None: + var idx = indices[0] * 4 + var pin = UnsafePointer[Float32, ImmutAnyOrigin]( + unsafe_from_address=dev_in_ptr + ) + var pout = UnsafePointer[Float32, MutAnyOrigin]( + unsafe_from_address=dev_out_ptr + ) + + var r = apply_sigmoid_per_channel( + pin[idx], + pin[idx + 1], + pin[idx + 2], + pin[idx + 3], + wt, + pe, + ff, + fp, + pp, + hp, + kptb, + kbtr, + krtp, + ) + pout[idx] = r[0] + pout[idx + 1] = r[1] + pout[idx + 2] = r[2] + pout[idx + 3] = r[3] + + elementwise[gpu_kernel, SIMD_WIDTH, target="gpu"](num_pixels, dctx) + dctx.synchronize() + + +# ========================================================================= +# EXPORTED INTERFACE +# ========================================================================= + + +@export +fn sigmoid_mojo_init(ctx_out: UnsafePointer[Int, MutAnyOrigin], use_gpu: Int32): + var gpu = use_gpu != 0 + var dctx_addr = 0 + if gpu: + try: + var d_ptr = alloc[DeviceContext](1) + d_ptr.init_pointee_move(DeviceContext()) + dctx_addr = Int(d_ptr) + print("Mojo: GPU Context Initialized Successfully") + except e: + print("Mojo: GPU Init Error (falling back to CPU):", String(e)) + gpu = False + else: + print("Mojo: CPU Context Initialized") + var p = alloc[MojoCtx](1) + p[0].use_gpu = 1 if gpu else 0 + p[0].dctx_addr = dctx_addr + ctx_out[0] = Int(p) + + +@export +fn sigmoid_mojo_destroy(ctx_addr: Int): + var p = UnsafePointer[MojoCtx, MutAnyOrigin](unsafe_from_address=ctx_addr) + if p[0].dctx_addr != 0: + var dctx_ptr = UnsafePointer[DeviceContext, MutAnyOrigin]( + unsafe_from_address=p[0].dctx_addr + ) + dctx_ptr.destroy_pointee() + dctx_ptr.free() + p.free() + + +@export +fn sigmoid_mojo_rgb_ratio( + ctx_addr: Int, + in_addr: Int, + out_addr: Int, + width: Int32, + height: Int32, + p_addr: Int, +): + var ctx_p = UnsafePointer[MojoCtx, MutAnyOrigin]( + unsafe_from_address=ctx_addr + ) + var use_gpu = ctx_p[0].use_gpu != 0 + var dctx_addr = ctx_p[0].dctx_addr + var params = CParamsView(p_addr) + var in_p = UnsafePointer[Float32, MutAnyOrigin](unsafe_from_address=in_addr) + var out_p = UnsafePointer[Float32, MutAnyOrigin]( + unsafe_from_address=out_addr + ) + var h = Int(height) + var w = Int(width) + var num_pixels = h * w + + if use_gpu and dctx_addr != 0: + try: + var dctx = UnsafePointer[DeviceContext, MutAnyOrigin]( + unsafe_from_address=dctx_addr + )[0] + var dev_in = dctx.enqueue_create_buffer[DTYPE](num_pixels * 4) + var dev_out = dctx.enqueue_create_buffer[DTYPE](num_pixels * 4) + dctx.enqueue_copy(dev_in, in_p) + + _launch_rgb_ratio_gpu( + dctx, + Int(dev_in.unsafe_ptr()), + Int(dev_out.unsafe_ptr()), + params.white_target(), + params.black_target(), + params.paper_exposure(), + params.film_fog(), + params.film_power(), + params.paper_power(), + num_pixels, + ) + + dctx.enqueue_copy(out_p, dev_out) + dctx.synchronize() + except e: + print("GPU Run Error (RGB Ratio):", String(e)) + else: + var rt = RuntimeLayout[IMAGE_LAYOUT].row_major( + IndexList[3](h, w, CHANNELS) + ) + var in_t = LayoutTensor[DTYPE, IMAGE_LAYOUT, MutAnyOrigin](in_p, rt) + var out_t = LayoutTensor[DTYPE, IMAGE_LAYOUT, MutAnyOrigin](out_p, rt) + var wt = params.white_target() + var bt = params.black_target() + var pe = params.paper_exposure() + var ff = params.film_fog() + var fp = params.film_power() + var pp = params.paper_power() + for i in range(num_pixels): + var px = in_t.load[width=4](Index(i // w, i % w, 0)) + var r = apply_sigmoid_rgb_ratio( + px[0], px[1], px[2], px[3], wt, bt, pe, ff, fp, pp + ) + out_t.store[width=4](Index(i // w, i % w, 0), r) + + +@export +fn sigmoid_mojo_per_channel( + ctx_addr: Int, + in_addr: Int, + out_addr: Int, + width: Int32, + height: Int32, + p_addr: Int, +): + var ctx_p = UnsafePointer[MojoCtx, MutAnyOrigin]( + unsafe_from_address=ctx_addr + ) + var use_gpu = ctx_p[0].use_gpu != 0 + var dctx_addr = ctx_p[0].dctx_addr + var params = CParamsView(p_addr) + var in_p = UnsafePointer[Float32, MutAnyOrigin](unsafe_from_address=in_addr) + var out_p = UnsafePointer[Float32, MutAnyOrigin]( + unsafe_from_address=out_addr + ) + var h = Int(height) + var w = Int(width) + var num_pixels = h * w + + if use_gpu and dctx_addr != 0: + try: + var dctx = UnsafePointer[DeviceContext, MutAnyOrigin]( + unsafe_from_address=dctx_addr + )[0] + var dev_in = dctx.enqueue_create_buffer[DTYPE](num_pixels * 4) + var dev_out = dctx.enqueue_create_buffer[DTYPE](num_pixels * 4) + dctx.enqueue_copy(dev_in, in_p) + + _launch_per_channel_gpu( + dctx, + Int(dev_in.unsafe_ptr()), + Int(dev_out.unsafe_ptr()), + params.white_target(), + params.paper_exposure(), + params.film_fog(), + params.film_power(), + params.paper_power(), + params.hue_preservation(), + params.pipe_to_base(), + params.base_to_rendering(), + params.rendering_to_pipe(), + num_pixels, + ) + + dctx.enqueue_copy(out_p, dev_out) + dctx.synchronize() + except e: + print("GPU Error (Per Channel):", String(e)) + else: + var rt = RuntimeLayout[IMAGE_LAYOUT].row_major( + IndexList[3](h, w, CHANNELS) + ) + var in_t = LayoutTensor[DTYPE, IMAGE_LAYOUT, MutAnyOrigin](in_p, rt) + var out_t = LayoutTensor[DTYPE, IMAGE_LAYOUT, MutAnyOrigin](out_p, rt) + var wt = params.white_target() + var pe = params.paper_exposure() + var ff = params.film_fog() + var fp = params.film_power() + var pp = params.paper_power() + var hp = params.hue_preservation() + var ptb = params.pipe_to_base() + var btr = params.base_to_rendering() + var rtp = params.rendering_to_pipe() + for i in range(num_pixels): + var px = in_t.load[width=4](Index(i // w, i % w, 0)) + var r = apply_sigmoid_per_channel( + px[0], + px[1], + px[2], + px[3], + wt, + pe, + ff, + fp, + pp, + hp, + ptb, + btr, + rtp, + ) + out_t.store[width=4](Index(i // w, i % w, 0), r) diff --git a/mojo/pixi.lock b/mojo/pixi.lock new file mode 100644 index 0000000000..c8ff562623 --- /dev/null +++ b/mojo/pixi.lock @@ -0,0 +1,843 @@ +version: 6 +environments: + default: + channels: + - url: https://conda.modular.com/max-nightly/ + - url: https://conda.anaconda.org/conda-forge/ + options: + pypi-prerelease-mode: if-necessary-or-explicit + packages: + linux-64: + - conda: https://conda.anaconda.org/conda-forge/linux-64/_openmp_mutex-4.5-20_gnu.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/_python_abi3_support-1.0-hd8ed1ab_2.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/bzip2-1.0.8-hda65f42_9.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/ca-certificates-2026.2.25-hbd8a1cb_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/click-8.3.1-pyh8f84b5b_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/cpython-3.14.3-py314hd8ed1ab_101.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/icu-78.2-h33c6efd_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/importlib-metadata-8.7.0-pyhe01879c_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/jupyter_client-8.6.3-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/jupyter_core-5.9.1-pyhc90fa1f_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/keyutils-1.6.3-hb9d3cd8_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/krb5-1.22.2-ha1258a1_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/ld_impl_linux-64-2.45.1-default_hbd61a6d_101.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libblas-3.11.0-5_h4a7cf45_openblas.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libcblas-3.11.0-5_h0358290_openblas.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libedit-3.1.20250104-pl5321h7949ede_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libexpat-2.7.4-hecca717_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libffi-3.5.2-h3435931_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libgcc-15.2.0-he0feb66_18.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libgfortran-15.2.0-h69a702a_18.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libgfortran5-15.2.0-h68bc16d_18.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libgomp-15.2.0-he0feb66_18.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/liblapack-3.11.0-5_h47877c9_openblas.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/liblzma-5.8.2-hb03c661_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libmpdec-4.0.0-hb03c661_1.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libopenblas-0.3.30-pthreads_h94d23a6_4.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libsodium-1.0.21-h280c20c_3.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libsqlite-3.52.0-hf4e2dac_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-15.2.0-h934c35e_18.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libuuid-2.41.3-h5347b49_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libzlib-1.3.1-hb9d3cd8_2.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/markdown-it-py-4.0.0-pyhd8ed1ab_0.conda + - conda: https://conda.modular.com/max-nightly/linux-64/max-26.3.0.dev2026031105-3.14release.conda + - conda: https://conda.modular.com/max-nightly/linux-64/max-core-26.3.0.dev2026031105-release.conda + - conda: https://conda.modular.com/max-nightly/noarch/mblack-26.3.0.dev2026031105-release.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/mdurl-0.1.2-pyhd8ed1ab_1.conda + - conda: https://conda.modular.com/max-nightly/linux-64/mojo-0.26.3.0.dev2026031105-release.conda + - conda: https://conda.modular.com/max-nightly/linux-64/mojo-compiler-0.26.3.0.dev2026031105-release.conda + - conda: https://conda.modular.com/max-nightly/noarch/mojo-python-0.26.3.0.dev2026031105-release.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/mypy_extensions-1.1.0-pyha770c72_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/ncurses-6.5-h2d0b736_3.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/numpy-2.4.2-py314h2b28147_1.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/openssl-3.6.1-h35e630c_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/packaging-26.0-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/pathspec-1.0.4-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/platformdirs-4.9.4-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/pygments-2.19.2-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/python-3.14.3-h32b2ec7_101_cp314.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/python-dateutil-2.9.0.post0-pyhe01879c_2.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/python-gil-3.14.3-h4df99d1_101.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/python_abi-3.14-8_cp314.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/pyyaml-6.0.3-py314h67df5f8_1.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/pyzmq-27.1.0-py312hda471dd_2.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/readline-8.3-h853b02a_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/rich-14.3.3-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/six-1.17.0-pyhe01879c_1.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/tk-8.6.13-noxft_h366c992_103.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/tomli-2.4.0-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/tornado-6.5.3-py314h5bd0f2a_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/traitlets-5.14.3-pyhd8ed1ab_1.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/typing-extensions-4.15.0-h396c80c_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/typing_extensions-4.15.0-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/tzdata-2025c-hc9c84f9_1.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/yaml-0.2.5-h280c20c_3.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/zeromq-4.3.5-h41580af_10.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/zipp-3.23.0-pyhcf101f3_1.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/zstd-1.5.7-hb78ec9c_6.conda +packages: +- conda: https://conda.anaconda.org/conda-forge/linux-64/_openmp_mutex-4.5-20_gnu.conda + build_number: 20 + sha256: 1dd3fffd892081df9726d7eb7e0dea6198962ba775bd88842135a4ddb4deb3c9 + md5: a9f577daf3de00bca7c3c76c0ecbd1de + depends: + - __glibc >=2.17,<3.0.a0 + - libgomp >=7.5.0 + constrains: + - openmp_impl <0.0a0 + license: BSD-3-Clause + license_family: BSD + size: 28948 + timestamp: 1770939786096 +- conda: https://conda.anaconda.org/conda-forge/noarch/_python_abi3_support-1.0-hd8ed1ab_2.conda + sha256: a3967b937b9abf0f2a99f3173fa4630293979bd1644709d89580e7c62a544661 + md5: aaa2a381ccc56eac91d63b6c1240312f + depends: + - cpython + - python-gil + license: MIT + license_family: MIT + size: 8191 + timestamp: 1744137672556 +- conda: https://conda.anaconda.org/conda-forge/linux-64/bzip2-1.0.8-hda65f42_9.conda + sha256: 0b75d45f0bba3e95dc693336fa51f40ea28c980131fec438afb7ce6118ed05f6 + md5: d2ffd7602c02f2b316fd921d39876885 + depends: + - __glibc >=2.17,<3.0.a0 + - libgcc >=14 + license: bzip2-1.0.6 + license_family: BSD + size: 260182 + timestamp: 1771350215188 +- conda: https://conda.anaconda.org/conda-forge/noarch/ca-certificates-2026.2.25-hbd8a1cb_0.conda + sha256: 67cc7101b36421c5913a1687ef1b99f85b5d6868da3abbf6ec1a4181e79782fc + md5: 4492fd26db29495f0ba23f146cd5638d + depends: + - __unix + license: ISC + size: 147413 + timestamp: 1772006283803 +- conda: https://conda.anaconda.org/conda-forge/noarch/click-8.3.1-pyh8f84b5b_1.conda + sha256: 38cfe1ee75b21a8361c8824f5544c3866f303af1762693a178266d7f198e8715 + md5: ea8a6c3256897cc31263de9f455e25d9 + depends: + - python >=3.10 + - __unix + - python + license: BSD-3-Clause + license_family: BSD + size: 97676 + timestamp: 1764518652276 +- conda: https://conda.anaconda.org/conda-forge/noarch/cpython-3.14.3-py314hd8ed1ab_101.conda + noarch: generic + sha256: 91b06300879df746214f7363d6c27c2489c80732e46a369eb2afc234bcafb44c + md5: 3bb89e4f795e5414addaa531d6b1500a + depends: + - python >=3.14,<3.15.0a0 + - python_abi * *_cp314 + license: Python-2.0 + size: 50078 + timestamp: 1770674447292 +- conda: https://conda.anaconda.org/conda-forge/linux-64/icu-78.2-h33c6efd_0.conda + sha256: 142a722072fa96cf16ff98eaaf641f54ab84744af81754c292cb81e0881c0329 + md5: 186a18e3ba246eccfc7cff00cd19a870 + depends: + - __glibc >=2.17,<3.0.a0 + - libgcc >=14 + - libstdcxx >=14 + license: MIT + license_family: MIT + size: 12728445 + timestamp: 1767969922681 +- conda: https://conda.anaconda.org/conda-forge/noarch/importlib-metadata-8.7.0-pyhe01879c_1.conda + sha256: c18ab120a0613ada4391b15981d86ff777b5690ca461ea7e9e49531e8f374745 + md5: 63ccfdc3a3ce25b027b8767eb722fca8 + depends: + - python >=3.9 + - zipp >=3.20 + - python + license: Apache-2.0 + license_family: APACHE + size: 34641 + timestamp: 1747934053147 +- conda: https://conda.anaconda.org/conda-forge/noarch/jupyter_client-8.6.3-pyhd8ed1ab_1.conda + sha256: 19d8bd5bb2fde910ec59e081eeb59529491995ce0d653a5209366611023a0b3a + md5: 4ebae00eae9705b0c3d6d1018a81d047 + depends: + - importlib-metadata >=4.8.3 + - jupyter_core >=4.12,!=5.0.* + - python >=3.9 + - python-dateutil >=2.8.2 + - pyzmq >=23.0 + - tornado >=6.2 + - traitlets >=5.3 + license: BSD-3-Clause + license_family: BSD + size: 106342 + timestamp: 1733441040958 +- conda: https://conda.anaconda.org/conda-forge/noarch/jupyter_core-5.9.1-pyhc90fa1f_0.conda + sha256: 1d34b80e5bfcd5323f104dbf99a2aafc0e5d823019d626d0dce5d3d356a2a52a + md5: b38fe4e78ee75def7e599843ef4c1ab0 + depends: + - __unix + - python + - platformdirs >=2.5 + - python >=3.10 + - traitlets >=5.3 + - python + constrains: + - pywin32 >=300 + license: BSD-3-Clause + license_family: BSD + size: 65503 + timestamp: 1760643864586 +- conda: https://conda.anaconda.org/conda-forge/linux-64/keyutils-1.6.3-hb9d3cd8_0.conda + sha256: 0960d06048a7185d3542d850986d807c6e37ca2e644342dd0c72feefcf26c2a4 + md5: b38117a3c920364aff79f870c984b4a3 + depends: + - __glibc >=2.17,<3.0.a0 + - libgcc >=13 + license: LGPL-2.1-or-later + size: 134088 + timestamp: 1754905959823 +- conda: https://conda.anaconda.org/conda-forge/linux-64/krb5-1.22.2-ha1258a1_0.conda + sha256: 3e307628ca3527448dd1cb14ad7bb9d04d1d28c7d4c5f97ba196ae984571dd25 + md5: fb53fb07ce46a575c5d004bbc96032c2 + depends: + - __glibc >=2.17,<3.0.a0 + - keyutils >=1.6.3,<2.0a0 + - libedit >=3.1.20250104,<3.2.0a0 + - libedit >=3.1.20250104,<4.0a0 + - libgcc >=14 + - libstdcxx >=14 + - openssl >=3.5.5,<4.0a0 + license: MIT + license_family: MIT + size: 1386730 + timestamp: 1769769569681 +- conda: https://conda.anaconda.org/conda-forge/linux-64/ld_impl_linux-64-2.45.1-default_hbd61a6d_101.conda + sha256: 565941ac1f8b0d2f2e8f02827cbca648f4d18cd461afc31f15604cd291b5c5f3 + md5: 12bd9a3f089ee6c9266a37dab82afabd + depends: + - __glibc >=2.17,<3.0.a0 + - zstd >=1.5.7,<1.6.0a0 + constrains: + - binutils_impl_linux-64 2.45.1 + license: GPL-3.0-only + license_family: GPL + size: 725507 + timestamp: 1770267139900 +- conda: https://conda.anaconda.org/conda-forge/linux-64/libblas-3.11.0-5_h4a7cf45_openblas.conda + build_number: 5 + sha256: 18c72545080b86739352482ba14ba2c4815e19e26a7417ca21a95b76ec8da24c + md5: c160954f7418d7b6e87eaf05a8913fa9 + depends: + - libopenblas >=0.3.30,<0.3.31.0a0 + - libopenblas >=0.3.30,<1.0a0 + constrains: + - mkl <2026 + - liblapack 3.11.0 5*_openblas + - libcblas 3.11.0 5*_openblas + - blas 2.305 openblas + - liblapacke 3.11.0 5*_openblas + license: BSD-3-Clause + license_family: BSD + size: 18213 + timestamp: 1765818813880 +- conda: https://conda.anaconda.org/conda-forge/linux-64/libcblas-3.11.0-5_h0358290_openblas.conda + build_number: 5 + sha256: 0cbdcc67901e02dc17f1d19e1f9170610bd828100dc207de4d5b6b8ad1ae7ad8 + md5: 6636a2b6f1a87572df2970d3ebc87cc0 + depends: + - libblas 3.11.0 5_h4a7cf45_openblas + constrains: + - liblapacke 3.11.0 5*_openblas + - blas 2.305 openblas + - liblapack 3.11.0 5*_openblas + license: BSD-3-Clause + license_family: BSD + size: 18194 + timestamp: 1765818837135 +- conda: https://conda.anaconda.org/conda-forge/linux-64/libedit-3.1.20250104-pl5321h7949ede_0.conda + sha256: d789471216e7aba3c184cd054ed61ce3f6dac6f87a50ec69291b9297f8c18724 + md5: c277e0a4d549b03ac1e9d6cbbe3d017b + depends: + - ncurses + - __glibc >=2.17,<3.0.a0 + - libgcc >=13 + - ncurses >=6.5,<7.0a0 + license: BSD-2-Clause + license_family: BSD + size: 134676 + timestamp: 1738479519902 +- conda: https://conda.anaconda.org/conda-forge/linux-64/libexpat-2.7.4-hecca717_0.conda + sha256: d78f1d3bea8c031d2f032b760f36676d87929b18146351c4464c66b0869df3f5 + md5: e7f7ce06ec24cfcfb9e36d28cf82ba57 + depends: + - __glibc >=2.17,<3.0.a0 + - libgcc >=14 + constrains: + - expat 2.7.4.* + license: MIT + license_family: MIT + size: 76798 + timestamp: 1771259418166 +- conda: https://conda.anaconda.org/conda-forge/linux-64/libffi-3.5.2-h3435931_0.conda + sha256: 31f19b6a88ce40ebc0d5a992c131f57d919f73c0b92cd1617a5bec83f6e961e6 + md5: a360c33a5abe61c07959e449fa1453eb + depends: + - __glibc >=2.17,<3.0.a0 + - libgcc >=14 + license: MIT + license_family: MIT + size: 58592 + timestamp: 1769456073053 +- conda: https://conda.anaconda.org/conda-forge/linux-64/libgcc-15.2.0-he0feb66_18.conda + sha256: faf7d2017b4d718951e3a59d081eb09759152f93038479b768e3d612688f83f5 + md5: 0aa00f03f9e39fb9876085dee11a85d4 + depends: + - __glibc >=2.17,<3.0.a0 + - _openmp_mutex >=4.5 + constrains: + - libgcc-ng ==15.2.0=*_18 + - libgomp 15.2.0 he0feb66_18 + license: GPL-3.0-only WITH GCC-exception-3.1 + license_family: GPL + size: 1041788 + timestamp: 1771378212382 +- conda: https://conda.anaconda.org/conda-forge/linux-64/libgfortran-15.2.0-h69a702a_18.conda + sha256: d2c9fad338fd85e4487424865da8e74006ab2e2475bd788f624d7a39b2a72aee + md5: 9063115da5bc35fdc3e1002e69b9ef6e + depends: + - libgfortran5 15.2.0 h68bc16d_18 + constrains: + - libgfortran-ng ==15.2.0=*_18 + license: GPL-3.0-only WITH GCC-exception-3.1 + license_family: GPL + size: 27523 + timestamp: 1771378269450 +- conda: https://conda.anaconda.org/conda-forge/linux-64/libgfortran5-15.2.0-h68bc16d_18.conda + sha256: 539b57cf50ec85509a94ba9949b7e30717839e4d694bc94f30d41c9d34de2d12 + md5: 646855f357199a12f02a87382d429b75 + depends: + - __glibc >=2.17,<3.0.a0 + - libgcc >=15.2.0 + constrains: + - libgfortran 15.2.0 + license: GPL-3.0-only WITH GCC-exception-3.1 + license_family: GPL + size: 2482475 + timestamp: 1771378241063 +- conda: https://conda.anaconda.org/conda-forge/linux-64/libgomp-15.2.0-he0feb66_18.conda + sha256: 21337ab58e5e0649d869ab168d4e609b033509de22521de1bfed0c031bfc5110 + md5: 239c5e9546c38a1e884d69effcf4c882 + depends: + - __glibc >=2.17,<3.0.a0 + license: GPL-3.0-only WITH GCC-exception-3.1 + license_family: GPL + size: 603262 + timestamp: 1771378117851 +- conda: https://conda.anaconda.org/conda-forge/linux-64/liblapack-3.11.0-5_h47877c9_openblas.conda + build_number: 5 + sha256: c723b6599fcd4c6c75dee728359ef418307280fa3e2ee376e14e85e5bbdda053 + md5: b38076eb5c8e40d0106beda6f95d7609 + depends: + - libblas 3.11.0 5_h4a7cf45_openblas + constrains: + - blas 2.305 openblas + - liblapacke 3.11.0 5*_openblas + - libcblas 3.11.0 5*_openblas + license: BSD-3-Clause + license_family: BSD + size: 18200 + timestamp: 1765818857876 +- conda: https://conda.anaconda.org/conda-forge/linux-64/liblzma-5.8.2-hb03c661_0.conda + sha256: 755c55ebab181d678c12e49cced893598f2bab22d582fbbf4d8b83c18be207eb + md5: c7c83eecbb72d88b940c249af56c8b17 + depends: + - __glibc >=2.17,<3.0.a0 + - libgcc >=14 + constrains: + - xz 5.8.2.* + license: 0BSD + size: 113207 + timestamp: 1768752626120 +- conda: https://conda.anaconda.org/conda-forge/linux-64/libmpdec-4.0.0-hb03c661_1.conda + sha256: fe171ed5cf5959993d43ff72de7596e8ac2853e9021dec0344e583734f1e0843 + md5: 2c21e66f50753a083cbe6b80f38268fa + depends: + - __glibc >=2.17,<3.0.a0 + - libgcc >=14 + license: BSD-2-Clause + license_family: BSD + size: 92400 + timestamp: 1769482286018 +- conda: https://conda.anaconda.org/conda-forge/linux-64/libopenblas-0.3.30-pthreads_h94d23a6_4.conda + sha256: 199d79c237afb0d4780ccd2fbf829cea80743df60df4705202558675e07dd2c5 + md5: be43915efc66345cccb3c310b6ed0374 + depends: + - __glibc >=2.17,<3.0.a0 + - libgcc >=14 + - libgfortran + - libgfortran5 >=14.3.0 + constrains: + - openblas >=0.3.30,<0.3.31.0a0 + license: BSD-3-Clause + license_family: BSD + size: 5927939 + timestamp: 1763114673331 +- conda: https://conda.anaconda.org/conda-forge/linux-64/libsodium-1.0.21-h280c20c_3.conda + sha256: 64e5c80cbce4680a2d25179949739a6def695d72c40ca28f010711764e372d97 + md5: 7af961ef4aa2c1136e11dd43ded245ab + depends: + - libgcc >=14 + - __glibc >=2.17,<3.0.a0 + license: ISC + size: 277661 + timestamp: 1772479381288 +- conda: https://conda.anaconda.org/conda-forge/linux-64/libsqlite-3.52.0-hf4e2dac_0.conda + sha256: d716847b7deca293d2e49ed1c8ab9e4b9e04b9d780aea49a97c26925b28a7993 + md5: fd893f6a3002a635b5e50ceb9dd2c0f4 + depends: + - __glibc >=2.17,<3.0.a0 + - icu >=78.2,<79.0a0 + - libgcc >=14 + - libzlib >=1.3.1,<2.0a0 + license: blessing + size: 951405 + timestamp: 1772818874251 +- conda: https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-15.2.0-h934c35e_18.conda + sha256: 78668020064fdaa27e9ab65cd2997e2c837b564ab26ce3bf0e58a2ce1a525c6e + md5: 1b08cd684f34175e4514474793d44bcb + depends: + - __glibc >=2.17,<3.0.a0 + - libgcc 15.2.0 he0feb66_18 + constrains: + - libstdcxx-ng ==15.2.0=*_18 + license: GPL-3.0-only WITH GCC-exception-3.1 + license_family: GPL + size: 5852330 + timestamp: 1771378262446 +- conda: https://conda.anaconda.org/conda-forge/linux-64/libuuid-2.41.3-h5347b49_0.conda + sha256: 1a7539cfa7df00714e8943e18de0b06cceef6778e420a5ee3a2a145773758aee + md5: db409b7c1720428638e7c0d509d3e1b5 + depends: + - __glibc >=2.17,<3.0.a0 + - libgcc >=14 + license: BSD-3-Clause + license_family: BSD + size: 40311 + timestamp: 1766271528534 +- conda: https://conda.anaconda.org/conda-forge/linux-64/libzlib-1.3.1-hb9d3cd8_2.conda + sha256: d4bfe88d7cb447768e31650f06257995601f89076080e76df55e3112d4e47dc4 + md5: edb0dca6bc32e4f4789199455a1dbeb8 + depends: + - __glibc >=2.17,<3.0.a0 + - libgcc >=13 + constrains: + - zlib 1.3.1 *_2 + license: Zlib + license_family: Other + size: 60963 + timestamp: 1727963148474 +- conda: https://conda.anaconda.org/conda-forge/noarch/markdown-it-py-4.0.0-pyhd8ed1ab_0.conda + sha256: 7b1da4b5c40385791dbc3cc85ceea9fad5da680a27d5d3cb8bfaa185e304a89e + md5: 5b5203189eb668f042ac2b0826244964 + depends: + - mdurl >=0.1,<1 + - python >=3.10 + license: MIT + license_family: MIT + size: 64736 + timestamp: 1754951288511 +- conda: https://conda.modular.com/max-nightly/linux-64/max-26.3.0.dev2026031105-3.14release.conda + sha256: e6407c6222b5a0ccc4fd7270e55326f39f1026531bde13d3b12586861a2f4710 + md5: c3c4ea5254ba1a4962b1d7c7ba9e7461 + depends: + - numpy >=1.18 + - typing-extensions >=4.12.2 + - pyyaml >=6.0.1 + - rich >=13.0.1 + - python 3.14.* + - python-gil + - max-core ==26.3.0.dev2026031105 + license: LicenseRef-Modular-Proprietary + size: 4231736 + timestamp: 1773207424284 +- conda: https://conda.modular.com/max-nightly/linux-64/max-core-26.3.0.dev2026031105-release.conda + sha256: 5fb9f5db6c0a463d2b25185f7bcf9fce9efbd67dbcd31a88ad13b65f2371d540 + md5: 689a17beb15fc6b8b14d188a750c2d85 + depends: + - mojo-compiler ==0.26.3.0.dev2026031105 + license: LicenseRef-Modular-Proprietary + size: 137553590 + timestamp: 1773207447229 +- conda: https://conda.modular.com/max-nightly/noarch/mblack-26.3.0.dev2026031105-release.conda + noarch: python + sha256: dd93df3438e366c447af4e2ae2fa3811b83a2a68fa69ce8f9fbbc33daf06ad80 + md5: 33c8f9a54a3122c0a02073f8f4138bb1 + depends: + - python >=3.10 + - click >=8.0.0 + - mypy_extensions >=0.4.3 + - packaging >=22.0 + - pathspec >=0.9.0 + - platformdirs >=2 + - tomli >=1.1.0 + license: LicenseRef-Modular-Proprietary + size: 133842 + timestamp: 1773207196055 +- conda: https://conda.anaconda.org/conda-forge/noarch/mdurl-0.1.2-pyhd8ed1ab_1.conda + sha256: 78c1bbe1723449c52b7a9df1af2ee5f005209f67e40b6e1d3c7619127c43b1c7 + md5: 592132998493b3ff25fd7479396e8351 + depends: + - python >=3.9 + license: MIT + license_family: MIT + size: 14465 + timestamp: 1733255681319 +- conda: https://conda.modular.com/max-nightly/linux-64/mojo-0.26.3.0.dev2026031105-release.conda + sha256: ecf51beda01f003878e078c9898c9fccf90354aa321a6964e2e1bf51fcc0eeea + md5: 923cb2a3993db48806d77c5cbc46587c + depends: + - python >=3.10 + - mojo-compiler ==0.26.3.0.dev2026031105 + - mblack ==26.3.0.dev2026031105 + - jupyter_client >=8.6.2,<8.7 + license: LicenseRef-Modular-Proprietary + size: 89318428 + timestamp: 1773207351958 +- conda: https://conda.modular.com/max-nightly/linux-64/mojo-compiler-0.26.3.0.dev2026031105-release.conda + sha256: 03eeb0e485c57def8067c0534658b185c984afbc27e77b57b55262dbc3fad8b8 + md5: 86ab7d0febcb6717e3c19018884cdbe4 + depends: + - mojo-python ==0.26.3.0.dev2026031105 + license: LicenseRef-Modular-Proprietary + size: 88636856 + timestamp: 1773207323233 +- conda: https://conda.modular.com/max-nightly/noarch/mojo-python-0.26.3.0.dev2026031105-release.conda + noarch: python + sha256: c6af0e06dc5fa3e1b0dbe3f9753c3da7653090e020720dcfba063f04911fc157 + md5: 647d60ca954e6f019a974171c1dfc830 + depends: + - python >=3.10 + license: LicenseRef-Modular-Proprietary + size: 22930 + timestamp: 1773207195039 +- conda: https://conda.anaconda.org/conda-forge/noarch/mypy_extensions-1.1.0-pyha770c72_0.conda + sha256: 6ed158e4e5dd8f6a10ad9e525631e35cee8557718f83de7a4e3966b1f772c4b1 + md5: e9c622e0d00fa24a6292279af3ab6d06 + depends: + - python >=3.9 + license: MIT + license_family: MIT + size: 11766 + timestamp: 1745776666688 +- conda: https://conda.anaconda.org/conda-forge/linux-64/ncurses-6.5-h2d0b736_3.conda + sha256: 3fde293232fa3fca98635e1167de6b7c7fda83caf24b9d6c91ec9eefb4f4d586 + md5: 47e340acb35de30501a76c7c799c41d7 + depends: + - __glibc >=2.17,<3.0.a0 + - libgcc >=13 + license: X11 AND BSD-3-Clause + size: 891641 + timestamp: 1738195959188 +- conda: https://conda.anaconda.org/conda-forge/linux-64/numpy-2.4.2-py314h2b28147_1.conda + sha256: 1d8377c8001c15ed12c2713b723213474b435706ab9d34ede69795d64af9e94d + md5: 4ea6b620fdf24a1a0bc4f1c7134dfafb + depends: + - python + - libstdcxx >=14 + - libgcc >=14 + - __glibc >=2.17,<3.0.a0 + - libcblas >=3.9.0,<4.0a0 + - python_abi 3.14.* *_cp314 + - libblas >=3.9.0,<4.0a0 + - liblapack >=3.9.0,<4.0a0 + constrains: + - numpy-base <0a0 + license: BSD-3-Clause + license_family: BSD + size: 8926994 + timestamp: 1770098474394 +- conda: https://conda.anaconda.org/conda-forge/linux-64/openssl-3.6.1-h35e630c_1.conda + sha256: 44c877f8af015332a5d12f5ff0fb20ca32f896526a7d0cdb30c769df1144fb5c + md5: f61eb8cd60ff9057122a3d338b99c00f + depends: + - __glibc >=2.17,<3.0.a0 + - ca-certificates + - libgcc >=14 + license: Apache-2.0 + license_family: Apache + size: 3164551 + timestamp: 1769555830639 +- conda: https://conda.anaconda.org/conda-forge/noarch/packaging-26.0-pyhcf101f3_0.conda + sha256: c1fc0f953048f743385d31c468b4a678b3ad20caffdeaa94bed85ba63049fd58 + md5: b76541e68fea4d511b1ac46a28dcd2c6 + depends: + - python >=3.8 + - python + license: Apache-2.0 + license_family: APACHE + size: 72010 + timestamp: 1769093650580 +- conda: https://conda.anaconda.org/conda-forge/noarch/pathspec-1.0.4-pyhd8ed1ab_0.conda + sha256: 29ea20d0faf20374fcd61c25f6d32fb8e9a2c786a7f1473a0c3ead359470fbe1 + md5: 2908273ac396d2cd210a8127f5f1c0d6 + depends: + - python >=3.10 + license: MPL-2.0 + license_family: MOZILLA + size: 53739 + timestamp: 1769677743677 +- conda: https://conda.anaconda.org/conda-forge/noarch/platformdirs-4.9.4-pyhcf101f3_0.conda + sha256: 0289f0a38337ee201d984f8f31f11f6ef076cfbbfd0ab9181d12d9d1d099bf46 + md5: 82c1787f2a65c0155ef9652466ee98d6 + depends: + - python >=3.10 + - python + license: MIT + license_family: MIT + size: 25646 + timestamp: 1773199142345 +- conda: https://conda.anaconda.org/conda-forge/noarch/pygments-2.19.2-pyhd8ed1ab_0.conda + sha256: 5577623b9f6685ece2697c6eb7511b4c9ac5fb607c9babc2646c811b428fd46a + md5: 6b6ece66ebcae2d5f326c77ef2c5a066 + depends: + - python >=3.9 + license: BSD-2-Clause + license_family: BSD + size: 889287 + timestamp: 1750615908735 +- conda: https://conda.anaconda.org/conda-forge/linux-64/python-3.14.3-h32b2ec7_101_cp314.conda + build_number: 101 + sha256: cb0628c5f1732f889f53a877484da98f5a0e0f47326622671396fb4f2b0cd6bd + md5: c014ad06e60441661737121d3eae8a60 + depends: + - __glibc >=2.17,<3.0.a0 + - bzip2 >=1.0.8,<2.0a0 + - ld_impl_linux-64 >=2.36.1 + - libexpat >=2.7.3,<3.0a0 + - libffi >=3.5.2,<3.6.0a0 + - libgcc >=14 + - liblzma >=5.8.2,<6.0a0 + - libmpdec >=4.0.0,<5.0a0 + - libsqlite >=3.51.2,<4.0a0 + - libuuid >=2.41.3,<3.0a0 + - libzlib >=1.3.1,<2.0a0 + - ncurses >=6.5,<7.0a0 + - openssl >=3.5.5,<4.0a0 + - python_abi 3.14.* *_cp314 + - readline >=8.3,<9.0a0 + - tk >=8.6.13,<8.7.0a0 + - tzdata + - zstd >=1.5.7,<1.6.0a0 + license: Python-2.0 + size: 36702440 + timestamp: 1770675584356 + python_site_packages_path: lib/python3.14/site-packages +- conda: https://conda.anaconda.org/conda-forge/noarch/python-dateutil-2.9.0.post0-pyhe01879c_2.conda + sha256: d6a17ece93bbd5139e02d2bd7dbfa80bee1a4261dced63f65f679121686bf664 + md5: 5b8d21249ff20967101ffa321cab24e8 + depends: + - python >=3.9 + - six >=1.5 + - python + license: Apache-2.0 + license_family: APACHE + size: 233310 + timestamp: 1751104122689 +- conda: https://conda.anaconda.org/conda-forge/noarch/python-gil-3.14.3-h4df99d1_101.conda + sha256: 233aebd94c704ac112afefbb29cf4170b7bc606e22958906f2672081bc50638a + md5: 235765e4ea0d0301c75965985163b5a1 + depends: + - cpython 3.14.3.* + - python_abi * *_cp314 + license: Python-2.0 + size: 50062 + timestamp: 1770674497152 +- conda: https://conda.anaconda.org/conda-forge/noarch/python_abi-3.14-8_cp314.conda + build_number: 8 + sha256: ad6d2e9ac39751cc0529dd1566a26751a0bf2542adb0c232533d32e176e21db5 + md5: 0539938c55b6b1a59b560e843ad864a4 + constrains: + - python 3.14.* *_cp314 + license: BSD-3-Clause + license_family: BSD + size: 6989 + timestamp: 1752805904792 +- conda: https://conda.anaconda.org/conda-forge/linux-64/pyyaml-6.0.3-py314h67df5f8_1.conda + sha256: b318fb070c7a1f89980ef124b80a0b5ccf3928143708a85e0053cde0169c699d + md5: 2035f68f96be30dc60a5dfd7452c7941 + depends: + - __glibc >=2.17,<3.0.a0 + - libgcc >=14 + - python >=3.14,<3.15.0a0 + - python_abi 3.14.* *_cp314 + - yaml >=0.2.5,<0.3.0a0 + license: MIT + license_family: MIT + size: 202391 + timestamp: 1770223462836 +- conda: https://conda.anaconda.org/conda-forge/linux-64/pyzmq-27.1.0-py312hda471dd_2.conda + noarch: python + sha256: be66c1f85c3b48137200d62c12d918f4f8ad329423daef04fed292818efd3c28 + md5: 082985717303dab433c976986c674b35 + depends: + - python + - libgcc >=14 + - libstdcxx >=14 + - __glibc >=2.17,<3.0.a0 + - zeromq >=4.3.5,<4.4.0a0 + - _python_abi3_support 1.* + - cpython >=3.12 + license: BSD-3-Clause + license_family: BSD + size: 211567 + timestamp: 1771716961404 +- conda: https://conda.anaconda.org/conda-forge/linux-64/readline-8.3-h853b02a_0.conda + sha256: 12ffde5a6f958e285aa22c191ca01bbd3d6e710aa852e00618fa6ddc59149002 + md5: d7d95fc8287ea7bf33e0e7116d2b95ec + depends: + - __glibc >=2.17,<3.0.a0 + - libgcc >=14 + - ncurses >=6.5,<7.0a0 + license: GPL-3.0-only + license_family: GPL + size: 345073 + timestamp: 1765813471974 +- conda: https://conda.anaconda.org/conda-forge/noarch/rich-14.3.3-pyhcf101f3_0.conda + sha256: b06ce84d6a10c266811a7d3adbfa1c11f13393b91cc6f8a5b468277d90be9590 + md5: 7a6289c50631d620652f5045a63eb573 + depends: + - markdown-it-py >=2.2.0 + - pygments >=2.13.0,<3.0.0 + - python >=3.10 + - typing_extensions >=4.0.0,<5.0.0 + - python + license: MIT + license_family: MIT + size: 208472 + timestamp: 1771572730357 +- conda: https://conda.anaconda.org/conda-forge/noarch/six-1.17.0-pyhe01879c_1.conda + sha256: 458227f759d5e3fcec5d9b7acce54e10c9e1f4f4b7ec978f3bfd54ce4ee9853d + md5: 3339e3b65d58accf4ca4fb8748ab16b3 + depends: + - python >=3.9 + - python + license: MIT + license_family: MIT + size: 18455 + timestamp: 1753199211006 +- conda: https://conda.anaconda.org/conda-forge/linux-64/tk-8.6.13-noxft_h366c992_103.conda + sha256: cafeec44494f842ffeca27e9c8b0c27ed714f93ac77ddadc6aaf726b5554ebac + md5: cffd3bdd58090148f4cfcd831f4b26ab + depends: + - __glibc >=2.17,<3.0.a0 + - libgcc >=14 + - libzlib >=1.3.1,<2.0a0 + constrains: + - xorg-libx11 >=1.8.12,<2.0a0 + license: TCL + license_family: BSD + size: 3301196 + timestamp: 1769460227866 +- conda: https://conda.anaconda.org/conda-forge/noarch/tomli-2.4.0-pyhcf101f3_0.conda + sha256: 62940c563de45790ba0f076b9f2085a842a65662268b02dd136a8e9b1eaf47a8 + md5: 72e780e9aa2d0a3295f59b1874e3768b + depends: + - python >=3.10 + - python + license: MIT + license_family: MIT + size: 21453 + timestamp: 1768146676791 +- conda: https://conda.anaconda.org/conda-forge/linux-64/tornado-6.5.3-py314h5bd0f2a_0.conda + sha256: b8f9f9ae508d79c9c697eb01b6a8d2ed4bc1899370f44aa6497c8abbd15988ea + md5: e35f08043f54d26a1be93fdbf90d30c3 + depends: + - __glibc >=2.17,<3.0.a0 + - libgcc >=14 + - python >=3.14,<3.15.0a0 + - python_abi 3.14.* *_cp314 + license: Apache-2.0 + license_family: Apache + size: 905436 + timestamp: 1765458949518 +- conda: https://conda.anaconda.org/conda-forge/noarch/traitlets-5.14.3-pyhd8ed1ab_1.conda + sha256: f39a5620c6e8e9e98357507262a7869de2ae8cc07da8b7f84e517c9fd6c2b959 + md5: 019a7385be9af33791c989871317e1ed + depends: + - python >=3.9 + license: BSD-3-Clause + license_family: BSD + size: 110051 + timestamp: 1733367480074 +- conda: https://conda.anaconda.org/conda-forge/noarch/typing-extensions-4.15.0-h396c80c_0.conda + sha256: 7c2df5721c742c2a47b2c8f960e718c930031663ac1174da67c1ed5999f7938c + md5: edd329d7d3a4ab45dcf905899a7a6115 + depends: + - typing_extensions ==4.15.0 pyhcf101f3_0 + license: PSF-2.0 + license_family: PSF + size: 91383 + timestamp: 1756220668932 +- conda: https://conda.anaconda.org/conda-forge/noarch/typing_extensions-4.15.0-pyhcf101f3_0.conda + sha256: 032271135bca55aeb156cee361c81350c6f3fb203f57d024d7e5a1fc9ef18731 + md5: 0caa1af407ecff61170c9437a808404d + depends: + - python >=3.10 + - python + license: PSF-2.0 + license_family: PSF + size: 51692 + timestamp: 1756220668932 +- conda: https://conda.anaconda.org/conda-forge/noarch/tzdata-2025c-hc9c84f9_1.conda + sha256: 1d30098909076af33a35017eed6f2953af1c769e273a0626a04722ac4acaba3c + md5: ad659d0a2b3e47e38d829aa8cad2d610 + license: LicenseRef-Public-Domain + size: 119135 + timestamp: 1767016325805 +- conda: https://conda.anaconda.org/conda-forge/linux-64/yaml-0.2.5-h280c20c_3.conda + sha256: 6d9ea2f731e284e9316d95fa61869fe7bbba33df7929f82693c121022810f4ad + md5: a77f85f77be52ff59391544bfe73390a + depends: + - libgcc >=14 + - __glibc >=2.17,<3.0.a0 + license: MIT + license_family: MIT + size: 85189 + timestamp: 1753484064210 +- conda: https://conda.anaconda.org/conda-forge/linux-64/zeromq-4.3.5-h41580af_10.conda + sha256: 325d370b28e2b9cc1f765c5b4cdb394c91a5d958fbd15da1a14607a28fee09f6 + md5: 755b096086851e1193f3b10347415d7c + depends: + - libgcc >=14 + - __glibc >=2.17,<3.0.a0 + - libstdcxx >=14 + - krb5 >=1.22.2,<1.23.0a0 + - libsodium >=1.0.21,<1.0.22.0a0 + license: MPL-2.0 + license_family: MOZILLA + size: 311150 + timestamp: 1772476812121 +- conda: https://conda.anaconda.org/conda-forge/noarch/zipp-3.23.0-pyhcf101f3_1.conda + sha256: b4533f7d9efc976511a73ef7d4a2473406d7f4c750884be8e8620b0ce70f4dae + md5: 30cd29cb87d819caead4d55184c1d115 + depends: + - python >=3.10 + - python + license: MIT + license_family: MIT + size: 24194 + timestamp: 1764460141901 +- conda: https://conda.anaconda.org/conda-forge/linux-64/zstd-1.5.7-hb78ec9c_6.conda + sha256: 68f0206ca6e98fea941e5717cec780ed2873ffabc0e1ed34428c061e2c6268c7 + md5: 4a13eeac0b5c8e5b8ab496e6c4ddd829 + depends: + - __glibc >=2.17,<3.0.a0 + - libzlib >=1.3.1,<2.0a0 + license: BSD-3-Clause + license_family: BSD + size: 601375 + timestamp: 1764777111296 diff --git a/mojo/pixi.toml b/mojo/pixi.toml new file mode 100644 index 0000000000..37ff9cc53a --- /dev/null +++ b/mojo/pixi.toml @@ -0,0 +1,14 @@ +[workspace] +name = "darktable-mojo" +authors = ["maxchisto "] +channels = ["https://conda.modular.com/max-nightly", "conda-forge"] +platforms = ["linux-64"] +version = "0.1.0" + +[tasks] +build-lib = "mojo build -I . iop/sigmoid/lib.mojo --emit shared-lib -o libsigmoid_mojo.so" +bench = "mojo bench/sigmoid_bench.mojo" + +[dependencies] +mojo = ">=0.26.3.0.dev2026031105,<0.27" +max = ">=26.3.0.dev2026031105,<27" diff --git a/mojo/reproduce_fault/README.md b/mojo/reproduce_fault/README.md new file mode 100644 index 0000000000..3613ba0438 --- /dev/null +++ b/mojo/reproduce_fault/README.md @@ -0,0 +1,28 @@ +## The Problem +When a Mojo function is exported to C and launches a GPU kernel via `elementwise`, any variables "captured" by the kernel's closure are kept on the **Host (CPU) Stack**. + +- **Manual Loop (CPU):** Works fine. The Mojo function can iterate and access its own stack variables locally. +- **`elementwise` (CPU or GPU):** Fails via FFI. The utility seems to assume a Mojo-managed stack layout for its closure handling, which is violated when the entry point is a C function. +- **GPU Failure:** Specifically, the GPU attempts to reach back to the CPU's stack address, which is not mapped in the GPU's page tables, causing a `SIGSEGV`. + +## Evidence +- **Host Stack Address:** `0x7fff266bb2a4` +- **GPU Fault Address:** `0x7fff266ba000` +The GPU is attempting to read memory at the exact location of the C program's stack. + +## Files +- `lib_fault.mojo`: Mojo library capturing a `BigParams` struct. +- `main.c`: C program that `dlopen`s the Mojo library. +- `build.sh`: Script to compile and run the reproduction. + +## Running the reproduction +```bash +./build.sh +``` +Expected output: +```text +C: Host Stack Address around 0x7fff... +C: Calling Mojo launch_gpu_kernel()... +Mojo: Launching GPU kernel capturing a BigParams struct... +Memory access fault by GPU node-1 ... on address 0x7fff... +``` diff --git a/mojo/reproduce_fault/build.sh b/mojo/reproduce_fault/build.sh new file mode 100755 index 0000000000..a71ee51d48 --- /dev/null +++ b/mojo/reproduce_fault/build.sh @@ -0,0 +1,10 @@ +#!/bin/bash +set -e + +echo "--- Building Mojo Library ---" +pixi run mojo build lib_fault.mojo --emit shared-lib -o lib_fault.so + +echo "--- Building C Driver ---" +clang main.c -Wl,-rpath=. -ldl -o reproduce_crash + +./reproduce_crash || echo "Exited with code $?" diff --git a/mojo/reproduce_fault/lib_fault.mojo b/mojo/reproduce_fault/lib_fault.mojo new file mode 100644 index 0000000000..8ee28de55a --- /dev/null +++ b/mojo/reproduce_fault/lib_fault.mojo @@ -0,0 +1,69 @@ +from std.gpu.host import DeviceContext +from std.algorithm.functional import elementwise +from std.utils import IndexList +from std.memory.unsafe_pointer import alloc, UnsafePointer + + +fn internal_gpu_launcher(dctx: DeviceContext, p_src: Int, p_dst: Int) raises: + @parameter + @always_inline + fn kernel[ + sw: Int, rank: Int, align: Int + ](indices: IndexList[rank]) capturing -> None: + UnsafePointer[Float32, MutAnyOrigin](unsafe_from_address=p_dst)[ + 0 + ] = UnsafePointer[Float32, ImmutAnyOrigin](unsafe_from_address=p_src)[0] + + elementwise[kernel, 1, target="cpu"](1, dctx) + dctx.synchronize() + + +@export +fn run_working_case(): + try: + print("Mojo: [WORKING] Launching via internal_gpu_launcher...") + var dctx = DeviceContext() + var dev_data = dctx.enqueue_create_buffer[DType.float32](1) + var dev_out = dctx.enqueue_create_buffer[DType.float32](1) + + var p_src = Int(dev_data.unsafe_ptr()) + var p_dst = Int(dev_out.unsafe_ptr()) + + internal_gpu_launcher(dctx, p_src, p_dst) + print("Mojo: [WORKING] Success.") + except e: + print("Mojo: [WORKING] Error:", String(e)) + + +@export +fn run_failing_case(): + try: + print( + "Mojo: [FAILING] Direct launch from @export (SIGSEGV/Fault" + " expected)..." + ) + var dctx = DeviceContext() + var dev_data = dctx.enqueue_create_buffer[DType.float32](1) + var dev_out = dctx.enqueue_create_buffer[DType.float32](1) + + var p_src = Int(dev_data.unsafe_ptr()) + var p_dst = Int(dev_out.unsafe_ptr()) + + @parameter + @always_inline + fn kernel[ + sw: Int, rank: Int, align: Int + ](indices: IndexList[rank]) capturing -> None: + UnsafePointer[Float32, MutAnyOrigin](unsafe_from_address=p_dst)[ + 0 + ] = UnsafePointer[Float32, ImmutAnyOrigin]( + unsafe_from_address=p_src + )[ + 0 + ] + + elementwise[kernel, 1, target="cpu"](1, dctx) + dctx.synchronize() + print("Mojo: [FAILING] Success (Unexpected!)") + except e: + print("Mojo: [FAILING] Error:", String(e)) diff --git a/mojo/reproduce_fault/lib_fault.so b/mojo/reproduce_fault/lib_fault.so new file mode 100755 index 0000000000000000000000000000000000000000..ffd6179a977e2fbe51cbabeae2ddfb829df4404b GIT binary patch literal 57088 zcmeHw3w%`7o&TLAz@U(cR%?8;4mjG-hA@-oBm&NaKuJi+{{3f zzqzsSdtvS2gy(=;BjhW9NmhzG5mHd&nH8fPUTp8jXYg3gpE!Pm!ZDgcqARVkEb7c zC_Rkl@c3~N%5oH?az{kDBchxPkBh;Qp_y01a3nmPqCR726ZvGw`UKd5zeIy{zxazh zgE2fN%E?fcy9wn8&#_)|*cXcSvW<%oZ%G@}?nU_J;#WWaW@XWu`)+xoy}n|G`M>@+ z=eiv~%6aNbGYJ>!{$==4IgX#zthfpo>~g?f+5XM77vHsOCvG!d7op&V8Bz#;YX-Oj z4Q9$8$pUZ80{;o>&Q#B%S?~#DfdiZo&yQzmw>L|>-^v27%2H2N7JNck>RFMco^NNV z=l5CQ!7TMm$^!pG7JN=*f#02_-D2do;%B_(0Wj4FMEosQ&-qH@l|1pKNj$t6!G+5C zN{5*IxPU7@-?FxFyRS3qk3@Yw#phYv;0tOIZCOiaREsQbn9~|=*B1MmS~Z@QImH*~ z_WMFD?f%x5J2WNI)$Usrj;v^DU*-$=J2l1Kxw<{DaIvpWTiFuO=7ig$T6feJh-m(( zrrc0JcfM~_CT~H+@2+i*8b@(txJ=+>RKXNAetp_XEYMZ-oqdpD0fFomeMUj zLxODc4Be-BXpV&2@O{ka$ zKeeRFmFx;WFE1^(pS7);l3bckn60I#w)Lv@d7d87MTCk)uOi7~thVb%mZA*I) zD0YgJ0K$?=rQYkQo#QJlag---OG_%08O`A~ZAM#QMj#y2W(56_6;UGH8EvR&MoC9Y zcgqZ|eP!p2pce9XwMJ*4DZW04qticm5&xGLho;gfuL=L?GU5JVBuTk*&zHUeageL* z5U{SxS|(Ed$3<}|pQSu)6zg$N$X^LB5b&+rI2IBvtT*_`LQK*A6y@*$%SaopfgS?7#aB5p#b{$-f z1*JK4@bhG5gf1OisA?=#)xjkTq=W@J_+*xo|G7;E&)2~lb#RLg-mHTc=-?eX_ysz6 zw+^nC%RM^yhjjTj>fofO$ZN9>ez621yjKVRunxXO2fsuIKX-d>1kR1Xxe@qBAA$d| zeDqDV_n2Aj8@C5GhPq)Onv)n*d-s_iXI)EFJOOax>Yw4SkxU4fU}2mnlvn zZm5IBKTmNIZ9|PL{uzps2p?L&;&)J-MBI>z#Um6a(Kcjf@#PdJkv3#y@g|CsC>ttZ z@qeN?iLfDs#TQbXMAy*qQvh6jJ;e(sew4*)DNdqlXpqHgC{7}3Xn@5_DNdqkXd8=v zoZ=*shPEKC_RpFIfZ9K9Dh1Ue(T`v%O1{ab;)29c8uU=8#B$|&6j7AN4RMqoqRjmj z&rvY#lo~s(?itBd_q=?aYC53)>QwX!RQM9y7c{p8RN?9BBYNnW^)IMP=_*ruXZ;uj zm7CSrU!s%MzF8{)Ph7keu=pH|#es3H0GO6Oo*h4`Z|GL!wOrYQnv|Q}x4Lg}-|Swz zNWJ^m5e$>(Cj`c2#Ta}uz`F9gprVkWt+;p4U-e*}&0I-0u@@bO=Q$A#Qe%H|JQ@GDw-X6( zf5q^fisFquVmpK-pdMUr8x&fnH};_I2*EH5chQRT_D{YMdDZ@1wr+y=-^23m90YB; zcMw6Tu>;GU@y}Bid-oOWKUuJz<+N;HZ@U*I>MYxL**b8u|KtUAw!)2dHmACQHxj*E z?c2prIIbpc9O;^*_8uJBKe8(RlanC14Xw83XrbD-#&(+_`I|^~zkbjZ{RCwyu&loT z3r+2tX=C{;8*(Trs)``*(5qOOG$bzS#vmqZCruceJB$jo_<(~D1nNhSqW0ZmBQ@L$ z?92$MD^$v|eUohjq3DU%=A3Ejk~n;ZwUNZ3lcEBLm)`;oO#o~&sj+)(M`_T2$YxZ< z5CIOB?Sq7d4QP1#zGFLtByZm)+W>2Iy^RP=MSL3z@3C!V;Ul&!EZk+g7a<0EJ*!iV zt+5rVv8c_e#unR(h$`$f?CzcsQ2G~?F;N0458Awq8~5r6Yd;JgzI z&KuigBd=E&4?ZIf{imZr^h~CqS|X*g(X$ zPNNd(Yn^3DEpzY0I2$h-oY?b>FB9t^!PtLs_YElDKiPr6+duB>EVjmGcJH)Dh(9-if$H67J=5Y}Cl>$0RaEOPn~HUiT>N)}mi3FjA8aqV_$eomVlzUv_+7Zk zu=uU%i~j<-S_XjOa4zxB>>S`5$U_{!vVmOKUMziDJ--FF+zQyU;G_FTZXIZPb&Yz# zdYc(haXSK3_C0skrQ;r`r()mWl-;+Utx0eHcWiMYJrKvJa< zDY%H5)_))4=!xxfyl%O&4wKt6vs{T55N-CJN^~7p4;}yoLk+n2hh3EKD3-e8a9o>0 zXulYL>Gecn|40GkpW2sa^ThrPASjOd(TJYyA3;Bw+0Cs3`$wkb+2a3-w)c+|<=KES zOYTN8b#o&^y5g4-cKws5F6D45;7r;ej}!N#8d3fQxThi{GgZ=$=!S+9B)`eV*hc^4 zw{IsrAG8$#PGCEQ{gbyxIoKt@DhEF!a^J?lUXmJE^se$-=cynnvrF} zaxb)n!nwGMe+He#(ptcT(-u@lOKSjO`ii2SadAprHpt8NzhfvNbE%&^6AgeNlSgFQ zn9S5YI+IOgqM_k6(U=)(O1wtRVXV0Hb{Ue7vg8MC9mzTxN7wOVQAa`al9Z%M>Ie5B z6}*~=xQQ|n4XuiQHFdj*Zu>|Kg42T0kQfAOwbwz$IOjSJQ}!Y_&X`ClNf$|lYJXJ9 zrG0RwP*;j98}?$thL~iMVn~fWPGU&i^Hx#}JwolK#E?31n2RAuxrVhIXB zHB72~m=(R+#}PUFH?+$*K0{LId_3(TO1pa>DdJ1qx4W0RKjro0kDc;ZwR_Mi)3%c#K@rP$nexch^PFjObgb;6!*a~oy zz8H@Hl2x~T_Em;4QQ!->b-#W95^Djp@?j`4&%~dk_XLaS%J(q+Zi%10;e=2 zz-Z?oOS-)iguU|R_zdz69f!tqdVA-`HX_=jz4PEZuy+Iwce6H<_D+PN0*6yC z0|!31s^P`>AVSngT<0iC#kU|`XZe^&ttZRpqv>KsSU!7UCk*{F%IT~d zC|zXToJxw7rKnOTR;1a|F!ALxS~);5O8xF_$Xs$BllULrsb%K+eZ_RN}GlZBiJ*; zGl< zvBsO_8c#QCl551;N!|HPK27h~)g8iDPnTz@?rcZ%Nm?|T-gM1paDJcNPKwQFVSZnod`D4gmy;dvyV)6ec;csym#*n7Xs&Y8vQ$1xbp? zsP6n>NV5FV>ds?G8?Ek;bwF&1?AOV|PBN>qnYh%G5o86?uwJnnac>4t!VBcR&Mu8p zKIV}`dq-JhJu_mdeX?*Vv5_Lw0$;j_P@NR%pcHC@BHa|BMxY<1WN#cR$V_E{Wzv!r zCNKA8yjzR*KjRp<-hD0@vdpM`6fKcOdm4N8xscAD{TFC{#L_>4dQ$c*^c#@XX^h!l zualEp&|vf=SzR`V{;@wZL`iJDid)y~`(gB45A3YX>n!Ul<0@!Ps(ZjnK_e|llOU9RF{ZGgZoo@7en>^b0;}7_Ox*^eh z6?{3^hk7yTqy8OXv0*^%%d=VjWiKex{J7&u>}1s$_JeMNOPEX#Cwb%tDFR307KEPI zYo7iNQ$y?*-q?OxO;&QnyVO26Y~O+I@!r1G=Hc^pvhACu<0nRl)$GL{ptr9TSy9p( zdnNt@0I1?^tYWzO5$@Y_M|hrj|C`|(;9%Qj+W`GvM=ZM;0d>KZ$ZypV&WuqUA!P9q(xuLIe$VWV`_ki3VIHC5wAod3in5p39&l5dF z(S&5JE}h~+S~OhfYiugXBo&>a%RBK+YS>IXn1M)(UO+v^)MA6>ukroZASH!`N~ry{ zCUuqxcWbOPNh~Au1vMT9jB6{h<^y#XsWobXH%0Cyl}O>c*d9J}ZBw$;2KCRHR%QHF zeSLSo$x!=fI%>ZrrBK&pF}B@{#kdaN%&GOc<0l5a7#~c@>!gN$gUabH@jjH17Nr#sMm>ETf0Yzi8tgiUAXRauokG6Du zRr?s8-8BNo;cL?oggfR>@gGqkbaX50C?mY?78OLN_EioqPcNWSfgeJJI75$oqYz22 zze?8cOt0S@zgl3!c>~8LBy%rI&#lHkc4kz5J=r)Wj|j%YE5AurzEEHJ?N}2KNMEJG zf*n!Ut6=fdSaIec7U}mG5wP(Z+B%vDxPSZf6fk2HlAHyqv~c2|#vbJv2K;4g7NpO7 z8j{`F2mBE#i7p+|O@H0M8a9;BNpd5)V>=F)Yu4}qPS4)TY%LICVm2X&d`&PXvqT=zY@To02soRJkPR& zAGsJgu~*&jq-yaT@R|-iu1n8e|G#0$j;g;Oz_j}5xXB`@-cU*Y#{;coL&CE0OSE$O zE8PEtAodJgHC5o}4I5s!tUm-{x5LGfCx3>#@sIudXZ$*VO)wmQ0x$mCzUPKt|7`;u zv-xYT+CTF`{C0b$jnrDU&osFi;zo5q!OZBWI{bEjNgK7;@9hJSGu?% zZr1NR{NWFn%l?W0RmOVvNtlE?T~x*HO=&7o6&pL*9wUldxyhDSOn|4X_XQEm%VP zFChQa`k=K%)36~CUDDmiX>+F}O47GuZxTKJ3N**)u{SYMjs0#_oaoL-a*r}Pw0H+O z-Tg|P_FHInE}R&9oc5zBFdbl&k|GSWpLOC?(+Ex6hJ?%VwS$;Hm-V2_vOS?r`(4)| zwcm~Q)2B_g|0_OsgSz%Za=(PHR#mrJf5hhUpXs=WW&L@uL9k!PG%m2uADfByfw5^M zs}A;3Cc5Mrehj|Nv^8AMxPxwd$FQEqzvJ~>PwTmlp{!G0aSktu?|+WtY&wbYfxHh* zmqikL)fg|Fp{IAEq^_r<7xoj24GWw4oaDFf&H4tNzt`(~f>_@VV0~k$s8}kGi=Epw z`jK6dH+p34YJv^4ETPjHnoV2rqkM z&v|0?1@So4GwA5x6l&}bkd#zSN>UCTChbjHP-^V&WI?HW-o_rp0kbC-E{Grb4LG*s z`!2op1f`lNoGx!{m#qNzyj^wTkS8|5bFdzxK{(%7AdHAa{PU0vNo?Y45F;gyEfgpV zyoyutaze_8>f@o!?064TyZJkLdIfp4+nNr^rbaTD-&! z|2n%XA5EBtC%Jdv2!b-4=Yb$1rg!%-HsI9*YYRO@NZ`kB#b3rOc?b#CYyF_8f*c6F ziBgX|FR4xaiWEYct2pBq*+xp_`cwRBV3~QOh`UZ8ekO_KQ)%*v>sNOlqe11m8l=nN z?rW%%UK6A>NMyPvcG$yY?z6cKOy(X^s}HUF*zk33f$)wKhv8pA=1bOHfQG0tCJKl8 z8Fh={@Wg%>f0`zZM&nIdZlDs$@@YUAk;k5=Zm%aILboYd9`Kk#v!BfKr??%J^G+u# zNoM#{95xiFo=fYRr5?pSNn{`k2=~B66=@G#plU9_1>?I`5QMqaSfR zDfIV!d)5@>U4mVNJSZT0t!niXkz?NKSEHARbl`B2W#dI=HL=H2{R^B|6I#_ry|I9K zxY*riiJ9l1!IQOU@T6?;xukXjjpxr?<2jmy=`k5)SS!gM5 zHNdx6*iijqCwwh)%xd*<)pFx;u;#)wQ-`}Lyel&FAco}n{=Byl;52E%5e*3z8y-_y48t%e{R8@TE`8@1OY+_0dzkzrj*_vcZ%X z{t#PC?%gHSX6%pqYDZ4|#an%f9qjd5Zan4bJqSJg6caxXJ#jy}Jf+%k~Dvy)hqFjrZ!roQ>XV6376;qya`6=yD=UBf7MXPvPm=u-~$Q2y^Hm zUTMLqVw5ynHtj)N?VAf}HU9d(x~oTSh}B&)l2)+GKYXXUBSol3qgOi)GlM0mzu;&^ z-DJ3J*Iba7ulDW%!S0C+L*PU&feKkisCmZ$?@3VF20YdKx?V|ZfS!f8mK#eK(wPcR zY(M!hzx#6%K5;tsOl0kim3bx}^v3SQlGtaUP~h*5jjsj%C+7hFlWEm=eS7%F4BeEz zm3rJE_TQe^2{J*2Qa97n_Z8whp1#M3ClLXD3sTK<@UxN|(PYK`AkvcyWoR|!;$>kz zT_#+kSLo`w6*(MF>T5FnP3?UvZ(WtT;VH}d`!Eq$*Aq78;BVcPiH&uaL6*OT$?fGS z^`?!rCYE*zX{W$FdfzHoSBSsnH8*?uCaAF-PhZ`YBUnL~jSS!9*(ImUGv#R<>xlgh ztP#jVKZ?)%84c}UJ=IS~r^SwVP0ux$UhF-3CGxP|?TufK48!^Ez6r4$H|jizDo=V$ z-=r}}c3Jm)N%rxq_4Iv~22uB%OB$|&QgQN2=BDr(C|rvD`a z`}`AU7}&kYaK?e%>=+<%o}$+j|AEt_S&hLD@Rr)Y2D${auLAcDh*cgPpSJDv;D__?iqIFHrXcWc= z50jFJJ30y&&D&-goTmVFDAF#4HTb|8Olzs+x*Ozd_F=_U2Iw3j4h>C7tfPs$)PXJ^!=m~Vk9=f4F zh3bZvy2pD+nNI1w`#t^9JnZ(Jh(AszyRh>|LifMuo|MZu%rGLuEnWr$;U=2|B-mCc zZL(Ym7&o2buhFsBK5UMWF82wdB1m2A;t!Wlv%VzWF17mSo%No+F!Yi5y87V-!j+5s z%#};k9>UpfgrrH|N7eoqoukX8Zn)iMw`@3!-VXg9f7CsH$wf75&u6(0#9v`2^=Xwr z(RE_*=psYVg?mm8@qeYAYACvtEJX|3i=sTVXq|DD&X((HpnlOx?8b7Chq#9=NyM!T zake_+v(8-mU)jm*6n?BtngIum`sp!y*jQGMIKxbgcnUKza(}E~Mf`~M z_4|2RVFq7q38me=kJg2b(dhXxNObiakSOupp)Z1Z4ElxIU!SM|3y88W*R@yRtCH@>O7Re>!zeWtaffw`i-HN^J>m+X^rseGq+Q99hqr4d3io9$` zocuTOuTwq1h_wz1A&k7S7f6|~Jf28E;#f>{$c3zcyZ2ESQ&Tteb2<`kgzD$V;9rU5 z*{Ij%WUr6XX(+NeF;2|;L`NxPKazHSs{7OrD6B8WpJ%)ab_}}oxA<>}yP*@@EWd{} zAUbjVUUcGd6F(dST&aK51MDc2Wr^Gjp@6p>`L&R7zr}ZfQE8 zreHv!?Cjfn=)^B*ufyVr6U!{Z%@cxc$`hf(=k0s$5KiYp(t-&8n(T}#{@-W{v@*j* zcrTz@ZzHMInBnMT5&G%ffEGs|iJpukV$v!JA8adZvTdO*;0y=c56{xPuOQ+@VNvk{ zt_5+{^UDD_fZo1x5ZXa<0kADUdwkK;iNwZ5MG)>*s5)%7&rT+70c}|v$TdNprz=0} z(Qw*%o!()T4u4AR?}Ev2yUof>Q3!PI0i}m~ zV21tyNa}r_7%p>CuW~VMfErdO9$^#!3x@VCtvJ$5Pk_^;bH_&FD8wi9mp$-}a{U04 zVs^Y9{|_A8_w*+6*44x4wyggiDuZn^$;Nyx-kAv)uujSV`{xTh&1;ixmziYVI!}2` zu=^bf>qP-7e*}}+K0t431mZlmzm^uagNCBHw=9Z$lQJ~Z8yg+f1?>a@P0J0(OW1ixBGd@d_tV1okH5F zA=V+F$DA}X$w}ipDY1Kqth!a)s$1ge%SRVHecq3bVDJh@hHv#u`D4=jPOWE#e?H-F zCiB}{Jrp_Sss3ZMNcfA$ja9fx?e$OOrcncb-QBr1lBh~}46DbmH!`o5wR z|0Exfq9or*(JS9VfzF+~&W*si5jZyj=SJY%2%H;%b0ctW1kR1Xxe+)w0_R5HtVX~r zN?$3$&S&@*>0-W?wvJXG z^7xiTT7q~>HvVc$YHB>~(PFP>@r{v|Wi9QCv}kdtT|liokq1kQC>GSZy?C4U5}TtK z;JU@DJG7#f_Go4Kw57$?8Plu*e@C<{f_EHSr_Z*|hs(0Kre=|+p$;|j%w5Pmjmo0g zA_q$GHss$kF>rt-3KBk`}}-?MO$znrdI2R5jPz;O;6W+ z{OEfYX8g4%li4>4sH_V=x?JpUDjXmeA}$%q^+RCUCd**>Gaxd`kbLxXQ8}FGRPaF! zeqEX#_T=!m4E5!3=2hvziGI3zK9L@F@Tk%wkrM4ubz>q?4tq9!>b@dBy0=V~Ke_OD*(_g~ZWapQ@cz=mM zvaIuI#Jf>U)W6D)Hrd-xB zgeM=Y!PV$eOMt#R5?HYpYb4qHgdgyTzWMK$tn#naLilh+l#Q-+X9;~@gpW~2 zBplE>JHruQ8$SAAdPp!ue9&Z>7JU@4Wr6nT2= zlB^gkcGN|D|H52wVIjv(iX7J=ya1A?M+g*`fKz_?yNqCN{LG4#U|;y!0g_boSZ9^ z`*-Il1y78_eUGBNW=Glah)+=R^42Khj*;F#<8Lw*JfWC!FeL|fQ+@cV*6uYuH@>(I`i~4Y^e$8@g&tvB&alvEXXj{%Z#|Mq1+7u(b4Q0P|?)L$Y0nrFu?2YXR zg?g%Be%ZxzUEIA!iW)A^A-j4+`1ezZFx_qw>2g{8TEs=RWZ-RShgMf(^?K{9rDY}d z5)kd_E>0}3CX9I&Fu-W|R|VYI{GYPq=j(oSJ@maP1+UMRdO9nPKWhNb#4qVgS|)y} zl(dKOU7-@l1xOmDWLbMxNwXiH3le_`9*rm^k#Nu-^(!Ux0V$T7qzJ%TYb5OJ@CWHr zK}reNkV{CWK&ZB~b+vUWC4q1o32B8y8NLdHuo+eIJ06}T%hbtNt^=GE|1N3oYV-Ng zg|ozpsv+rmmepV~NNBT;ks}0R`~N|W_&`<{lfGwH*V&aOS?8$PIwAQejzM@*Nr4M0uGm z%L_15bEs~aF8P!Ut%mXve;Hnhbkc2PdC3=L*dxlzfcl`nK86UvWO=Dy$Yp2lgd;VwcIEURri*B> zVS-%&jXOQ*t91OXguYwHDhYRX&VYH-)#{HR#K-I)Tab-{8A(6n4B>~I0f+SrofkV* zO%fq1efdud!uQ-t9{{w{dedJYvxeKP{40c3f?HR?Z*JA7S1hkFztneR9nCFrg>VS) zJm-HW$A1Q2{{KGzJAJ@Y|7png#icGuhlTG*bt-LgR+l$5*-NXct16r!KWgv?o#nww zd$81A9&-Bqe$7$gFKenScLuepU|FattuaK#>R&%jf)&^MMf3N)OTiBo} ziVS5pNruIcW<@v#|N8oeOJHk6AVGYc9r!xxcb94HTBId#I=A~n%eYIkiG8Q`Of_V3 z$*0$l^faII%WG%!{ihte=lt?)6KJ&GKQ%<>{PN_?e~Xm05z+n_Xl$u{KtpYy~2|GW>ubX!dP79uEI!c8;Ezh5l)iL`fnSZ+m; z{{6TpFZIevQV|yUWbl6Z_oaCt+kLnG{iNP5Dl+=_CEu0)eIwuf2lVfcV!dbQ-yc=o zyYug7tmd8j_eWK8*8csBHNH##zOk;emznRke_z^{a@|ROAmhX)h{s6XLh>CMsv;xF zJJLZU89#f{`w#zhwD$>)+of@PD`d{YK%R-=Ied_eT7)`S<^FcK-c& zZjV>*-?tXs+^%(ZXo08}oH9*^lknFk^VhY2RYk4*fUy-PjoPd(6bwhA)}lq8`bBf= zZ<#UA-_;tm%Ic)NJUyQ8$MyrQzoS?zBM1hvqz=9c9vTHD&g9iNVL zM!Qz7>Rx?^zR2nP|K#m&4tGXpaDP3HY~jpwcQnoGuiJ`X)zX_W^!5Ao5!_o1af+HQ z&%HK|klyw7vz|mE50iRDZz55QaMI>Pf^q4>GeYnEiNqR& zn-J34nE4-x#6E-%K9ES9M7Zn0M4}PQW!FQA#5RP-9!?~lM)>si6A3F8;mjW-60H>e zFp>BU!by)L62}qV{-5|b2$Z*7+fgsVr~eD(5Kh9=^F?5?-d%}A6yX|#8xj5t;dc<8 zL^y!(ipLTOdg;^C2s$K|=1TT*NfQ-1%Ftsm8SB_g`t4Bp{T{aK0)#e|$M$JFUo86Iv?6m@koVoc8|bWg79j2R2&~lGoNuQ(=tADtkk|Y@yj>3Wsj+0ad9w|=v7C!a zaz3L2;8h+d<^<1r1a*8W^BB4FUHT!)oomWC5`3QMU9X40AK9TUKDT@<>QIXuieu^B zCLDI5+-C68J!8Y+a~W|!8#Etg@YsvMi|0{YNgTitz!!SQZyMLnM+OrF)N~VK&v_vo zgyK}+ZB!q4`i-ZruRgu-+8QD3*BKHIiL;_}RWt;^qHN*nN;jGg6my@t9* zP#4Lw4D@ZBy9={a#qc7T^fY7^y^lIWUG@1r>AavYH!~s7y5EGl1|je018{Va4X_2U zV}Oy2q56Asa@Yc=v|UKELM{^h^RyBAskOy39OC?gKQ6 z_MlR*t4Q2H{IYAKNSFHvxuhrHGVVJFuoC}1PZz}$CJzCUb}E@1bkU31>2e9jzl7f_ z5R`as<=K+Qilko=FG>tkQnSbi?UTjjI3E{r>F2|HR9U*jQ&juD1xLg@7#pk<^E*$3 zw}>z#!j&TYtO&m%!haXxb`c&B;crFwf(Tz1;dr5xeOQFoh_F(G^F(-y2ty)VDZ;JW zIOg?p=G0h=>gV4)&FZKqDJ^kWOYNnV_ELMbv1jRVnwLCf^m7?R*Eyi39?_00WfAZJ z1AMB0%XKF6J8@%DKCEmLcuG1Tz0-spcDW8@{uQ{PcICR2@Kr2-fg;zHgwq@KGvR+f z%RgDMujD!9fxUeK-a!@OlI!7lc7MLo(ZImU4|t$VK?W06d5_2s(}-OzzzNUIE*@t# z7H<1Qe%BW{T>kx@fDe9wL*%p^7WLTaKp-x;F8>(|g6c1r$KlLA!7X;i8GPUd4ww94 zr+{w}a5+7nMFy&8P{8YWF-60|Cb*prI^bgVCPMPxQ-8NKF|aa)2g)Y|yivf(e@)l* z0zN3<4&gX%0i5br1pab;-v9z)@N0Q_<_o~>_plzQU8@*p!LF6V0`3xUqD#8s0$w1- z8NoLz4UxTGtI|D_NpRL=r3 zj&U)&4B?XyJXFtSQO``4swl4tc!7YUT6Psc5K(@spwD^%p8+`4-zWx#+3`qQ1Hnl7 z72#!;bn>qPKCqfsAnD{$0k@0%KM-{Cdjand?aKa+2>50(UsFYgFZn2M_jo_A{~Cd3 zCE#ov#Jqe$z&|bETdFwVMFB6z1{CX;$^p#I#%;cp!#5M-!o_@C2;V>lDgRM1U(8)sP+$B0XOoedmy^7A_H|5U&?yE&ZsWN>>4 zDrEV^#GVrM4+!`+vCx^H1ZmGyaInP_gI}l^<$fpNn0;ljz*FY`tf)uIFA4uLgI|=?t0a6Y;5noffj{Z5 zhXKD3hdZ$@T`2WcfJNKl+-I1zw&7UY7;# z&jRns0{P+Oz}IGhe>DqyXBPO+vcUhE z1wH}%D--^A!jr#Yis%gOL#go%HHijFSYWOI+~n#ETgH)5o)RkmHKg*L!aE#q}d(*N`FOZ z8D5U$sGzeRc5#a;fUR&8m~$8GlFPOYN`ub{%~xZ0O3YHbP7 zJGtnEQ$_Rqt(_X)m9>bqF(-_thW+hP{+_O4>&LHKTFl=1_3ny>TDv-%(Jr-A9K!Qc zxSxlhPU{RrS~@V;wrRzSJ7+CfLa(%9uh^M+)e1ZsP|`wAFKeNhmrrHA3xr!+vE7Wf zPIT7PAO*chg}hf&XU{ISBB?77omtcoW>28aq+MHp={+soBA8}v)7rw3)i<`cuAaSg zsbO%-n#wDJ&Y(T0H912SO=Wy=$IyIDO;>xTKcx9OqIib7D~dO6alEeKZ`86)GqhA1 zv{yRq_KGrRu&lZ~P-;l73)+U+2HXq*h zg%`C2*?ZW4!_5q-1$;P+bM9#aqVD$9a<((Pqs*E18a2=Zy;aT4pX69X4|1Rlyfv)I zK8?L)jdvM6UKH+%1T=3r0Ll%wPxCFD+i>G8bL)KWh4p9bN!pvr@UFS`Pj_i}6H_3f z(aTu)(;)IyZ}f_?wETRurxgc@qGpB4Xk4*8I#+{mXiYOB^wnuiUCX?9L0M}}%_=Mu z%@+-$N?J2ogzD7gD7M+JnO4klMM@UqEoY8dXCl{7IMU|F9QnGUEv;zSw>TX2x5^b+ zyE>|MrjaY7&ve54AkZEInWv#W=2iAVbY|2Xu*t1u&k}*z1y&T})rOD@CBC-SmWYo> z8fV>UwcaIC`2DTfXZvcL8A!DC=c@Fqycdj#XD zso^(9{$#T+rL~yiqu@C7+Q>A)1>pvkH0tGw2F7J`KtaYp>cniB)1g6rmNx}T{Z*9} zc0+Qow5q%`5UMIG*QzTjOGy&wOFPTT9QJZ&Q*}j?qtqV~Qus9dnRBUmcpoEiMDXvf zwhkX2WJZ5(@kd%9bVbjoI}7~LX4b8DBe1QZ_9DYeGI@>kddmMV+e%3RIlfDXEa?5A zwo?4c@GHl!V(B#Y!c6uC%~{4ln!+>>qJ8mJO-9zqhsDpm16BV%)%Wxn?5Q(rlFW-# zx7k+AB)xu=Np&lG@u{_Dre3WRJZ*N-l#Xx*-s%XoeCEfC?8Oc!FrqFhMw~G1^|7f* zw_g4MzDCtLs>67bstnb9Mz@zJeWJGZl$9VrKhhfEvM#I@>W-tJ6T#a(n_7k!;2ypZ)5t(3P))Ten_&T zB>h1m*mn*6ncj|SQ1>F#=&qPv@0i}+Qd3pqtf|HTu($Cx!F*$HKV7?TiYe zw3yB6lBM+W<0VV)Dz<{NfheqYC@Qcvy78W&miAEChu88og*&xb_F^Vl{GDx_N*EC# zJ0}X3*4nxOL=bb`u>;Lp=WJl`Z~#EmYN8&pNnBSVW`KX zL#AnJnRKjaqYY8NKM?S1AxBwtHB7{S!=QcyouTSVtvpaxt=Y>fnwkvBp=--< z?~Y-dRs_Ab<9*KSBz^7gf!8U`AyXY}sxF7iCJ+h*g3fA#R^oRC%i*rJ2Ltf8SK3Kw z0gF7hYx3?~k{_H}WZbZE1ge_sWtvt|S&h$u1OtX?FLhSg{pDIk2-<9wHY(ZaEHA5| z?}$`am1D0(Z1ULFWx@m680ELd7uK5CvZxlO@fb+tWU*WkG|kU=?$IfH(!GE^DT zN&`lVrYwZ_-37~<%1ZGq8SK4s=3o?gY}a6M1Y9KP?Dn;Uu=z{dtUIaDf?!GnD$Y$`);?M#$sA!>r>k1-zcr+jD2{& z^nee3a*RA+Z2pAPt^RX+Qha0ogI3P&}ai_mLZ*C7XJ8Hq1NP zWyMj6R`|o{APNbmv`thT*APgA{)7W&pzicTWyYO;d$7vc1c$oaSz1;V43e`Pm5hn{eRNzw9AMMw z_x5c1J==9Ag`XXH)0HU8R)XzK^K%Y#xPR#}nyAExy*UKPY$l6`yBu1J;y0*@=eeT<2mUZ&n|D_zHH3FW~RglvQ-X z%T@37)Xwph;<2F$C4^(3o#~)pS4UcwbR_r@A{^al)u_hwM2S_FWZ41msZd1IFfPm5 z!uUoAc4wm)RcsRmI#ws!c9bhO)X$yoo5RnoMKBI-`X-tdoJWPzTb5O4#!II1Lv*4G zej3~?)reDfvOoNFKUwnfN|g{>jY;fja5JOW63mi7_%!3AW%NC`P%zw~wWrl)9E^G` znzG5VR2k8t_)K56adeZE%u+JQNY35S!YM2xeffNWZa=LBe5>w`bRyDyHb;ImC;9mt z`g9NbVh@SM;B<_K?)yFRi$3xTKJ;ZD@yRuwO+QuAtYeCDxs26`qSOffnLQS2zPXvuVcKuEv^xWtK z{N(c?PH`hcJQk96<)y_Gr37*P_gfl8c^S&*IdVlgv!Oga{&GHkvVHlysSLM>1ZtO_ z>5%2|SQEQk_)B%m^78pr8OrBXS!Vp9I`o%{2t7M0%gg6QWhfrcNxr5{=93{lZ>rF< zs4^~}N0p&nG-kwK;t<3QrOER0`Bxck6&a1=C;Q(n%GZi~@_AYr%IEy3{6+W~$FCbO z8k6z(u}eNrB0{1M{YALyuTDht<*lNe3>ystZm3>aYbYyhCxnNfts_O}48FYnsS zGs@5^Crp$#M3wIWo{yiT5Bc2cq5F6NV}2Qz;dW%R=*r9IWCzDselN-r?a21!^G-)Z z`Qu{#Wxr*+avXn(vXn)ZKlFV>a2@&<53M{%{*~orIEWN|dHMW#T$CS3mgG0GoD6?& zC@-Jm+PWn@KaZu>kBmQq8>&m +#include + +typedef void (*launch_fn)(); + +int main() { + void* handle = dlopen("./lib_fault.so", RTLD_NOW); + if (!handle) { + fprintf(stderr, "dlopen failed: %s\n", dlerror()); + return 1; + } + + launch_fn run_fail = (launch_fn)dlsym(handle, "run_failing_case"); + launch_fn run_work = (launch_fn)dlsym(handle, "run_working_case"); + + printf("--- Running WORKING Case (Indirection) ---\n"); + run_work(); + printf("C: WORKING case worked.\n\n"); + + printf("--- Running FAILING Case (Direct) ---\n"); + run_fail(); + printf("C: FAILING case worked (Unexpected).\n"); + + dlclose(handle); + return 0; +} diff --git a/mojo/reproduce_fault/reproduce_crash b/mojo/reproduce_fault/reproduce_crash new file mode 100755 index 0000000000000000000000000000000000000000..aab4c4f7fa32a70140f3c8489d58bcf811064e98 GIT binary patch literal 16192 zcmeHOZ)_Y#6`wmdiAj^#NgI;pPqPV0lmvNWJBdkJ(wyTzSqCREj?)&2us!?M@rm=D zd$&&Qv=NpMjR-~&-rP>P^(8c0LMhg2XDK@kxp=&V37qM#O9AHO&I z-g>>ctE%z=A$F|WH}CiU%-grK>zUn|&$V^7$73-irAj@Zh?}VOkOT`RKgJc1gxal^ z;CQcEtLB5hLt~yi;Sor^Qd>2bRx?g8*|{ta(>1IoR3aowc9lxk0#71T@p+KlJQ6V< zreBYHc0xt&w`X{A6{3@*fFd+#N4dFJr^V{x-nsM(+};FJu_Ko3uCm=#wi7zd?Fp59 zQcmdC#rX`=jgV0hO1e~t(r;`%ako=Lqsl++aC_(3E=5u zSLA}OSCcx>*|ERHZZsRsU4DJXPGu^)qvwzf$Igj#!FBRIhgveZtkaY1%}@*{26I`C z!S+GnAN8iV?om{yS{3pVSBJq#e;rR`sm%fT_A6F^xROI<1_wRX5 z(j-4$1n**ecS4&f!G*uL&%^1SqG+T{25^1^Y0=dH&W~n|Ukl*WN2Od3;CBiLdLw`@ z3g9Z?G31;2ij?YvS0h}W1%%Wl=%^Io`u~CElv9}-VP!>LKlKSw+b38yY-{yW6ekZkI_=D$fi z4aKG=z*9U8f9@E6^)uG^@2xX`nC|LnYnW{KnRRY2y`?BEuY&rU{btR&&%*=(pW8bM z#5y-GPh`(q?pmD6A5HI6mX>Z%(EYRemqR@4_h|cFjYPK2SmV>yrMIfAOKrd|#n#!qSHP8)KZ0Yi1t)uD-j7g? z9lu;TKJqv95OizSjlq+4h3i%Sa@#w^BK!xIDD@)cyEryeDp})g)7F{O(`vW@Vb9<# zf^}8_yk6W-(zBQ4NMZT6FG9#My#S%_#qlvUNmvbjR76fQbdpn{?^DD_CCWgQfhYq} z2BHi^8Hh3vWgyBxl!5<`49NODS+6Jafx~?IYnkSl`fKKR!Qf)XCwOt?JF(>xwjiL6J3e7g#Q9=6>Lbm8|t!$^3GrLfA|d zc^x2q@T4-~5q};>>-VUv@Q9k^h6OKjU=sIr){`}e(*D0n-uXbRRq?G-pWj8c#}`@6 zOWrZf)BHDB{x7fQGJ;tUIv_W@&!!&jYi1K1~I>lQXcP6e=UUtbX{Ut0W9mw4OX}d zw4DAr;BmD`O@`uK&vRgwfWM&q)T$9Wkx(h;=Vj=`)N&QR?$@+!je5

*RHLPE;nTY(#59H2cKU#aR70T9(T;I(R{pJ$PF0$&zitmOG5c%E=!0<@d` zg#FtU^dGH&KMC9j_?7BcfG>@&RMPJSe}-`Eh7luv+^Gw|YtetKglKy7Vc^TqPh@}1 zW)^>+^`*aw)g{)y!0WR0yCev&Xk4^r>92rOoMAt&12<;X{Mx$}{M6zByTR9*t$Y;t zvN)dJq5G==IEOf^OtqtZdAGsLVafzJ0-aqTZ@YswR_JG)0#s8u z`$Q(!o6OiLH9;zmm3_)IC!^b)|c0yW%_A5na?LjZ71vIM^%45Iq2A_;laUC zm;^W*n(iD`+6iFxQEW~&MUmNddw27pHoNU`tBoc4t)DvFe5j)ZFrIaGo5fPAwOiQ- zI*;sc?zE4zw;yfmv3r{LcedHGRUjc71h6UKwyg(qgzvRqz)225de!0$-_g)*dc6DTU18VD}aZ+=AXBq4z}iF7>Hk zLxbdoA}_mrf%ja@H4JX3@FhA*x+uIGV~#+X*pcBTdqLg2M+c;q&AECn{8dlLS?G>5Sv%R8AQDRONnjN8OV0u>rNioJ|4h3@7CX;?;8v6pe41&qdMVlU%Wp)x*&iZ0>! zyMWSIQ0!&=CsfAG62IsPJq~*sTM94ZOQH2%v-j;Folrv*nTG9u8nQpn_Cmw<;rM?6dyJm+JqY5D zlV#s!xIN*8(s-yQU@!C2NvwROB6=P-%L{%B0_wXmPm}So?7x+Lwmhx#{YgK16SlE{ zz3h*>xE&x$y+{0`v@4Xx*Fk%k-xk^4@-4lJ*a?lpIA|~9-+FFX;uJSx=kqWoOwK=% z@FWSg59?D~!Lpd!51*&lhjFnFQtA6aze8*|&4JPX0lGCM9(jJ#{7_;K+CM@QJCxRt Yb7|L;dSljd_TQp^Oeh}?8H8B%Z@In3VE_OC literal 0 HcmV?d00001 diff --git a/mojo/sigmoid_build_instructions.md b/mojo/sigmoid_build_instructions.md new file mode 100644 index 0000000000..72acae27c4 --- /dev/null +++ b/mojo/sigmoid_build_instructions.md @@ -0,0 +1,61 @@ +# Darktable Sigmoid Mojo Build Instructions + +This guide outlines how to compile the Mojo-based Sigmoid module and its C-bridge for integration with darktable. + +## 1. Compile the Mojo Module (`lib_sigmoid`) + +The Mojo code provides the core processing logic and kernels. + +```bash +cd ~/code/darktable/mojo +pixi run mojo build -I . iop/sigmoid/lib.mojo --emit shared-lib -o libsigmoid_mojo.so +``` + +The resulting `libsigmoid_mojo.so` contains the following exported symbols used by the C bridge: +- `sigmoid_mojo_init` +- `sigmoid_mojo_destroy` +- `sigmoid_mojo_rgb_ratio` +- `sigmoid_mojo_per_channel` + +## 2. Compile the C-Bridge Plugin (`sigmoid.c`) + +The C part (`src/iop/sigmoid.c`) handles the darktable user interface and parameter management. It loads the Mojo shared library at runtime. + +### Step-by-Step Build + +**A. Generate Introspection Code:** +Use darktable's introspection tool to generate boilerplate for the plugin parameters. +```bash +perl tools/introspection/parser.pl src/ src/iop/sigmoid.c /tmp/introspection_sigmoid.c +``` + +**B. Compile and Link:** +Compile the generated code and link it into a shared library. + +```bash +# Compilation +gcc -O3 -march=native -fPIC -fopenmp \ + $(pkg-config --cflags gtk+-3.0 glib-2.0 lcms2) \ + -Isrc -Isrc/iop -Ibuild/bin -include common/module_api.h -include iop/iop_api.h \ + -c /tmp/introspection_sigmoid.c -o /tmp/introspection_sigmoid.o + +# Linking +gcc -shared -fPIC -fopenmp /tmp/introspection_sigmoid.o \ + -L/usr/lib/darktable -ldarktable -lm -lgomp \ + -o libsigmoid.so +``` + +## 3. Deployment + +Both shared libraries must be placed in the darktable plugins directory so that darktable can find the plugin and the plugin can find the Mojo library. + +```bash +# Copy the Mojo library +sudo cp mojo/libsigmoid_mojo.so /usr/lib/darktable/plugins/ + +# Copy the C bridge plugin +sudo cp libsigmoid.so /usr/lib/darktable/plugins/ +``` + +> [!IMPORTANT] +> The C code in `src/iop/sigmoid.c` uses `dlopen("libsigmoid_mojo.so", RTLD_LAZY | RTLD_LOCAL)` in `init_global` to load the Mojo module. diff --git a/mojo/validate_sigmoid b/mojo/validate_sigmoid new file mode 100755 index 0000000000000000000000000000000000000000..413771c9134ccd5e4f3fc8ed0b776def001e6d1c GIT binary patch literal 16344 zcmeHOeQX@X6`wm}a08AVh{1%9EKvdh>W%Frt_d|+$2n(B<6z<>O+R3>_O9)1&bRK} zUa)~EmO>*AV>M7IMf{cakBU;MqKH;01!OmEQfO3}R%(rC)d&^oTpHpQN)t+ReQ##o zdT+U-75`Liccj}l@Ap3D&D+_%+1;4~iLIUSSd3ArV0SR&rfLPENx|+`SplN4R<;<9 z*R$)`#lT<0F(KCk0jVgp6~(kl;s~RiC*@&!t<)n*5h6vqa%Ht_fGCswK)Xsbl6;uH zDdQkYa=$$VjiD&~=+kp?!4nPIVQyv`gnXv0m_97+j!H^)$fDh8X?I%M`4j^}xuld& z%nAN_WIw`mB^p5?in>^k;_rzS$bFb<9K~}kO1QmQX&0u7)F(>q^}gv@AE&(@X*Yj^ za2%#9WqU-ay}Q8<^ZD-<8Xntad*#kwBc3QIMCIw=xm*f3o`x;kcCz#9uG)9yOvl+~ zecST~)V}RsuK(c!Ym!c1%epm5t0C#6vZD>7tt|~L>(q>^HZycv13&b~;}(v2Ylz?W zmkGKgfctRyB0!ga7-q}M;70&gO6X&=rTkz_rSRLz;GJb~yq%Qt^DN*B{K7>8pp?E< z2A>98fnT^>1wbi3-GIxXTx8M&+!A&*+oK7X#y=a$dKqRI8PDwBYxEE8H3m#4$*g35 z(#_Z)fOFiB(zIs`n@);lJj+g}*}zEJNqGaHm>C;}qdQ`!K)Zj`GzOfMnRM>A0R>GL zbni<$p3Tq`4vrSKWoy@_4x?FJ=hvFm^?rTrI%ah3=r*7od(g>vc6vv5N77B%JIuZ$ zhBP?rresKlP%Wtl9gXvGSOKHC0%|yx7mnfv69aTF;B>t#b)0IvlVl~9UFLiOuYv=T zf9|m(Fm&Q<0(dNRKcsxlED`vuyx$Q%F7X~s$O$KYYNNn$T=I)Vus3@AKA&x&Pt2I$WjBR&3V{HSSyK^#tKRH6(-8Hh3v zWgyBxl!5=j47^iw{rmdCKUL`ymD6h&)4ww1#S1g~!Jkwe=e;ege+A&es#9>RS*}41 z`5~0eoi0FG^<(65sxvpk`R9?xso2~U=buF$r(ko>aQ1BPg?S0TG z)V;1U_U?dMv-}}op@WlcCOFV1D?g9qwsYQf(Dj!<3A?_ea2h)~u%LfD#Do0}+l1C?1+Gxn3dej0boNB$b5M=lbG&?f=x^?`po>1>#A>|)6nD_4d`(vcv*jf zdy#Ga5i~@b%hAVV<+PCIA4Aj0x>t~$0FSv}V*@|u4ikBhyihFpo{-E#(|MM?1)6jl z&|%wC+~M@|pM);HguTlj@!R|ku+ZlBxys4>b?D<{Vg^uv6EhHS9$GE@5+ay^5tCg4 z%EzSg{0D`?TiskTSy!!9_?@i0TFSnGGTzaX z`AUpqvQm+vLm=V_hj`9e`mIa(sr4%J`$6zBUb+5t?O(~*g{VXsh%yjmAj&|LfhYq} z2BHi^8Hh6Q|D1uC{A#i@V-DIIlvNq!(<7!gWOy!q!OA%B&B(Ff>(d?b<3U41gR%=g zQ(30xqz09qWOmR=aXXMKVx8?>TN9fV&sD^yEpUj!MXYycM@OQ!cZ0N0lCEjlmdc!z z;~9fVx6e$n{-kZDvLnRu(;^&hP&(Xf(o#~cr@&`2OM#DPeQr{@DPVpRkI-+Y)6Ifj zk%Fppm`V8LI;c1vxVleORT=OGCE!T09MMw7_7wQ;23<8(rQ7|It3XQ9W_k?hUOPFa z;3q%Z!ro$c6gLgMhR7tYVh8+qf(_WCc7N6bOB1^yT2R`QRU?ZSMOd5*j8qigU$KHh zE>#vhLv9KZH$0NGJzD`BT*R)axP2v1d?C~LcA>Bv=nPN|=-k;tVH_wH{DzGscK>!3 z8?B99bJ-}VIS?J93`7}-G7x1T%0QHXC<9Rjq73|(XMonx(YiUB z@8ViLEdJUyK5JiC<28GQkY6o`zdr98$O~3NG@8@c{!0t!ZnO z6`X@(IVR<_Chc_r`)kqaC0{2g5oyNqa$O#NXvQ)ti2vMzAB?f!dQU7p5+Hn3210R@ zJ*^3(_WxcG{Q(zT6+UP3{PCm%}UdHwOMUanj4#28k-w&q3}l(%;QDu z&xNoGo|hP`+@*Eqid4X1TS|X9;Bgr3^P%`|ka(k9S1R4`XIIpx>xS?lFHx+453sFaHTM`iGzazC}~Fn^jRFM^ix^DC}j z$HM3Tx7?(L(K>qabe8L1sM;v zMq29pt^ge459eP6T=A_JDtZ||UjTf?f|_63hx+kF;$@AGGWH-^%tnBdc7?=b&gTLJis_ql-NNs~&q4r~(cFMvi4Wn~=dw0S} zY};(WO8m{A+t%LQ)d4WvFO7sQrTXUW%-FK^u1)P*jk`KKdlNg19qpU8CJfpjpwWH+ zT=^f~a8M-th;0G5kHOc)?F5F6FFYCVJ-992;NWXwi>N2|6F@|`k-?WjKyssl4{_I_ zEeM8{ag8B(35wejy6ysBmXk8F8QX#=QgAT(G8x$d-$cP*ulg>r6~0BnM?@pA?SWE@ z9WS}bL2lIWEqMxqyD)qS_lf6MZs8~rFcseSdS)L`FD>X0)!=2dtq!KLYG2lYy*`e` zRM`4qr$-n^hs?|nQ?0QSZUo`fOAE<9JDqXdR1srsax-2OtcLfv3#0p>lJ zyHWczpC!6dDpR{Ok0c)&RB&Gf+0%TN=(ubUr^Q$$dzyFZfZ?2x>}mc?l;+Ez!b>>* z9zbz!O7=7#C8`KBKX0T*^d6AoT$gy7?-JFdeK>vzvm{h7H)K!qZK5>a4(E^hzfan4 zk`8J9PBdtbbNq1r#sR~e$Z7b(|BL+p#PJm@6bE4sKqY9eNIRlONl{WMhW~C+4gm?; z)BiTny3hjw*-3NuXvm)CIYeQ}VNnUk|G2cL{4+(WOZqkj7buiJf}Vs5s*pX+6N#=Q zMM=Z$KNYgCUm+BUJ{YnOxBo22gV+BP(w?aHFYIxi9JHU6_C%GCeK>xcH^c0VUxTPh z`_*VaXgFTtiT)mJY6AB3oG^P7;1+no-vR-ST^c4dfA{wV%kziV9~QEsar7== zs6zI%Pi|@rYBRQ45dTQ_M9+gHXiv}Ed1=4Lw-gn!BZ}*PgZ4DP)*FRB#Yt|+&ga1x zF^YeWCM3Jd*egQpAFE~iQr*YNK79RStXP6HE})|0sC3vW2MoUdz +#include +#include +#include + +// Mock the darktable module structure minimally +typedef struct dt_iop_module_so_t { + void *data; +} dt_iop_module_so_t; + +typedef void (*init_global_fn)(dt_iop_module_so_t *); +typedef void (*cleanup_global_fn)(dt_iop_module_so_t *); + +int main(int argc, char **argv) { + if (argc < 2) { + fprintf(stderr, "Usage: %s \n", argv[0]); + return 1; + } + + const char *plugin_path = argv[1]; + printf("--- Validating Plugin: %s ---\n", plugin_path); + + void *handle = dlopen(plugin_path, RTLD_NOW | RTLD_GLOBAL); + if (!handle) { + fprintf(stderr, "FAILED to dlopen %s: %s\n", plugin_path, dlerror()); + return 1; + } + printf("SUCCESS: Plugin loaded.\n"); + + init_global_fn init_global = (init_global_fn)dlsym(handle, "init_global"); + cleanup_global_fn cleanup_global = (cleanup_global_fn)dlsym(handle, "cleanup_global"); + + if (!init_global) { + fprintf(stderr, "FAILED: Could not find symbol 'init_global'\n"); + return 1; + } + + printf("SUCCESS: Found 'init_global'. Calling it now...\n"); + + dt_iop_module_so_t so = { .data = NULL }; + + // This will trigger the dlopen for libsigmoid_mojo.so + init_global(&so); + + if (so.data == NULL) { + printf("FAILED: init_global did not set so.data. Mojo library likely failed to load or dlsym failed.\n"); + } else { + printf("SUCCESS: init_global executed and so.data = %p\n", so.data); + + // We can't easily peek into the opaque struct without the header, + // but we can check if cleanup works. + if (cleanup_global) { + printf("Calling cleanup_global...\n"); + cleanup_global(&so); + printf("SUCCESS: cleanup_global executed.\n"); + } + } + + dlclose(handle); + printf("--- Validation Complete ---\n"); + return 0; +} diff --git a/src/iop/sigmoid.c b/src/iop/sigmoid.c index cbd96d7725..b9301431b7 100644 --- a/src/iop/sigmoid.c +++ b/src/iop/sigmoid.c @@ -27,9 +27,11 @@ #include "gui/gtk.h" #include "gui/presets.h" #include "iop/iop_api.h" +#include "common/opencl.h" #include #include +#include DT_MODULE_INTROSPECTION(3, dt_iop_sigmoid_params_t) @@ -180,10 +182,38 @@ typedef struct dt_iop_sigmoid_gui_data_t dt_gui_collapsible_section_t display_luminance_section, primaries_section; } dt_iop_sigmoid_gui_data_t; +typedef struct { + float white_target[4]; + float black_target[4]; + float paper_exposure[4]; + float film_fog[4]; + float film_power[4]; + float paper_power[4]; + float contrast_power[4]; + float skew_power[4]; + float hue_preservation[4]; + float pipe_to_base[16]; + float base_to_rendering[16]; + float rendering_to_pipe[16]; +} SigmoidMojoParams; + +typedef void (*mojo_init_fn)(uintptr_t *ctx, int use_gpu); +typedef void (*mojo_destroy_fn)(uintptr_t ctx); +typedef void (*mojo_rgb_ratio_fn)(uintptr_t ctx, float *in, float *out, int32_t width, int32_t height, void *params); +typedef void (*mojo_per_channel_fn)(uintptr_t ctx, float *in, float *out, int32_t width, int32_t height, void *params); + typedef struct dt_iop_sigmoid_global_data_t { int kernel_sigmoid_loglogistic_per_channel; int kernel_sigmoid_loglogistic_rgb_ratio; + + void *mojo_lib; + uintptr_t mojo_ctx_cpu; + uintptr_t mojo_ctx_gpu; + mojo_init_fn mojo_init; + mojo_destroy_fn mojo_destroy; + mojo_rgb_ratio_fn mojo_rgb_ratio; + mojo_per_channel_fn mojo_per_channel; } dt_iop_sigmoid_global_data_t; @@ -760,6 +790,40 @@ void process_loglogistic_per_channel(dt_develop_t *dev, } } +static SigmoidMojoParams _build_mojo_params(dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece) +{ + const dt_iop_sigmoid_data_t *d = piece->data; + SigmoidMojoParams p = {0}; + + for(int i = 0; i < 4; i++) { + p.white_target[i] = d->white_target; + p.black_target[i] = d->black_target; + p.paper_exposure[i] = d->paper_exposure; + p.film_fog[i] = d->film_fog; + p.film_power[i] = d->film_power; + p.paper_power[i] = d->paper_power; + p.contrast_power[i] = d->film_power; + p.skew_power[i] = d->paper_power; + p.hue_preservation[i] = d->hue_preservation; + } + + const dt_iop_order_iccprofile_info_t *pipe_work_profile = dt_ioppr_get_pipe_work_profile_info(piece->pipe); + const dt_iop_order_iccprofile_info_t *base_profile = _get_base_profile(self->dev, pipe_work_profile, d->base_primaries); + dt_colormatrix_t pipe_to_base_transposed, base_to_rendering_transposed, rendering_to_pipe_transposed; + dt_colormatrix_t pipe_to_base, base_to_rendering, rendering_to_pipe; + + _calculate_adjusted_primaries(d, pipe_work_profile, base_profile, pipe_to_base_transposed, base_to_rendering_transposed, rendering_to_pipe_transposed); + transpose_3xSSE(pipe_to_base_transposed, pipe_to_base); + transpose_3xSSE(base_to_rendering_transposed, base_to_rendering); + transpose_3xSSE(rendering_to_pipe_transposed, rendering_to_pipe); + + memcpy(p.pipe_to_base, pipe_to_base, sizeof(pipe_to_base)); + memcpy(p.base_to_rendering, base_to_rendering, sizeof(base_to_rendering)); + memcpy(p.rendering_to_pipe, rendering_to_pipe, sizeof(rendering_to_pipe)); + + return p; +} + /** process, all real work is done here. */ void process(dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, @@ -768,8 +832,22 @@ void process(dt_iop_module_t *self, const dt_iop_roi_t *const roi_in, const dt_iop_roi_t *const roi_out) { - // this is called for preview and full pipe separately, each with its own pixelpipe piece. const dt_iop_sigmoid_data_t *module_data = piece->data; + const dt_iop_sigmoid_global_data_t *gd = self->global_data; + + if(gd && gd->mojo_lib && gd->mojo_per_channel && gd->mojo_rgb_ratio) + { + SigmoidMojoParams p = _build_mojo_params(self, piece); + const int w = roi_in->width, h = roi_in->height; + int use_gpu = dt_opencl_is_enabled() ? 1 : 0; + uintptr_t ctx = use_gpu ? gd->mojo_ctx_gpu : gd->mojo_ctx_cpu; + + if(module_data->color_processing == DT_SIGMOID_METHOD_PER_CHANNEL) + gd->mojo_per_channel(ctx, (float *)ivoid, (float *)ovoid, w, h, &p); + else + gd->mojo_rgb_ratio(ctx, (float *)ivoid, (float *)ovoid, w, h, &p); + return; + } if(module_data->color_processing == DT_SIGMOID_METHOD_PER_CHANNEL) { @@ -781,87 +859,42 @@ void process(dt_iop_module_t *self, } } -#ifdef HAVE_OPENCL -int process_cl(dt_iop_module_t *self, - dt_dev_pixelpipe_iop_t *piece, - cl_mem dev_in, - cl_mem dev_out, - const dt_iop_roi_t *const roi_in, - const dt_iop_roi_t *const roi_out) -{ - const dt_iop_sigmoid_data_t *const d = piece->data; - const dt_iop_sigmoid_global_data_t *const gd = self->global_data; - cl_int err = CL_MEM_OBJECT_ALLOCATION_FAILURE; - const int devid = piece->pipe->devid; - const int width = roi_in->width; - const int height = roi_in->height; - const float white_target = d->white_target; - const float paper_exp = d->paper_exposure; - const float film_fog = d->film_fog; - const float contrast_power = d->film_power; - const float skew_power = d->paper_power; +void init_global(dt_iop_module_so_t *self) +{ + dt_iop_sigmoid_global_data_t *gd = calloc(1, sizeof(dt_iop_sigmoid_global_data_t)); + self->data = gd; - const dt_iop_order_iccprofile_info_t *pipe_work_profile = dt_ioppr_get_pipe_work_profile_info(piece->pipe); - const dt_iop_order_iccprofile_info_t *base_profile = _get_base_profile(self->dev, pipe_work_profile, d->base_primaries); - dt_colormatrix_t pipe_to_base_transposed, base_to_rendering_transposed, - rendering_to_pipe_transposed, pipe_to_base, base_to_rendering, rendering_to_pipe; - _calculate_adjusted_primaries(d, pipe_work_profile, base_profile, pipe_to_base_transposed, base_to_rendering_transposed, rendering_to_pipe_transposed); - transpose_3xSSE(pipe_to_base_transposed, pipe_to_base); - transpose_3xSSE(base_to_rendering_transposed, base_to_rendering); - transpose_3xSSE(rendering_to_pipe_transposed, rendering_to_pipe); - const cl_mem dev_pipe_to_base - = dt_opencl_copy_host_to_device_constant(devid, sizeof(pipe_to_base), pipe_to_base); - const cl_mem dev_base_to_rendering - = dt_opencl_copy_host_to_device_constant(devid, sizeof(base_to_rendering), base_to_rendering); - const cl_mem dev_rendering_to_pipe - = dt_opencl_copy_host_to_device_constant(devid, sizeof(rendering_to_pipe), rendering_to_pipe); - if(dev_pipe_to_base == NULL || dev_base_to_rendering == NULL || dev_rendering_to_pipe == NULL) - goto cleanup; - - if(d->color_processing == DT_SIGMOID_METHOD_PER_CHANNEL) + gd->mojo_lib = dlopen("libsigmoid_mojo.so", RTLD_LAZY | RTLD_LOCAL); + if(gd->mojo_lib) { - const float hue_preservation = d->hue_preservation; - err = dt_opencl_enqueue_kernel_2d_args( - devid, gd->kernel_sigmoid_loglogistic_per_channel, width, height, CLARG(dev_in), CLARG(dev_out), - CLARG(width), CLARG(height), CLARG(white_target), CLARG(paper_exp), CLARG(film_fog), CLARG(contrast_power), - CLARG(skew_power), CLARG(hue_preservation), CLARG(dev_pipe_to_base), CLARG(dev_base_to_rendering), CLARG(dev_rendering_to_pipe)); - } - else - { - const float black_target = d->black_target; + gd->mojo_init = (mojo_init_fn)dlsym(gd->mojo_lib, "sigmoid_mojo_init"); + gd->mojo_destroy = (mojo_destroy_fn)dlsym(gd->mojo_lib, "sigmoid_mojo_destroy"); + gd->mojo_rgb_ratio = (mojo_rgb_ratio_fn)dlsym(gd->mojo_lib, "sigmoid_mojo_rgb_ratio"); + gd->mojo_per_channel = (mojo_per_channel_fn)dlsym(gd->mojo_lib, "sigmoid_mojo_per_channel"); - err = dt_opencl_enqueue_kernel_2d_args(devid, gd->kernel_sigmoid_loglogistic_rgb_ratio, width, height, - CLARG(dev_in), CLARG(dev_out), CLARG(width), CLARG(height), - CLARG(white_target), CLARG(black_target), CLARG(paper_exp), - CLARG(film_fog), CLARG(contrast_power), CLARG(skew_power)); + if (gd->mojo_init && gd->mojo_destroy) + { + gd->mojo_init(&gd->mojo_ctx_cpu, 0); // CPU context + gd->mojo_init(&gd->mojo_ctx_gpu, 1); // GPU context + } } - -cleanup: - dt_opencl_release_mem_object(dev_pipe_to_base); - dt_opencl_release_mem_object(dev_base_to_rendering); - dt_opencl_release_mem_object(dev_rendering_to_pipe); - return err; -} -#endif // HAVE_OPENCL - -void init_global(dt_iop_module_so_t *self) -{ - const int program = 36; // sigmoid.cl, from programs.conf - dt_iop_sigmoid_global_data_t *gd = malloc(sizeof(dt_iop_sigmoid_global_data_t)); - - self->data = gd; - gd->kernel_sigmoid_loglogistic_per_channel = dt_opencl_create_kernel(program, "sigmoid_loglogistic_per_channel"); - gd->kernel_sigmoid_loglogistic_rgb_ratio = dt_opencl_create_kernel(program, "sigmoid_loglogistic_rgb_ratio"); } void cleanup_global(dt_iop_module_so_t *self) { - const dt_iop_sigmoid_global_data_t *gd = self->data; - dt_opencl_free_kernel(gd->kernel_sigmoid_loglogistic_per_channel); - dt_opencl_free_kernel(gd->kernel_sigmoid_loglogistic_rgb_ratio); - free(self->data); + dt_iop_sigmoid_global_data_t *gd = self->data; + if(gd) + { + if(gd->mojo_destroy) + { + if(gd->mojo_ctx_cpu) gd->mojo_destroy(gd->mojo_ctx_cpu); + if(gd->mojo_ctx_gpu) gd->mojo_destroy(gd->mojo_ctx_gpu); + } + if(gd->mojo_lib) dlclose(gd->mojo_lib); + free(gd); + } self->data = NULL; } diff --git a/src/iop/sigmoid_mojo.h b/src/iop/sigmoid_mojo.h new file mode 100644 index 0000000000..c91b7ee5ef --- /dev/null +++ b/src/iop/sigmoid_mojo.h @@ -0,0 +1,37 @@ +#pragma once +#include + +// Opaque context (hides Mojo DeviceContext internals) +typedef void* SigmoidMojoCtx; + +// Parameters passed per-frame (mirrors SigmoidMojoParams in lib.mojo) +typedef struct { + float white_target; + float black_target; + float paper_exposure; + float film_fog; + float film_power; + float paper_power; + float hue_preservation; + // 4x4 float matrices (row-major, 16 floats each) + float pipe_to_base[16]; + float base_to_rendering[16]; + float rendering_to_pipe[16]; +} SigmoidMojoParams; + +// Lifecycle +SigmoidMojoCtx sigmoid_mojo_init(int use_gpu); // 1=GPU, 0=CPU +void sigmoid_mojo_destroy(SigmoidMojoCtx ctx); + +// Processing (in/out are RGBA float32, stride = width * 4 * sizeof(float)) +void sigmoid_mojo_rgb_ratio( + SigmoidMojoCtx ctx, + const float* in, float* out, + int width, int height, + const SigmoidMojoParams* p); + +void sigmoid_mojo_per_channel( + SigmoidMojoCtx ctx, + const float* in, float* out, + int width, int height, + const SigmoidMojoParams* p); From 90ec7447c140e7d614088cbcb6cab27b4079568e Mon Sep 17 00:00:00 2001 From: maxchisto Date: Tue, 14 Apr 2026 17:14:21 -0700 Subject: [PATCH 2/6] detele unused bits --- src/external/OpenCL | 1 - src/external/lua-scripts | 1 - src/iop/sigmoid_mojo.h | 37 ------------------------------------- 3 files changed, 39 deletions(-) delete mode 160000 src/external/OpenCL delete mode 160000 src/external/lua-scripts delete mode 100644 src/iop/sigmoid_mojo.h diff --git a/src/external/OpenCL b/src/external/OpenCL deleted file mode 160000 index 8a97ebc88d..0000000000 --- a/src/external/OpenCL +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 8a97ebc88daa3495d6f57ec10bb515224400186f diff --git a/src/external/lua-scripts b/src/external/lua-scripts deleted file mode 160000 index db505f1e0a..0000000000 --- a/src/external/lua-scripts +++ /dev/null @@ -1 +0,0 @@ -Subproject commit db505f1e0a089b210f5afa653e53ebd32e5a82b0 diff --git a/src/iop/sigmoid_mojo.h b/src/iop/sigmoid_mojo.h deleted file mode 100644 index c91b7ee5ef..0000000000 --- a/src/iop/sigmoid_mojo.h +++ /dev/null @@ -1,37 +0,0 @@ -#pragma once -#include - -// Opaque context (hides Mojo DeviceContext internals) -typedef void* SigmoidMojoCtx; - -// Parameters passed per-frame (mirrors SigmoidMojoParams in lib.mojo) -typedef struct { - float white_target; - float black_target; - float paper_exposure; - float film_fog; - float film_power; - float paper_power; - float hue_preservation; - // 4x4 float matrices (row-major, 16 floats each) - float pipe_to_base[16]; - float base_to_rendering[16]; - float rendering_to_pipe[16]; -} SigmoidMojoParams; - -// Lifecycle -SigmoidMojoCtx sigmoid_mojo_init(int use_gpu); // 1=GPU, 0=CPU -void sigmoid_mojo_destroy(SigmoidMojoCtx ctx); - -// Processing (in/out are RGBA float32, stride = width * 4 * sizeof(float)) -void sigmoid_mojo_rgb_ratio( - SigmoidMojoCtx ctx, - const float* in, float* out, - int width, int height, - const SigmoidMojoParams* p); - -void sigmoid_mojo_per_channel( - SigmoidMojoCtx ctx, - const float* in, float* out, - int width, int height, - const SigmoidMojoParams* p); From 920af7a0c5786a01c74a11b7d29dcebe64aaaf92 Mon Sep 17 00:00:00 2001 From: maxchisto Date: Tue, 14 Apr 2026 17:17:18 -0700 Subject: [PATCH 3/6] delete more unused bits --- mojo/reproduce_fault/README.md | 28 ----------- mojo/reproduce_fault/build.sh | 10 ---- mojo/reproduce_fault/lib_fault.mojo | 69 --------------------------- mojo/reproduce_fault/lib_fault.so | Bin 57088 -> 0 bytes mojo/reproduce_fault/main.c | 26 ---------- mojo/reproduce_fault/reproduce_crash | Bin 16192 -> 0 bytes 6 files changed, 133 deletions(-) delete mode 100644 mojo/reproduce_fault/README.md delete mode 100755 mojo/reproduce_fault/build.sh delete mode 100644 mojo/reproduce_fault/lib_fault.mojo delete mode 100755 mojo/reproduce_fault/lib_fault.so delete mode 100644 mojo/reproduce_fault/main.c delete mode 100755 mojo/reproduce_fault/reproduce_crash diff --git a/mojo/reproduce_fault/README.md b/mojo/reproduce_fault/README.md deleted file mode 100644 index 3613ba0438..0000000000 --- a/mojo/reproduce_fault/README.md +++ /dev/null @@ -1,28 +0,0 @@ -## The Problem -When a Mojo function is exported to C and launches a GPU kernel via `elementwise`, any variables "captured" by the kernel's closure are kept on the **Host (CPU) Stack**. - -- **Manual Loop (CPU):** Works fine. The Mojo function can iterate and access its own stack variables locally. -- **`elementwise` (CPU or GPU):** Fails via FFI. The utility seems to assume a Mojo-managed stack layout for its closure handling, which is violated when the entry point is a C function. -- **GPU Failure:** Specifically, the GPU attempts to reach back to the CPU's stack address, which is not mapped in the GPU's page tables, causing a `SIGSEGV`. - -## Evidence -- **Host Stack Address:** `0x7fff266bb2a4` -- **GPU Fault Address:** `0x7fff266ba000` -The GPU is attempting to read memory at the exact location of the C program's stack. - -## Files -- `lib_fault.mojo`: Mojo library capturing a `BigParams` struct. -- `main.c`: C program that `dlopen`s the Mojo library. -- `build.sh`: Script to compile and run the reproduction. - -## Running the reproduction -```bash -./build.sh -``` -Expected output: -```text -C: Host Stack Address around 0x7fff... -C: Calling Mojo launch_gpu_kernel()... -Mojo: Launching GPU kernel capturing a BigParams struct... -Memory access fault by GPU node-1 ... on address 0x7fff... -``` diff --git a/mojo/reproduce_fault/build.sh b/mojo/reproduce_fault/build.sh deleted file mode 100755 index a71ee51d48..0000000000 --- a/mojo/reproduce_fault/build.sh +++ /dev/null @@ -1,10 +0,0 @@ -#!/bin/bash -set -e - -echo "--- Building Mojo Library ---" -pixi run mojo build lib_fault.mojo --emit shared-lib -o lib_fault.so - -echo "--- Building C Driver ---" -clang main.c -Wl,-rpath=. -ldl -o reproduce_crash - -./reproduce_crash || echo "Exited with code $?" diff --git a/mojo/reproduce_fault/lib_fault.mojo b/mojo/reproduce_fault/lib_fault.mojo deleted file mode 100644 index 8ee28de55a..0000000000 --- a/mojo/reproduce_fault/lib_fault.mojo +++ /dev/null @@ -1,69 +0,0 @@ -from std.gpu.host import DeviceContext -from std.algorithm.functional import elementwise -from std.utils import IndexList -from std.memory.unsafe_pointer import alloc, UnsafePointer - - -fn internal_gpu_launcher(dctx: DeviceContext, p_src: Int, p_dst: Int) raises: - @parameter - @always_inline - fn kernel[ - sw: Int, rank: Int, align: Int - ](indices: IndexList[rank]) capturing -> None: - UnsafePointer[Float32, MutAnyOrigin](unsafe_from_address=p_dst)[ - 0 - ] = UnsafePointer[Float32, ImmutAnyOrigin](unsafe_from_address=p_src)[0] - - elementwise[kernel, 1, target="cpu"](1, dctx) - dctx.synchronize() - - -@export -fn run_working_case(): - try: - print("Mojo: [WORKING] Launching via internal_gpu_launcher...") - var dctx = DeviceContext() - var dev_data = dctx.enqueue_create_buffer[DType.float32](1) - var dev_out = dctx.enqueue_create_buffer[DType.float32](1) - - var p_src = Int(dev_data.unsafe_ptr()) - var p_dst = Int(dev_out.unsafe_ptr()) - - internal_gpu_launcher(dctx, p_src, p_dst) - print("Mojo: [WORKING] Success.") - except e: - print("Mojo: [WORKING] Error:", String(e)) - - -@export -fn run_failing_case(): - try: - print( - "Mojo: [FAILING] Direct launch from @export (SIGSEGV/Fault" - " expected)..." - ) - var dctx = DeviceContext() - var dev_data = dctx.enqueue_create_buffer[DType.float32](1) - var dev_out = dctx.enqueue_create_buffer[DType.float32](1) - - var p_src = Int(dev_data.unsafe_ptr()) - var p_dst = Int(dev_out.unsafe_ptr()) - - @parameter - @always_inline - fn kernel[ - sw: Int, rank: Int, align: Int - ](indices: IndexList[rank]) capturing -> None: - UnsafePointer[Float32, MutAnyOrigin](unsafe_from_address=p_dst)[ - 0 - ] = UnsafePointer[Float32, ImmutAnyOrigin]( - unsafe_from_address=p_src - )[ - 0 - ] - - elementwise[kernel, 1, target="cpu"](1, dctx) - dctx.synchronize() - print("Mojo: [FAILING] Success (Unexpected!)") - except e: - print("Mojo: [FAILING] Error:", String(e)) diff --git a/mojo/reproduce_fault/lib_fault.so b/mojo/reproduce_fault/lib_fault.so deleted file mode 100755 index ffd6179a977e2fbe51cbabeae2ddfb829df4404b..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 57088 zcmeHw3w%`7o&TLAz@U(cR%?8;4mjG-hA@-oBm&NaKuJi+{{3f zzqzsSdtvS2gy(=;BjhW9NmhzG5mHd&nH8fPUTp8jXYg3gpE!Pm!ZDgcqARVkEb7c zC_Rkl@c3~N%5oH?az{kDBchxPkBh;Qp_y01a3nmPqCR726ZvGw`UKd5zeIy{zxazh zgE2fN%E?fcy9wn8&#_)|*cXcSvW<%oZ%G@}?nU_J;#WWaW@XWu`)+xoy}n|G`M>@+ z=eiv~%6aNbGYJ>!{$==4IgX#zthfpo>~g?f+5XM77vHsOCvG!d7op&V8Bz#;YX-Oj z4Q9$8$pUZ80{;o>&Q#B%S?~#DfdiZo&yQzmw>L|>-^v27%2H2N7JNck>RFMco^NNV z=l5CQ!7TMm$^!pG7JN=*f#02_-D2do;%B_(0Wj4FMEosQ&-qH@l|1pKNj$t6!G+5C zN{5*IxPU7@-?FxFyRS3qk3@Yw#phYv;0tOIZCOiaREsQbn9~|=*B1MmS~Z@QImH*~ z_WMFD?f%x5J2WNI)$Usrj;v^DU*-$=J2l1Kxw<{DaIvpWTiFuO=7ig$T6feJh-m(( zrrc0JcfM~_CT~H+@2+i*8b@(txJ=+>RKXNAetp_XEYMZ-oqdpD0fFomeMUj zLxODc4Be-BXpV&2@O{ka$ zKeeRFmFx;WFE1^(pS7);l3bckn60I#w)Lv@d7d87MTCk)uOi7~thVb%mZA*I) zD0YgJ0K$?=rQYkQo#QJlag---OG_%08O`A~ZAM#QMj#y2W(56_6;UGH8EvR&MoC9Y zcgqZ|eP!p2pce9XwMJ*4DZW04qticm5&xGLho;gfuL=L?GU5JVBuTk*&zHUeageL* z5U{SxS|(Ed$3<}|pQSu)6zg$N$X^LB5b&+rI2IBvtT*_`LQK*A6y@*$%SaopfgS?7#aB5p#b{$-f z1*JK4@bhG5gf1OisA?=#)xjkTq=W@J_+*xo|G7;E&)2~lb#RLg-mHTc=-?eX_ysz6 zw+^nC%RM^yhjjTj>fofO$ZN9>ez621yjKVRunxXO2fsuIKX-d>1kR1Xxe@qBAA$d| zeDqDV_n2Aj8@C5GhPq)Onv)n*d-s_iXI)EFJOOax>Yw4SkxU4fU}2mnlvn zZm5IBKTmNIZ9|PL{uzps2p?L&;&)J-MBI>z#Um6a(Kcjf@#PdJkv3#y@g|CsC>ttZ z@qeN?iLfDs#TQbXMAy*qQvh6jJ;e(sew4*)DNdqlXpqHgC{7}3Xn@5_DNdqkXd8=v zoZ=*shPEKC_RpFIfZ9K9Dh1Ue(T`v%O1{ab;)29c8uU=8#B$|&6j7AN4RMqoqRjmj z&rvY#lo~s(?itBd_q=?aYC53)>QwX!RQM9y7c{p8RN?9BBYNnW^)IMP=_*ruXZ;uj zm7CSrU!s%MzF8{)Ph7keu=pH|#es3H0GO6Oo*h4`Z|GL!wOrYQnv|Q}x4Lg}-|Swz zNWJ^m5e$>(Cj`c2#Ta}uz`F9gprVkWt+;p4U-e*}&0I-0u@@bO=Q$A#Qe%H|JQ@GDw-X6( zf5q^fisFquVmpK-pdMUr8x&fnH};_I2*EH5chQRT_D{YMdDZ@1wr+y=-^23m90YB; zcMw6Tu>;GU@y}Bid-oOWKUuJz<+N;HZ@U*I>MYxL**b8u|KtUAw!)2dHmACQHxj*E z?c2prIIbpc9O;^*_8uJBKe8(RlanC14Xw83XrbD-#&(+_`I|^~zkbjZ{RCwyu&loT z3r+2tX=C{;8*(Trs)``*(5qOOG$bzS#vmqZCruceJB$jo_<(~D1nNhSqW0ZmBQ@L$ z?92$MD^$v|eUohjq3DU%=A3Ejk~n;ZwUNZ3lcEBLm)`;oO#o~&sj+)(M`_T2$YxZ< z5CIOB?Sq7d4QP1#zGFLtByZm)+W>2Iy^RP=MSL3z@3C!V;Ul&!EZk+g7a<0EJ*!iV zt+5rVv8c_e#unR(h$`$f?CzcsQ2G~?F;N0458Awq8~5r6Yd;JgzI z&KuigBd=E&4?ZIf{imZr^h~CqS|X*g(X$ zPNNd(Yn^3DEpzY0I2$h-oY?b>FB9t^!PtLs_YElDKiPr6+duB>EVjmGcJH)Dh(9-if$H67J=5Y}Cl>$0RaEOPn~HUiT>N)}mi3FjA8aqV_$eomVlzUv_+7Zk zu=uU%i~j<-S_XjOa4zxB>>S`5$U_{!vVmOKUMziDJ--FF+zQyU;G_FTZXIZPb&Yz# zdYc(haXSK3_C0skrQ;r`r()mWl-;+Utx0eHcWiMYJrKvJa< zDY%H5)_))4=!xxfyl%O&4wKt6vs{T55N-CJN^~7p4;}yoLk+n2hh3EKD3-e8a9o>0 zXulYL>Gecn|40GkpW2sa^ThrPASjOd(TJYyA3;Bw+0Cs3`$wkb+2a3-w)c+|<=KES zOYTN8b#o&^y5g4-cKws5F6D45;7r;ej}!N#8d3fQxThi{GgZ=$=!S+9B)`eV*hc^4 zw{IsrAG8$#PGCEQ{gbyxIoKt@DhEF!a^J?lUXmJE^se$-=cynnvrF} zaxb)n!nwGMe+He#(ptcT(-u@lOKSjO`ii2SadAprHpt8NzhfvNbE%&^6AgeNlSgFQ zn9S5YI+IOgqM_k6(U=)(O1wtRVXV0Hb{Ue7vg8MC9mzTxN7wOVQAa`al9Z%M>Ie5B z6}*~=xQQ|n4XuiQHFdj*Zu>|Kg42T0kQfAOwbwz$IOjSJQ}!Y_&X`ClNf$|lYJXJ9 zrG0RwP*;j98}?$thL~iMVn~fWPGU&i^Hx#}JwolK#E?31n2RAuxrVhIXB zHB72~m=(R+#}PUFH?+$*K0{LId_3(TO1pa>DdJ1qx4W0RKjro0kDc;ZwR_Mi)3%c#K@rP$nexch^PFjObgb;6!*a~oy zz8H@Hl2x~T_Em;4QQ!->b-#W95^Djp@?j`4&%~dk_XLaS%J(q+Zi%10;e=2 zz-Z?oOS-)iguU|R_zdz69f!tqdVA-`HX_=jz4PEZuy+Iwce6H<_D+PN0*6yC z0|!31s^P`>AVSngT<0iC#kU|`XZe^&ttZRpqv>KsSU!7UCk*{F%IT~d zC|zXToJxw7rKnOTR;1a|F!ALxS~);5O8xF_$Xs$BllULrsb%K+eZ_RN}GlZBiJ*; zGl< zvBsO_8c#QCl551;N!|HPK27h~)g8iDPnTz@?rcZ%Nm?|T-gM1paDJcNPKwQFVSZnod`D4gmy;dvyV)6ec;csym#*n7Xs&Y8vQ$1xbp? zsP6n>NV5FV>ds?G8?Ek;bwF&1?AOV|PBN>qnYh%G5o86?uwJnnac>4t!VBcR&Mu8p zKIV}`dq-JhJu_mdeX?*Vv5_Lw0$;j_P@NR%pcHC@BHa|BMxY<1WN#cR$V_E{Wzv!r zCNKA8yjzR*KjRp<-hD0@vdpM`6fKcOdm4N8xscAD{TFC{#L_>4dQ$c*^c#@XX^h!l zualEp&|vf=SzR`V{;@wZL`iJDid)y~`(gB45A3YX>n!Ul<0@!Ps(ZjnK_e|llOU9RF{ZGgZoo@7en>^b0;}7_Ox*^eh z6?{3^hk7yTqy8OXv0*^%%d=VjWiKex{J7&u>}1s$_JeMNOPEX#Cwb%tDFR307KEPI zYo7iNQ$y?*-q?OxO;&QnyVO26Y~O+I@!r1G=Hc^pvhACu<0nRl)$GL{ptr9TSy9p( zdnNt@0I1?^tYWzO5$@Y_M|hrj|C`|(;9%Qj+W`GvM=ZM;0d>KZ$ZypV&WuqUA!P9q(xuLIe$VWV`_ki3VIHC5wAod3in5p39&l5dF z(S&5JE}h~+S~OhfYiugXBo&>a%RBK+YS>IXn1M)(UO+v^)MA6>ukroZASH!`N~ry{ zCUuqxcWbOPNh~Au1vMT9jB6{h<^y#XsWobXH%0Cyl}O>c*d9J}ZBw$;2KCRHR%QHF zeSLSo$x!=fI%>ZrrBK&pF}B@{#kdaN%&GOc<0l5a7#~c@>!gN$gUabH@jjH17Nr#sMm>ETf0Yzi8tgiUAXRauokG6Du zRr?s8-8BNo;cL?oggfR>@gGqkbaX50C?mY?78OLN_EioqPcNWSfgeJJI75$oqYz22 zze?8cOt0S@zgl3!c>~8LBy%rI&#lHkc4kz5J=r)Wj|j%YE5AurzEEHJ?N}2KNMEJG zf*n!Ut6=fdSaIec7U}mG5wP(Z+B%vDxPSZf6fk2HlAHyqv~c2|#vbJv2K;4g7NpO7 z8j{`F2mBE#i7p+|O@H0M8a9;BNpd5)V>=F)Yu4}qPS4)TY%LICVm2X&d`&PXvqT=zY@To02soRJkPR& zAGsJgu~*&jq-yaT@R|-iu1n8e|G#0$j;g;Oz_j}5xXB`@-cU*Y#{;coL&CE0OSE$O zE8PEtAodJgHC5o}4I5s!tUm-{x5LGfCx3>#@sIudXZ$*VO)wmQ0x$mCzUPKt|7`;u zv-xYT+CTF`{C0b$jnrDU&osFi;zo5q!OZBWI{bEjNgK7;@9hJSGu?% zZr1NR{NWFn%l?W0RmOVvNtlE?T~x*HO=&7o6&pL*9wUldxyhDSOn|4X_XQEm%VP zFChQa`k=K%)36~CUDDmiX>+F}O47GuZxTKJ3N**)u{SYMjs0#_oaoL-a*r}Pw0H+O z-Tg|P_FHInE}R&9oc5zBFdbl&k|GSWpLOC?(+Ex6hJ?%VwS$;Hm-V2_vOS?r`(4)| zwcm~Q)2B_g|0_OsgSz%Za=(PHR#mrJf5hhUpXs=WW&L@uL9k!PG%m2uADfByfw5^M zs}A;3Cc5Mrehj|Nv^8AMxPxwd$FQEqzvJ~>PwTmlp{!G0aSktu?|+WtY&wbYfxHh* zmqikL)fg|Fp{IAEq^_r<7xoj24GWw4oaDFf&H4tNzt`(~f>_@VV0~k$s8}kGi=Epw z`jK6dH+p34YJv^4ETPjHnoV2rqkM z&v|0?1@So4GwA5x6l&}bkd#zSN>UCTChbjHP-^V&WI?HW-o_rp0kbC-E{Grb4LG*s z`!2op1f`lNoGx!{m#qNzyj^wTkS8|5bFdzxK{(%7AdHAa{PU0vNo?Y45F;gyEfgpV zyoyutaze_8>f@o!?064TyZJkLdIfp4+nNr^rbaTD-&! z|2n%XA5EBtC%Jdv2!b-4=Yb$1rg!%-HsI9*YYRO@NZ`kB#b3rOc?b#CYyF_8f*c6F ziBgX|FR4xaiWEYct2pBq*+xp_`cwRBV3~QOh`UZ8ekO_KQ)%*v>sNOlqe11m8l=nN z?rW%%UK6A>NMyPvcG$yY?z6cKOy(X^s}HUF*zk33f$)wKhv8pA=1bOHfQG0tCJKl8 z8Fh={@Wg%>f0`zZM&nIdZlDs$@@YUAk;k5=Zm%aILboYd9`Kk#v!BfKr??%J^G+u# zNoM#{95xiFo=fYRr5?pSNn{`k2=~B66=@G#plU9_1>?I`5QMqaSfR zDfIV!d)5@>U4mVNJSZT0t!niXkz?NKSEHARbl`B2W#dI=HL=H2{R^B|6I#_ry|I9K zxY*riiJ9l1!IQOU@T6?;xukXjjpxr?<2jmy=`k5)SS!gM5 zHNdx6*iijqCwwh)%xd*<)pFx;u;#)wQ-`}Lyel&FAco}n{=Byl;52E%5e*3z8y-_y48t%e{R8@TE`8@1OY+_0dzkzrj*_vcZ%X z{t#PC?%gHSX6%pqYDZ4|#an%f9qjd5Zan4bJqSJg6caxXJ#jy}Jf+%k~Dvy)hqFjrZ!roQ>XV6376;qya`6=yD=UBf7MXPvPm=u-~$Q2y^Hm zUTMLqVw5ynHtj)N?VAf}HU9d(x~oTSh}B&)l2)+GKYXXUBSol3qgOi)GlM0mzu;&^ z-DJ3J*Iba7ulDW%!S0C+L*PU&feKkisCmZ$?@3VF20YdKx?V|ZfS!f8mK#eK(wPcR zY(M!hzx#6%K5;tsOl0kim3bx}^v3SQlGtaUP~h*5jjsj%C+7hFlWEm=eS7%F4BeEz zm3rJE_TQe^2{J*2Qa97n_Z8whp1#M3ClLXD3sTK<@UxN|(PYK`AkvcyWoR|!;$>kz zT_#+kSLo`w6*(MF>T5FnP3?UvZ(WtT;VH}d`!Eq$*Aq78;BVcPiH&uaL6*OT$?fGS z^`?!rCYE*zX{W$FdfzHoSBSsnH8*?uCaAF-PhZ`YBUnL~jSS!9*(ImUGv#R<>xlgh ztP#jVKZ?)%84c}UJ=IS~r^SwVP0ux$UhF-3CGxP|?TufK48!^Ez6r4$H|jizDo=V$ z-=r}}c3Jm)N%rxq_4Iv~22uB%OB$|&QgQN2=BDr(C|rvD`a z`}`AU7}&kYaK?e%>=+<%o}$+j|AEt_S&hLD@Rr)Y2D${auLAcDh*cgPpSJDv;D__?iqIFHrXcWc= z50jFJJ30y&&D&-goTmVFDAF#4HTb|8Olzs+x*Ozd_F=_U2Iw3j4h>C7tfPs$)PXJ^!=m~Vk9=f4F zh3bZvy2pD+nNI1w`#t^9JnZ(Jh(AszyRh>|LifMuo|MZu%rGLuEnWr$;U=2|B-mCc zZL(Ym7&o2buhFsBK5UMWF82wdB1m2A;t!Wlv%VzWF17mSo%No+F!Yi5y87V-!j+5s z%#};k9>UpfgrrH|N7eoqoukX8Zn)iMw`@3!-VXg9f7CsH$wf75&u6(0#9v`2^=Xwr z(RE_*=psYVg?mm8@qeYAYACvtEJX|3i=sTVXq|DD&X((HpnlOx?8b7Chq#9=NyM!T zake_+v(8-mU)jm*6n?BtngIum`sp!y*jQGMIKxbgcnUKza(}E~Mf`~M z_4|2RVFq7q38me=kJg2b(dhXxNObiakSOupp)Z1Z4ElxIU!SM|3y88W*R@yRtCH@>O7Re>!zeWtaffw`i-HN^J>m+X^rseGq+Q99hqr4d3io9$` zocuTOuTwq1h_wz1A&k7S7f6|~Jf28E;#f>{$c3zcyZ2ESQ&Tteb2<`kgzD$V;9rU5 z*{Ij%WUr6XX(+NeF;2|;L`NxPKazHSs{7OrD6B8WpJ%)ab_}}oxA<>}yP*@@EWd{} zAUbjVUUcGd6F(dST&aK51MDc2Wr^Gjp@6p>`L&R7zr}ZfQE8 zreHv!?Cjfn=)^B*ufyVr6U!{Z%@cxc$`hf(=k0s$5KiYp(t-&8n(T}#{@-W{v@*j* zcrTz@ZzHMInBnMT5&G%ffEGs|iJpukV$v!JA8adZvTdO*;0y=c56{xPuOQ+@VNvk{ zt_5+{^UDD_fZo1x5ZXa<0kADUdwkK;iNwZ5MG)>*s5)%7&rT+70c}|v$TdNprz=0} z(Qw*%o!()T4u4AR?}Ev2yUof>Q3!PI0i}m~ zV21tyNa}r_7%p>CuW~VMfErdO9$^#!3x@VCtvJ$5Pk_^;bH_&FD8wi9mp$-}a{U04 zVs^Y9{|_A8_w*+6*44x4wyggiDuZn^$;Nyx-kAv)uujSV`{xTh&1;ixmziYVI!}2` zu=^bf>qP-7e*}}+K0t431mZlmzm^uagNCBHw=9Z$lQJ~Z8yg+f1?>a@P0J0(OW1ixBGd@d_tV1okH5F zA=V+F$DA}X$w}ipDY1Kqth!a)s$1ge%SRVHecq3bVDJh@hHv#u`D4=jPOWE#e?H-F zCiB}{Jrp_Sss3ZMNcfA$ja9fx?e$OOrcncb-QBr1lBh~}46DbmH!`o5wR z|0Exfq9or*(JS9VfzF+~&W*si5jZyj=SJY%2%H;%b0ctW1kR1Xxe+)w0_R5HtVX~r zN?$3$&S&@*>0-W?wvJXG z^7xiTT7q~>HvVc$YHB>~(PFP>@r{v|Wi9QCv}kdtT|liokq1kQC>GSZy?C4U5}TtK z;JU@DJG7#f_Go4Kw57$?8Plu*e@C<{f_EHSr_Z*|hs(0Kre=|+p$;|j%w5Pmjmo0g zA_q$GHss$kF>rt-3KBk`}}-?MO$znrdI2R5jPz;O;6W+ z{OEfYX8g4%li4>4sH_V=x?JpUDjXmeA}$%q^+RCUCd**>Gaxd`kbLxXQ8}FGRPaF! zeqEX#_T=!m4E5!3=2hvziGI3zK9L@F@Tk%wkrM4ubz>q?4tq9!>b@dBy0=V~Ke_OD*(_g~ZWapQ@cz=mM zvaIuI#Jf>U)W6D)Hrd-xB zgeM=Y!PV$eOMt#R5?HYpYb4qHgdgyTzWMK$tn#naLilh+l#Q-+X9;~@gpW~2 zBplE>JHruQ8$SAAdPp!ue9&Z>7JU@4Wr6nT2= zlB^gkcGN|D|H52wVIjv(iX7J=ya1A?M+g*`fKz_?yNqCN{LG4#U|;y!0g_boSZ9^ z`*-Il1y78_eUGBNW=Glah)+=R^42Khj*;F#<8Lw*JfWC!FeL|fQ+@cV*6uYuH@>(I`i~4Y^e$8@g&tvB&alvEXXj{%Z#|Mq1+7u(b4Q0P|?)L$Y0nrFu?2YXR zg?g%Be%ZxzUEIA!iW)A^A-j4+`1ezZFx_qw>2g{8TEs=RWZ-RShgMf(^?K{9rDY}d z5)kd_E>0}3CX9I&Fu-W|R|VYI{GYPq=j(oSJ@maP1+UMRdO9nPKWhNb#4qVgS|)y} zl(dKOU7-@l1xOmDWLbMxNwXiH3le_`9*rm^k#Nu-^(!Ux0V$T7qzJ%TYb5OJ@CWHr zK}reNkV{CWK&ZB~b+vUWC4q1o32B8y8NLdHuo+eIJ06}T%hbtNt^=GE|1N3oYV-Ng zg|ozpsv+rmmepV~NNBT;ks}0R`~N|W_&`<{lfGwH*V&aOS?8$PIwAQejzM@*Nr4M0uGm z%L_15bEs~aF8P!Ut%mXve;Hnhbkc2PdC3=L*dxlzfcl`nK86UvWO=Dy$Yp2lgd;VwcIEURri*B> zVS-%&jXOQ*t91OXguYwHDhYRX&VYH-)#{HR#K-I)Tab-{8A(6n4B>~I0f+SrofkV* zO%fq1efdud!uQ-t9{{w{dedJYvxeKP{40c3f?HR?Z*JA7S1hkFztneR9nCFrg>VS) zJm-HW$A1Q2{{KGzJAJ@Y|7png#icGuhlTG*bt-LgR+l$5*-NXct16r!KWgv?o#nww zd$81A9&-Bqe$7$gFKenScLuepU|FattuaK#>R&%jf)&^MMf3N)OTiBo} ziVS5pNruIcW<@v#|N8oeOJHk6AVGYc9r!xxcb94HTBId#I=A~n%eYIkiG8Q`Of_V3 z$*0$l^faII%WG%!{ihte=lt?)6KJ&GKQ%<>{PN_?e~Xm05z+n_Xl$u{KtpYy~2|GW>ubX!dP79uEI!c8;Ezh5l)iL`fnSZ+m; z{{6TpFZIevQV|yUWbl6Z_oaCt+kLnG{iNP5Dl+=_CEu0)eIwuf2lVfcV!dbQ-yc=o zyYug7tmd8j_eWK8*8csBHNH##zOk;emznRke_z^{a@|ROAmhX)h{s6XLh>CMsv;xF zJJLZU89#f{`w#zhwD$>)+of@PD`d{YK%R-=Ied_eT7)`S<^FcK-c& zZjV>*-?tXs+^%(ZXo08}oH9*^lknFk^VhY2RYk4*fUy-PjoPd(6bwhA)}lq8`bBf= zZ<#UA-_;tm%Ic)NJUyQ8$MyrQzoS?zBM1hvqz=9c9vTHD&g9iNVL zM!Qz7>Rx?^zR2nP|K#m&4tGXpaDP3HY~jpwcQnoGuiJ`X)zX_W^!5Ao5!_o1af+HQ z&%HK|klyw7vz|mE50iRDZz55QaMI>Pf^q4>GeYnEiNqR& zn-J34nE4-x#6E-%K9ES9M7Zn0M4}PQW!FQA#5RP-9!?~lM)>si6A3F8;mjW-60H>e zFp>BU!by)L62}qV{-5|b2$Z*7+fgsVr~eD(5Kh9=^F?5?-d%}A6yX|#8xj5t;dc<8 zL^y!(ipLTOdg;^C2s$K|=1TT*NfQ-1%Ftsm8SB_g`t4Bp{T{aK0)#e|$M$JFUo86Iv?6m@koVoc8|bWg79j2R2&~lGoNuQ(=tADtkk|Y@yj>3Wsj+0ad9w|=v7C!a zaz3L2;8h+d<^<1r1a*8W^BB4FUHT!)oomWC5`3QMU9X40AK9TUKDT@<>QIXuieu^B zCLDI5+-C68J!8Y+a~W|!8#Etg@YsvMi|0{YNgTitz!!SQZyMLnM+OrF)N~VK&v_vo zgyK}+ZB!q4`i-ZruRgu-+8QD3*BKHIiL;_}RWt;^qHN*nN;jGg6my@t9* zP#4Lw4D@ZBy9={a#qc7T^fY7^y^lIWUG@1r>AavYH!~s7y5EGl1|je018{Va4X_2U zV}Oy2q56Asa@Yc=v|UKELM{^h^RyBAskOy39OC?gKQ6 z_MlR*t4Q2H{IYAKNSFHvxuhrHGVVJFuoC}1PZz}$CJzCUb}E@1bkU31>2e9jzl7f_ z5R`as<=K+Qilko=FG>tkQnSbi?UTjjI3E{r>F2|HR9U*jQ&juD1xLg@7#pk<^E*$3 zw}>z#!j&TYtO&m%!haXxb`c&B;crFwf(Tz1;dr5xeOQFoh_F(G^F(-y2ty)VDZ;JW zIOg?p=G0h=>gV4)&FZKqDJ^kWOYNnV_ELMbv1jRVnwLCf^m7?R*Eyi39?_00WfAZJ z1AMB0%XKF6J8@%DKCEmLcuG1Tz0-spcDW8@{uQ{PcICR2@Kr2-fg;zHgwq@KGvR+f z%RgDMujD!9fxUeK-a!@OlI!7lc7MLo(ZImU4|t$VK?W06d5_2s(}-OzzzNUIE*@t# z7H<1Qe%BW{T>kx@fDe9wL*%p^7WLTaKp-x;F8>(|g6c1r$KlLA!7X;i8GPUd4ww94 zr+{w}a5+7nMFy&8P{8YWF-60|Cb*prI^bgVCPMPxQ-8NKF|aa)2g)Y|yivf(e@)l* z0zN3<4&gX%0i5br1pab;-v9z)@N0Q_<_o~>_plzQU8@*p!LF6V0`3xUqD#8s0$w1- z8NoLz4UxTGtI|D_NpRL=r3 zj&U)&4B?XyJXFtSQO``4swl4tc!7YUT6Psc5K(@spwD^%p8+`4-zWx#+3`qQ1Hnl7 z72#!;bn>qPKCqfsAnD{$0k@0%KM-{Cdjand?aKa+2>50(UsFYgFZn2M_jo_A{~Cd3 zCE#ov#Jqe$z&|bETdFwVMFB6z1{CX;$^p#I#%;cp!#5M-!o_@C2;V>lDgRM1U(8)sP+$B0XOoedmy^7A_H|5U&?yE&ZsWN>>4 zDrEV^#GVrM4+!`+vCx^H1ZmGyaInP_gI}l^<$fpNn0;ljz*FY`tf)uIFA4uLgI|=?t0a6Y;5noffj{Z5 zhXKD3hdZ$@T`2WcfJNKl+-I1zw&7UY7;# z&jRns0{P+Oz}IGhe>DqyXBPO+vcUhE z1wH}%D--^A!jr#Yis%gOL#go%HHijFSYWOI+~n#ETgH)5o)RkmHKg*L!aE#q}d(*N`FOZ z8D5U$sGzeRc5#a;fUR&8m~$8GlFPOYN`ub{%~xZ0O3YHbP7 zJGtnEQ$_Rqt(_X)m9>bqF(-_thW+hP{+_O4>&LHKTFl=1_3ny>TDv-%(Jr-A9K!Qc zxSxlhPU{RrS~@V;wrRzSJ7+CfLa(%9uh^M+)e1ZsP|`wAFKeNhmrrHA3xr!+vE7Wf zPIT7PAO*chg}hf&XU{ISBB?77omtcoW>28aq+MHp={+soBA8}v)7rw3)i<`cuAaSg zsbO%-n#wDJ&Y(T0H912SO=Wy=$IyIDO;>xTKcx9OqIib7D~dO6alEeKZ`86)GqhA1 zv{yRq_KGrRu&lZ~P-;l73)+U+2HXq*h zg%`C2*?ZW4!_5q-1$;P+bM9#aqVD$9a<((Pqs*E18a2=Zy;aT4pX69X4|1Rlyfv)I zK8?L)jdvM6UKH+%1T=3r0Ll%wPxCFD+i>G8bL)KWh4p9bN!pvr@UFS`Pj_i}6H_3f z(aTu)(;)IyZ}f_?wETRurxgc@qGpB4Xk4*8I#+{mXiYOB^wnuiUCX?9L0M}}%_=Mu z%@+-$N?J2ogzD7gD7M+JnO4klMM@UqEoY8dXCl{7IMU|F9QnGUEv;zSw>TX2x5^b+ zyE>|MrjaY7&ve54AkZEInWv#W=2iAVbY|2Xu*t1u&k}*z1y&T})rOD@CBC-SmWYo> z8fV>UwcaIC`2DTfXZvcL8A!DC=c@Fqycdj#XD zso^(9{$#T+rL~yiqu@C7+Q>A)1>pvkH0tGw2F7J`KtaYp>cniB)1g6rmNx}T{Z*9} zc0+Qow5q%`5UMIG*QzTjOGy&wOFPTT9QJZ&Q*}j?qtqV~Qus9dnRBUmcpoEiMDXvf zwhkX2WJZ5(@kd%9bVbjoI}7~LX4b8DBe1QZ_9DYeGI@>kddmMV+e%3RIlfDXEa?5A zwo?4c@GHl!V(B#Y!c6uC%~{4ln!+>>qJ8mJO-9zqhsDpm16BV%)%Wxn?5Q(rlFW-# zx7k+AB)xu=Np&lG@u{_Dre3WRJZ*N-l#Xx*-s%XoeCEfC?8Oc!FrqFhMw~G1^|7f* zw_g4MzDCtLs>67bstnb9Mz@zJeWJGZl$9VrKhhfEvM#I@>W-tJ6T#a(n_7k!;2ypZ)5t(3P))Ten_&T zB>h1m*mn*6ncj|SQ1>F#=&qPv@0i}+Qd3pqtf|HTu($Cx!F*$HKV7?TiYe zw3yB6lBM+W<0VV)Dz<{NfheqYC@Qcvy78W&miAEChu88og*&xb_F^Vl{GDx_N*EC# zJ0}X3*4nxOL=bb`u>;Lp=WJl`Z~#EmYN8&pNnBSVW`KX zL#AnJnRKjaqYY8NKM?S1AxBwtHB7{S!=QcyouTSVtvpaxt=Y>fnwkvBp=--< z?~Y-dRs_Ab<9*KSBz^7gf!8U`AyXY}sxF7iCJ+h*g3fA#R^oRC%i*rJ2Ltf8SK3Kw z0gF7hYx3?~k{_H}WZbZE1ge_sWtvt|S&h$u1OtX?FLhSg{pDIk2-<9wHY(ZaEHA5| z?}$`am1D0(Z1ULFWx@m680ELd7uK5CvZxlO@fb+tWU*WkG|kU=?$IfH(!GE^DT zN&`lVrYwZ_-37~<%1ZGq8SK4s=3o?gY}a6M1Y9KP?Dn;Uu=z{dtUIaDf?!GnD$Y$`);?M#$sA!>r>k1-zcr+jD2{& z^nee3a*RA+Z2pAPt^RX+Qha0ogI3P&}ai_mLZ*C7XJ8Hq1NP zWyMj6R`|o{APNbmv`thT*APgA{)7W&pzicTWyYO;d$7vc1c$oaSz1;V43e`Pm5hn{eRNzw9AMMw z_x5c1J==9Ag`XXH)0HU8R)XzK^K%Y#xPR#}nyAExy*UKPY$l6`yBu1J;y0*@=eeT<2mUZ&n|D_zHH3FW~RglvQ-X z%T@37)Xwph;<2F$C4^(3o#~)pS4UcwbR_r@A{^al)u_hwM2S_FWZ41msZd1IFfPm5 z!uUoAc4wm)RcsRmI#ws!c9bhO)X$yoo5RnoMKBI-`X-tdoJWPzTb5O4#!II1Lv*4G zej3~?)reDfvOoNFKUwnfN|g{>jY;fja5JOW63mi7_%!3AW%NC`P%zw~wWrl)9E^G` znzG5VR2k8t_)K56adeZE%u+JQNY35S!YM2xeffNWZa=LBe5>w`bRyDyHb;ImC;9mt z`g9NbVh@SM;B<_K?)yFRi$3xTKJ;ZD@yRuwO+QuAtYeCDxs26`qSOffnLQS2zPXvuVcKuEv^xWtK z{N(c?PH`hcJQk96<)y_Gr37*P_gfl8c^S&*IdVlgv!Oga{&GHkvVHlysSLM>1ZtO_ z>5%2|SQEQk_)B%m^78pr8OrBXS!Vp9I`o%{2t7M0%gg6QWhfrcNxr5{=93{lZ>rF< zs4^~}N0p&nG-kwK;t<3QrOER0`Bxck6&a1=C;Q(n%GZi~@_AYr%IEy3{6+W~$FCbO z8k6z(u}eNrB0{1M{YALyuTDht<*lNe3>ystZm3>aYbYyhCxnNfts_O}48FYnsS zGs@5^Crp$#M3wIWo{yiT5Bc2cq5F6NV}2Qz;dW%R=*r9IWCzDselN-r?a21!^G-)Z z`Qu{#Wxr*+avXn(vXn)ZKlFV>a2@&<53M{%{*~orIEWN|dHMW#T$CS3mgG0GoD6?& zC@-Jm+PWn@KaZu>kBmQq8>&m -#include - -typedef void (*launch_fn)(); - -int main() { - void* handle = dlopen("./lib_fault.so", RTLD_NOW); - if (!handle) { - fprintf(stderr, "dlopen failed: %s\n", dlerror()); - return 1; - } - - launch_fn run_fail = (launch_fn)dlsym(handle, "run_failing_case"); - launch_fn run_work = (launch_fn)dlsym(handle, "run_working_case"); - - printf("--- Running WORKING Case (Indirection) ---\n"); - run_work(); - printf("C: WORKING case worked.\n\n"); - - printf("--- Running FAILING Case (Direct) ---\n"); - run_fail(); - printf("C: FAILING case worked (Unexpected).\n"); - - dlclose(handle); - return 0; -} diff --git a/mojo/reproduce_fault/reproduce_crash b/mojo/reproduce_fault/reproduce_crash deleted file mode 100755 index aab4c4f7fa32a70140f3c8489d58bcf811064e98..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 16192 zcmeHOZ)_Y#6`wmdiAj^#NgI;pPqPV0lmvNWJBdkJ(wyTzSqCREj?)&2us!?M@rm=D zd$&&Qv=NpMjR-~&-rP>P^(8c0LMhg2XDK@kxp=&V37qM#O9AHO&I z-g>>ctE%z=A$F|WH}CiU%-grK>zUn|&$V^7$73-irAj@Zh?}VOkOT`RKgJc1gxal^ z;CQcEtLB5hLt~yi;Sor^Qd>2bRx?g8*|{ta(>1IoR3aowc9lxk0#71T@p+KlJQ6V< zreBYHc0xt&w`X{A6{3@*fFd+#N4dFJr^V{x-nsM(+};FJu_Ko3uCm=#wi7zd?Fp59 zQcmdC#rX`=jgV0hO1e~t(r;`%ako=Lqsl++aC_(3E=5u zSLA}OSCcx>*|ERHZZsRsU4DJXPGu^)qvwzf$Igj#!FBRIhgveZtkaY1%}@*{26I`C z!S+GnAN8iV?om{yS{3pVSBJq#e;rR`sm%fT_A6F^xROI<1_wRX5 z(j-4$1n**ecS4&f!G*uL&%^1SqG+T{25^1^Y0=dH&W~n|Ukl*WN2Od3;CBiLdLw`@ z3g9Z?G31;2ij?YvS0h}W1%%Wl=%^Io`u~CElv9}-VP!>LKlKSw+b38yY-{yW6ekZkI_=D$fi z4aKG=z*9U8f9@E6^)uG^@2xX`nC|LnYnW{KnRRY2y`?BEuY&rU{btR&&%*=(pW8bM z#5y-GPh`(q?pmD6A5HI6mX>Z%(EYRemqR@4_h|cFjYPK2SmV>yrMIfAOKrd|#n#!qSHP8)KZ0Yi1t)uD-j7g? z9lu;TKJqv95OizSjlq+4h3i%Sa@#w^BK!xIDD@)cyEryeDp})g)7F{O(`vW@Vb9<# zf^}8_yk6W-(zBQ4NMZT6FG9#My#S%_#qlvUNmvbjR76fQbdpn{?^DD_CCWgQfhYq} z2BHi^8Hh3vWgyBxl!5<`49NODS+6Jafx~?IYnkSl`fKKR!Qf)XCwOt?JF(>xwjiL6J3e7g#Q9=6>Lbm8|t!$^3GrLfA|d zc^x2q@T4-~5q};>>-VUv@Q9k^h6OKjU=sIr){`}e(*D0n-uXbRRq?G-pWj8c#}`@6 zOWrZf)BHDB{x7fQGJ;tUIv_W@&!!&jYi1K1~I>lQXcP6e=UUtbX{Ut0W9mw4OX}d zw4DAr;BmD`O@`uK&vRgwfWM&q)T$9Wkx(h;=Vj=`)N&QR?$@+!je5

*RHLPE;nTY(#59H2cKU#aR70T9(T;I(R{pJ$PF0$&zitmOG5c%E=!0<@d` zg#FtU^dGH&KMC9j_?7BcfG>@&RMPJSe}-`Eh7luv+^Gw|YtetKglKy7Vc^TqPh@}1 zW)^>+^`*aw)g{)y!0WR0yCev&Xk4^r>92rOoMAt&12<;X{Mx$}{M6zByTR9*t$Y;t zvN)dJq5G==IEOf^OtqtZdAGsLVafzJ0-aqTZ@YswR_JG)0#s8u z`$Q(!o6OiLH9;zmm3_)IC!^b)|c0yW%_A5na?LjZ71vIM^%45Iq2A_;laUC zm;^W*n(iD`+6iFxQEW~&MUmNddw27pHoNU`tBoc4t)DvFe5j)ZFrIaGo5fPAwOiQ- zI*;sc?zE4zw;yfmv3r{LcedHGRUjc71h6UKwyg(qgzvRqz)225de!0$-_g)*dc6DTU18VD}aZ+=AXBq4z}iF7>Hk zLxbdoA}_mrf%ja@H4JX3@FhA*x+uIGV~#+X*pcBTdqLg2M+c;q&AECn{8dlLS?G>5Sv%R8AQDRONnjN8OV0u>rNioJ|4h3@7CX;?;8v6pe41&qdMVlU%Wp)x*&iZ0>! zyMWSIQ0!&=CsfAG62IsPJq~*sTM94ZOQH2%v-j;Folrv*nTG9u8nQpn_Cmw<;rM?6dyJm+JqY5D zlV#s!xIN*8(s-yQU@!C2NvwROB6=P-%L{%B0_wXmPm}So?7x+Lwmhx#{YgK16SlE{ zz3h*>xE&x$y+{0`v@4Xx*Fk%k-xk^4@-4lJ*a?lpIA|~9-+FFX;uJSx=kqWoOwK=% z@FWSg59?D~!Lpd!51*&lhjFnFQtA6aze8*|&4JPX0lGCM9(jJ#{7_;K+CM@QJCxRt Yb7|L;dSljd_TQp^Oeh}?8H8B%Z@In3VE_OC From c69e3e6e8a5816944fd631eec5e8e759bbd8b228 Mon Sep 17 00:00:00 2001 From: maxchisto Date: Tue, 14 Apr 2026 17:19:16 -0700 Subject: [PATCH 4/6] and even more --- mojo/bench/sigmoid_bench.mojo | 148 ---------------------------------- mojo/validate_sigmoid | Bin 16344 -> 0 bytes mojo/validate_sigmoid.c | 62 -------------- 3 files changed, 210 deletions(-) delete mode 100644 mojo/bench/sigmoid_bench.mojo delete mode 100755 mojo/validate_sigmoid delete mode 100644 mojo/validate_sigmoid.c diff --git a/mojo/bench/sigmoid_bench.mojo b/mojo/bench/sigmoid_bench.mojo deleted file mode 100644 index ced674b102..0000000000 --- a/mojo/bench/sigmoid_bench.mojo +++ /dev/null @@ -1,148 +0,0 @@ -from std.gpu.host import DeviceContext -from layout import Layout, LayoutTensor -from std.utils import Index, IndexList -from std.math import sqrt, pow, max, min -from std.benchmark import Bench, BenchConfig, Bencher, BenchId -from std.algorithm.functional import elementwise -from iop.sigmoid.kernels import apply_sigmoid_rgb_ratio, apply_sigmoid_per_channel - -# Configuration -comptime WIDTH = 6016 -comptime HEIGHT = 4016 -comptime CHANNELS = 4 -comptime IMAGE_LAYOUT = Layout.row_major(HEIGHT, WIDTH, CHANNELS) -comptime DTYPE = DType.float32 - -fn run_sigmoid_rgb_ratio( - ctx: DeviceContext, - output: LayoutTensor[DTYPE, IMAGE_LAYOUT, MutAnyOrigin], - input: LayoutTensor[DTYPE, IMAGE_LAYOUT, ImmutAnyOrigin], - white_target: Float32, - black_target: Float32, - paper_exp: Float32, - film_fog: Float32, - film_power: Float32, - paper_power: Float32, - num_pixels: Int, -) raises: - @parameter - @always_inline - fn rgb_ratio_closure[ - width: Int, rank: Int, alignment: Int - ](indices: IndexList[rank]) capturing -> None: - var px_idx = indices[0] - var y = px_idx // WIDTH - var x = px_idx % WIDTH - var pix = input.load[width=4](Index(y, x, 0)) - var res = apply_sigmoid_rgb_ratio( - pix[0], pix[1], pix[2], pix[3], - white_target, black_target, paper_exp, film_fog, film_power, paper_power - ) - output.store[width=4](Index(y, x, 0), res) - - elementwise[rgb_ratio_closure, 1, target="gpu"](num_pixels, ctx) - - -fn run_sigmoid_per_channel( - ctx: DeviceContext, - output: LayoutTensor[DTYPE, IMAGE_LAYOUT, MutAnyOrigin], - input: LayoutTensor[DTYPE, IMAGE_LAYOUT, ImmutAnyOrigin], - white_target: Float32, - paper_exp: Float32, - film_fog: Float32, - contrast_power: Float32, - skew_power: Float32, - hue_preservation: Float32, - pipe_to_base: SIMD[DType.float32, 16], - base_to_rendering: SIMD[DType.float32, 16], - rendering_to_pipe: SIMD[DType.float32, 16], - num_pixels: Int, -) raises: - @parameter - @always_inline - fn per_channel_closure[ - width: Int, rank: Int, alignment: Int - ](indices: IndexList[rank]) capturing -> None: - var px_idx = indices[0] - var y = px_idx // WIDTH - var x = px_idx % WIDTH - var pix = input.load[width=4](Index(y, x, 0)) - var res = apply_sigmoid_per_channel( - pix[0], pix[1], pix[2], pix[3], - white_target, paper_exp, film_fog, contrast_power, skew_power, hue_preservation, - pipe_to_base, base_to_rendering, rendering_to_pipe - ) - output.store[width=4](Index(y, x, 0), res) - - elementwise[per_channel_closure, 1, target="gpu"](num_pixels, ctx) - - -fn main() raises: - print("Mojo Sigmoid Benchmark - New Structure") - var total_floats = HEIGHT * WIDTH * CHANNELS - var ctx = DeviceContext() - print("Using GPU API:", ctx.api()) - - var input_buffer_host = ctx.enqueue_create_host_buffer[DTYPE](total_floats) - var input_buffer_device = ctx.enqueue_create_buffer[DTYPE](total_floats) - var output_buffer_device = ctx.enqueue_create_buffer[DTYPE](total_floats) - - var input_image_host = LayoutTensor[DTYPE, IMAGE_LAYOUT, MutAnyOrigin]( - input_buffer_host - ) - var input_image_device = LayoutTensor[DTYPE, IMAGE_LAYOUT, ImmutAnyOrigin]( - input_buffer_device - ) - var output_image_device = LayoutTensor[DTYPE, IMAGE_LAYOUT, MutAnyOrigin]( - output_buffer_device - ) - - # Initialize input data (simplified ramp for testing) - for y in range(HEIGHT): - for x in range(WIDTH): - var r = Float32(x) / WIDTH - var g = Float32(y) / HEIGHT - var b = Float32(0.5) - input_image_host.store[width=4]( - Index(y, x, 0), SIMD[DType.float32, 4](r, g, b, 1.0) - ) - - ctx.enqueue_copy(input_buffer_device, input_buffer_host) - - var white_target = Float32(1.0) - var black_target = Float32(0.000152) - var paper_exp = Float32(0.5) - var film_fog = Float32(0.0) - var contrast_power = Float32(2.5) - var skew_power = Float32(1.0) - var hue_preservation = Float32(1.0) - - var identity = SIMD[DType.float32, 16](0) - identity[0] = 1; identity[5] = 1; identity[10] = 1; identity[15] = 1 - - # Warmup - run_sigmoid_rgb_ratio(ctx, output_image_device, input_image_device, white_target, black_target, paper_exp, film_fog, contrast_power, skew_power, WIDTH * HEIGHT) - ctx.synchronize() - - # Benchmarking - var bench = Bench(BenchConfig(max_iters=100, num_warmup_iters=10)) - - @parameter - fn bench_rgb(mut b: Bencher) raises: - @parameter - fn run(ctx: DeviceContext) raises: - run_sigmoid_rgb_ratio(ctx, output_image_device, input_image_device, white_target, black_target, paper_exp, film_fog, contrast_power, skew_power, WIDTH * HEIGHT) - b.iter_custom[run](ctx) - ctx.synchronize() - - @parameter - fn bench_per(mut b: Bencher) raises: - @parameter - fn run(ctx: DeviceContext) raises: - run_sigmoid_per_channel(ctx, output_image_device, input_image_device, white_target, paper_exp, film_fog, contrast_power, skew_power, hue_preservation, identity, identity, identity, WIDTH * HEIGHT) - b.iter_custom[run](ctx) - ctx.synchronize() - - bench.bench_function[bench_rgb](BenchId("Mojo-Sigmoid-RGB-Ratio-New")) - bench.bench_function[bench_per](BenchId("Mojo-Sigmoid-Per-Channel-New")) - print(bench) diff --git a/mojo/validate_sigmoid b/mojo/validate_sigmoid deleted file mode 100755 index 413771c9134ccd5e4f3fc8ed0b776def001e6d1c..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 16344 zcmeHOeQX@X6`wm}a08AVh{1%9EKvdh>W%Frt_d|+$2n(B<6z<>O+R3>_O9)1&bRK} zUa)~EmO>*AV>M7IMf{cakBU;MqKH;01!OmEQfO3}R%(rC)d&^oTpHpQN)t+ReQ##o zdT+U-75`Liccj}l@Ap3D&D+_%+1;4~iLIUSSd3ArV0SR&rfLPENx|+`SplN4R<;<9 z*R$)`#lT<0F(KCk0jVgp6~(kl;s~RiC*@&!t<)n*5h6vqa%Ht_fGCswK)Xsbl6;uH zDdQkYa=$$VjiD&~=+kp?!4nPIVQyv`gnXv0m_97+j!H^)$fDh8X?I%M`4j^}xuld& z%nAN_WIw`mB^p5?in>^k;_rzS$bFb<9K~}kO1QmQX&0u7)F(>q^}gv@AE&(@X*Yj^ za2%#9WqU-ay}Q8<^ZD-<8Xntad*#kwBc3QIMCIw=xm*f3o`x;kcCz#9uG)9yOvl+~ zecST~)V}RsuK(c!Ym!c1%epm5t0C#6vZD>7tt|~L>(q>^HZycv13&b~;}(v2Ylz?W zmkGKgfctRyB0!ga7-q}M;70&gO6X&=rTkz_rSRLz;GJb~yq%Qt^DN*B{K7>8pp?E< z2A>98fnT^>1wbi3-GIxXTx8M&+!A&*+oK7X#y=a$dKqRI8PDwBYxEE8H3m#4$*g35 z(#_Z)fOFiB(zIs`n@);lJj+g}*}zEJNqGaHm>C;}qdQ`!K)Zj`GzOfMnRM>A0R>GL zbni<$p3Tq`4vrSKWoy@_4x?FJ=hvFm^?rTrI%ah3=r*7od(g>vc6vv5N77B%JIuZ$ zhBP?rresKlP%Wtl9gXvGSOKHC0%|yx7mnfv69aTF;B>t#b)0IvlVl~9UFLiOuYv=T zf9|m(Fm&Q<0(dNRKcsxlED`vuyx$Q%F7X~s$O$KYYNNn$T=I)Vus3@AKA&x&Pt2I$WjBR&3V{HSSyK^#tKRH6(-8Hh3v zWgyBxl!5=j47^iw{rmdCKUL`ymD6h&)4ww1#S1g~!Jkwe=e;ege+A&es#9>RS*}41 z`5~0eoi0FG^<(65sxvpk`R9?xso2~U=buF$r(ko>aQ1BPg?S0TG z)V;1U_U?dMv-}}op@WlcCOFV1D?g9qwsYQf(Dj!<3A?_ea2h)~u%LfD#Do0}+l1C?1+Gxn3dej0boNB$b5M=lbG&?f=x^?`po>1>#A>|)6nD_4d`(vcv*jf zdy#Ga5i~@b%hAVV<+PCIA4Aj0x>t~$0FSv}V*@|u4ikBhyihFpo{-E#(|MM?1)6jl z&|%wC+~M@|pM);HguTlj@!R|ku+ZlBxys4>b?D<{Vg^uv6EhHS9$GE@5+ay^5tCg4 z%EzSg{0D`?TiskTSy!!9_?@i0TFSnGGTzaX z`AUpqvQm+vLm=V_hj`9e`mIa(sr4%J`$6zBUb+5t?O(~*g{VXsh%yjmAj&|LfhYq} z2BHi^8Hh6Q|D1uC{A#i@V-DIIlvNq!(<7!gWOy!q!OA%B&B(Ff>(d?b<3U41gR%=g zQ(30xqz09qWOmR=aXXMKVx8?>TN9fV&sD^yEpUj!MXYycM@OQ!cZ0N0lCEjlmdc!z z;~9fVx6e$n{-kZDvLnRu(;^&hP&(Xf(o#~cr@&`2OM#DPeQr{@DPVpRkI-+Y)6Ifj zk%Fppm`V8LI;c1vxVleORT=OGCE!T09MMw7_7wQ;23<8(rQ7|It3XQ9W_k?hUOPFa z;3q%Z!ro$c6gLgMhR7tYVh8+qf(_WCc7N6bOB1^yT2R`QRU?ZSMOd5*j8qigU$KHh zE>#vhLv9KZH$0NGJzD`BT*R)axP2v1d?C~LcA>Bv=nPN|=-k;tVH_wH{DzGscK>!3 z8?B99bJ-}VIS?J93`7}-G7x1T%0QHXC<9Rjq73|(XMonx(YiUB z@8ViLEdJUyK5JiC<28GQkY6o`zdr98$O~3NG@8@c{!0t!ZnO z6`X@(IVR<_Chc_r`)kqaC0{2g5oyNqa$O#NXvQ)ti2vMzAB?f!dQU7p5+Hn3210R@ zJ*^3(_WxcG{Q(zT6+UP3{PCm%}UdHwOMUanj4#28k-w&q3}l(%;QDu z&xNoGo|hP`+@*Eqid4X1TS|X9;Bgr3^P%`|ka(k9S1R4`XIIpx>xS?lFHx+453sFaHTM`iGzazC}~Fn^jRFM^ix^DC}j z$HM3Tx7?(L(K>qabe8L1sM;v zMq29pt^ge459eP6T=A_JDtZ||UjTf?f|_63hx+kF;$@AGGWH-^%tnBdc7?=b&gTLJis_ql-NNs~&q4r~(cFMvi4Wn~=dw0S} zY};(WO8m{A+t%LQ)d4WvFO7sQrTXUW%-FK^u1)P*jk`KKdlNg19qpU8CJfpjpwWH+ zT=^f~a8M-th;0G5kHOc)?F5F6FFYCVJ-992;NWXwi>N2|6F@|`k-?WjKyssl4{_I_ zEeM8{ag8B(35wejy6ysBmXk8F8QX#=QgAT(G8x$d-$cP*ulg>r6~0BnM?@pA?SWE@ z9WS}bL2lIWEqMxqyD)qS_lf6MZs8~rFcseSdS)L`FD>X0)!=2dtq!KLYG2lYy*`e` zRM`4qr$-n^hs?|nQ?0QSZUo`fOAE<9JDqXdR1srsax-2OtcLfv3#0p>lJ zyHWczpC!6dDpR{Ok0c)&RB&Gf+0%TN=(ubUr^Q$$dzyFZfZ?2x>}mc?l;+Ez!b>>* z9zbz!O7=7#C8`KBKX0T*^d6AoT$gy7?-JFdeK>vzvm{h7H)K!qZK5>a4(E^hzfan4 zk`8J9PBdtbbNq1r#sR~e$Z7b(|BL+p#PJm@6bE4sKqY9eNIRlONl{WMhW~C+4gm?; z)BiTny3hjw*-3NuXvm)CIYeQ}VNnUk|G2cL{4+(WOZqkj7buiJf}Vs5s*pX+6N#=Q zMM=Z$KNYgCUm+BUJ{YnOxBo22gV+BP(w?aHFYIxi9JHU6_C%GCeK>xcH^c0VUxTPh z`_*VaXgFTtiT)mJY6AB3oG^P7;1+no-vR-ST^c4dfA{wV%kziV9~QEsar7== zs6zI%Pi|@rYBRQ45dTQ_M9+gHXiv}Ed1=4Lw-gn!BZ}*PgZ4DP)*FRB#Yt|+&ga1x zF^YeWCM3Jd*egQpAFE~iQr*YNK79RStXP6HE})|0sC3vW2MoUdz -#include -#include -#include - -// Mock the darktable module structure minimally -typedef struct dt_iop_module_so_t { - void *data; -} dt_iop_module_so_t; - -typedef void (*init_global_fn)(dt_iop_module_so_t *); -typedef void (*cleanup_global_fn)(dt_iop_module_so_t *); - -int main(int argc, char **argv) { - if (argc < 2) { - fprintf(stderr, "Usage: %s \n", argv[0]); - return 1; - } - - const char *plugin_path = argv[1]; - printf("--- Validating Plugin: %s ---\n", plugin_path); - - void *handle = dlopen(plugin_path, RTLD_NOW | RTLD_GLOBAL); - if (!handle) { - fprintf(stderr, "FAILED to dlopen %s: %s\n", plugin_path, dlerror()); - return 1; - } - printf("SUCCESS: Plugin loaded.\n"); - - init_global_fn init_global = (init_global_fn)dlsym(handle, "init_global"); - cleanup_global_fn cleanup_global = (cleanup_global_fn)dlsym(handle, "cleanup_global"); - - if (!init_global) { - fprintf(stderr, "FAILED: Could not find symbol 'init_global'\n"); - return 1; - } - - printf("SUCCESS: Found 'init_global'. Calling it now...\n"); - - dt_iop_module_so_t so = { .data = NULL }; - - // This will trigger the dlopen for libsigmoid_mojo.so - init_global(&so); - - if (so.data == NULL) { - printf("FAILED: init_global did not set so.data. Mojo library likely failed to load or dlsym failed.\n"); - } else { - printf("SUCCESS: init_global executed and so.data = %p\n", so.data); - - // We can't easily peek into the opaque struct without the header, - // but we can check if cleanup works. - if (cleanup_global) { - printf("Calling cleanup_global...\n"); - cleanup_global(&so); - printf("SUCCESS: cleanup_global executed.\n"); - } - } - - dlclose(handle); - printf("--- Validation Complete ---\n"); - return 0; -} From d5355f7272f133e584046064d584d902ed709e57 Mon Sep 17 00:00:00 2001 From: maxchisto Date: Tue, 14 Apr 2026 17:31:57 -0700 Subject: [PATCH 5/6] docs --- mojo/Makefile | 3 --- mojo/darktable-mojo.sh | 7 ------ mojo/sigmoid_build_instructions.md | 36 ++++++------------------------ 3 files changed, 7 insertions(+), 39 deletions(-) delete mode 100755 mojo/darktable-mojo.sh diff --git a/mojo/Makefile b/mojo/Makefile index 3db8bd8b44..c8648a298b 100644 --- a/mojo/Makefile +++ b/mojo/Makefile @@ -13,8 +13,5 @@ libsigmoid_mojo.so: iop/sigmoid/lib.mojo iop/sigmoid/kernels.mojo install: libsigmoid_mojo.so sudo cp libsigmoid_mojo.so $(PLUGIN_DIR)/ -bench: - $(MOJO) bench/sigmoid_bench.mojo - clean: rm -f libsigmoid_mojo.so diff --git a/mojo/darktable-mojo.sh b/mojo/darktable-mojo.sh deleted file mode 100755 index 0e10c48be9..0000000000 --- a/mojo/darktable-mojo.sh +++ /dev/null @@ -1,7 +0,0 @@ -#!/bin/bash -MOJO_LIB_DIR="/home/mc/code/darktable/mojo/.pixi/envs/default/lib" -# We need both the Mojo libs and the directory where libsigmoid_mojo.so lives (usually /usr/lib) -export LD_LIBRARY_PATH="$MOJO_LIB_DIR:/usr/lib:$LD_LIBRARY_PATH" - -echo "--- Launching Darktable with Mojo GPU Support ---" -/usr/bin/darktable -d opencl "$@" diff --git a/mojo/sigmoid_build_instructions.md b/mojo/sigmoid_build_instructions.md index 72acae27c4..dc107c6639 100644 --- a/mojo/sigmoid_build_instructions.md +++ b/mojo/sigmoid_build_instructions.md @@ -1,14 +1,12 @@ -# Darktable Sigmoid Mojo Build Instructions +# Build Instructions -This guide outlines how to compile the Mojo-based Sigmoid module and its C-bridge for integration with darktable. - -## 1. Compile the Mojo Module (`lib_sigmoid`) +## 1. Compile the Mojo Module (`libsigmoid_mojo.so`) The Mojo code provides the core processing logic and kernels. ```bash cd ~/code/darktable/mojo -pixi run mojo build -I . iop/sigmoid/lib.mojo --emit shared-lib -o libsigmoid_mojo.so +make build ``` The resulting `libsigmoid_mojo.so` contains the following exported symbols used by the C bridge: @@ -17,45 +15,25 @@ The resulting `libsigmoid_mojo.so` contains the following exported symbols used - `sigmoid_mojo_rgb_ratio` - `sigmoid_mojo_per_channel` -## 2. Compile the C-Bridge Plugin (`sigmoid.c`) +## 2. Compile the C-Bridge Plugin (`libsigmoid.so`) The C part (`src/iop/sigmoid.c`) handles the darktable user interface and parameter management. It loads the Mojo shared library at runtime. -### Step-by-Step Build - -**A. Generate Introspection Code:** -Use darktable's introspection tool to generate boilerplate for the plugin parameters. ```bash -perl tools/introspection/parser.pl src/ src/iop/sigmoid.c /tmp/introspection_sigmoid.c +./build_sigmoid_iop.sh ``` -**B. Compile and Link:** -Compile the generated code and link it into a shared library. - -```bash -# Compilation -gcc -O3 -march=native -fPIC -fopenmp \ - $(pkg-config --cflags gtk+-3.0 glib-2.0 lcms2) \ - -Isrc -Isrc/iop -Ibuild/bin -include common/module_api.h -include iop/iop_api.h \ - -c /tmp/introspection_sigmoid.c -o /tmp/introspection_sigmoid.o - -# Linking -gcc -shared -fPIC -fopenmp /tmp/introspection_sigmoid.o \ - -L/usr/lib/darktable -ldarktable -lm -lgomp \ - -o libsigmoid.so -``` -## 3. Deployment +## 3. Installation Both shared libraries must be placed in the darktable plugins directory so that darktable can find the plugin and the plugin can find the Mojo library. ```bash # Copy the Mojo library -sudo cp mojo/libsigmoid_mojo.so /usr/lib/darktable/plugins/ +sudo cp libsigmoid_mojo.so /usr/lib/darktable/plugins/ # Copy the C bridge plugin sudo cp libsigmoid.so /usr/lib/darktable/plugins/ ``` -> [!IMPORTANT] > The C code in `src/iop/sigmoid.c` uses `dlopen("libsigmoid_mojo.so", RTLD_LAZY | RTLD_LOCAL)` in `init_global` to load the Mojo module. From ecc268cb6cfb06dc6ae0e95f0fd06b8b31ae925f Mon Sep 17 00:00:00 2001 From: maxchisto Date: Thu, 16 Apr 2026 15:55:48 -0700 Subject: [PATCH 6/6] restore submodules --- src/external/OpenCL/.gitignore | 0 src/external/lua-scripts/.gitignore | 0 2 files changed, 0 insertions(+), 0 deletions(-) create mode 100644 src/external/OpenCL/.gitignore create mode 100644 src/external/lua-scripts/.gitignore diff --git a/src/external/OpenCL/.gitignore b/src/external/OpenCL/.gitignore new file mode 100644 index 0000000000..e69de29bb2 diff --git a/src/external/lua-scripts/.gitignore b/src/external/lua-scripts/.gitignore new file mode 100644 index 0000000000..e69de29bb2