From bb053fbe689fb32b7004833a7c56ccd7c473eaaa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dani=C3=ABl=20de=20Kok?= Date: Tue, 11 Nov 2025 14:45:20 +0000 Subject: [PATCH 1/3] Allow dashes in kernel names This allows us to upload kernels with dashes in their names without setting the repo ID in `general.hub`. --- build2cmake/src/config/v2.rs | 7 +++++++ build2cmake/src/main.rs | 4 +++- build2cmake/src/torch/cpu.rs | 4 ++-- build2cmake/src/torch/cuda.rs | 4 ++-- build2cmake/src/torch/metal.rs | 2 +- build2cmake/src/torch/universal.rs | 4 ++-- build2cmake/src/torch/xpu.rs | 4 ++-- examples/silu-and-mul-universal/build.toml | 2 +- lib/build.nix | 4 ++-- lib/torch-extension/arch.nix | 4 +++- lib/torch-extension/no-arch.nix | 8 ++++++-- 11 files changed, 31 insertions(+), 16 deletions(-) diff --git a/build2cmake/src/config/v2.rs b/build2cmake/src/config/v2.rs index ecbdd9ec..07fe2a1a 100644 --- a/build2cmake/src/config/v2.rs +++ b/build2cmake/src/config/v2.rs @@ -56,6 +56,13 @@ pub struct General { pub hub: Option, } +impl General { + /// Name of the kernel as a Python extension. + pub fn python_name(&self) -> String { + self.name.replace("-", "_") + } +} + #[derive(Debug, Deserialize, Serialize)] #[serde(deny_unknown_fields, rename_all = "kebab-case")] pub struct Hub { diff --git a/build2cmake/src/main.rs b/build2cmake/src/main.rs index 528b37ec..ca419389 100644 --- a/build2cmake/src/main.rs +++ b/build2cmake/src/main.rs @@ -344,7 +344,9 @@ fn clean( // Clean up empty directories let dirs_to_check = [ target_dir.join("cmake"), - target_dir.join("torch-ext").join(&build.general.name), + target_dir + .join("torch-ext") + .join(build.general.python_name()), target_dir.join("torch-ext"), ]; diff --git a/build2cmake/src/torch/cpu.rs b/build2cmake/src/torch/cpu.rs index bad4ea38..a2ef77d6 100644 --- a/build2cmake/src/torch/cpu.rs +++ b/build2cmake/src/torch/cpu.rs @@ -26,7 +26,7 @@ pub fn write_torch_ext_cpu( let mut file_set = FileSet::default(); - let ops_name = kernel_ops_identifier(&target_dir, &build.general.name, ops_id); + let ops_name = kernel_ops_identifier(&target_dir, &build.general.python_name(), ops_id); write_cmake( env, @@ -45,7 +45,7 @@ pub fn write_torch_ext_cpu( &mut file_set, )?; - write_ops_py(env, &build.general.name, &ops_name, &mut file_set)?; + write_ops_py(env, &build.general.python_name(), &ops_name, &mut file_set)?; write_pyproject_toml(env, &mut file_set)?; diff --git a/build2cmake/src/torch/cuda.rs b/build2cmake/src/torch/cuda.rs index 267fe576..0a189104 100644 --- a/build2cmake/src/torch/cuda.rs +++ b/build2cmake/src/torch/cuda.rs @@ -38,7 +38,7 @@ pub fn write_torch_ext_cuda( let mut file_set = FileSet::default(); - let ops_name = kernel_ops_identifier(&target_dir, &build.general.name, ops_id); + let ops_name = kernel_ops_identifier(&target_dir, &build.general.python_name(), ops_id); write_cmake( env, @@ -58,7 +58,7 @@ pub fn write_torch_ext_cuda( &mut file_set, )?; - write_ops_py(env, &build.general.name, &ops_name, &mut file_set)?; + write_ops_py(env, &build.general.python_name(), &ops_name, &mut file_set)?; write_pyproject_toml(env, &mut file_set)?; diff --git a/build2cmake/src/torch/metal.rs b/build2cmake/src/torch/metal.rs index 4b1edcf2..0d6198b8 100644 --- a/build2cmake/src/torch/metal.rs +++ b/build2cmake/src/torch/metal.rs @@ -47,7 +47,7 @@ pub fn write_torch_ext_metal( &mut file_set, )?; - write_ops_py(env, &build.general.name, &ops_name, &mut file_set)?; + write_ops_py(env, &build.general.python_name(), &ops_name, &mut file_set)?; write_pyproject_toml(env, &mut file_set)?; diff --git a/build2cmake/src/torch/universal.rs b/build2cmake/src/torch/universal.rs index c92257ad..b52525a5 100644 --- a/build2cmake/src/torch/universal.rs +++ b/build2cmake/src/torch/universal.rs @@ -17,9 +17,9 @@ pub fn write_torch_ext_universal( ) -> Result { let mut file_set = FileSet::default(); - let ops_name = kernel_ops_identifier(&target_dir, &build.general.name, ops_id); + let ops_name = kernel_ops_identifier(&target_dir, &build.general.python_name(), ops_id); - write_ops_py(env, &build.general.name, &ops_name, &mut file_set)?; + write_ops_py(env, &build.general.python_name(), &ops_name, &mut file_set)?; write_pyproject_toml( env, build.torch.as_ref(), diff --git a/build2cmake/src/torch/xpu.rs b/build2cmake/src/torch/xpu.rs index 41bd4518..a515180e 100644 --- a/build2cmake/src/torch/xpu.rs +++ b/build2cmake/src/torch/xpu.rs @@ -26,7 +26,7 @@ pub fn write_torch_ext_xpu( let mut file_set = FileSet::default(); - let ops_name = kernel_ops_identifier(&target_dir, &build.general.name, ops_id); + let ops_name = kernel_ops_identifier(&target_dir, &build.general.python_name(), ops_id); write_cmake( env, @@ -45,7 +45,7 @@ pub fn write_torch_ext_xpu( &mut file_set, )?; - write_ops_py(env, &build.general.name, &ops_name, &mut file_set)?; + write_ops_py(env, &build.general.python_name(), &ops_name, &mut file_set)?; write_pyproject_toml(env, &mut file_set)?; diff --git a/examples/silu-and-mul-universal/build.toml b/examples/silu-and-mul-universal/build.toml index c7515935..826e880e 100644 --- a/examples/silu-and-mul-universal/build.toml +++ b/examples/silu-and-mul-universal/build.toml @@ -1,3 +1,3 @@ [general] -name = "silu_and_mul_universal" +name = "silu-and-mul-universal" universal = true diff --git a/lib/build.nix b/lib/build.nix index d6f77aa1..ae2e6dbe 100644 --- a/lib/build.nix +++ b/lib/build.nix @@ -145,7 +145,7 @@ rec { rev doGetKernelCheck ; - extensionName = buildToml.general.name; + kernelName = buildToml.general.name; } else extension.mkExtension { @@ -159,7 +159,7 @@ rec { rev ; - extensionName = buildToml.general.name; + kernelName = buildToml.general.name; doAbiCheck = true; }; diff --git a/lib/torch-extension/arch.nix b/lib/torch-extension/arch.nix index 0a4b4e02..507f5915 100644 --- a/lib/torch-extension/arch.nix +++ b/lib/torch-extension/arch.nix @@ -42,7 +42,7 @@ # Whether to run get-kernel-check. doGetKernelCheck ? true, - extensionName, + kernelName, # Extra dependencies (such as CUTLASS). extraDeps ? [ ], @@ -65,6 +65,8 @@ assert (buildConfig ? xpuVersion) -> xpuSupport; assert (buildConfig.metal or false) -> stdenv.hostPlatform.isDarwin; let + extensionName = builtins.replaceStrings [ "-" ] [ "_" ] kernelName; + # On Darwin, we need the host's xcrun for `xcrun metal` to compile Metal shaders. # It's not supported by the nixpkgs shim. xcrunHost = writeScriptBin "xcrunHost" '' diff --git a/lib/torch-extension/no-arch.nix b/lib/torch-extension/no-arch.nix index 783a5ac4..fc57a7d3 100644 --- a/lib/torch-extension/no-arch.nix +++ b/lib/torch-extension/no-arch.nix @@ -13,7 +13,7 @@ # Whether to run get-kernel-check. doGetKernelCheck ? true, - extensionName, + kernelName, # Revision to bake into the ops name. rev, @@ -21,8 +21,12 @@ src, }: +let + extensionName = builtins.replaceStrings [ "-" ] [ "_" ] kernelName; +in + stdenv.mkDerivation (prevAttrs: { - name = "${extensionName}-torch-ext"; + name = "${kernelName}-torch-ext"; inherit extensionName src; From ae7065f82b9b0b33f16ab2039cbf46310c5c7383 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dani=C3=ABl=20de=20Kok?= Date: Tue, 11 Nov 2025 15:06:20 +0000 Subject: [PATCH 2/3] CI: remove macos-13 from the matrix This was used to build x86_64 macOS kernel-abi-check packages. Building these does not make much sense anyway, since we only support ARM64 macOS in kernel-builder. --- .github/workflows/kernel_abi_python_release.yaml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/workflows/kernel_abi_python_release.yaml b/.github/workflows/kernel_abi_python_release.yaml index d0514178..37b7ae34 100644 --- a/.github/workflows/kernel_abi_python_release.yaml +++ b/.github/workflows/kernel_abi_python_release.yaml @@ -141,8 +141,6 @@ jobs: strategy: matrix: platform: - - runner: macos-13 - target: x86_64 - runner: macos-14 target: aarch64 steps: From 0aabdf2b2bc7e65f2d26f5e998d1eef15223bd38 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dani=C3=ABl=20de=20Kok?= Date: Tue, 11 Nov 2025 15:37:14 +0000 Subject: [PATCH 3/3] Update some of the example kernel names --- examples/cutlass-gemm/build.toml | 2 +- examples/relu-backprop-compile/build.toml | 2 +- .../relu-backprop-compile/tests/test_relu.py | 18 +++++++++--------- .../__init__.py | 0 examples/relu-compiler-flags/build.toml | 2 +- .../{relu => relu_compiler_flags}/__init__.py | 0 examples/relu-specific-torch/build.toml | 2 +- .../relu-specific-torch/tests/test_relu.py | 4 ++-- .../{relu => relu_specific_torch}/__init__.py | 0 9 files changed, 15 insertions(+), 15 deletions(-) rename examples/relu-backprop-compile/torch-ext/{relu => relu_backprop_compile}/__init__.py (100%) rename examples/relu-compiler-flags/torch-ext/{relu => relu_compiler_flags}/__init__.py (100%) rename examples/relu-specific-torch/torch-ext/{relu => relu_specific_torch}/__init__.py (100%) diff --git a/examples/cutlass-gemm/build.toml b/examples/cutlass-gemm/build.toml index 09199fe0..dc5d10c6 100644 --- a/examples/cutlass-gemm/build.toml +++ b/examples/cutlass-gemm/build.toml @@ -1,5 +1,5 @@ [general] -name = "cutlass_gemm" +name = "cutlass-gemm" universal = false [torch] diff --git a/examples/relu-backprop-compile/build.toml b/examples/relu-backprop-compile/build.toml index c9bfab3c..130b6474 100644 --- a/examples/relu-backprop-compile/build.toml +++ b/examples/relu-backprop-compile/build.toml @@ -1,5 +1,5 @@ [general] -name = "relu" +name = "relu-backprop-compile" universal = false [torch] diff --git a/examples/relu-backprop-compile/tests/test_relu.py b/examples/relu-backprop-compile/tests/test_relu.py index 450c10e2..104be7d8 100644 --- a/examples/relu-backprop-compile/tests/test_relu.py +++ b/examples/relu-backprop-compile/tests/test_relu.py @@ -5,7 +5,7 @@ import torch.nn.functional as F from torch.library import opcheck -import relu +import relu_backprop_compile def get_device(): @@ -30,21 +30,21 @@ def test_relu_forward(dtype): device = get_device() x = torch.randn(1024, 1024, dtype=dtype, device=device) expected = F.relu(x) - actual = relu.relu(x) + actual = relu_backprop_compile.relu(x) torch.testing.assert_close(expected, actual) def test_relu_gradient_numerical(): device = get_device() x = torch.randn(32, 32, dtype=torch.float64, device=device, requires_grad=True) - assert torch.autograd.gradcheck(relu.relu, x) + assert torch.autograd.gradcheck(relu_backprop_compile.relu, x) @pytest.mark.parametrize("dtype", DTYPES) def test_relu_gradient_large_tensor(dtype): device = get_device() x = torch.randn(1024, 1024, dtype=dtype, device=device, requires_grad=True) - y = relu.relu(x) + y = relu_backprop_compile.relu(x) loss = y.sum() loss.backward() @@ -69,7 +69,7 @@ def test_relu_gradient_comparison(dtype): ) x_kernel = x_data.clone().requires_grad_(True) - y_kernel = relu.relu(x_kernel) + y_kernel = relu_backprop_compile.relu(x_kernel) loss_custom = y_kernel.sum() loss_custom.backward() @@ -86,7 +86,7 @@ def test_relu_gradient_comparison(dtype): def test_relu_backward_chain(dtype): device = get_device() x = torch.randn(64, 128, dtype=dtype, device=device, requires_grad=True) - y = relu.relu(x) + y = relu_backprop_compile.relu(x) z = y * 2.0 loss = z.sum() loss.backward() @@ -115,7 +115,7 @@ def test_relu_backward_chain(dtype): def test_relu_fwd_opcheck(shape, dtype): device = get_device() x = torch.randn(shape, dtype=dtype, device=device, requires_grad=True) - opcheck(relu.ops.relu_fwd, (x,)) + opcheck(relu_backprop_compile.ops.relu_fwd, (x,)) @pytest.mark.parametrize("dtype", DTYPES) @@ -128,7 +128,7 @@ def __init__(self): self.linear = torch.nn.Linear(1024, 1024) def forward(self, x): - return relu.relu(self.linear(x)) + return relu_backprop_compile.relu(self.linear(x)) model = SimpleModel().to(device).to(dtype) compiled_model = torch.compile(model, fullgraph=True) @@ -168,7 +168,7 @@ def __init__(self): self.linear = torch.nn.Linear(16, 16) def forward(self, x): - return relu.relu(self.linear(x)) + return relu_backprop_compile.relu(self.linear(x)) model = SimpleModel().to(device).to(dtype) compiled_model = torch.compile(model, fullgraph=True) diff --git a/examples/relu-backprop-compile/torch-ext/relu/__init__.py b/examples/relu-backprop-compile/torch-ext/relu_backprop_compile/__init__.py similarity index 100% rename from examples/relu-backprop-compile/torch-ext/relu/__init__.py rename to examples/relu-backprop-compile/torch-ext/relu_backprop_compile/__init__.py diff --git a/examples/relu-compiler-flags/build.toml b/examples/relu-compiler-flags/build.toml index e99d7ae3..d595e09e 100644 --- a/examples/relu-compiler-flags/build.toml +++ b/examples/relu-compiler-flags/build.toml @@ -1,5 +1,5 @@ [general] -name = "relu" +name = "relu-compiler-flags" universal = false [torch] diff --git a/examples/relu-compiler-flags/torch-ext/relu/__init__.py b/examples/relu-compiler-flags/torch-ext/relu_compiler_flags/__init__.py similarity index 100% rename from examples/relu-compiler-flags/torch-ext/relu/__init__.py rename to examples/relu-compiler-flags/torch-ext/relu_compiler_flags/__init__.py diff --git a/examples/relu-specific-torch/build.toml b/examples/relu-specific-torch/build.toml index c9bfab3c..3db0e7e7 100644 --- a/examples/relu-specific-torch/build.toml +++ b/examples/relu-specific-torch/build.toml @@ -1,5 +1,5 @@ [general] -name = "relu" +name = "relu-specific-torch" universal = false [torch] diff --git a/examples/relu-specific-torch/tests/test_relu.py b/examples/relu-specific-torch/tests/test_relu.py index 98b292b9..4ef804d4 100644 --- a/examples/relu-specific-torch/tests/test_relu.py +++ b/examples/relu-specific-torch/tests/test_relu.py @@ -3,7 +3,7 @@ import torch import torch.nn.functional as F -import relu +import relu_specific_torch def test_relu(): @@ -12,4 +12,4 @@ def test_relu(): else: device = torch.device("cuda") x = torch.randn(1024, 1024, dtype=torch.float32, device=device) - torch.testing.assert_allclose(F.relu(x), relu.relu(x)) + torch.testing.assert_allclose(F.relu(x), relu_specific_torch.relu(x)) diff --git a/examples/relu-specific-torch/torch-ext/relu/__init__.py b/examples/relu-specific-torch/torch-ext/relu_specific_torch/__init__.py similarity index 100% rename from examples/relu-specific-torch/torch-ext/relu/__init__.py rename to examples/relu-specific-torch/torch-ext/relu_specific_torch/__init__.py