Skip to content
This repository was archived by the owner on Jan 27, 2026. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions .github/workflows/kernel_abi_python_release.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -141,8 +141,6 @@ jobs:
strategy:
matrix:
platform:
- runner: macos-13
target: x86_64
- runner: macos-14
target: aarch64
steps:
Expand Down
7 changes: 7 additions & 0 deletions build2cmake/src/config/v2.rs
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,13 @@ pub struct General {
pub hub: Option<Hub>,
}

impl General {
/// Name of the kernel as a Python extension.
pub fn python_name(&self) -> String {
self.name.replace("-", "_")
}
}

#[derive(Debug, Deserialize, Serialize)]
#[serde(deny_unknown_fields, rename_all = "kebab-case")]
pub struct Hub {
Expand Down
4 changes: 3 additions & 1 deletion build2cmake/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -344,7 +344,9 @@ fn clean(
// Clean up empty directories
let dirs_to_check = [
target_dir.join("cmake"),
target_dir.join("torch-ext").join(&build.general.name),
target_dir
.join("torch-ext")
.join(build.general.python_name()),
target_dir.join("torch-ext"),
];

Expand Down
4 changes: 2 additions & 2 deletions build2cmake/src/torch/cpu.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ pub fn write_torch_ext_cpu(

let mut file_set = FileSet::default();

let ops_name = kernel_ops_identifier(&target_dir, &build.general.name, ops_id);
let ops_name = kernel_ops_identifier(&target_dir, &build.general.python_name(), ops_id);

write_cmake(
env,
Expand All @@ -45,7 +45,7 @@ pub fn write_torch_ext_cpu(
&mut file_set,
)?;

write_ops_py(env, &build.general.name, &ops_name, &mut file_set)?;
write_ops_py(env, &build.general.python_name(), &ops_name, &mut file_set)?;

write_pyproject_toml(env, &mut file_set)?;

Expand Down
4 changes: 2 additions & 2 deletions build2cmake/src/torch/cuda.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ pub fn write_torch_ext_cuda(

let mut file_set = FileSet::default();

let ops_name = kernel_ops_identifier(&target_dir, &build.general.name, ops_id);
let ops_name = kernel_ops_identifier(&target_dir, &build.general.python_name(), ops_id);

write_cmake(
env,
Expand All @@ -58,7 +58,7 @@ pub fn write_torch_ext_cuda(
&mut file_set,
)?;

write_ops_py(env, &build.general.name, &ops_name, &mut file_set)?;
write_ops_py(env, &build.general.python_name(), &ops_name, &mut file_set)?;

write_pyproject_toml(env, &mut file_set)?;

Expand Down
2 changes: 1 addition & 1 deletion build2cmake/src/torch/metal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ pub fn write_torch_ext_metal(
&mut file_set,
)?;

write_ops_py(env, &build.general.name, &ops_name, &mut file_set)?;
write_ops_py(env, &build.general.python_name(), &ops_name, &mut file_set)?;

write_pyproject_toml(env, &mut file_set)?;

Expand Down
4 changes: 2 additions & 2 deletions build2cmake/src/torch/universal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,9 @@ pub fn write_torch_ext_universal(
) -> Result<FileSet> {
let mut file_set = FileSet::default();

let ops_name = kernel_ops_identifier(&target_dir, &build.general.name, ops_id);
let ops_name = kernel_ops_identifier(&target_dir, &build.general.python_name(), ops_id);

write_ops_py(env, &build.general.name, &ops_name, &mut file_set)?;
write_ops_py(env, &build.general.python_name(), &ops_name, &mut file_set)?;
write_pyproject_toml(
env,
build.torch.as_ref(),
Expand Down
4 changes: 2 additions & 2 deletions build2cmake/src/torch/xpu.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ pub fn write_torch_ext_xpu(

let mut file_set = FileSet::default();

let ops_name = kernel_ops_identifier(&target_dir, &build.general.name, ops_id);
let ops_name = kernel_ops_identifier(&target_dir, &build.general.python_name(), ops_id);

write_cmake(
env,
Expand All @@ -45,7 +45,7 @@ pub fn write_torch_ext_xpu(
&mut file_set,
)?;

write_ops_py(env, &build.general.name, &ops_name, &mut file_set)?;
write_ops_py(env, &build.general.python_name(), &ops_name, &mut file_set)?;

write_pyproject_toml(env, &mut file_set)?;

Expand Down
2 changes: 1 addition & 1 deletion examples/cutlass-gemm/build.toml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[general]
name = "cutlass_gemm"
name = "cutlass-gemm"
universal = false

[torch]
Expand Down
2 changes: 1 addition & 1 deletion examples/relu-backprop-compile/build.toml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[general]
name = "relu"
name = "relu-backprop-compile"
universal = false

[torch]
Expand Down
18 changes: 9 additions & 9 deletions examples/relu-backprop-compile/tests/test_relu.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import torch.nn.functional as F
from torch.library import opcheck

import relu
import relu_backprop_compile


def get_device():
Expand All @@ -30,21 +30,21 @@ def test_relu_forward(dtype):
device = get_device()
x = torch.randn(1024, 1024, dtype=dtype, device=device)
expected = F.relu(x)
actual = relu.relu(x)
actual = relu_backprop_compile.relu(x)
torch.testing.assert_close(expected, actual)


def test_relu_gradient_numerical():
device = get_device()
x = torch.randn(32, 32, dtype=torch.float64, device=device, requires_grad=True)
assert torch.autograd.gradcheck(relu.relu, x)
assert torch.autograd.gradcheck(relu_backprop_compile.relu, x)


@pytest.mark.parametrize("dtype", DTYPES)
def test_relu_gradient_large_tensor(dtype):
device = get_device()
x = torch.randn(1024, 1024, dtype=dtype, device=device, requires_grad=True)
y = relu.relu(x)
y = relu_backprop_compile.relu(x)
loss = y.sum()
loss.backward()

Expand All @@ -69,7 +69,7 @@ def test_relu_gradient_comparison(dtype):
)

x_kernel = x_data.clone().requires_grad_(True)
y_kernel = relu.relu(x_kernel)
y_kernel = relu_backprop_compile.relu(x_kernel)
loss_custom = y_kernel.sum()
loss_custom.backward()

Expand All @@ -86,7 +86,7 @@ def test_relu_gradient_comparison(dtype):
def test_relu_backward_chain(dtype):
device = get_device()
x = torch.randn(64, 128, dtype=dtype, device=device, requires_grad=True)
y = relu.relu(x)
y = relu_backprop_compile.relu(x)
z = y * 2.0
loss = z.sum()
loss.backward()
Expand Down Expand Up @@ -115,7 +115,7 @@ def test_relu_backward_chain(dtype):
def test_relu_fwd_opcheck(shape, dtype):
device = get_device()
x = torch.randn(shape, dtype=dtype, device=device, requires_grad=True)
opcheck(relu.ops.relu_fwd, (x,))
opcheck(relu_backprop_compile.ops.relu_fwd, (x,))


@pytest.mark.parametrize("dtype", DTYPES)
Expand All @@ -128,7 +128,7 @@ def __init__(self):
self.linear = torch.nn.Linear(1024, 1024)

def forward(self, x):
return relu.relu(self.linear(x))
return relu_backprop_compile.relu(self.linear(x))

model = SimpleModel().to(device).to(dtype)
compiled_model = torch.compile(model, fullgraph=True)
Expand Down Expand Up @@ -168,7 +168,7 @@ def __init__(self):
self.linear = torch.nn.Linear(16, 16)

def forward(self, x):
return relu.relu(self.linear(x))
return relu_backprop_compile.relu(self.linear(x))

model = SimpleModel().to(device).to(dtype)
compiled_model = torch.compile(model, fullgraph=True)
Expand Down
2 changes: 1 addition & 1 deletion examples/relu-compiler-flags/build.toml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[general]
name = "relu"
name = "relu-compiler-flags"
universal = false

[torch]
Expand Down
2 changes: 1 addition & 1 deletion examples/relu-specific-torch/build.toml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[general]
name = "relu"
name = "relu-specific-torch"
universal = false

[torch]
Expand Down
4 changes: 2 additions & 2 deletions examples/relu-specific-torch/tests/test_relu.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import torch
import torch.nn.functional as F

import relu
import relu_specific_torch


def test_relu():
Expand All @@ -12,4 +12,4 @@ def test_relu():
else:
device = torch.device("cuda")
x = torch.randn(1024, 1024, dtype=torch.float32, device=device)
torch.testing.assert_allclose(F.relu(x), relu.relu(x))
torch.testing.assert_allclose(F.relu(x), relu_specific_torch.relu(x))
2 changes: 1 addition & 1 deletion examples/silu-and-mul-universal/build.toml
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
[general]
name = "silu_and_mul_universal"
name = "silu-and-mul-universal"
universal = true
4 changes: 2 additions & 2 deletions lib/build.nix
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ rec {
rev
doGetKernelCheck
;
extensionName = buildToml.general.name;
kernelName = buildToml.general.name;
}
else
extension.mkExtension {
Expand All @@ -159,7 +159,7 @@ rec {
rev
;

extensionName = buildToml.general.name;
kernelName = buildToml.general.name;
doAbiCheck = true;
};

Expand Down
4 changes: 3 additions & 1 deletion lib/torch-extension/arch.nix
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@
# Whether to run get-kernel-check.
doGetKernelCheck ? true,

extensionName,
kernelName,

# Extra dependencies (such as CUTLASS).
extraDeps ? [ ],
Expand All @@ -65,6 +65,8 @@ assert (buildConfig ? xpuVersion) -> xpuSupport;
assert (buildConfig.metal or false) -> stdenv.hostPlatform.isDarwin;

let
extensionName = builtins.replaceStrings [ "-" ] [ "_" ] kernelName;

# On Darwin, we need the host's xcrun for `xcrun metal` to compile Metal shaders.
# It's not supported by the nixpkgs shim.
xcrunHost = writeScriptBin "xcrunHost" ''
Expand Down
8 changes: 6 additions & 2 deletions lib/torch-extension/no-arch.nix
Original file line number Diff line number Diff line change
Expand Up @@ -13,16 +13,20 @@
# Whether to run get-kernel-check.
doGetKernelCheck ? true,

extensionName,
kernelName,

# Revision to bake into the ops name.
rev,

src,
}:

let
extensionName = builtins.replaceStrings [ "-" ] [ "_" ] kernelName;
in

stdenv.mkDerivation (prevAttrs: {
name = "${extensionName}-torch-ext";
name = "${kernelName}-torch-ext";

inherit extensionName src;

Expand Down
Loading