CrazyForks · pull · Apr 22, 2026 · Apr 21, 2026 · Apr 21, 2026 · Apr 21, 2026
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -676,8 +676,8 @@ jobs:
         env:
           AMD: 0
         run: |
-          PYTHONPATH=. DEV=NULL::gfx1100 python extra/mmapeak/mmapeak.py
-          PYTHONPATH=. DEV=NULL::gfx1201 python3 -m pytest -n=auto test/testextra/test_tk.py test/backend/test_asm_gemm.py
+          PYTHONPATH=. DEV=NULL:HIP:gfx1100 python extra/mmapeak/mmapeak.py
+          PYTHONPATH=. DEV=NULL:HIP:gfx950 python3 -m pytest -n=auto test/testextra/test_tk.py test/backend/test_asm_gemm.py
       - name: Run matmul on MOCKKFD
         run: |
           PYTHONPATH="." DEV=MOCKKFD+AMD N=256 python3 extra/gemm/amd_asm_matmul.py
@@ -773,7 +773,7 @@ jobs:
           cuda: 'true'
           ocelot: 'true'
       - name: Set env
-        run: printf "${{ matrix.backend == 'ptx' && 'DEV=MOCK+CUDA:PTX' || matrix.backend == 'nv' && 'DEV=MOCKNVK+NV\nSKIP_SLOW_TEST=1' }}" >> $GITHUB_ENV
+        run: printf "${{ matrix.backend == 'ptx' && 'DEV=MOCK+CUDA:PTX' || matrix.backend == 'nv' && 'DEV=MOCK+NV\nSKIP_SLOW_TEST=1' }}" >> $GITHUB_ENV
       - name: Check Device.DEFAULT and print some source
         run: |
           python3 -c "from tinygrad import Device; assert Device.DEFAULT in ['CUDA','NV'], Device.DEFAULT"
@@ -870,7 +870,7 @@ jobs:
         python -m pytest -n=auto test/device/test_hcq.py test/test_tiny.py test/device/test_amd_llvm.py --durations=20
     - name: Run pytest (ptx)
       env:
-        DEV: "MOCKNVK+NV:PTX"
+        DEV: "MOCK+NV:PTX"
         FORWARD_ONLY: 1
         # TODO: failing due to library loading error
         CAPTURE_PROCESS_REPLAY: 0

diff --git a/examples/tools/gpuburn.py b/examples/tools/gpuburn.py
@@ -1,7 +1,6 @@
 from tinygrad import Tensor, Device, TinyJit, dtypes
-from tinygrad.helpers import getenv
 
-GPUS = getenv("GPUS", 4) # TODO: expose a way in tinygrad to access this
+GPUS = Device[Device.DEFAULT].count()
 N = 6144
 
 @TinyJit

diff --git a/test/amd/test_sqtt_encoder.py b/test/amd/test_sqtt_encoder.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python3
 """Tests for SQTT encoder: verifies the emulator produces correct SQTT traces for known kernels.
 
-Run with: DEV=MOCKKFD+AMD python -m pytest test/amd/test_sqtt_encoder.py -v
+Run with: DEV=MOCK+AMD python -m pytest test/amd/test_sqtt_encoder.py -v
 """
 import ctypes, unittest
 from tinygrad.helpers import Context

diff --git a/test/backend/test_device.py b/test/backend/test_device.py
@@ -0,0 +1,9 @@
+import unittest
+from tinygrad import Device
+
+class TestDeviceCount(unittest.TestCase):
+  def test_count(self):
+    self.assertGreaterEqual(Device[Device.DEFAULT].count(), 1)
+
+if __name__ == "__main__":
+  unittest.main()
diff --git a/test/backend/test_ops.py b/test/backend/test_ops.py
@@ -3298,13 +3298,11 @@ def test_one_hot(self):
     data = [1, 2, 4]
     helper_test_op([], lambda: torch.nn.functional.one_hot(torch.tensor(data), 6).type(torch.int32),
                        lambda: Tensor(data).one_hot(6), forward_only=True)
-    helper_test_op([], lambda: torch.nn.functional.one_hot(torch.tensor(data)).type(torch.int32),
-                       lambda: Tensor(data).one_hot(), forward_only=True)
+    # like jax.nn.one_hot, num_classes must be non-negative (torch accepts -1 for auto-inference, we don't)
+    with self.assertRaises(ValueError): Tensor(data).one_hot(-1)
     data = [[[1, 2, 3], [0, 3, 5]], [[1, 2, 3], [0, 3, 5]]]
     helper_test_op([], lambda: torch.nn.functional.one_hot(torch.tensor(data), 8).type(torch.int32),
                        lambda: Tensor(data).one_hot(8), forward_only=True)
-    helper_test_op([], lambda: torch.nn.functional.one_hot(torch.tensor(data)).type(torch.int32),
-                       lambda: Tensor(data).one_hot(), forward_only=True)
 
   def test_masked_fill(self):
     helper_test_op([(32,10)], lambda x: x.masked_fill((x>0.1).detach(), -math.inf))

diff --git a/test/mockgpu/amd/README b/test/mockgpu/amd/README
@@ -20,17 +20,17 @@ test_llvm.py tests asm/disasm on the LLVM tests, confirming it behaves the same
 
 tinygrad's dtype tests should pass with and without LLVM. they run in about 12 seconds.
 
-`DEV=MOCKKFD+AMD pytest -n=12 test/backend/test_dtype_alu.py test/backend/test_dtype.py`
-`DEV=MOCKKFD+AMD:LLVM pytest -n=12 test/backend/test_dtype_alu.py test/backend/test_dtype.py`
+`DEV=MOCK+AMD pytest -n=12 test/backend/test_dtype_alu.py test/backend/test_dtype.py`
+`DEV=MOCK+AMD:LLVM pytest -n=12 test/backend/test_dtype_alu.py test/backend/test_dtype.py`
 
 The ops tests also pass, but they are very slow, so you should run them one at a time.
 
-`SKIP_SLOW_TEST=1 DEV=MOCKKFD+AMD pytest -n=12 test/backend/test_ops.py`
-`SKIP_SLOW_TEST=1 DEV=NOCKKFD+AMD:LLVM pytest -n=12 test/backend/test_ops.py`
+`SKIP_SLOW_TEST=1 DEV=MOCK+AMD pytest -n=12 test/backend/test_ops.py`
+`SKIP_SLOW_TEST=1 DEV=NOCK+AMD:LLVM pytest -n=12 test/backend/test_ops.py`
 
 When something is caught by main tinygrad tests, a local regression test should be added to `test/amd`.
 While working with tinygrad, you can dump the assembly with `DEBUG=7`. These tests all pass on real hardware
-If a test is failing with `DEV=MOCKKFD+AMD` it's because an instruction is emulated incorrectly.
+If a test is failing with `DEV=MOCK+AMD` it's because an instruction is emulated incorrectly.
 You can test with just `DEV=AMD` to test on real hardware, if it works on real hardware there's a bug in the emulator.
 IMPORTANT: if a test is failing in the emulator, it's an instruction bug. Use DEBUG=7, get the instructions, and debug.
 

diff --git a/test/mockgpu/cuda/cuda.py b/test/mockgpu/cuda/cuda.py
@@ -169,3 +169,7 @@ def cuGetErrorString(error: int, pStr) -> int:
   # Set the pointer to point to our error string buffer
   pStr._obj.value = ctypes.cast(buf, ctypes.POINTER(ctypes.c_char))
   return orig_cuda.CUDA_SUCCESS
+
+def cuDeviceGetCount(count) -> int:
+  count._obj.value = 1
+  return orig_cuda.CUDA_SUCCESS
diff --git a/test/mockgpu/mockgpu.py b/test/mockgpu/mockgpu.py
@@ -11,8 +11,8 @@
 libc.mmap.argtypes = [ctypes.c_void_p, ctypes.c_size_t, ctypes.c_int, ctypes.c_int, ctypes.c_int, ctypes.c_long]
 libc.mmap.restype = ctypes.c_void_p
 
-drivers = [cls() for t in DEV.value if (cls:={"MOCKPCI+AMD": AMDriver, "MOCKKFD+AMD": AMDDriver, "MOCKUSB+AMD": AMUSBDriver,
-                                              "MOCKNVK+NV": NVDriver}.get(f"{t.interface}+{t.device}"))]
+drivers = [cls() for t in DEV.value if (cls:={"MOCKPCI+AMD": AMDriver, "MOCKKFD+AMD": AMDDriver, "MOCK+AMD": AMDDriver, "MOCKUSB+AMD": AMUSBDriver,
+                                              "MOCK+NV": NVDriver}.get(f"{t.interface}+{t.device}"))]
 tracked_fds = {}
 
 original_memoryview = builtins.memoryview
@@ -67,7 +67,7 @@ def __del__(self):
     if self.fd in tracked_fds:
       tracked_fds[self.fd].close(self.fd)
       tracked_fds.pop(self.fd)
-    else: os.close(self.fd)
+    elif self.fd is not None: os.close(self.fd)
 
   def ioctl(self, request, arg):
     if self.fd in tracked_fds:

diff --git a/test/null/test_device.py b/test/null/test_device.py
@@ -38,6 +38,13 @@ def test_nonexistent_iface(self):
     self.assertNotEqual(result.returncode, 0)
     self.assertIn(b"did you mean: 'USB'", result.stderr)
 
+  @unittest.skipIf(Device.DEFAULT != "AMD", "only run on AMD")
+  def test_dev_id_out_of_range(self):
+    result = subprocess.run(['python3', '-c', 'from tinygrad import Device; Device[Device.DEFAULT]'],
+                            env={**os.environ, "DEV":":99+AMD"}, capture_output=True)
+    self.assertNotEqual(result.returncode, 0)
+    self.assertIn(b"invalid visibility filter", result.stderr)
+
   def test_lowercase_canonicalizes(self):
     device = Device.DEFAULT
     with Context(DEV=device.lower()):

diff --git a/test/null/test_tensor_uop_mixin.py b/test/null/test_tensor_uop_mixin.py
@@ -62,6 +62,11 @@ def test_cumsum_non_last(self): _check(self, _t(3, 4), lambda x: x.cumsum(0))
   def test_cumsum_large(self):    _check(self, _t(600), lambda x: x.cumsum())  # exercises _split_cumalu
   def test_cumprod(self):         _check(self, _t(4), lambda x: x.cumprod(0))
 
+class TestTensorUOpOneHot(unittest.TestCase):
+  def test_one_hot(self):
+    t = _t(5)
+    self.assertIs(_strip_unique(t.one_hot(5).uop), _strip_unique(t.uop.one_hot(5)))
+
 class TestTensorUOpCat(unittest.TestCase):
   def test_cat_dim0(self):     _check(self, _t(2, 3), lambda x: x.cat(x, dim=0))
   def test_cat_dim1(self):     _check(self, _t(2, 3), lambda x: x.cat(x, dim=1))
@@ -136,7 +141,7 @@ def test_empty_direct_singleton_tuple_device(self):
     u = UOp.empty((4,), dtype=dtypes.float32, device=("NULL:0",), axis=0)
     self.assertEqual((u.shape, u.device, u.axis), ((4,), "NULL", None))
 
-class TestTensorUOpFull(unittest.TestCase):
+class TestTensorUOpCreation(unittest.TestCase):
   def test_full(self):
     self.assertIs(_strip_unique(Tensor.full((2, 3), 42).uop), _strip_unique(UOp.full((2, 3), 42)))
   def test_full_kwargs(self):
@@ -159,6 +164,26 @@ def test_arange_empty(self):
     self.assertIs(_strip_unique(Tensor.arange(5, 5).uop), _strip_unique(UOp.arange(5, 5)))
   def test_arange_step(self):
     self.assertIs(_strip_unique(Tensor.arange(5, 10, 2).uop), _strip_unique(UOp.arange(5, 10, 2)))
+  def test_linspace(self):
+    self.assertIs(_strip_unique(Tensor.linspace(0, 10, 5).uop), _strip_unique(UOp.linspace(0, 10, 5)))
+  def test_linspace_one_step(self):
+    self.assertIs(_strip_unique(Tensor.linspace(5, 10, 1).uop), _strip_unique(UOp.linspace(5, 10, 1)))
+  def test_eye(self):
+    self.assertIs(_strip_unique(Tensor.eye(3).uop), _strip_unique(UOp.eye(3)))
+  def test_eye_rect(self):
+    self.assertIs(_strip_unique(Tensor.eye(2, 4).uop), _strip_unique(UOp.eye(2, 4)))
+  def test_triu(self):
+    t = _t(3, 4)
+    self.assertIs(_strip_unique(t.triu().uop), _strip_unique(t.uop.triu()))
+  def test_triu_diagonal(self):
+    t = _t(3, 4)
+    self.assertIs(_strip_unique(t.triu(diagonal=1).uop), _strip_unique(t.uop.triu(diagonal=1)))
+  def test_tril(self):
+    t = _t(3, 4)
+    self.assertIs(_strip_unique(t.tril().uop), _strip_unique(t.uop.tril()))
+  def test_tril_diagonal(self):
+    t = _t(3, 4)
+    self.assertIs(_strip_unique(t.tril(diagonal=-1).uop), _strip_unique(t.uop.tril(diagonal=-1)))
 
 if __name__ == "__main__":
   unittest.main()
diff --git a/test/null/test_viz.py b/test/null/test_viz.py
@@ -924,7 +924,7 @@ def custom_empty_prg(B:UOp, A:UOp) -> UOp:
       self.assertIn("TINY", times)
       self.assertIn("NULL", times)
       with Context(DEBUG=3):
-        json_lines = run_cli("--rewrites-path", str(r), "--profile-path", str(p), "-p", "-s", "ALL", "--jsonl")
+        json_lines = run_cli("--rewrites-path", str(r), "--profile-path", str(p), "-p", "-s", "ALL", "--json")
       for line in json_lines.split("\n"): _ = json.loads(line)
 
 if __name__ == "__main__":

diff --git a/test/testextra/test_tk.py b/test/testextra/test_tk.py
@@ -15,6 +15,7 @@ def assert_allclose(cmp:Tensor, ref:Tensor, **kwargs) -> None:
   if Device.DEFAULT == "NULL": Tensor.realize(cmp, ref)
   else: np.testing.assert_allclose(cmp.numpy(), ref.numpy(), **kwargs)
 
+@unittest.skip("TODO: broken after ranges on store instead of after")
 class TestTK(unittest.TestCase):
   def setUp(self):
     arch = Device[Device.DEFAULT].renderer.target.arch

diff --git a/test/unit/test_disk_tensor.py b/test/unit/test_disk_tensor.py
@@ -447,13 +447,13 @@ def test_disk_device_reuse(self):
     # get the DiskDevice and check internal state
     disk_device = Device[f"DISK:{fn}"]
     assert isinstance(disk_device, DiskDevice)
-    assert disk_device.count == 1
+    assert disk_device.refcount == 1
     assert hasattr(disk_device, "mem")
     first_fd = disk_device.fd
     # create second tensor on same file - should reuse the device, not re-open
     t2 = Tensor.empty(64, device=f"disk:{fn}", dtype=dtypes.uint8)
     t2.to("CPU").realize()
-    assert disk_device.count == 2
+    assert disk_device.refcount == 2
     assert disk_device.fd == first_fd, "file descriptor changed - file was unnecessarily re-opened"
     # verify data is correct
     np.testing.assert_equal(t1.numpy(), np.arange(128, dtype=np.uint8))

diff --git a/tinygrad/device.py b/tinygrad/device.py
@@ -295,6 +295,12 @@ def _select_renderer(self) -> Renderer:
     return select_first_inited(select_by_name(self.renderers, self._renderer_name, t.renderer, f"{self.device} has no renderer {t.renderer!r}"),
                                f"No renderer for {self.device} is available", self.cached_renderer, target=t)
 
+  def count(self) -> int:
+    """
+    Returns the number of physical accelerators available to the runtime.
+    """
+    return 1
+
   def synchronize(self):
     """
     Synchronize all pending operations on the device.

diff --git a/tinygrad/mixin/__init__.py b/tinygrad/mixin/__init__.py
@@ -114,6 +114,83 @@ def arange(cls, start, stop=None, step=1, **kwargs) -> Self:
     if (output_len:=ceildiv(stop-start, step)) <= 0: return cls.full((0,), 0, dtype=dtype, **kwargs)
     return (cls.full((output_len,), step, dtype=dtype, **kwargs)._cumalu(0, Ops.ADD) + (start - step)).cast(dtype)
 
+  @classmethod
+  def linspace(cls, start:int|float, stop:int|float, steps:int, **kwargs) -> Self:
+    """
+    Returns a 1-D tensor of `steps` evenly spaced values from `start` to `stop`, inclusive.
+
+    You can pass in `dtype` and `device` keyword arguments to control the data type and device of the tensor.
+    Additionally, all other keyword arguments are passed to the constructor of the tensor.
+
+    ```python exec="true" source="above" session="tensor" result="python"
+    print(Tensor.linspace(0, 10, 5).numpy())
+    ```
+    ```python exec="true" source="above" session="tensor" result="python"
+    print(Tensor.linspace(-1, 1, 5).numpy())
+    ```
+    """
+    if steps < 0: raise ValueError("number of steps must be non-negative")
+    if (dtype := to_dtype(kwargs.pop("dtype", dtypes.default_float))) == dtypes.bool: raise ValueError("linspace with bool dtype is not supported")
+    if steps == 1: return cls.full((1,), start, dtype=dtype, **kwargs)
+    return (start + cls.arange(steps, dtype=dtypes.default_float, **kwargs) * ((stop - start) / (steps - 1))).cast(dtype)
+
+  @classmethod
+  def eye(cls, n:int, m:int|None=None, dtype:DTypeLike|None=None, device:str|tuple[str, ...]|None=None) -> Self:
+    m_ = n if m is None else m
+    if n < 0 or m_ < 0: raise ValueError(f"cannot have negative {n=}, {m_=}")
+    out_dtype = to_dtype(dtype) if dtype is not None else dtypes.default_float
+    return cls.arange(n, device=device).unsqueeze(-1).eq(cls.arange(m_, device=device)).cast(out_dtype)
+
+  @classmethod
+  def _tri(cls, r:sint, c:sint, diagonal=0, device:str|tuple[str, ...]|None=None) -> Self:
+    return cls.arange(r, device=device).unsqueeze(-1) + diagonal <= cls.arange(c, device=device)
+
+  def triu(self, diagonal:sint=0) -> Self:
+    """
+    Returns the upper triangular part of the tensor, the other elements are set to 0.
+
+    The argument `diagonal` determines which diagonal is on the boundary. `diagonal = 0` means the main diagonal.
+    Positive `diagonal` means above the main diagonal, and negative `diagonal` means below the main diagonal.
+
+    ```python exec="true" source="above" session="tensor" result="python"
+    t = Tensor([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]])
+    print(t.numpy())
+    ```
+    ```python exec="true" source="above" session="tensor" result="python"
+    print(t.triu(diagonal=0).numpy())
+    ```
+    ```python exec="true" source="above" session="tensor" result="python"
+    print(t.triu(diagonal=1).numpy())
+    ```
+    ```python exec="true" source="above" session="tensor" result="python"
+    print(t.triu(diagonal=-1).numpy())
+    ```
+    """
+    return self._tri(self.shape[-2], self.shape[-1], diagonal, self.device).where(self, self.zeros_like())
+
+  def tril(self, diagonal:sint=0) -> Self:
+    """
+    Returns the lower triangular part of the tensor, the other elements are set to 0.
+
+    The argument `diagonal` determines which diagonal is on the boundary. `diagonal = 0` means the main diagonal.
+    Positive `diagonal` means above the main diagonal, and negative `diagonal` means below the main diagonal.
+
+    ```python exec="true" source="above" session="tensor" result="python"
+    t = Tensor([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]])
+    print(t.numpy())
+    ```
+    ```python exec="true" source="above" session="tensor" result="python"
+    print(t.tril(diagonal=0).numpy())
+    ```
+    ```python exec="true" source="above" session="tensor" result="python"
+    print(t.tril(diagonal=1).numpy())
+    ```
+    ```python exec="true" source="above" session="tensor" result="python"
+    print(t.tril(diagonal=-1).numpy())
+    ```
+    """
+    return self._tri(self.shape[-2], self.shape[-1], diagonal+1, self.device).where(self.zeros_like(), self)
+
   def _pad_constant(self, pX, value:float) -> Self:
     # shrink first for negative pads, then pad with only non-negative values
     pX = tuple((0, 0) if p is None else p for p in pX)
@@ -498,6 +575,27 @@ def cumprod(self, axis:int) -> Self:
     """
     return self._split_cumalu(axis, Ops.MUL)
 
+  # helper function commonly used for indexing
+  def _one_hot_along_dim(self, num_classes:sint, dim:int=-1) -> Self:
+    from tinygrad.uop.ops import sint_to_uop
+    if not dtypes.is_int(self.dtype): raise RuntimeError(f"_one_hot_along_dim expects int index tensor, getting {self.dtype}")
+    offset = self.ndim - self._resolve_dim(dim) - 1
+    dt = dtypes.int64 if sint_to_uop(num_classes).overflows(dtypes.int32) else dtypes.int32
+    return self.eq(type(self).arange(num_classes, dtype=dt, device=self.device).reshape((num_classes,) + (1,) * offset))
+
+  def one_hot(self, num_classes:int) -> Self:
+    """
+    Converts `self` to a one-hot tensor.
+
+    ```python exec="true" source="above" session="tensor" result="python"
+    t = Tensor([0, 1, 3, 3, 4])
+    print(t.one_hot(5).numpy())
+    ```
+    """
+    if not dtypes.is_int(self.dtype): raise RuntimeError(f"expect integer dtype, getting {self.dtype=}")
+    if num_classes < 0: raise ValueError(f"num_classes must be non-negative, got {num_classes}")
+    return self[..., None]._one_hot_along_dim(num_classes).where(1, 0)
+
   # ***** functional nn ops *****
 
   def linear(self, weight:Self, bias:Self|None=None, dtype:DTypeLike|None=None) -> Self:

diff --git a/tinygrad/mixin/movement.py b/tinygrad/mixin/movement.py
@@ -18,6 +18,10 @@ def _mop(self, op: Ops, arg) -> Self:
   def shape(self) -> tuple[sint, ...]:
     raise NotImplementedError
 
+  @property
+  def device(self) -> str|tuple[str, ...]:
+    raise NotImplementedError
+
   # great functions you get!
   @property
   def ndim(self) -> int:

diff --git a/tinygrad/runtime/ops_amd.py b/tinygrad/runtime/ops_amd.py
@@ -695,6 +695,7 @@ class KFDIface:
   kfd:FileIOInterface|None = None
   event_page:HCQBuffer|None = None
   gpus:list[FileIOInterface] = []
+  count:int = 0
 
   def _is_usable_gpu(self, gpu_id):
     with contextlib.suppress(OSError): return int(gpu_id.read()) != 0
@@ -710,6 +711,7 @@ def __init__(self, dev, device_id):
       KFDIface.kfd = FileIOInterface("/dev/kfd", os.O_RDWR)
       gpus = [g for g in FileIOInterface(kfd_topo_path).listdir() if self._is_usable_gpu(FileIOInterface(f"{kfd_topo_path}/{g}/gpu_id"))]
       KFDIface.gpus = hcq_filter_visible_devices(sorted(gpus, key=lambda x: int(x.split('/')[-1])), "AMD")
+      KFDIface.count = len(KFDIface.gpus)
 
     if device_id >= len(KFDIface.gpus): raise RuntimeError(f"No device found for {device_id}. Requesting more devices than the system has?")
 
@@ -910,6 +912,8 @@ def on_device_hang(self):
   def device_fini(self): self.dev_impl.fini()
 
 class USBIface(PCIIface):
+  count = 1 # TODO: support multiple usbgpus, see usb.py
+
   def __init__(self, dev, dev_id): # pylint: disable=super-init-not-called
     self.dev, self.pci_dev, self.vram_bar = dev, USBPCIDevice(dev.__class__.__name__[:2], f"usb:{dev_id}"), 0
     self.dev_impl = AMDev(self.pci_dev)
@@ -941,15 +945,16 @@ def create_queue(self, queue_type, ring, gart, rptr, wptr, eop_buffer=None, cwsr
 
   def sleep(self, timeout): pass
 
-def mock_iface(iface): return type(f"MOCK{iface.__name__}", (iface,), {})
-
 class AMDDevice(HCQCompiled):
   def is_am(self) -> bool: return isinstance(self.iface, (PCIIface, USBIface))
   def is_usb(self) -> bool: return isinstance(self.iface, USBIface)
 
   def __init__(self, device:str=""):
     self.device_id = int(device.split(":")[1]) if ":" in device else 0
-    self.iface = self._select_iface(KFDIface, PCIIface, USBIface, mock_iface(KFDIface), mock_iface(PCIIface), mock_iface(USBIface))
+
+    def mock(iface, name=None): return type(name or f"MOCK{iface.__name__}", (iface,), {})
+    self.iface = self._select_iface(KFDIface, PCIIface, USBIface, mock(KFDIface, "MOCKIface"), mock(KFDIface), mock(PCIIface), mock(USBIface))
+
     self.target:tuple[int, ...] = ((trgt:=self.iface.props['gfx_target_version']) // 10000, (trgt // 100) % 100, trgt % 100)
     self.arch = "gfx%d%x%x" % self.target
     if self.target < (9,4,2) or self.target >= (13,0,0): raise RuntimeError(f"Unsupported arch: {self.arch}")