Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions test/null/test_attention.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,9 @@ def test_half_qkv_buffers(self):
k = Tensor.ones(BS, seqlen, dim, dtype=dtypes.half).contiguous().realize()
v = Tensor.ones(BS, seqlen, dim, dtype=dtypes.half).contiguous().realize()
attn = q.scaled_dot_product_attention(k, v)
sched = attn.schedule()
sched = attn.schedule_linear()
# attention has 4 kernels now
self.assertEqual(len(sched), 4)
self.assertEqual(len(sched.src), 4)

def test_apply_rope_jit_prune(self):
def rope_fn(x_in, pos): return apply_rope(x_in, pos)
Expand Down
8 changes: 4 additions & 4 deletions test/null/test_compile_failures.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,11 @@
from tinygrad import Tensor, dtypes, Device
from tinygrad.helpers import OSX, DEV
from tinygrad.device import is_dtype_supported
from tinygrad.engine.realize import get_program
from tinygrad.engine.realize import get_program, compile_linear

class TestCompileFailures(unittest.TestCase):
def compile(self, out:Tensor):
for si in out.schedule(): si.lower()
compile_linear(out.schedule_linear())

@unittest.skipUnless(is_dtype_supported(dtypes.uchar), f"no uint8 on {Device.DEFAULT}")
def test_interpolate_atari(self):
Expand All @@ -21,8 +21,8 @@ class TestDisassembly(unittest.TestCase):
@unittest.skipUnless(Device.DEFAULT in ("CPU",) and DEV.renderer not in ("LLVM", "LVP") and OSX, "m series cpus support fp16 arithmetic")
def test_float16_alu(self):
c = Tensor([1], dtype=dtypes.float16) + Tensor([1], dtype=dtypes.float16)
s = c.schedule()[-1]
p = get_program(s.ast, Device[Device.DEFAULT].renderer)
s = c.schedule_linear().src[-1]
p = get_program(s.src[0], Device[Device.DEFAULT].renderer)
lib = Device[Device.DEFAULT].compiler.compile(p.src)
out = io.StringIO()
with redirect_stdout(out): Device[Device.DEFAULT].compiler.disassemble(lib)
Expand Down
4 changes: 2 additions & 2 deletions test/null/test_const_folding.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@

def _check_ast_count(desired_count:int, t:Tensor):
# NOTE: this has side effect because everything can be scheduled only once
schedule = t.schedule()
asts = [s for s in schedule if s.ast.op is Ops.SINK]
linear = t.schedule_linear()
asts = [s for s in linear.src if s.src[0].op is Ops.SINK]
len(asts)
# NOT SUPPORTED ANYMORE
#assert len(asts) == desired_count, f"{len(asts)} != {desired_count}"
Expand Down
2 changes: 1 addition & 1 deletion test/null/test_gc.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def test_schedule_gc(self):
init = bufs_allocated()
x = Tensor.ones(256).contiguous().realize()
y = Tensor.ones(5, 5).contiguous()
y.schedule()
y.schedule_linear()
del x
del y
self.assertEqual(bufs_allocated()-init, 0)
Expand Down
12 changes: 6 additions & 6 deletions test/null/test_linearizer_rewrite.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,29 +9,29 @@ def test_reduction(self):
t = Tensor.ones((64,64), device="NULL").contiguous().realize()
out = (t*2).sum(axis=1)
with Context(SPLIT_REDUCEOP=0, DEVECTORIZE=0):
si = out.schedule()[-1]
si = out.schedule_linear().src[-1]
opts_to_apply = []
opts_to_apply.append(Opt(OptOps.UPCAST, 0, 4))
opts_to_apply.append(Opt(OptOps.UNROLL, 0, 4))
ast = si.ast.replace(arg=KernelInfo(opts_to_apply=tuple(opts_to_apply)))
ast = si.src[0].replace(arg=KernelInfo(opts_to_apply=tuple(opts_to_apply)))
prg = get_program(ast, Device["CPU"].renderer)
print(prg.src)

def test_arange(self):
out = Tensor.arange(32, device="NULL")
with Context(SPLIT_REDUCEOP=0, DEVECTORIZE=0):
si = out.schedule()[-1]
si = out.schedule_linear().src[-1]
opts_to_apply = []
opts_to_apply.append(Opt(OptOps.UPCAST, 0, 4))
ast = si.ast.replace(arg=KernelInfo(opts_to_apply=tuple(opts_to_apply)))
ast = si.src[0].replace(arg=KernelInfo(opts_to_apply=tuple(opts_to_apply)))
prg = get_program(ast, Device["CPU"].renderer)
print(prg.src)

def test_kernel_info(self):
out = Tensor.arange(4, device="NULL")
si = out.schedule()[-1]
si = out.schedule_linear().src[-1]

ast = si.ast.replace(arg=KernelInfo(opts_to_apply=()))
ast = si.src[0].replace(arg=KernelInfo(opts_to_apply=()))
prg = get_program(ast, Device["CPU"].renderer)
assert prg.applied_opts == (), f"expected no opts, got {prg}"

Expand Down
8 changes: 4 additions & 4 deletions test/null/test_process_replay.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
class TestProcessReplay(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.ast = (Tensor.empty(N, N) @ Tensor.empty(N, N)).schedule()[-1].ast
cls.ast = (Tensor.empty(N, N) @ Tensor.empty(N, N)).schedule_linear().src[-1].src[0]
cls.renderer = Device[Device.DEFAULT].renderer

def test_replay_no_opts(self):
Expand All @@ -35,9 +35,9 @@ def test_replay_with_opt(self):

def test_beam(self):
with Context(BEAM=1):
si = (Tensor.empty(N, N) @ Tensor.empty(N, N)).schedule()[-1]
p = do_to_program(si.ast, self.renderer)
good, compare, _ = replay_to_program(p, si.ast, self.renderer)
ast = (Tensor.empty(N, N) @ Tensor.empty(N, N)).schedule_linear().src[-1].src[0]
p = do_to_program(ast, self.renderer)
good, compare, _ = replay_to_program(p, ast, self.renderer)
self.assertEqual(good, compare)

if __name__ == '__main__':
Expand Down
6 changes: 3 additions & 3 deletions test/null/test_schedule.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ def test_unused_var_not_in_var_vals(self):
# unused variable should not appear in var_vals even when there's other work
a = Tensor(UOp.variable("unused", 0, 10).bind(1))
b = Tensor.empty(3) + 1
_, var_vals = Tensor.schedule_with_vars(a, b)
_, var_vals = Tensor.linear_with_vars(a, b)
self.assertEqual(var_vals, {})
self.assertIsNone(a.uop.base.realized)

Expand Down Expand Up @@ -208,8 +208,8 @@ def test_realize_view_of_realized_has_empty_schedule(self):
t = Tensor.zeros((3, 3)).contiguous().realize()
v = t[1] # view - is_realized but not has_buffer_identity
assert v.uop.is_realized
sched, _ = Tensor.schedule_with_vars(v)
self.assertEqual(len(sched), 0)
linear, _ = Tensor.linear_with_vars(v)
self.assertEqual(len(linear.src), 0)

# NOTE: because empty does not have a lowered ExecItem if realize is called on a childless empty, it never gets allocated.
def test_childless_empty_never_allocates(self):
Expand Down
4 changes: 2 additions & 2 deletions test/null/test_schedule_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,15 @@
from tinygrad.schedule import schedule_cache

def schedule_one():
Tensor([1]).schedule()
Tensor([1]).schedule_linear()

class TestScheduleCache(unittest.TestCase):
def test_bound_variable_var_vals(self):
v = Variable('pos', 1, 100)
x = Tensor.ones(10).contiguous().realize()

t = x + Tensor(v.bind(42))
_, var_vals = t.schedule_with_vars()
_, var_vals = t.linear_with_vars()
self.assertEqual(var_vals, {'pos': 42})

def test_disable_schedule_cache(self):
Expand Down
17 changes: 9 additions & 8 deletions test/null/test_tensor.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,11 +62,12 @@ def _find_op(self, ast: UOp, op: Ops):
for src in ast.src:
if (ret:=self._find_op(src, op)) is not None: return ret
def _schedule_render(self, a: Tensor):
schedule, _ = a.schedule_with_vars()
for s in schedule:
if s.ast.op is Ops.SINK:
renderer = Device[s.bufs[0].device].renderer
prg = get_program(s.ast, renderer)
linear, _ = a.linear_with_vars()
for si in linear.src:
ast = si.src[0]
if ast.op is Ops.SINK:
renderer = Device[si.src[1].buffer.device].renderer
prg = get_program(ast, renderer)
return prg.uops

def _assert(self, dtype: DType, a: Tensor):
Expand Down Expand Up @@ -162,9 +163,9 @@ class TestRand(unittest.TestCase):
def test_rand_large_tensor(self):
# large tensor rand (num > uint32.max) should not crash in frontend
Tensor.manual_seed(0)
Tensor.rand(2**17, 2**17).schedule()
Tensor.rand(2**17, 2**17).schedule()
Tensor.rand(2**17, 2**17).schedule()
Tensor.rand(2**17, 2**17).schedule_linear()
Tensor.rand(2**17, 2**17).schedule_linear()
Tensor.rand(2**17, 2**17).schedule_linear()

class TestTensorConstLike(unittest.TestCase):
def test_const_like_shape(self):
Expand Down
42 changes: 42 additions & 0 deletions test/null/test_tensor_uop_mixin.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,48 @@ def test_sparse_categorical_crossentropy_ignore_index(self):
t, Y = _t(2, 3).float(), Tensor([1, 2], dtype=dtypes.int32)
self.assertIs(_strip_unique(t.sparse_categorical_crossentropy(Y, ignore_index=0).uop),
_strip_unique(t.uop.sparse_categorical_crossentropy(Y.uop, ignore_index=0)))
def test_nll_loss(self):
t, Y = _t(2, 3).float().log_softmax(), Tensor([1, 2], dtype=dtypes.int32)
self.assertIs(_strip_unique(t.nll_loss(Y).uop), _strip_unique(t.uop.nll_loss(Y.uop)))
def test_nll_loss_weight(self):
t, Y, w = _t(2, 3).float().log_softmax(), Tensor([1, 2], dtype=dtypes.int32), _t(3).float()
self.assertIs(_strip_unique(t.nll_loss(Y, weight=w).uop), _strip_unique(t.uop.nll_loss(Y.uop, weight=w.uop)))
def test_nll_loss_ignore_index(self):
t, Y = _t(2, 3).float().log_softmax(), Tensor([1, 2], dtype=dtypes.int32)
self.assertIs(_strip_unique(t.nll_loss(Y, ignore_index=1).uop), _strip_unique(t.uop.nll_loss(Y.uop, ignore_index=1)))
def test_nll_loss_none_reduction(self):
t, Y = _t(2, 3).float().log_softmax(), Tensor([1, 2], dtype=dtypes.int32)
self.assertIs(_strip_unique(t.nll_loss(Y, reduction="none").uop), _strip_unique(t.uop.nll_loss(Y.uop, reduction="none")))
def test_nll_loss_weight_ignore_index(self):
t, Y, w = _t(2, 3).float().log_softmax(), Tensor([1, 2], dtype=dtypes.int32), _t(3).float()
self.assertIs(_strip_unique(t.nll_loss(Y, weight=w, ignore_index=1).uop),
_strip_unique(t.uop.nll_loss(Y.uop, weight=w.uop, ignore_index=1)))

class TestTensorUOpScatter(unittest.TestCase):
def test_scatter(self):
x, idx, src = _t(3, 4).float(), Tensor([[0, 1, 2, 0]], dtype=dtypes.int32), _t(1, 4).float()
self.assertIs(_strip_unique(x.scatter(0, idx, src).uop), _strip_unique(x.uop.scatter(0, idx.uop, src.uop)))
def test_scatter_scalar_src(self):
x, idx = _t(3, 4).float(), Tensor([[0, 1]], dtype=dtypes.int32)
self.assertIs(_strip_unique(x.scatter(1, idx, 3.14).uop), _strip_unique(x.uop.scatter(1, idx.uop, 3.14)))
# inf cannot be cast to int — this regresses if scalar src is routed through index.dtype first
def test_scatter_inf_src(self):
x, idx = _t(3, 4).float(), Tensor([[0, 1]], dtype=dtypes.int32)
self.assertIs(_strip_unique(x.scatter(1, idx, float("inf")).uop),
_strip_unique(x.uop.scatter(1, idx.uop, float("inf"))))
def test_scatter_add(self):
x, idx = _t(3, 4).float(), Tensor([[0, 1]], dtype=dtypes.int32)
self.assertIs(_strip_unique(x.scatter(1, idx, 3.14, reduce="add").uop),
_strip_unique(x.uop.scatter(1, idx.uop, 3.14, reduce="add")))
def test_scatter_multiply(self):
x, idx = _t(3, 4).float(), Tensor([[0, 1]], dtype=dtypes.int32)
self.assertIs(_strip_unique(x.scatter(1, idx, 3.14, reduce="multiply").uop),
_strip_unique(x.uop.scatter(1, idx.uop, 3.14, reduce="multiply")))
# tensor src with reduce hits the "elif reduce: raise" branch in both Tensor and UOp paths
def test_scatter_tensor_src_with_reduce_raises(self):
x, idx, src = _t(3, 4).float(), Tensor([[0, 1]], dtype=dtypes.int32), _t(1, 2).float()
with self.assertRaises(TypeError): x.scatter(1, idx, src, reduce="add")
with self.assertRaises(TypeError): x.uop.scatter(1, idx.uop, src.uop, reduce="add")

class TestTensorUOpScatterReduce(unittest.TestCase):
def _check(self, x, idx, src, **kw):
Expand Down
2 changes: 1 addition & 1 deletion test/null/test_tensor_uop_representation.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ def test_mutate_add(self):
pa = a.uop
pb = b.uop
pr = ret.uop
ret.schedule()
ret.schedule_linear()
self.assertIsNot(pa, a.uop)
self.assertIsNot(pb, b.uop)
self.assertIsNot(pr, ret.uop)
Expand Down
8 changes: 4 additions & 4 deletions test/null/test_tinyfs.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,22 +5,22 @@ class TestLoadStore(unittest.TestCase):
def test_load_shape(self):
t = Tensor(bytes(16)).fs_load(1024)
assert t.shape == (1024,), t.shape
t.schedule()
t.schedule_linear()

def test_store_shape(self):
t = Tensor.zeros(1024).fs_store()
assert t.shape == (16,), t.shape
t.schedule()
t.schedule_linear()

def test_load_large_shape(self):
t = Tensor(bytes(16)).fs_load(10_000_000)
assert t.shape == (10_000_000,), t.shape
t.schedule()
t.schedule_linear()

def test_store_large_shape(self):
t = Tensor.zeros(10_000_000).fs_store()
assert t.shape == (16,), t.shape
t.schedule()
t.schedule_linear()

if __name__ == "__main__":
unittest.main()
2 changes: 1 addition & 1 deletion test/null/test_uops.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,7 @@ def test_uop_variables(self):
a = UOp.variable("a", 1, 10)
uop_var = Tensor(a.bind(1))
st_var = Tensor.empty((2, 10))[:, :a.bind(1)]
_, var_vals = (uop_var+st_var).schedule_with_vars()
_, var_vals = (uop_var+st_var).linear_with_vars()
self.assertEqual(len(var_vals), 1)
self.assertEqual(list(var_vals)[0], a.expr)

Expand Down
10 changes: 5 additions & 5 deletions test/null/test_uops_stats.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import unittest
from tinygrad import Tensor
from tinygrad.helpers import GlobalCounters, DEV
from tinygrad.engine.realize import get_program
from tinygrad.engine.realize import get_program, compile_linear, estimate_uop
from tinygrad.renderer import ProgramSpec
from tinygrad.renderer import Estimates
from tinygrad.uop.ops import Ops, UOp
Expand All @@ -18,8 +18,8 @@ def flops_mem(uops, ignore_indexing=False):
# **************** new FlopCounter ****************

def get_stats(x:Tensor):
si = x.schedule()[-1].lower()
return si.prg.estimates.ops, si.prg.estimates.mem
est = estimate_uop(compile_linear(x.schedule_linear()).src[-1])
return est.ops, est.mem

@unittest.skipIf(Device.DEFAULT == "WEBGPU", "webgpu does extra load/store for packed types")
class TestMemoryCount(unittest.TestCase):
Expand Down Expand Up @@ -165,8 +165,8 @@ def test_mulacc(self):
class TestStatsOptimized(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.ast_gemm = (Tensor.empty(N, N) @ Tensor.empty(N, N)).schedule()[-1].ast
cls.ast_reduce = (Tensor.empty(N*N).sum()).schedule()[-1].ast
cls.ast_gemm = (Tensor.empty(N, N) @ Tensor.empty(N, N)).schedule_linear().src[-1].src[0]
cls.ast_reduce = (Tensor.empty(N*N).sum()).schedule_linear().src[-1].src[0]

def check_gemm(self, p:ProgramSpec, extra_flops=0):
#p.uops.print()
Expand Down
Loading
Loading