diff --git a/build.zig.zon b/build.zig.zon index e06308d..6790c53 100644 --- a/build.zig.zon +++ b/build.zig.zon @@ -2,7 +2,7 @@ .name = .zmath, .fingerprint = 0xfd23d422bd223cc2, .version = "0.11.0-dev", - .minimum_zig_version = "0.15.1", + .minimum_zig_version = "0.16.0", .paths = .{ "build.zig", "build.zig.zon", diff --git a/src/benchmark.zig b/src/benchmark.zig index 775a1fc..02f95a7 100644 --- a/src/benchmark.zig +++ b/src/benchmark.zig @@ -49,43 +49,41 @@ /// wave benchmark (SOA) - scalar version: 3.7832s, zmath version: 0.3642s /// /// ------------------------------------------------------------------------------------------------- -pub fn main() !void { - var gpa = std.heap.GeneralPurposeAllocator(.{}){}; - defer _ = gpa.deinit(); - const allocator = gpa.allocator(); +pub fn main(init: std.process.Init) !void { + const allocator = init.gpa; // m = mul(ma, mb); data set fits in L1 cache; AOS data layout. - try mat4MulBenchmark(allocator, 100_000); + try mat4MulBenchmark(init.io, allocator, 100_000); // v = 0.01 * cross3(va, vb) + vec3(1.0); data set fits in L1 cache; AOS data layout. - try cross3ScaleBiasBenchmark(allocator, 10_000); + try cross3ScaleBiasBenchmark(init.io, allocator, 10_000); // v = dot3(va, vb) * (0.1 * cross3(va, vb) + vec3(1.0)); data set fits in L1 cache; AOS data layout. - try cross3Dot3ScaleBiasBenchmark(allocator, 10_000); + try cross3Dot3ScaleBiasBenchmark(init.io, allocator, 10_000); // q = qmul(qa, qb); data set fits in L1 cache; AOS data layout. - try quatBenchmark(allocator, 10_000); + try quatBenchmark(init.io, allocator, 10_000); // d = sqrt(x * x + z * z); y = sin(d - t); SOA layout. - try waveBenchmark(allocator, 1_000); + try waveBenchmark(init.io, allocator, 1_000); } const std = @import("std"); const time = std.time; -const Timer = time.Timer; +const Timer = std.Io.Timestamp; const zm = @import("zmath"); var prng = std.Random.DefaultPrng.init(0); const random = prng.random(); -noinline fn mat4MulBenchmark(allocator: std.mem.Allocator, comptime count: comptime_int) !void { +noinline fn mat4MulBenchmark(io: std.Io, allocator: std.mem.Allocator, comptime count: comptime_int) !void { std.debug.print("\n", .{}); std.debug.print("{s:>42} - ", .{"matrix mul benchmark (AOS)"}); var data0 = try std.ArrayList([16]f32).initCapacity(allocator, 64); - defer data0.deinit(); + defer data0.deinit(allocator); var data1 = try std.ArrayList([16]f32).initCapacity(allocator, 64); - defer data1.deinit(); + defer data1.deinit(allocator); var i: usize = 0; while (i < 64) : (i += 1) { @@ -118,8 +116,7 @@ noinline fn mat4MulBenchmark(allocator: std.mem.Allocator, comptime count: compt { i = 0; - var timer = try Timer.start(); - const start = timer.lap(); + const start = std.Io.Clock.awake.now(io); while (i < count) : (i += 1) { for (data1.items) |b| { for (data0.items) |a| { @@ -145,16 +142,15 @@ noinline fn mat4MulBenchmark(allocator: std.mem.Allocator, comptime count: compt } } } - const end = timer.read(); - const elapsed_s = @as(f64, @floatFromInt(end - start)) / time.ns_per_s; + const elapsed = start.untilNow(io, .awake); - std.debug.print("scalar version: {d:.4}s, ", .{elapsed_s}); + std.debug.print("scalar version: {f}, ", .{elapsed}); } { i = 0; - var timer = try Timer.start(); - const start = timer.lap(); + + const start = std.Io.Clock.awake.now(io); while (i < count) : (i += 1) { for (data1.items) |b| { for (data0.items) |a| { @@ -165,20 +161,19 @@ noinline fn mat4MulBenchmark(allocator: std.mem.Allocator, comptime count: compt } } } - const end = timer.read(); - const elapsed_s = @as(f64, @floatFromInt(end - start)) / time.ns_per_s; - std.debug.print("zmath version: {d:.4}s\n", .{elapsed_s}); + const elapsed = start.untilNow(io, .awake); + std.debug.print("zmath version: {f}\n", .{elapsed}); } } -noinline fn cross3ScaleBiasBenchmark(allocator: std.mem.Allocator, comptime count: comptime_int) !void { +noinline fn cross3ScaleBiasBenchmark(io: std.Io, allocator: std.mem.Allocator, comptime count: comptime_int) !void { std.debug.print("{s:>42} - ", .{"cross3, scale, bias benchmark (AOS)"}); var data0 = try std.ArrayList([3]f32).initCapacity(allocator, 256); - defer data0.deinit(); + defer data0.deinit(allocator); var data1 = try std.ArrayList([3]f32).initCapacity(allocator, 256); - defer data1.deinit(); + defer data1.deinit(allocator); var i: usize = 0; while (i < 256) : (i += 1) { @@ -201,8 +196,8 @@ noinline fn cross3ScaleBiasBenchmark(allocator: std.mem.Allocator, comptime coun { i = 0; - var timer = try Timer.start(); - const start = timer.lap(); + + const start = std.Io.Clock.awake.now(io); while (i < count) : (i += 1) { for (data1.items) |b| { for (data0.items) |a| { @@ -215,16 +210,15 @@ noinline fn cross3ScaleBiasBenchmark(allocator: std.mem.Allocator, comptime coun } } } - const end = timer.read(); - const elapsed_s = @as(f64, @floatFromInt(end - start)) / time.ns_per_s; + const elapsed = start.untilNow(io, .awake); - std.debug.print("scalar version: {d:.4}s, ", .{elapsed_s}); + std.debug.print("scalar version: {f}, ", .{elapsed}); } { i = 0; - var timer = try Timer.start(); - const start = timer.lap(); + + const start = std.Io.Clock.awake.now(io); while (i < count) : (i += 1) { for (data1.items) |b| { for (data0.items) |a| { @@ -235,14 +229,13 @@ noinline fn cross3ScaleBiasBenchmark(allocator: std.mem.Allocator, comptime coun } } } - const end = timer.read(); - const elapsed_s = @as(f64, @floatFromInt(end - start)) / time.ns_per_s; - std.debug.print("zmath version: {d:.4}s\n", .{elapsed_s}); + const elapsed = start.untilNow(io, .awake); + std.debug.print("zmath version: {f}\n", .{elapsed}); } } -noinline fn cross3Dot3ScaleBiasBenchmark(allocator: std.mem.Allocator, comptime count: comptime_int) !void { +noinline fn cross3Dot3ScaleBiasBenchmark(io: std.Io, allocator: std.mem.Allocator, comptime count: comptime_int) !void { std.debug.print("{s:>42} - ", .{"cross3, dot3, scale, bias benchmark (AOS)"}); var data0 = try std.ArrayList([3]f32).initCapacity(allocator, 256); @@ -271,8 +264,8 @@ noinline fn cross3Dot3ScaleBiasBenchmark(allocator: std.mem.Allocator, comptime { i = 0; - var timer = try Timer.start(); - const start = timer.lap(); + + const start = std.Io.Clock.awake.now(io); while (i < count) : (i += 1) { for (data1.items) |b| { for (data0.items) |a| { @@ -286,16 +279,15 @@ noinline fn cross3Dot3ScaleBiasBenchmark(allocator: std.mem.Allocator, comptime } } } - const end = timer.read(); - const elapsed_s = @as(f64, @floatFromInt(end - start)) / time.ns_per_s; + const elapsed = start.untilNow(io, .awake); - std.debug.print("scalar version: {d:.4}s, ", .{elapsed_s}); + std.debug.print("scalar version: {f}, ", .{elapsed}); } { i = 0; - var timer = try Timer.start(); - const start = timer.lap(); + + const start = std.Io.Clock.awake.now(io); while (i < count) : (i += 1) { for (data1.items) |b| { for (data0.items) |a| { @@ -306,14 +298,13 @@ noinline fn cross3Dot3ScaleBiasBenchmark(allocator: std.mem.Allocator, comptime } } } - const end = timer.read(); - const elapsed_s = @as(f64, @floatFromInt(end - start)) / time.ns_per_s; + const elapsed = start.untilNow(io, .awake); - std.debug.print("zmath version: {d:.4}s\n", .{elapsed_s}); + std.debug.print("zmath version: {f}\n", .{elapsed}); } } -noinline fn quatBenchmark(allocator: std.mem.Allocator, comptime count: comptime_int) !void { +noinline fn quatBenchmark(io: std.Io, allocator: std.mem.Allocator, comptime count: comptime_int) !void { std.debug.print("{s:>42} - ", .{"quaternion mul benchmark (AOS)"}); var data0 = try std.ArrayList([4]f32).initCapacity(allocator, 256); @@ -342,8 +333,8 @@ noinline fn quatBenchmark(allocator: std.mem.Allocator, comptime count: comptime { i = 0; - var timer = try Timer.start(); - const start = timer.lap(); + + const start = std.Io.Clock.awake.now(io); while (i < count) : (i += 1) { for (data1.items) |b| { for (data0.items) |a| { @@ -357,16 +348,14 @@ noinline fn quatBenchmark(allocator: std.mem.Allocator, comptime count: comptime } } } - const end = timer.read(); - const elapsed_s = @as(f64, @floatFromInt(end - start)) / time.ns_per_s; + const elapsed = start.untilNow(io, .awake); - std.debug.print("scalar version: {d:.4}s, ", .{elapsed_s}); + std.debug.print("scalar version: {f}, ", .{elapsed}); } { i = 0; - var timer = try Timer.start(); - const start = timer.lap(); + const start = std.Io.Clock.awake.now(io); while (i < count) : (i += 1) { for (data1.items) |b| { for (data0.items) |a| { @@ -377,14 +366,13 @@ noinline fn quatBenchmark(allocator: std.mem.Allocator, comptime count: comptime } } } - const end = timer.read(); - const elapsed_s = @as(f64, @floatFromInt(end - start)) / time.ns_per_s; + const elapsed = start.untilNow(io, .awake); - std.debug.print("zmath version: {d:.4}s\n", .{elapsed_s}); + std.debug.print("zmath version: {f}\n", .{elapsed}); } } -noinline fn waveBenchmark(allocator: std.mem.Allocator, comptime count: comptime_int) !void { +noinline fn waveBenchmark(io: std.Io, allocator: std.mem.Allocator, comptime count: comptime_int) !void { _ = allocator; std.debug.print("{s:>42} - ", .{"wave benchmark (SOA)"}); @@ -394,8 +382,7 @@ noinline fn waveBenchmark(allocator: std.mem.Allocator, comptime count: comptime const scale: f32 = 0.05; - var timer = try Timer.start(); - const start = timer.lap(); + const start = std.Io.Clock.awake.now(io); var iter: usize = 0; while (iter < count) : (iter += 1) { @@ -428,10 +415,9 @@ noinline fn waveBenchmark(allocator: std.mem.Allocator, comptime count: comptime } t += 0.001; } - const end = timer.read(); - const elapsed_s = @as(f64, @floatFromInt(end - start)) / time.ns_per_s; + const elapsed = start.untilNow(io, .awake); - std.debug.print("scalar version: {d:.4}s, ", .{elapsed_s}); + std.debug.print("scalar version: {f}, ", .{elapsed}); } { @@ -445,8 +431,7 @@ noinline fn waveBenchmark(allocator: std.mem.Allocator, comptime count: comptime const scale: f32 = 0.05; - var timer = try Timer.start(); - const start = timer.lap(); + const start = std.Io.Clock.awake.now(io); var iter: usize = 0; while (iter < count) : (iter += 1) { @@ -469,9 +454,8 @@ noinline fn waveBenchmark(allocator: std.mem.Allocator, comptime count: comptime } vt += zm.splat(T, 0.001); } - const end = timer.read(); - const elapsed_s = @as(f64, @floatFromInt(end - start)) / time.ns_per_s; + const elapsed = start.untilNow(io, .awake); - std.debug.print("zmath version: {d:.4}s\n", .{elapsed_s}); + std.debug.print("zmath version: {f}\n", .{elapsed}); } } diff --git a/src/root.zig b/src/root.zig index d02c7dd..dc7dcb5 100644 --- a/src/root.zig +++ b/src/root.zig @@ -4122,7 +4122,8 @@ test "zmath.fftN" { -77.254834, 0.000000, -105.489863, 0.000000, -160.874864, 0.000000, -324.901452, 0.000000, }; for (expected, 0..) |e, ie| { - try expect(std.math.approxEqAbs(f32, e, im[(ie / 4)][ie % 4], epsilon)); + const v: [4]f32 = im[(ie / 4)]; + try expect(std.math.approxEqAbs(f32, e, v[ie % 4], epsilon)); } } @@ -4185,7 +4186,8 @@ test "zmath.fftN" { -321.749727, 0.000000, 0.000000, 0.000000, -649.802905, 0.000000, 0.000000, 0.000000, }; for (expected, 0..) |e, ie| { - try expect(std.math.approxEqAbs(f32, e, im[(ie / 4)][ie % 4], epsilon)); + const v: [4]f32 = im[(ie / 4)]; + try expect(std.math.approxEqAbs(f32, e, v[ie % 4], epsilon)); } } }