Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion build.zig.zon
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
.name = .zmath,
.fingerprint = 0xfd23d422bd223cc2,
.version = "0.11.0-dev",
.minimum_zig_version = "0.15.1",
.minimum_zig_version = "0.16.0",
.paths = .{
"build.zig",
"build.zig.zon",
Expand Down
122 changes: 53 additions & 69 deletions src/benchmark.zig
Original file line number Diff line number Diff line change
Expand Up @@ -49,43 +49,41 @@
/// wave benchmark (SOA) - scalar version: 3.7832s, zmath version: 0.3642s
///
/// -------------------------------------------------------------------------------------------------
pub fn main() !void {
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
defer _ = gpa.deinit();
const allocator = gpa.allocator();
pub fn main(init: std.process.Init) !void {
const allocator = init.gpa;

// m = mul(ma, mb); data set fits in L1 cache; AOS data layout.
try mat4MulBenchmark(allocator, 100_000);
try mat4MulBenchmark(init.io, allocator, 100_000);

// v = 0.01 * cross3(va, vb) + vec3(1.0); data set fits in L1 cache; AOS data layout.
try cross3ScaleBiasBenchmark(allocator, 10_000);
try cross3ScaleBiasBenchmark(init.io, allocator, 10_000);

// v = dot3(va, vb) * (0.1 * cross3(va, vb) + vec3(1.0)); data set fits in L1 cache; AOS data layout.
try cross3Dot3ScaleBiasBenchmark(allocator, 10_000);
try cross3Dot3ScaleBiasBenchmark(init.io, allocator, 10_000);

// q = qmul(qa, qb); data set fits in L1 cache; AOS data layout.
try quatBenchmark(allocator, 10_000);
try quatBenchmark(init.io, allocator, 10_000);

// d = sqrt(x * x + z * z); y = sin(d - t); SOA layout.
try waveBenchmark(allocator, 1_000);
try waveBenchmark(init.io, allocator, 1_000);
}

const std = @import("std");
const time = std.time;
const Timer = time.Timer;
const Timer = std.Io.Timestamp;
const zm = @import("zmath");

var prng = std.Random.DefaultPrng.init(0);
const random = prng.random();

noinline fn mat4MulBenchmark(allocator: std.mem.Allocator, comptime count: comptime_int) !void {
noinline fn mat4MulBenchmark(io: std.Io, allocator: std.mem.Allocator, comptime count: comptime_int) !void {
std.debug.print("\n", .{});
std.debug.print("{s:>42} - ", .{"matrix mul benchmark (AOS)"});

var data0 = try std.ArrayList([16]f32).initCapacity(allocator, 64);
defer data0.deinit();
defer data0.deinit(allocator);
var data1 = try std.ArrayList([16]f32).initCapacity(allocator, 64);
defer data1.deinit();
defer data1.deinit(allocator);

var i: usize = 0;
while (i < 64) : (i += 1) {
Expand Down Expand Up @@ -118,8 +116,7 @@ noinline fn mat4MulBenchmark(allocator: std.mem.Allocator, comptime count: compt

{
i = 0;
var timer = try Timer.start();
const start = timer.lap();
const start = std.Io.Clock.awake.now(io);
while (i < count) : (i += 1) {
for (data1.items) |b| {
for (data0.items) |a| {
Expand All @@ -145,16 +142,15 @@ noinline fn mat4MulBenchmark(allocator: std.mem.Allocator, comptime count: compt
}
}
}
const end = timer.read();
const elapsed_s = @as(f64, @floatFromInt(end - start)) / time.ns_per_s;
const elapsed = start.untilNow(io, .awake);

std.debug.print("scalar version: {d:.4}s, ", .{elapsed_s});
std.debug.print("scalar version: {f}, ", .{elapsed});
}

{
i = 0;
var timer = try Timer.start();
const start = timer.lap();

const start = std.Io.Clock.awake.now(io);
while (i < count) : (i += 1) {
for (data1.items) |b| {
for (data0.items) |a| {
Expand All @@ -165,20 +161,19 @@ noinline fn mat4MulBenchmark(allocator: std.mem.Allocator, comptime count: compt
}
}
}
const end = timer.read();
const elapsed_s = @as(f64, @floatFromInt(end - start)) / time.ns_per_s;

std.debug.print("zmath version: {d:.4}s\n", .{elapsed_s});
const elapsed = start.untilNow(io, .awake);
std.debug.print("zmath version: {f}\n", .{elapsed});
}
}

noinline fn cross3ScaleBiasBenchmark(allocator: std.mem.Allocator, comptime count: comptime_int) !void {
noinline fn cross3ScaleBiasBenchmark(io: std.Io, allocator: std.mem.Allocator, comptime count: comptime_int) !void {
std.debug.print("{s:>42} - ", .{"cross3, scale, bias benchmark (AOS)"});

var data0 = try std.ArrayList([3]f32).initCapacity(allocator, 256);
defer data0.deinit();
defer data0.deinit(allocator);
var data1 = try std.ArrayList([3]f32).initCapacity(allocator, 256);
defer data1.deinit();
defer data1.deinit(allocator);

var i: usize = 0;
while (i < 256) : (i += 1) {
Expand All @@ -201,8 +196,8 @@ noinline fn cross3ScaleBiasBenchmark(allocator: std.mem.Allocator, comptime coun

{
i = 0;
var timer = try Timer.start();
const start = timer.lap();

const start = std.Io.Clock.awake.now(io);
while (i < count) : (i += 1) {
for (data1.items) |b| {
for (data0.items) |a| {
Expand All @@ -215,16 +210,15 @@ noinline fn cross3ScaleBiasBenchmark(allocator: std.mem.Allocator, comptime coun
}
}
}
const end = timer.read();
const elapsed_s = @as(f64, @floatFromInt(end - start)) / time.ns_per_s;
const elapsed = start.untilNow(io, .awake);

std.debug.print("scalar version: {d:.4}s, ", .{elapsed_s});
std.debug.print("scalar version: {f}, ", .{elapsed});
}

{
i = 0;
var timer = try Timer.start();
const start = timer.lap();

const start = std.Io.Clock.awake.now(io);
while (i < count) : (i += 1) {
for (data1.items) |b| {
for (data0.items) |a| {
Expand All @@ -235,14 +229,13 @@ noinline fn cross3ScaleBiasBenchmark(allocator: std.mem.Allocator, comptime coun
}
}
}
const end = timer.read();
const elapsed_s = @as(f64, @floatFromInt(end - start)) / time.ns_per_s;

std.debug.print("zmath version: {d:.4}s\n", .{elapsed_s});
const elapsed = start.untilNow(io, .awake);
std.debug.print("zmath version: {f}\n", .{elapsed});
}
}

noinline fn cross3Dot3ScaleBiasBenchmark(allocator: std.mem.Allocator, comptime count: comptime_int) !void {
noinline fn cross3Dot3ScaleBiasBenchmark(io: std.Io, allocator: std.mem.Allocator, comptime count: comptime_int) !void {
std.debug.print("{s:>42} - ", .{"cross3, dot3, scale, bias benchmark (AOS)"});

var data0 = try std.ArrayList([3]f32).initCapacity(allocator, 256);
Expand Down Expand Up @@ -271,8 +264,8 @@ noinline fn cross3Dot3ScaleBiasBenchmark(allocator: std.mem.Allocator, comptime

{
i = 0;
var timer = try Timer.start();
const start = timer.lap();

const start = std.Io.Clock.awake.now(io);
while (i < count) : (i += 1) {
for (data1.items) |b| {
for (data0.items) |a| {
Expand All @@ -286,16 +279,15 @@ noinline fn cross3Dot3ScaleBiasBenchmark(allocator: std.mem.Allocator, comptime
}
}
}
const end = timer.read();
const elapsed_s = @as(f64, @floatFromInt(end - start)) / time.ns_per_s;
const elapsed = start.untilNow(io, .awake);

std.debug.print("scalar version: {d:.4}s, ", .{elapsed_s});
std.debug.print("scalar version: {f}, ", .{elapsed});
}

{
i = 0;
var timer = try Timer.start();
const start = timer.lap();

const start = std.Io.Clock.awake.now(io);
while (i < count) : (i += 1) {
for (data1.items) |b| {
for (data0.items) |a| {
Expand All @@ -306,14 +298,13 @@ noinline fn cross3Dot3ScaleBiasBenchmark(allocator: std.mem.Allocator, comptime
}
}
}
const end = timer.read();
const elapsed_s = @as(f64, @floatFromInt(end - start)) / time.ns_per_s;
const elapsed = start.untilNow(io, .awake);

std.debug.print("zmath version: {d:.4}s\n", .{elapsed_s});
std.debug.print("zmath version: {f}\n", .{elapsed});
}
}

noinline fn quatBenchmark(allocator: std.mem.Allocator, comptime count: comptime_int) !void {
noinline fn quatBenchmark(io: std.Io, allocator: std.mem.Allocator, comptime count: comptime_int) !void {
std.debug.print("{s:>42} - ", .{"quaternion mul benchmark (AOS)"});

var data0 = try std.ArrayList([4]f32).initCapacity(allocator, 256);
Expand Down Expand Up @@ -342,8 +333,8 @@ noinline fn quatBenchmark(allocator: std.mem.Allocator, comptime count: comptime

{
i = 0;
var timer = try Timer.start();
const start = timer.lap();

const start = std.Io.Clock.awake.now(io);
while (i < count) : (i += 1) {
for (data1.items) |b| {
for (data0.items) |a| {
Expand All @@ -357,16 +348,14 @@ noinline fn quatBenchmark(allocator: std.mem.Allocator, comptime count: comptime
}
}
}
const end = timer.read();
const elapsed_s = @as(f64, @floatFromInt(end - start)) / time.ns_per_s;
const elapsed = start.untilNow(io, .awake);

std.debug.print("scalar version: {d:.4}s, ", .{elapsed_s});
std.debug.print("scalar version: {f}, ", .{elapsed});
}

{
i = 0;
var timer = try Timer.start();
const start = timer.lap();
const start = std.Io.Clock.awake.now(io);
while (i < count) : (i += 1) {
for (data1.items) |b| {
for (data0.items) |a| {
Expand All @@ -377,14 +366,13 @@ noinline fn quatBenchmark(allocator: std.mem.Allocator, comptime count: comptime
}
}
}
const end = timer.read();
const elapsed_s = @as(f64, @floatFromInt(end - start)) / time.ns_per_s;
const elapsed = start.untilNow(io, .awake);

std.debug.print("zmath version: {d:.4}s\n", .{elapsed_s});
std.debug.print("zmath version: {f}\n", .{elapsed});
}
}

noinline fn waveBenchmark(allocator: std.mem.Allocator, comptime count: comptime_int) !void {
noinline fn waveBenchmark(io: std.Io, allocator: std.mem.Allocator, comptime count: comptime_int) !void {
_ = allocator;
std.debug.print("{s:>42} - ", .{"wave benchmark (SOA)"});

Expand All @@ -394,8 +382,7 @@ noinline fn waveBenchmark(allocator: std.mem.Allocator, comptime count: comptime

const scale: f32 = 0.05;

var timer = try Timer.start();
const start = timer.lap();
const start = std.Io.Clock.awake.now(io);

var iter: usize = 0;
while (iter < count) : (iter += 1) {
Expand Down Expand Up @@ -428,10 +415,9 @@ noinline fn waveBenchmark(allocator: std.mem.Allocator, comptime count: comptime
}
t += 0.001;
}
const end = timer.read();
const elapsed_s = @as(f64, @floatFromInt(end - start)) / time.ns_per_s;
const elapsed = start.untilNow(io, .awake);

std.debug.print("scalar version: {d:.4}s, ", .{elapsed_s});
std.debug.print("scalar version: {f}, ", .{elapsed});
}

{
Expand All @@ -445,8 +431,7 @@ noinline fn waveBenchmark(allocator: std.mem.Allocator, comptime count: comptime

const scale: f32 = 0.05;

var timer = try Timer.start();
const start = timer.lap();
const start = std.Io.Clock.awake.now(io);

var iter: usize = 0;
while (iter < count) : (iter += 1) {
Expand All @@ -469,9 +454,8 @@ noinline fn waveBenchmark(allocator: std.mem.Allocator, comptime count: comptime
}
vt += zm.splat(T, 0.001);
}
const end = timer.read();
const elapsed_s = @as(f64, @floatFromInt(end - start)) / time.ns_per_s;
const elapsed = start.untilNow(io, .awake);

std.debug.print("zmath version: {d:.4}s\n", .{elapsed_s});
std.debug.print("zmath version: {f}\n", .{elapsed});
}
}
6 changes: 4 additions & 2 deletions src/root.zig
Original file line number Diff line number Diff line change
Expand Up @@ -4122,7 +4122,8 @@ test "zmath.fftN" {
-77.254834, 0.000000, -105.489863, 0.000000, -160.874864, 0.000000, -324.901452, 0.000000,
};
for (expected, 0..) |e, ie| {
try expect(std.math.approxEqAbs(f32, e, im[(ie / 4)][ie % 4], epsilon));
const v: [4]f32 = im[(ie / 4)];
try expect(std.math.approxEqAbs(f32, e, v[ie % 4], epsilon));
}
}

Expand Down Expand Up @@ -4185,7 +4186,8 @@ test "zmath.fftN" {
-321.749727, 0.000000, 0.000000, 0.000000, -649.802905, 0.000000, 0.000000, 0.000000,
};
for (expected, 0..) |e, ie| {
try expect(std.math.approxEqAbs(f32, e, im[(ie / 4)][ie % 4], epsilon));
const v: [4]f32 = im[(ie / 4)];
try expect(std.math.approxEqAbs(f32, e, v[ie % 4], epsilon));
}
}
}
Expand Down
Loading