Moved benchmarks into a seperate file and exe

This commit is contained in:
adrien 2026-04-21 23:52:13 +02:00
parent a518a86fb2
commit 1fd86a5807
3 changed files with 493 additions and 535 deletions

View File

@ -197,48 +197,39 @@ test "Add" {
const added = distance.add(distance2);
try std.testing.expectEqual(30, added.value);
try std.testing.expectEqual(1, @TypeOf(added).dims.get(.L));
std.debug.print("KiloMeter {d} + {d} = {d} OK\n", .{ distance, distance2, added });
const KiloMeter = Scalar(i128, Dimensions.init(.{ .L = 1 }), Scales.init(.{ .L = .k }));
const distance3 = KiloMeter{ .value = 2 };
const added2 = distance.add(distance3);
try std.testing.expectEqual(2010, added2.value);
try std.testing.expectEqual(1, @TypeOf(added2).dims.get(.L));
std.debug.print("KiloMeter {d} + {d} = {d} OK\n", .{ distance, distance3, added2 });
const added3 = distance3.add(distance).to(KiloMeter);
try std.testing.expectEqual(2, added3.value);
try std.testing.expectEqual(1, @TypeOf(added3).dims.get(.L));
std.debug.print("KiloMeter {d} + {d} = {d} OK\n", .{ distance3, distance, added3 });
const KiloMeter_f = Scalar(f64, Dimensions.init(.{ .L = 1 }), Scales.init(.{ .L = .k }));
const distance4 = KiloMeter_f{ .value = 2 };
const added4 = distance4.add(distance).to(KiloMeter_f);
try std.testing.expectApproxEqAbs(2.01, added4.value, 0.000001);
try std.testing.expectEqual(1, @TypeOf(added4).dims.get(.L));
std.debug.print("KiloMeter_f {d} + {d} = {d:.2} OK\n", .{ distance4, distance, added4 });
}
test "Sub" {
const Meter = Scalar(i128, Dimensions.init(.{ .L = 1 }), Scales.init(.{}));
const KiloMeter = Scalar(i128, Dimensions.init(.{ .L = 1 }), Scales.init(.{ .L = .k }));
const KiloMeter_f = Scalar(f64, Dimensions.init(.{ .L = 1 }), Scales.init(.{ .L = .k }));
const a = Meter{ .value = 500 };
const b = Meter{ .value = 200 };
const diff = a.sub(b);
try std.testing.expectEqual(300, diff.value);
std.debug.print("Sub: {d} - {d} = {d} OK\n", .{ a, b, diff });
const km = KiloMeter{ .value = 1 };
const diff2 = a.sub(km);
std.debug.print("Sub cross-scale: {d} - {d} = {d}\n", .{ a, km, diff2 });
const diff2 = b.sub(a);
try std.testing.expectEqual(-300, diff2.value);
const km_f = KiloMeter_f{ .value = 2.5 };
const m_f = Meter{ .value = 500 };
const diff3 = km_f.sub(m_f);
try std.testing.expectApproxEqAbs(2000, diff3.value, 1e-4);
std.debug.print("Sub float cross-scale: {d} - {d} = {d} OK\n", .{ km_f, m_f, diff3 });
}
test "MulBy" {
@ -252,14 +243,12 @@ test "MulBy" {
try std.testing.expectEqual(12, area_time.value);
try std.testing.expectEqual(1, @TypeOf(area_time).dims.get(.L));
try std.testing.expectEqual(1, @TypeOf(area_time).dims.get(.T));
std.debug.print("MulBy: {d} * {d} = {d} OK\n", .{ d, t, area_time });
const d2 = Meter{ .value = 5.0 };
const area = d.mulBy(d2);
try std.testing.expectEqual(15, area.value);
try std.testing.expectEqual(2, @TypeOf(area).dims.get(.L));
try std.testing.expectEqual(0, @TypeOf(area).dims.get(.T));
std.debug.print("MulBy: {d} * {d} = {d} OK\n", .{ d, d2, area });
}
test "MulBy with scale" {
@ -271,7 +260,6 @@ test "MulBy with scale" {
const prod = dist.mulBy(mass);
try std.testing.expectEqual(1, @TypeOf(prod).dims.get(.L));
try std.testing.expectEqual(1, @TypeOf(prod).dims.get(.M));
std.debug.print("MulBy scaled: {d} * {d} = {d} OK\n", .{ dist, mass, prod });
}
test "MulBy with type change" {
@ -289,7 +277,6 @@ test "MulBy with type change" {
try std.testing.expectApproxEqAbs(12, area_time_f.value, 0.0001);
try std.testing.expectEqual(1, @TypeOf(area_time).dims.get(.L));
try std.testing.expectEqual(1, @TypeOf(area_time).dims.get(.T));
std.debug.print("MulBy: {d} * {d} = {d} OK\n", .{ d, t, area_time });
}
test "MulBy small" {
@ -303,7 +290,6 @@ test "MulBy small" {
try std.testing.expectEqual(12, area_time.value);
try std.testing.expectEqual(1, @TypeOf(area_time).dims.get(.L));
try std.testing.expectEqual(1, @TypeOf(area_time).dims.get(.T));
std.debug.print("MulBy: {d} * {d} = {d} OK\n", .{ d, t, area_time });
}
test "Scale" {
@ -314,12 +300,10 @@ test "Scale" {
const scaled = d.scale(3);
try std.testing.expectEqual(21, scaled.value);
try std.testing.expectEqual(1, @TypeOf(scaled).dims.get(.L));
std.debug.print("Scale int: {d} * 3 = {d} OK\n", .{ d, scaled });
const t = Second{ .value = 1.5 };
const scaled_f = t.scale(4.0);
try std.testing.expectApproxEqAbs(@as(f32, 6.0), scaled_f.value, 1e-4);
std.debug.print("Scale float: {d} * 4 = {d} OK\n", .{ t, scaled_f });
}
test "Chained: velocity and acceleration" {
@ -338,8 +322,6 @@ test "Chained: velocity and acceleration" {
try std.testing.expectEqual(5, accel.value);
try std.testing.expectEqual(1, @TypeOf(accel).dims.get(.L));
try std.testing.expectEqual(-2, @TypeOf(accel).dims.get(.T));
std.debug.print("Velocity: {d}, Acceleration: {d} OK\n", .{ velocity, accel });
}
test "DivBy integer exact" {
@ -353,7 +335,6 @@ test "DivBy integer exact" {
try std.testing.expectEqual(30, vel.value);
try std.testing.expectEqual(1, @TypeOf(vel).dims.get(.L));
try std.testing.expectEqual(-1, @TypeOf(vel).dims.get(.T));
std.debug.print("DivBy int: {d} / {d} = {d} OK\n", .{ dist, time, vel });
}
test "Conversion chain: km -> m -> cm" {
@ -367,7 +348,6 @@ test "Conversion chain: km -> m -> cm" {
try std.testing.expectEqual(15_000, m.value);
try std.testing.expectEqual(1_500_000, cm.value);
std.debug.print("Chain: {d} -> {d} -> {d} OK\n", .{ km, m, cm });
}
test "Conversion: hours -> minutes -> seconds" {
@ -381,7 +361,6 @@ test "Conversion: hours -> minutes -> seconds" {
try std.testing.expectEqual(60, min.value);
try std.testing.expectEqual(3600, sec.value);
std.debug.print("Time chain: {d} -> {d} -> {d} OK\n", .{ h, min, sec });
}
test "Negative values" {
@ -391,7 +370,6 @@ test "Negative values" {
const b = Meter{ .value = 20 };
const diff = a.sub(b);
try std.testing.expectEqual(-15, diff.value);
std.debug.print("Negative sub: {d} - {d} = {d} OK\n", .{ a, b, diff });
}
test "Format Scalar" {
@ -405,396 +383,22 @@ test "Format Scalar" {
Dimensions.init(.{ .M = 1, .L = 1, .T = -1 }),
Scales.init(.{ .M = .k }),
);
const Meter = Scalar(f32, Dimensions.init(.{ .L = 1 }), Scales.init(.{}));
const m = Meter{ .value = 1.23456 };
const accel = MeterPerSecondSq{ .value = 9.81 };
const momentum = KgMeterPerSecond{ .value = 42.0 };
std.debug.print("Acceleration: {d}\n", .{accel});
std.debug.print("Momentum: {d}\n", .{momentum});
}
test "Benchmark Scalar" {
const Io = std.Io;
const ITERS: usize = 100_000;
const SAMPLES: usize = 10; // Number of samples for stats
var gsink: f64 = 0;
const io = std.testing.io;
// Standard Zig 0.16 timestamp retrieval
const getTime = struct {
fn f(i: Io) Io.Timestamp {
return Io.Clock.awake.now(i);
}
}.f;
const fold = struct {
fn f(comptime TT: type, s: *f64, v: TT) void {
s.* += if (comptime @typeInfo(TT) == .float)
@as(f64, @floatCast(v))
else
@as(f64, @floatFromInt(v));
}
}.f;
const getVal = struct {
fn f(comptime TT: type, i: usize, comptime mask: u7) TT {
const v: u8 = @as(u8, @truncate(i & @as(usize, mask))) + 1;
return if (comptime @typeInfo(TT) == .float) @floatFromInt(v) else @intCast(v);
}
}.f;
const Stats = struct {
median: f64,
delta: f64,
ops_per_sec: f64,
};
const computeStats = struct {
fn f(samples: []f64, iters: usize) Stats {
std.mem.sort(f64, samples, {}, std.sort.asc(f64));
const mid = samples.len / 2;
const median_ns = if (samples.len % 2 == 0) (samples[mid - 1] + samples[mid]) / 2.0 else samples[mid];
const low = samples[0];
const high = samples[samples.len - 1];
const delta_ns = (high - low) / 2.0;
const ns_per_op = median_ns / @as(f64, @floatFromInt(iters));
return .{
.median = ns_per_op,
.delta = (delta_ns / @as(f64, @floatFromInt(iters))),
.ops_per_sec = 1_000_000_000.0 / ns_per_op,
};
}
}.f;
std.debug.print(
\\
\\ Scalar<T> benchmark — {d} iterations, {d} samples/cell
\\
\\┌───────────────────┬──────┬─────────────────────┬─────────────────────┐
\\│ Operation │ Type │ ns / op (± delta) │ Throughput (ops/s) │
\\├───────────────────┼──────┼─────────────────────┼─────────────────────┤
\\
, .{ ITERS, SAMPLES });
const Types = .{ i16, i32, i64, i128, i256, f32, f64, f128 };
const TNames = .{ "i16", "i32", "i64", "i128", "i256", "f32", "f64", "f128" };
const Ops = .{ "add", "sub", "mulBy", "divBy", "scale", "to" };
var results_matrix: [Ops.len][Types.len]f64 = undefined;
comptime var tidx: usize = 0;
inline for (Types, TNames) |T, tname| {
const M = Scalar(T, Dimensions.init(.{ .L = 1 }), Scales.init(.{}));
const KM = Scalar(T, Dimensions.init(.{ .L = 1 }), Scales.init(.{ .L = .k }));
const S = Scalar(T, Dimensions.init(.{ .T = 1 }), Scales.init(.{}));
inline for (Ops, 0..) |op_name, oidx| {
var samples: [SAMPLES]f64 = undefined;
for (0..SAMPLES) |s_idx| {
var sink: T = 0;
const t_start = getTime(io);
for (0..ITERS) |i| {
const r = if (comptime std.mem.eql(u8, op_name, "add"))
(M{ .value = getVal(T, i, 63) }).add(M{ .value = getVal(T, i +% 7, 63) })
else if (comptime std.mem.eql(u8, op_name, "sub"))
(M{ .value = getVal(T, i +% 10, 63) }).sub(M{ .value = getVal(T, i, 63) })
else if (comptime std.mem.eql(u8, op_name, "mulBy"))
(M{ .value = getVal(T, i, 63) }).mulBy(M{ .value = getVal(T, i +% 1, 63) })
else if (comptime std.mem.eql(u8, op_name, "divBy"))
(M{ .value = getVal(T, i +% 10, 63) }).divBy(S{ .value = getVal(T, i, 63) })
else if (comptime std.mem.eql(u8, op_name, "scale"))
(M{ .value = getVal(T, i, 63) }).scale(getVal(T, i +% 2, 63))
else
(KM{ .value = getVal(T, i, 15) }).to(M);
if (comptime @typeInfo(T) == .float) sink += r.value else sink ^= r.value;
}
const t_end = getTime(io);
samples[s_idx] = @as(f64, @floatFromInt(t_start.durationTo(t_end).toNanoseconds()));
fold(T, &gsink, sink);
}
const stats = computeStats(&samples, ITERS);
results_matrix[oidx][tidx] = stats.median;
std.debug.print("│ {s:<17} │ {s:<4} │ {d:>8.2} ns ±{d:<6.2} │ {d:>19.0} │\n", .{ op_name, tname, stats.median, stats.delta, stats.ops_per_sec });
}
if (comptime tidx < Types.len - 1) {
std.debug.print("├───────────────────┼──────┼─────────────────────┼─────────────────────┤\n", .{});
}
tidx += 1;
}
// Median Summary Table
std.debug.print("└───────────────────┴──────┴─────────────────────┴─────────────────────┘\n\n", .{});
std.debug.print("Median Summary (ns/op):\n", .{});
std.debug.print("┌──────────────┬───────┬───────┬───────┬───────┬───────┬───────┬───────┬───────┐\n", .{});
std.debug.print("│ Operation │ i16 │ i32 │ i64 │ i128 │ i256 │ f32 │ f64 │ f128 │\n", .{});
std.debug.print("├──────────────┼───────┼───────┼───────┼───────┼───────┼───────┼───────┼───────┤\n", .{});
inline for (Ops, 0..) |op_name, oidx| {
std.debug.print("│ {s:<11} │", .{op_name});
var i: usize = 0;
while (i < Types.len) : (i += 1) {
std.debug.print("{d:>6.1} │", .{results_matrix[oidx][i]});
}
std.debug.print("\n", .{});
}
std.debug.print("└──────────────┴───────┴───────┴───────┴───────┴───────┴───────┴───────┴───────┘\n", .{});
std.debug.print("\nAnti-optimisation sink: {d:.4}\n", .{gsink});
try std.testing.expect(gsink != 0);
}
test "Overhead Analysis: Scalar vs Native" {
const Io = std.Io;
const ITERS: usize = 100_000;
const SAMPLES: usize = 5;
const io = std.testing.io;
const getTime = struct {
fn f(i: Io) Io.Timestamp {
return Io.Clock.awake.now(i);
}
}.f;
const fold = struct {
fn f(comptime TT: type, s: *f64, v: TT) void {
s.* += if (comptime @typeInfo(TT) == .float)
@as(f64, @floatCast(v))
else
@as(f64, @floatFromInt(v));
}
}.f;
// Helper to safely get a value of type T from a loop index
const getValT = struct {
fn f(comptime TT: type, i: usize) TT {
const v = (i % 100) + 1;
return if (comptime @typeInfo(TT) == .float) @floatFromInt(v) else @intCast(v);
}
}.f;
const Types = .{ i32, i64, i128, f32, f64 };
const TNames = .{ "i32", "i64", "i128", "f32", "f64" };
const Ops = .{ "add", "mulBy", "divBy" };
var gsink: f64 = 0;
std.debug.print(
\\
\\ Scalar vs Native Overhead Analysis
\\
\\┌───────────┬──────┬───────────┬───────────┬───────────┐
\\│ Operation │ Type │ Native │ Scalar │ Slowdown │
\\├───────────┼──────┼───────────┼───────────┼───────────┤
\\
, .{});
inline for (Ops, 0..) |op_name, j| {
inline for (Types, 0..) |T, tidx| {
var native_total_ns: f64 = 0;
var quantity_total_ns: f64 = 0;
const M = Scalar(T, Dimensions.init(.{ .L = 1 }), Scales.init(.{}));
const S = Scalar(T, Dimensions.init(.{ .T = 1 }), Scales.init(.{}));
for (0..SAMPLES) |_| {
// --- 1. Benchmark Native ---
var n_sink: T = 0;
const n_start = getTime(io);
for (0..ITERS) |i| {
const a = getValT(T, i);
const b = getValT(T, 2);
const r = if (comptime std.mem.eql(u8, op_name, "add"))
a + b
else if (comptime std.mem.eql(u8, op_name, "mulBy"))
a * b
else if (comptime @typeInfo(T) == .int) @divTrunc(a, b) else a / b;
if (comptime @typeInfo(T) == .float) n_sink += r else n_sink ^= r;
}
const n_end = getTime(io);
native_total_ns += @as(f64, @floatFromInt(n_start.durationTo(n_end).toNanoseconds()));
fold(T, &gsink, n_sink);
// --- 2. Benchmark Scalar ---
var q_sink: T = 0;
const q_start = getTime(io);
for (0..ITERS) |i| {
const qa = M{ .value = getValT(T, i) };
const qb = if (comptime std.mem.eql(u8, op_name, "divBy")) S{ .value = getValT(T, 2) } else M{ .value = getValT(T, 2) };
const r = if (comptime std.mem.eql(u8, op_name, "add"))
qa.add(qb)
else if (comptime std.mem.eql(u8, op_name, "mulBy"))
qa.mulBy(qb)
else
qa.divBy(qb);
if (comptime @typeInfo(T) == .float) q_sink += r.value else q_sink ^= r.value;
}
const q_end = getTime(io);
quantity_total_ns += @as(f64, @floatFromInt(q_start.durationTo(q_end).toNanoseconds()));
fold(T, &gsink, q_sink);
}
const avg_n = (native_total_ns / SAMPLES) / @as(f64, @floatFromInt(ITERS));
const avg_q = (quantity_total_ns / SAMPLES) / @as(f64, @floatFromInt(ITERS));
const slowdown = avg_q / avg_n;
std.debug.print("│ {s:<9} │ {s:<4} │ {d:>7.2}ns │ {d:>7.2}ns │ {d:>8.2}x │\n", .{
op_name, TNames[tidx], avg_n, avg_q, slowdown,
});
}
if (j != Ops.len - 1) std.debug.print("├───────────┼──────┼───────────┼───────────┼───────────┤\n", .{});
}
std.debug.print("└───────────┴──────┴───────────┴───────────┴───────────┘\n", .{});
try std.testing.expect(gsink != 0);
}
test "Cross-Type Overhead Analysis: Scalar vs Native" {
const Io = std.Io;
const ITERS: usize = 100_000;
const SAMPLES: usize = 5;
const io = std.testing.io;
const getTime = struct {
fn f(i: Io) Io.Timestamp {
return Io.Clock.awake.now(i);
}
}.f;
const fold = struct {
fn f(comptime TT: type, s: *f64, v: TT) void {
s.* += if (comptime @typeInfo(TT) == .float)
@as(f64, @floatCast(v))
else
@as(f64, @floatFromInt(v));
}
}.f;
const getValT = struct {
fn f(comptime TT: type, i: usize) TT {
// Keep values safe and non-zero to avoid division by zero or overflows during cross-casting
const v = (i % 50) + 1;
return if (comptime @typeInfo(TT) == .float) @floatFromInt(v) else @intCast(v);
}
}.f;
// Helper for the Native baseline: explicitly casting T2 to T1 before the operation
const castTo = struct {
fn f(comptime DestT: type, comptime SrcT: type, val: SrcT) DestT {
if (comptime DestT == SrcT) return val;
const src_info = @typeInfo(SrcT);
const dest_info = @typeInfo(DestT);
if (dest_info == .int and src_info == .int) return @intCast(val);
if (dest_info == .float and src_info == .int) return @floatFromInt(val);
if (dest_info == .int and src_info == .float) return @intFromFloat(val);
if (dest_info == .float and src_info == .float) return @floatCast(val);
unreachable;
}
}.f;
const Types = .{ i16, i64, i128, f32, f64 };
const TNames = .{ "i16", "i64", "i128", "f32", "f64" };
const Ops = .{ "add", "mulBy", "divBy" };
var gsink: f64 = 0;
std.debug.print(
\\
\\ Cross-Type Overhead Analysis: Scalar vs Native
\\
\\┌─────────┬──────┬──────┬───────────┬───────────┬───────────┐
\\│ Op │ T1 │ T2 │ Native │ Scalar │ Slowdown │
\\├─────────┼──────┼──────┼───────────┼───────────┼───────────┤
\\
, .{});
inline for (Ops, 0..) |op_name, j| {
inline for (Types, 0..) |T1, t1_idx| {
inline for (Types, 0..) |T2, t2_idx| {
var native_total_ns: f64 = 0;
var quantity_total_ns: f64 = 0;
const M1 = Scalar(T1, Dimensions.init(.{ .L = 1 }), Scales.init(.{}));
const M2 = Scalar(T2, Dimensions.init(.{ .L = 1 }), Scales.init(.{}));
const S2 = Scalar(T2, Dimensions.init(.{ .T = 1 }), Scales.init(.{}));
for (0..SAMPLES) |_| {
// --- 1. Benchmark Native (Cast T2 to T1, then math) ---
var n_sink: T1 = 0;
const n_start = getTime(io);
for (0..ITERS) |i| {
const a = getValT(T1, i);
const b_raw = getValT(T2, 2);
const b = castTo(T1, T2, b_raw);
const r = if (comptime std.mem.eql(u8, op_name, "add"))
a + b
else if (comptime std.mem.eql(u8, op_name, "mulBy"))
a * b
else if (comptime @typeInfo(T1) == .int)
@divTrunc(a, b)
else
a / b;
if (comptime @typeInfo(T1) == .float) n_sink += r else n_sink ^= r;
}
const n_end = getTime(io);
native_total_ns += @as(f64, @floatFromInt(n_start.durationTo(n_end).toNanoseconds()));
fold(T1, &gsink, n_sink);
// --- 2. Benchmark Scalar ---
var q_sink: T1 = 0;
const q_start = getTime(io);
for (0..ITERS) |i| {
const qa = M1{ .value = getValT(T1, i) };
const qb = if (comptime std.mem.eql(u8, op_name, "divBy"))
S2{ .value = getValT(T2, 2) }
else
M2{ .value = getValT(T2, 2) };
const r = if (comptime std.mem.eql(u8, op_name, "add"))
qa.add(qb)
else if (comptime std.mem.eql(u8, op_name, "mulBy"))
qa.mulBy(qb)
else
qa.divBy(qb);
if (comptime @typeInfo(T1) == .float) q_sink += r.value else q_sink ^= r.value;
}
const q_end = getTime(io);
quantity_total_ns += @as(f64, @floatFromInt(q_start.durationTo(q_end).toNanoseconds()));
fold(T1, &gsink, q_sink);
}
const avg_n = (native_total_ns / SAMPLES) / @as(f64, @floatFromInt(ITERS));
const avg_q = (quantity_total_ns / SAMPLES) / @as(f64, @floatFromInt(ITERS));
const slowdown = avg_q / avg_n;
std.debug.print("│ {s:<7} │ {s:<4} │ {s:<4} │ {d:>7.2}ns │ {d:>7.2}ns │ {d:>8.2}x │\n", .{
op_name, TNames[t1_idx], TNames[t2_idx], avg_n, avg_q, slowdown,
});
}
}
if (j != Ops.len - 1) {
std.debug.print("├─────────┼──────┼──────┼───────────┼───────────┼───────────┤\n", .{});
}
}
std.debug.print("└─────────┴──────┴──────┴───────────┴───────────┴───────────┘\n", .{});
try std.testing.expect(gsink != 0);
var buf: [64]u8 = undefined;
var res = try std.fmt.bufPrint(&buf, "{d:.2}", .{m});
try std.testing.expectEqualStrings("1.23m", res);
res = try std.fmt.bufPrint(&buf, "{d}", .{accel});
try std.testing.expectEqualStrings("9.81m.ns⁻²", res);
res = try std.fmt.bufPrint(&buf, "{d}", .{momentum});
try std.testing.expectEqualStrings("42m.kg.s⁻¹", res);
res = try std.fmt.bufPrint(&buf, "{d:_>10.1}", .{m});
try std.testing.expectEqualStrings("_______1.2m", res);
}

View File

@ -204,8 +204,12 @@ test "Format VectorX" {
const accel = MeterPerSecondSq.Vec3.initDefault(9.81);
const momentum = KgMeterPerSecond.Vec3{ .data = .{ 43, 0, 11 } };
std.debug.print("Acceleration: {d}\n", .{accel});
std.debug.print("Momentum: {d:.2}\n", .{momentum});
var buf: [64]u8 = undefined;
var res = try std.fmt.bufPrint(&buf, "{d}", .{accel});
try std.testing.expectEqualStrings("(9.81, 9.81, 9.81)m.ns⁻²", res);
res = try std.fmt.bufPrint(&buf, "{d:.2}", .{momentum});
try std.testing.expectEqualStrings("(43.00, 0.00, 11.00)m.kg.s⁻¹", res);
}
test "VecX Init and Basic Arithmetic" {
@ -326,124 +330,3 @@ test "VecX Length" {
try std.testing.expectApproxEqAbs(@as(f32, 25.0), v_float.lengthSqr(), 1e-4);
try std.testing.expectApproxEqAbs(@as(f32, 5.0), v_float.length(), 1e-4);
}
test "Benchmark Vector ops" {
const Io = std.Io;
const ITERS: usize = 10_000;
const SAMPLES: usize = 10;
var gsink: f64 = 0;
// In Zig 0.14+, we use the testing IO for clock access in tests
const io = std.testing.io;
const getTime = struct {
fn f(i: Io) Io.Timestamp {
return Io.Clock.awake.now(i);
}
}.f;
const getVal = struct {
fn f(comptime TT: type, i: usize, comptime mask: u7) TT {
const v: u8 = @as(u8, @truncate(i & @as(usize, mask))) + 1;
return if (comptime @typeInfo(TT) == .float) @floatFromInt(v) else @intCast(v);
}
}.f;
const fold = struct {
fn f(comptime TT: type, s: *f64, v: TT) void {
s.* += if (comptime @typeInfo(TT) == .float)
@as(f64, @floatCast(v))
else
@as(f64, @floatFromInt(v));
}
}.f;
const computeStats = struct {
fn f(samples: []f64, iters: usize) f64 {
std.mem.sort(f64, samples, {}, std.sort.asc(f64));
const mid = samples.len / 2;
const median_ns = if (samples.len % 2 == 0)
(samples[mid - 1] + samples[mid]) / 2.0
else
samples[mid];
return median_ns / @as(f64, @floatFromInt(iters));
}
}.f;
std.debug.print(
\\
\\ Vector<N, T> benchmark — {d} iterations, {d} samples/cell
\\ (Results in ns/op)
\\
\\┌─────────────┬──────┬─────────┬─────────┬─────────┐
\\│ Operation │ Type │ Len=3 │ Len=4 │ Len=16 │
\\├─────────────┼──────┼─────────┼─────────┼─────────┤
\\
, .{ ITERS, SAMPLES });
const Types = .{ i32, i64, i128, f32, f64 };
const TNames = .{ "i32", "i64", "i128", "f32", "f64" };
const Lengths = .{ 3, 4, 16 };
const Ops = .{ "add", "scale", "mulByScalar", "length" };
inline for (Ops, 0..) |op_name, o_idx| {
inline for (Types, TNames) |T, tname| {
std.debug.print("│ {s:<11} │ {s:<4} │", .{ op_name, tname });
inline for (Lengths) |len| {
const Q_base = Scalar(T, Dimensions.init(.{ .L = 1 }), Scales.init(.{}));
const Q_time = Scalar(T, Dimensions.init(.{ .T = 1 }), Scales.init(.{}));
const V = Vector(len, Q_base);
var samples: [SAMPLES]f64 = undefined;
for (0..SAMPLES) |s_idx| {
var sink: T = 0;
const t_start = getTime(io);
for (0..ITERS) |i| {
const v1 = V.initDefault(getVal(T, i, 63));
if (comptime std.mem.eql(u8, op_name, "add")) {
const v2 = V.initDefault(getVal(T, i +% 7, 63));
const res = v1.add(v2);
for (res.data) |val| {
if (comptime @typeInfo(T) == .float) sink += val else sink ^= val;
}
} else if (comptime std.mem.eql(u8, op_name, "scale")) {
const sc = getVal(T, i +% 2, 63);
const res = v1.scale(sc);
for (res.data) |val| {
if (comptime @typeInfo(T) == .float) sink += val else sink ^= val;
}
} else if (comptime std.mem.eql(u8, op_name, "mulByScalar")) {
const s_val = Q_time{ .value = getVal(T, i +% 2, 63) };
const res = v1.mulByScalar(s_val);
for (res.data) |val| {
if (comptime @typeInfo(T) == .float) sink += val else sink ^= val;
}
} else if (comptime std.mem.eql(u8, op_name, "length")) {
const r = v1.length();
if (comptime @typeInfo(T) == .float) sink += r else sink ^= r;
}
}
const t_end = getTime(io);
samples[s_idx] = @as(f64, @floatFromInt(t_start.durationTo(t_end).toNanoseconds()));
fold(T, &gsink, sink);
}
const median_ns_per_op = computeStats(&samples, ITERS);
std.debug.print(" {d:>7.1} │", .{median_ns_per_op});
}
std.debug.print("\n", .{});
}
if (o_idx < Ops.len - 1) {
std.debug.print("├─────────────┼──────┼─────────┼─────────┼─────────┤\n", .{});
}
}
std.debug.print("└─────────────┴──────┴─────────┴─────────┴─────────┘\n", .{});
std.debug.print("\nAnti-optimisation sink: {d:.4}\n", .{gsink});
try std.testing.expect(gsink != 0);
}

471
src/benchmark.zig Normal file
View File

@ -0,0 +1,471 @@
const std = @import("std");
const Io = std.Io;
const Scalar = @import("Scalar.zig").Scalar;
const Vector = @import("Vector.zig").Vector;
var io: Io = undefined;
pub fn main(init: std.process.Init) !void {
var stdout_buf: [4096]u8 = undefined;
var stdout_writer: std.Io.File.Writer = .init(.stdout(), init.io, &stdout_buf);
try stdout_writer.interface.print("Starting Benchmarks...", .{});
io = init.io;
try bench_Scalar(&stdout_writer.interface);
try stdout_writer.flush();
try bench_vsNative(&stdout_writer.interface);
try stdout_writer.flush();
try bench_crossTypeVsNative(&stdout_writer.interface);
try stdout_writer.flush();
try bench_Vector(&stdout_writer.interface);
try stdout_writer.flush();
}
fn getTime() Io.Timestamp {
return Io.Clock.awake.now(io);
}
fn fold(comptime TT: type, s: *f64, v: TT) void {
s.* += if (comptime @typeInfo(TT) == .float)
@as(f64, @floatCast(v))
else
@as(f64, @floatFromInt(v));
}
fn bench_Scalar(writer: *std.Io.Writer) !void {
const ITERS: usize = 100_000;
const SAMPLES: usize = 10;
var gsink: f64 = 0;
const getVal = struct {
fn f(comptime TT: type, i: usize, comptime mask: u7) TT {
const v: u8 = @as(u8, @truncate(i & @as(usize, mask))) + 1;
return if (comptime @typeInfo(TT) == .float) @floatFromInt(v) else @intCast(v);
}
}.f;
const Stats = struct {
median: f64,
delta: f64,
ops_per_sec: f64,
};
const computeStats = struct {
fn f(samples: []f64, iters: usize) Stats {
std.mem.sort(f64, samples, {}, std.sort.asc(f64));
const mid = samples.len / 2;
const median_ns = if (samples.len % 2 == 0) (samples[mid - 1] + samples[mid]) / 2.0 else samples[mid];
const low = samples[0];
const high = samples[samples.len - 1];
const delta_ns = (high - low) / 2.0;
const ns_per_op = median_ns / @as(f64, @floatFromInt(iters));
return .{
.median = ns_per_op,
.delta = (delta_ns / @as(f64, @floatFromInt(iters))),
.ops_per_sec = 1_000_000_000.0 / ns_per_op,
};
}
}.f;
try writer.print(
\\
\\ Scalar<T> benchmark — {d} iterations, {d} samples/cell
\\
\\┌───────────────────┬──────┬─────────────────────┬─────────────────────┐
\\│ Operation │ Type │ ns / op (± delta) │ Throughput (ops/s) │
\\├───────────────────┼──────┼─────────────────────┼─────────────────────┤
\\
, .{ ITERS, SAMPLES });
const Types = .{ i16, i32, i64, i128, i256, f32, f64, f128 };
const TNames = .{ "i16", "i32", "i64", "i128", "i256", "f32", "f64", "f128" };
const Ops = .{ "add", "sub", "mulBy", "divBy", "scale", "to" };
var results_matrix: [Ops.len][Types.len]f64 = undefined;
comptime var tidx: usize = 0;
inline for (Types, TNames) |T, tname| {
const M = Scalar(T, .init(.{ .L = 1 }), .init(.{}));
const KM = Scalar(T, .init(.{ .L = 1 }), .init(.{ .L = .k }));
const S = Scalar(T, .init(.{ .T = 1 }), .init(.{}));
inline for (Ops, 0..) |op_name, oidx| {
var samples: [SAMPLES]f64 = undefined;
for (0..SAMPLES) |s_idx| {
var sink: T = 0;
const t_start = getTime();
for (0..ITERS) |i| {
const r = if (comptime std.mem.eql(u8, op_name, "add"))
(M{ .value = getVal(T, i, 63) }).add(M{ .value = getVal(T, i +% 7, 63) })
else if (comptime std.mem.eql(u8, op_name, "sub"))
(M{ .value = getVal(T, i +% 10, 63) }).sub(M{ .value = getVal(T, i, 63) })
else if (comptime std.mem.eql(u8, op_name, "mulBy"))
(M{ .value = getVal(T, i, 63) }).mulBy(M{ .value = getVal(T, i +% 1, 63) })
else if (comptime std.mem.eql(u8, op_name, "divBy"))
(M{ .value = getVal(T, i +% 10, 63) }).divBy(S{ .value = getVal(T, i, 63) })
else if (comptime std.mem.eql(u8, op_name, "scale"))
(M{ .value = getVal(T, i, 63) }).scale(getVal(T, i +% 2, 63))
else
(KM{ .value = getVal(T, i, 15) }).to(M);
if (comptime @typeInfo(T) == .float) sink += r.value else sink ^= r.value;
}
const t_end = getTime();
samples[s_idx] = @as(f64, @floatFromInt(t_start.durationTo(t_end).toNanoseconds()));
fold(T, &gsink, sink);
}
const stats = computeStats(&samples, ITERS);
results_matrix[oidx][tidx] = stats.median;
try writer.print("│ {s:<17} │ {s:<4} │ {d:>8.2} ns ±{d:<6.2} │ {d:>19.0} │\n", .{ op_name, tname, stats.median, stats.delta, stats.ops_per_sec });
}
if (comptime tidx < Types.len - 1) {
try writer.print("├───────────────────┼──────┼─────────────────────┼─────────────────────┤\n", .{});
}
tidx += 1;
}
// Median Summary Table
try writer.print("└───────────────────┴──────┴─────────────────────┴─────────────────────┘\n\n", .{});
try writer.print("Median Summary (ns/op):\n", .{});
try writer.print("┌──────────────┬───────┬───────┬───────┬───────┬───────┬───────┬───────┬───────┐\n", .{});
try writer.print("│ Operation │ i16 │ i32 │ i64 │ i128 │ i256 │ f32 │ f64 │ f128 │\n", .{});
try writer.print("├──────────────┼───────┼───────┼───────┼───────┼───────┼───────┼───────┼───────┤\n", .{});
inline for (Ops, 0..) |op_name, oidx| {
try writer.print("│ {s:<11} │", .{op_name});
var i: usize = 0;
while (i < Types.len) : (i += 1)
try writer.print("{d:>6.1} │", .{results_matrix[oidx][i]});
try writer.print("\n", .{});
}
try writer.print("└──────────────┴───────┴───────┴───────┴───────┴───────┴───────┴───────┴───────┘\n", .{});
try writer.print("\nAnti-optimisation sink: {d:.4}\n", .{gsink});
try std.testing.expect(gsink != 0);
}
fn bench_vsNative(writer: *std.Io.Writer) !void {
const ITERS: usize = 100_000;
const SAMPLES: usize = 5;
// Helper to safely get a value of type T from a loop index
const getValT = struct {
fn f(comptime TT: type, i: usize) TT {
const v = (i % 100) + 1;
return if (comptime @typeInfo(TT) == .float) @floatFromInt(v) else @intCast(v);
}
}.f;
const Types = .{ i32, i64, i128, f32, f64 };
const TNames = .{ "i32", "i64", "i128", "f32", "f64" };
const Ops = .{ "add", "mulBy", "divBy" };
var gsink: f64 = 0;
try writer.print(
\\
\\ Scalar vs Native Overhead Analysis
\\
\\┌───────────┬──────┬───────────┬───────────┬───────────┐
\\│ Operation │ Type │ Native │ Scalar │ Slowdown │
\\├───────────┼──────┼───────────┼───────────┼───────────┤
\\
, .{});
inline for (Ops, 0..) |op_name, j| {
inline for (Types, 0..) |T, tidx| {
var native_total_ns: f64 = 0;
var quantity_total_ns: f64 = 0;
const M = Scalar(T, .init(.{ .L = 1 }), .init(.{}));
const S = Scalar(T, .init(.{ .T = 1 }), .init(.{}));
for (0..SAMPLES) |_| {
// --- 1. Benchmark Native ---
var n_sink: T = 0;
const n_start = getTime();
for (0..ITERS) |i| {
const a = getValT(T, i);
const b = getValT(T, 2);
const r = if (comptime std.mem.eql(u8, op_name, "add"))
a + b
else if (comptime std.mem.eql(u8, op_name, "mulBy"))
a * b
else if (comptime @typeInfo(T) == .int) @divTrunc(a, b) else a / b;
if (comptime @typeInfo(T) == .float) n_sink += r else n_sink ^= r;
}
const n_end = getTime();
native_total_ns += @as(f64, @floatFromInt(n_start.durationTo(n_end).toNanoseconds()));
fold(T, &gsink, n_sink);
// --- 2. Benchmark Scalar ---
var q_sink: T = 0;
const q_start = getTime();
for (0..ITERS) |i| {
const qa = M{ .value = getValT(T, i) };
const qb = if (comptime std.mem.eql(u8, op_name, "divBy")) S{ .value = getValT(T, 2) } else M{ .value = getValT(T, 2) };
const r = if (comptime std.mem.eql(u8, op_name, "add"))
qa.add(qb)
else if (comptime std.mem.eql(u8, op_name, "mulBy"))
qa.mulBy(qb)
else
qa.divBy(qb);
if (comptime @typeInfo(T) == .float) q_sink += r.value else q_sink ^= r.value;
}
const q_end = getTime();
quantity_total_ns += @as(f64, @floatFromInt(q_start.durationTo(q_end).toNanoseconds()));
fold(T, &gsink, q_sink);
}
const avg_n = (native_total_ns / SAMPLES) / @as(f64, @floatFromInt(ITERS));
const avg_q = (quantity_total_ns / SAMPLES) / @as(f64, @floatFromInt(ITERS));
const slowdown = avg_q / avg_n;
try writer.print("│ {s:<9} │ {s:<4} │ {d:>7.2}ns │ {d:>7.2}ns │ {d:>8.2}x │\n", .{
op_name, TNames[tidx], avg_n, avg_q, slowdown,
});
}
if (j != Ops.len - 1) try writer.print("├───────────┼──────┼───────────┼───────────┼───────────┤\n", .{});
}
try writer.print("└───────────┴──────┴───────────┴───────────┴───────────┘\n", .{});
try writer.print("\nAnti-optimisation sink: {d:.4}\n", .{gsink});
try std.testing.expect(gsink != 0);
}
fn bench_crossTypeVsNative(writer: *std.Io.Writer) !void {
const ITERS: usize = 100_000;
const SAMPLES: usize = 5;
const getValT = struct {
fn f(comptime TT: type, i: usize) TT {
// Keep values safe and non-zero to avoid division by zero or overflows during cross-casting
const v = (i % 50) + 1;
return if (comptime @typeInfo(TT) == .float) @floatFromInt(v) else @intCast(v);
}
}.f;
// Helper for the Native baseline: explicitly casting T2 to T1 before the operation
const castTo = struct {
fn f(comptime DestT: type, comptime SrcT: type, val: SrcT) DestT {
if (comptime DestT == SrcT) return val;
const src_info = @typeInfo(SrcT);
const dest_info = @typeInfo(DestT);
if (dest_info == .int and src_info == .int) return @intCast(val);
if (dest_info == .float and src_info == .int) return @floatFromInt(val);
if (dest_info == .int and src_info == .float) return @intFromFloat(val);
if (dest_info == .float and src_info == .float) return @floatCast(val);
unreachable;
}
}.f;
const Types = .{ i16, i64, i128, f32, f64 };
const TNames = .{ "i16", "i64", "i128", "f32", "f64" };
const Ops = .{ "add", "mulBy", "divBy" };
var gsink: f64 = 0;
try writer.print(
\\
\\ Cross-Type Overhead Analysis: Scalar vs Native
\\
\\┌─────────┬──────┬──────┬───────────┬───────────┬───────────┐
\\│ Op │ T1 │ T2 │ Native │ Scalar │ Slowdown │
\\├─────────┼──────┼──────┼───────────┼───────────┼───────────┤
\\
, .{});
inline for (Ops, 0..) |op_name, j| {
inline for (Types, 0..) |T1, t1_idx| {
inline for (Types, 0..) |T2, t2_idx| {
var native_total_ns: f64 = 0;
var quantity_total_ns: f64 = 0;
const M1 = Scalar(T1, .init(.{ .L = 1 }), .init(.{}));
const M2 = Scalar(T2, .init(.{ .L = 1 }), .init(.{}));
const S2 = Scalar(T2, .init(.{ .T = 1 }), .init(.{}));
for (0..SAMPLES) |_| {
// --- 1. Benchmark Native (Cast T2 to T1, then math) ---
var n_sink: T1 = 0;
const n_start = getTime();
for (0..ITERS) |i| {
const a = getValT(T1, i);
const b_raw = getValT(T2, 2);
const b = castTo(T1, T2, b_raw);
const r = if (comptime std.mem.eql(u8, op_name, "add"))
a + b
else if (comptime std.mem.eql(u8, op_name, "mulBy"))
a * b
else if (comptime @typeInfo(T1) == .int)
@divTrunc(a, b)
else
a / b;
if (comptime @typeInfo(T1) == .float) n_sink += r else n_sink ^= r;
}
const n_end = getTime();
native_total_ns += @as(f64, @floatFromInt(n_start.durationTo(n_end).toNanoseconds()));
fold(T1, &gsink, n_sink);
// --- 2. Benchmark Scalar ---
var q_sink: T1 = 0;
const q_start = getTime();
for (0..ITERS) |i| {
const qa = M1{ .value = getValT(T1, i) };
const qb = if (comptime std.mem.eql(u8, op_name, "divBy"))
S2{ .value = getValT(T2, 2) }
else
M2{ .value = getValT(T2, 2) };
const r = if (comptime std.mem.eql(u8, op_name, "add"))
qa.add(qb)
else if (comptime std.mem.eql(u8, op_name, "mulBy"))
qa.mulBy(qb)
else
qa.divBy(qb);
if (comptime @typeInfo(T1) == .float) q_sink += r.value else q_sink ^= r.value;
}
const q_end = getTime();
quantity_total_ns += @as(f64, @floatFromInt(q_start.durationTo(q_end).toNanoseconds()));
fold(T1, &gsink, q_sink);
}
const avg_n = (native_total_ns / SAMPLES) / @as(f64, @floatFromInt(ITERS));
const avg_q = (quantity_total_ns / SAMPLES) / @as(f64, @floatFromInt(ITERS));
const slowdown = avg_q / avg_n;
try writer.print("│ {s:<7} │ {s:<4} │ {s:<4} │ {d:>7.2}ns │ {d:>7.2}ns │ {d:>8.2}x │\n", .{
op_name, TNames[t1_idx], TNames[t2_idx], avg_n, avg_q, slowdown,
});
}
}
if (j != Ops.len - 1) {
try writer.print("├─────────┼──────┼──────┼───────────┼───────────┼───────────┤\n", .{});
}
}
try writer.print("└─────────┴──────┴──────┴───────────┴───────────┴───────────┘\n", .{});
try writer.print("\nAnti-optimisation sink: {d:.4}\n", .{gsink});
try std.testing.expect(gsink != 0);
}
fn bench_Vector(writer: *std.Io.Writer) !void {
const ITERS: usize = 10_000;
const SAMPLES: usize = 10;
var gsink: f64 = 0;
const getVal = struct {
fn f(comptime TT: type, i: usize, comptime mask: u7) TT {
const v: u8 = @as(u8, @truncate(i & @as(usize, mask))) + 1;
return if (comptime @typeInfo(TT) == .float) @floatFromInt(v) else @intCast(v);
}
}.f;
const computeStats = struct {
fn f(samples: []f64, iters: usize) f64 {
std.mem.sort(f64, samples, {}, std.sort.asc(f64));
const mid = samples.len / 2;
const median_ns = if (samples.len % 2 == 0)
(samples[mid - 1] + samples[mid]) / 2.0
else
samples[mid];
return median_ns / @as(f64, @floatFromInt(iters));
}
}.f;
try writer.print(
\\
\\ Vector<N, T> benchmark — {d} iterations, {d} samples/cell
\\ (Results in ns/op)
\\
\\┌─────────────┬──────┬─────────┬─────────┬─────────┐
\\│ Operation │ Type │ Len=3 │ Len=4 │ Len=16 │
\\├─────────────┼──────┼─────────┼─────────┼─────────┤
\\
, .{ ITERS, SAMPLES });
const Types = .{ i32, i64, i128, f32, f64 };
const TNames = .{ "i32", "i64", "i128", "f32", "f64" };
const Lengths = .{ 3, 4, 16 };
const Ops = .{ "add", "scale", "mulByScalar", "length" };
inline for (Ops, 0..) |op_name, o_idx| {
inline for (Types, TNames) |T, tname| {
try writer.print("│ {s:<11} │ {s:<4} │", .{ op_name, tname });
inline for (Lengths) |len| {
const Q_base = Scalar(T, .init(.{ .L = 1 }), .init(.{}));
const Q_time = Scalar(T, .init(.{ .T = 1 }), .init(.{}));
const V = Vector(len, Q_base);
var samples: [SAMPLES]f64 = undefined;
for (0..SAMPLES) |s_idx| {
var sink: T = 0;
const t_start = getTime();
for (0..ITERS) |i| {
const v1 = V.initDefault(getVal(T, i, 63));
if (comptime std.mem.eql(u8, op_name, "add")) {
const v2 = V.initDefault(getVal(T, i +% 7, 63));
const res = v1.add(v2);
for (res.data) |val| {
if (comptime @typeInfo(T) == .float) sink += val else sink ^= val;
}
} else if (comptime std.mem.eql(u8, op_name, "scale")) {
const sc = getVal(T, i +% 2, 63);
const res = v1.scale(sc);
for (res.data) |val| {
if (comptime @typeInfo(T) == .float) sink += val else sink ^= val;
}
} else if (comptime std.mem.eql(u8, op_name, "mulByScalar")) {
const s_val = Q_time{ .value = getVal(T, i +% 2, 63) };
const res = v1.mulByScalar(s_val);
for (res.data) |val| {
if (comptime @typeInfo(T) == .float) sink += val else sink ^= val;
}
} else if (comptime std.mem.eql(u8, op_name, "length")) {
const r = v1.length();
if (comptime @typeInfo(T) == .float) sink += r else sink ^= r;
}
}
const t_end = getTime();
samples[s_idx] = @as(f64, @floatFromInt(t_start.durationTo(t_end).toNanoseconds()));
fold(T, &gsink, sink);
}
const median_ns_per_op = computeStats(&samples, ITERS);
try writer.print(" {d:>7.1} │", .{median_ns_per_op});
}
try writer.print("\n", .{});
}
if (o_idx < Ops.len - 1) {
try writer.print("├─────────────┼──────┼─────────┼─────────┼─────────┤\n", .{});
}
}
try writer.print("└─────────────┴──────┴─────────┴─────────┴─────────┘\n", .{});
try writer.print("\nAnti-optimisation sink: {d:.4}\n", .{gsink});
try std.testing.expect(gsink != 0);
}