Compare commits

...

2 Commits

Author SHA1 Message Date
adrien
cc39c1a6f4 Same for all bench, nbo more sink 2026-04-22 00:17:58 +02:00
adrien
d96a1ac4bc Replaced a sing with std.mem.doNotOptimizeAway 2026-04-22 00:12:38 +02:00

View File

@ -6,6 +6,9 @@ const Vector = @import("Vector.zig").Vector;
var io: Io = undefined; var io: Io = undefined;
pub fn main(init: std.process.Init) !void { pub fn main(init: std.process.Init) !void {
const zone = tracy.ZoneN(@src(), "Main Loop");
defer zone.End();
var stdout_buf: [4096]u8 = undefined; var stdout_buf: [4096]u8 = undefined;
var stdout_writer: std.Io.File.Writer = .init(.stdout(), init.io, &stdout_buf); var stdout_writer: std.Io.File.Writer = .init(.stdout(), init.io, &stdout_buf);
try stdout_writer.interface.print("Starting Benchmarks...", .{}); try stdout_writer.interface.print("Starting Benchmarks...", .{});
@ -20,6 +23,8 @@ pub fn main(init: std.process.Init) !void {
try stdout_writer.flush(); try stdout_writer.flush();
try bench_Vector(&stdout_writer.interface); try bench_Vector(&stdout_writer.interface);
try stdout_writer.flush(); try stdout_writer.flush();
tracy.FrameMark();
} }
fn getTime() Io.Timestamp { fn getTime() Io.Timestamp {
@ -37,8 +42,6 @@ fn bench_Scalar(writer: *std.Io.Writer) !void {
const ITERS: usize = 100_000; const ITERS: usize = 100_000;
const SAMPLES: usize = 10; const SAMPLES: usize = 10;
var gsink: f64 = 0;
const getVal = struct { const getVal = struct {
fn f(comptime TT: type, i: usize, comptime mask: u7) TT { fn f(comptime TT: type, i: usize, comptime mask: u7) TT {
const v: u8 = @as(u8, @truncate(i & @as(usize, mask))) + 1; const v: u8 = @as(u8, @truncate(i & @as(usize, mask))) + 1;
@ -97,11 +100,12 @@ fn bench_Scalar(writer: *std.Io.Writer) !void {
var samples: [SAMPLES]f64 = undefined; var samples: [SAMPLES]f64 = undefined;
for (0..SAMPLES) |s_idx| { for (0..SAMPLES) |s_idx| {
var sink: T = 0;
const t_start = getTime(); const t_start = getTime();
for (0..ITERS) |i| { for (0..ITERS) |i| {
const r = if (comptime std.mem.eql(u8, op_name, "add")) std.mem.doNotOptimizeAway(
{
_ = if (comptime std.mem.eql(u8, op_name, "add"))
(M{ .value = getVal(T, i, 63) }).add(M{ .value = getVal(T, i +% 7, 63) }) (M{ .value = getVal(T, i, 63) }).add(M{ .value = getVal(T, i +% 7, 63) })
else if (comptime std.mem.eql(u8, op_name, "sub")) else if (comptime std.mem.eql(u8, op_name, "sub"))
(M{ .value = getVal(T, i +% 10, 63) }).sub(M{ .value = getVal(T, i, 63) }) (M{ .value = getVal(T, i +% 10, 63) }).sub(M{ .value = getVal(T, i, 63) })
@ -113,13 +117,12 @@ fn bench_Scalar(writer: *std.Io.Writer) !void {
(M{ .value = getVal(T, i, 63) }).scale(getVal(T, i +% 2, 63)) (M{ .value = getVal(T, i, 63) }).scale(getVal(T, i +% 2, 63))
else else
(KM{ .value = getVal(T, i, 15) }).to(M); (KM{ .value = getVal(T, i, 15) }).to(M);
},
if (comptime @typeInfo(T) == .float) sink += r.value else sink ^= r.value; );
} }
const t_end = getTime(); const t_end = getTime();
samples[s_idx] = @as(f64, @floatFromInt(t_start.durationTo(t_end).toNanoseconds())); samples[s_idx] = @as(f64, @floatFromInt(t_start.durationTo(t_end).toNanoseconds()));
fold(T, &gsink, sink);
} }
const stats = computeStats(&samples, ITERS); const stats = computeStats(&samples, ITERS);
@ -152,9 +155,6 @@ fn bench_Scalar(writer: *std.Io.Writer) !void {
} }
try writer.print("└──────────────┴───────┴───────┴───────┴───────┴───────┴───────┴───────┴───────┘\n", .{}); try writer.print("└──────────────┴───────┴───────┴───────┴───────┴───────┴───────┴───────┴───────┘\n", .{});
try writer.print("\nAnti-optimisation sink: {d:.4}\n", .{gsink});
try std.testing.expect(gsink != 0);
} }
fn bench_vsNative(writer: *std.Io.Writer) !void { fn bench_vsNative(writer: *std.Io.Writer) !void {
@ -173,8 +173,6 @@ fn bench_vsNative(writer: *std.Io.Writer) !void {
const TNames = .{ "i32", "i64", "i128", "f32", "f64" }; const TNames = .{ "i32", "i64", "i128", "f32", "f64" };
const Ops = .{ "add", "mulBy", "divBy" }; const Ops = .{ "add", "mulBy", "divBy" };
var gsink: f64 = 0;
try writer.print( try writer.print(
\\ \\
\\ Scalar vs Native Overhead Analysis \\ Scalar vs Native Overhead Analysis
@ -193,45 +191,39 @@ fn bench_vsNative(writer: *std.Io.Writer) !void {
const M = Scalar(T, .init(.{ .L = 1 }), .init(.{})); const M = Scalar(T, .init(.{ .L = 1 }), .init(.{}));
const S = Scalar(T, .init(.{ .T = 1 }), .init(.{})); const S = Scalar(T, .init(.{ .T = 1 }), .init(.{}));
std.mem.doNotOptimizeAway({
for (0..SAMPLES) |_| { for (0..SAMPLES) |_| {
// --- 1. Benchmark Native --- // --- 1. Benchmark Native ---
var n_sink: T = 0;
const n_start = getTime(); const n_start = getTime();
for (0..ITERS) |i| { for (0..ITERS) |i| {
const a = getValT(T, i); const a = getValT(T, i);
const b = getValT(T, 2); const b = getValT(T, 2);
const r = if (comptime std.mem.eql(u8, op_name, "add")) _ = if (comptime std.mem.eql(u8, op_name, "add"))
a + b a + b
else if (comptime std.mem.eql(u8, op_name, "mulBy")) else if (comptime std.mem.eql(u8, op_name, "mulBy"))
a * b a * b
else if (comptime @typeInfo(T) == .int) @divTrunc(a, b) else a / b; else if (comptime @typeInfo(T) == .int) @divTrunc(a, b) else a / b;
if (comptime @typeInfo(T) == .float) n_sink += r else n_sink ^= r;
} }
const n_end = getTime(); const n_end = getTime();
native_total_ns += @as(f64, @floatFromInt(n_start.durationTo(n_end).toNanoseconds())); native_total_ns += @as(f64, @floatFromInt(n_start.durationTo(n_end).toNanoseconds()));
fold(T, &gsink, n_sink);
// --- 2. Benchmark Scalar --- // --- 2. Benchmark Scalar ---
var q_sink: T = 0;
const q_start = getTime(); const q_start = getTime();
for (0..ITERS) |i| { for (0..ITERS) |i| {
const qa = M{ .value = getValT(T, i) }; const qa = M{ .value = getValT(T, i) };
const qb = if (comptime std.mem.eql(u8, op_name, "divBy")) S{ .value = getValT(T, 2) } else M{ .value = getValT(T, 2) }; const qb = if (comptime std.mem.eql(u8, op_name, "divBy")) S{ .value = getValT(T, 2) } else M{ .value = getValT(T, 2) };
const r = if (comptime std.mem.eql(u8, op_name, "add")) _ = if (comptime std.mem.eql(u8, op_name, "add"))
qa.add(qb) qa.add(qb)
else if (comptime std.mem.eql(u8, op_name, "mulBy")) else if (comptime std.mem.eql(u8, op_name, "mulBy"))
qa.mulBy(qb) qa.mulBy(qb)
else else
qa.divBy(qb); qa.divBy(qb);
if (comptime @typeInfo(T) == .float) q_sink += r.value else q_sink ^= r.value;
} }
const q_end = getTime(); const q_end = getTime();
quantity_total_ns += @as(f64, @floatFromInt(q_start.durationTo(q_end).toNanoseconds())); quantity_total_ns += @as(f64, @floatFromInt(q_start.durationTo(q_end).toNanoseconds()));
fold(T, &gsink, q_sink);
} }
});
const avg_n = (native_total_ns / SAMPLES) / @as(f64, @floatFromInt(ITERS)); const avg_n = (native_total_ns / SAMPLES) / @as(f64, @floatFromInt(ITERS));
const avg_q = (quantity_total_ns / SAMPLES) / @as(f64, @floatFromInt(ITERS)); const avg_q = (quantity_total_ns / SAMPLES) / @as(f64, @floatFromInt(ITERS));
@ -245,8 +237,6 @@ fn bench_vsNative(writer: *std.Io.Writer) !void {
} }
try writer.print("└───────────┴──────┴───────────┴───────────┴───────────┘\n", .{}); try writer.print("└───────────┴──────┴───────────┴───────────┴───────────┘\n", .{});
try writer.print("\nAnti-optimisation sink: {d:.4}\n", .{gsink});
try std.testing.expect(gsink != 0);
} }
fn bench_crossTypeVsNative(writer: *std.Io.Writer) !void { fn bench_crossTypeVsNative(writer: *std.Io.Writer) !void {
@ -280,8 +270,6 @@ fn bench_crossTypeVsNative(writer: *std.Io.Writer) !void {
const TNames = .{ "i16", "i64", "i128", "f32", "f64" }; const TNames = .{ "i16", "i64", "i128", "f32", "f64" };
const Ops = .{ "add", "mulBy", "divBy" }; const Ops = .{ "add", "mulBy", "divBy" };
var gsink: f64 = 0;
try writer.print( try writer.print(
\\ \\
\\ Cross-Type Overhead Analysis: Scalar vs Native \\ Cross-Type Overhead Analysis: Scalar vs Native
@ -302,16 +290,16 @@ fn bench_crossTypeVsNative(writer: *std.Io.Writer) !void {
const M2 = Scalar(T2, .init(.{ .L = 1 }), .init(.{})); const M2 = Scalar(T2, .init(.{ .L = 1 }), .init(.{}));
const S2 = Scalar(T2, .init(.{ .T = 1 }), .init(.{})); const S2 = Scalar(T2, .init(.{ .T = 1 }), .init(.{}));
std.mem.doNotOptimizeAway({
for (0..SAMPLES) |_| { for (0..SAMPLES) |_| {
// --- 1. Benchmark Native (Cast T2 to T1, then math) --- // --- 1. Benchmark Native (Cast T2 to T1, then math) ---
var n_sink: T1 = 0;
const n_start = getTime(); const n_start = getTime();
for (0..ITERS) |i| { for (0..ITERS) |i| {
const a = getValT(T1, i); const a = getValT(T1, i);
const b_raw = getValT(T2, 2); const b_raw = getValT(T2, 2);
const b = castTo(T1, T2, b_raw); const b = castTo(T1, T2, b_raw);
const r = if (comptime std.mem.eql(u8, op_name, "add")) _ = if (comptime std.mem.eql(u8, op_name, "add"))
a + b a + b
else if (comptime std.mem.eql(u8, op_name, "mulBy")) else if (comptime std.mem.eql(u8, op_name, "mulBy"))
a * b a * b
@ -319,15 +307,11 @@ fn bench_crossTypeVsNative(writer: *std.Io.Writer) !void {
@divTrunc(a, b) @divTrunc(a, b)
else else
a / b; a / b;
if (comptime @typeInfo(T1) == .float) n_sink += r else n_sink ^= r;
} }
const n_end = getTime(); const n_end = getTime();
native_total_ns += @as(f64, @floatFromInt(n_start.durationTo(n_end).toNanoseconds())); native_total_ns += @as(f64, @floatFromInt(n_start.durationTo(n_end).toNanoseconds()));
fold(T1, &gsink, n_sink);
// --- 2. Benchmark Scalar --- // --- 2. Benchmark Scalar ---
var q_sink: T1 = 0;
const q_start = getTime(); const q_start = getTime();
for (0..ITERS) |i| { for (0..ITERS) |i| {
const qa = M1{ .value = getValT(T1, i) }; const qa = M1{ .value = getValT(T1, i) };
@ -336,18 +320,15 @@ fn bench_crossTypeVsNative(writer: *std.Io.Writer) !void {
else else
M2{ .value = getValT(T2, 2) }; M2{ .value = getValT(T2, 2) };
const r = if (comptime std.mem.eql(u8, op_name, "add")) _ = if (comptime std.mem.eql(u8, op_name, "add"))
qa.add(qb) qa.add(qb)
else if (comptime std.mem.eql(u8, op_name, "mulBy")) else if (comptime std.mem.eql(u8, op_name, "mulBy"))
qa.mulBy(qb) qa.mulBy(qb)
else else
qa.divBy(qb); qa.divBy(qb);
if (comptime @typeInfo(T1) == .float) q_sink += r.value else q_sink ^= r.value;
} }
const q_end = getTime(); const q_end = getTime();
quantity_total_ns += @as(f64, @floatFromInt(q_start.durationTo(q_end).toNanoseconds())); quantity_total_ns += @as(f64, @floatFromInt(q_start.durationTo(q_end).toNanoseconds()));
fold(T1, &gsink, q_sink);
} }
const avg_n = (native_total_ns / SAMPLES) / @as(f64, @floatFromInt(ITERS)); const avg_n = (native_total_ns / SAMPLES) / @as(f64, @floatFromInt(ITERS));
@ -357,6 +338,7 @@ fn bench_crossTypeVsNative(writer: *std.Io.Writer) !void {
try writer.print("│ {s:<7} │ {s:<4} │ {s:<4} │ {d:>7.2}ns │ {d:>7.2}ns │ {d:>8.2}x │\n", .{ try writer.print("│ {s:<7} │ {s:<4} │ {s:<4} │ {d:>7.2}ns │ {d:>7.2}ns │ {d:>8.2}x │\n", .{
op_name, TNames[t1_idx], TNames[t2_idx], avg_n, avg_q, slowdown, op_name, TNames[t1_idx], TNames[t2_idx], avg_n, avg_q, slowdown,
}); });
});
} }
} }
if (j != Ops.len - 1) { if (j != Ops.len - 1) {
@ -365,14 +347,11 @@ fn bench_crossTypeVsNative(writer: *std.Io.Writer) !void {
} }
try writer.print("└─────────┴──────┴──────┴───────────┴───────────┴───────────┘\n", .{}); try writer.print("└─────────┴──────┴──────┴───────────┴───────────┴───────────┘\n", .{});
try writer.print("\nAnti-optimisation sink: {d:.4}\n", .{gsink});
try std.testing.expect(gsink != 0);
} }
fn bench_Vector(writer: *std.Io.Writer) !void { fn bench_Vector(writer: *std.Io.Writer) !void {
const ITERS: usize = 10_000; const ITERS: usize = 10_000;
const SAMPLES: usize = 10; const SAMPLES: usize = 10;
var gsink: f64 = 0;
const getVal = struct { const getVal = struct {
fn f(comptime TT: type, i: usize, comptime mask: u7) TT { fn f(comptime TT: type, i: usize, comptime mask: u7) TT {
@ -420,44 +399,33 @@ fn bench_Vector(writer: *std.Io.Writer) !void {
var samples: [SAMPLES]f64 = undefined; var samples: [SAMPLES]f64 = undefined;
std.mem.doNotOptimizeAway({
for (0..SAMPLES) |s_idx| { for (0..SAMPLES) |s_idx| {
var sink: T = 0;
const t_start = getTime(); const t_start = getTime();
for (0..ITERS) |i| { for (0..ITERS) |i| {
const v1 = V.initDefault(getVal(T, i, 63)); const v1 = V.initDefault(getVal(T, i, 63));
if (comptime std.mem.eql(u8, op_name, "add")) { if (comptime std.mem.eql(u8, op_name, "add")) {
const v2 = V.initDefault(getVal(T, i +% 7, 63)); const v2 = V.initDefault(getVal(T, i +% 7, 63));
const res = v1.add(v2); _ = v1.add(v2);
for (res.data) |val| {
if (comptime @typeInfo(T) == .float) sink += val else sink ^= val;
}
} else if (comptime std.mem.eql(u8, op_name, "scale")) { } else if (comptime std.mem.eql(u8, op_name, "scale")) {
const sc = getVal(T, i +% 2, 63); const sc = getVal(T, i +% 2, 63);
const res = v1.scale(sc); _ = v1.scale(sc);
for (res.data) |val| {
if (comptime @typeInfo(T) == .float) sink += val else sink ^= val;
}
} else if (comptime std.mem.eql(u8, op_name, "mulByScalar")) { } else if (comptime std.mem.eql(u8, op_name, "mulByScalar")) {
const s_val = Q_time{ .value = getVal(T, i +% 2, 63) }; const s_val = Q_time{ .value = getVal(T, i +% 2, 63) };
const res = v1.mulByScalar(s_val); _ = v1.mulByScalar(s_val);
for (res.data) |val| {
if (comptime @typeInfo(T) == .float) sink += val else sink ^= val;
}
} else if (comptime std.mem.eql(u8, op_name, "length")) { } else if (comptime std.mem.eql(u8, op_name, "length")) {
const r = v1.length(); _ = v1.length();
if (comptime @typeInfo(T) == .float) sink += r else sink ^= r;
} }
} }
const t_end = getTime(); const t_end = getTime();
samples[s_idx] = @as(f64, @floatFromInt(t_start.durationTo(t_end).toNanoseconds())); samples[s_idx] = @as(f64, @floatFromInt(t_start.durationTo(t_end).toNanoseconds()));
fold(T, &gsink, sink);
} }
const median_ns_per_op = computeStats(&samples, ITERS); const median_ns_per_op = computeStats(&samples, ITERS);
try writer.print(" {d:>7.1} │", .{median_ns_per_op}); try writer.print(" {d:>7.1} │", .{median_ns_per_op});
});
} }
try writer.print("\n", .{}); try writer.print("\n", .{});
} }
@ -467,6 +435,4 @@ fn bench_Vector(writer: *std.Io.Writer) !void {
} }
} }
try writer.print("└─────────────┴──────┴─────────┴─────────┴─────────┘\n", .{}); try writer.print("└─────────────┴──────┴─────────┴─────────┴─────────┘\n", .{});
try writer.print("\nAnti-optimisation sink: {d:.4}\n", .{gsink});
try std.testing.expect(gsink != 0);
} }