Same for all bench, nbo more sink
This commit is contained in:
parent
d96a1ac4bc
commit
cc39c1a6f4
@ -173,8 +173,6 @@ fn bench_vsNative(writer: *std.Io.Writer) !void {
|
|||||||
const TNames = .{ "i32", "i64", "i128", "f32", "f64" };
|
const TNames = .{ "i32", "i64", "i128", "f32", "f64" };
|
||||||
const Ops = .{ "add", "mulBy", "divBy" };
|
const Ops = .{ "add", "mulBy", "divBy" };
|
||||||
|
|
||||||
var gsink: f64 = 0;
|
|
||||||
|
|
||||||
try writer.print(
|
try writer.print(
|
||||||
\\
|
\\
|
||||||
\\ Scalar vs Native Overhead Analysis
|
\\ Scalar vs Native Overhead Analysis
|
||||||
@ -193,45 +191,39 @@ fn bench_vsNative(writer: *std.Io.Writer) !void {
|
|||||||
const M = Scalar(T, .init(.{ .L = 1 }), .init(.{}));
|
const M = Scalar(T, .init(.{ .L = 1 }), .init(.{}));
|
||||||
const S = Scalar(T, .init(.{ .T = 1 }), .init(.{}));
|
const S = Scalar(T, .init(.{ .T = 1 }), .init(.{}));
|
||||||
|
|
||||||
for (0..SAMPLES) |_| {
|
std.mem.doNotOptimizeAway({
|
||||||
// --- 1. Benchmark Native ---
|
for (0..SAMPLES) |_| {
|
||||||
var n_sink: T = 0;
|
// --- 1. Benchmark Native ---
|
||||||
const n_start = getTime();
|
const n_start = getTime();
|
||||||
for (0..ITERS) |i| {
|
for (0..ITERS) |i| {
|
||||||
const a = getValT(T, i);
|
const a = getValT(T, i);
|
||||||
const b = getValT(T, 2);
|
const b = getValT(T, 2);
|
||||||
const r = if (comptime std.mem.eql(u8, op_name, "add"))
|
_ = if (comptime std.mem.eql(u8, op_name, "add"))
|
||||||
a + b
|
a + b
|
||||||
else if (comptime std.mem.eql(u8, op_name, "mulBy"))
|
else if (comptime std.mem.eql(u8, op_name, "mulBy"))
|
||||||
a * b
|
a * b
|
||||||
else if (comptime @typeInfo(T) == .int) @divTrunc(a, b) else a / b;
|
else if (comptime @typeInfo(T) == .int) @divTrunc(a, b) else a / b;
|
||||||
|
}
|
||||||
|
const n_end = getTime();
|
||||||
|
native_total_ns += @as(f64, @floatFromInt(n_start.durationTo(n_end).toNanoseconds()));
|
||||||
|
|
||||||
if (comptime @typeInfo(T) == .float) n_sink += r else n_sink ^= r;
|
// --- 2. Benchmark Scalar ---
|
||||||
|
const q_start = getTime();
|
||||||
|
for (0..ITERS) |i| {
|
||||||
|
const qa = M{ .value = getValT(T, i) };
|
||||||
|
const qb = if (comptime std.mem.eql(u8, op_name, "divBy")) S{ .value = getValT(T, 2) } else M{ .value = getValT(T, 2) };
|
||||||
|
|
||||||
|
_ = if (comptime std.mem.eql(u8, op_name, "add"))
|
||||||
|
qa.add(qb)
|
||||||
|
else if (comptime std.mem.eql(u8, op_name, "mulBy"))
|
||||||
|
qa.mulBy(qb)
|
||||||
|
else
|
||||||
|
qa.divBy(qb);
|
||||||
|
}
|
||||||
|
const q_end = getTime();
|
||||||
|
quantity_total_ns += @as(f64, @floatFromInt(q_start.durationTo(q_end).toNanoseconds()));
|
||||||
}
|
}
|
||||||
const n_end = getTime();
|
});
|
||||||
native_total_ns += @as(f64, @floatFromInt(n_start.durationTo(n_end).toNanoseconds()));
|
|
||||||
fold(T, &gsink, n_sink);
|
|
||||||
|
|
||||||
// --- 2. Benchmark Scalar ---
|
|
||||||
var q_sink: T = 0;
|
|
||||||
const q_start = getTime();
|
|
||||||
for (0..ITERS) |i| {
|
|
||||||
const qa = M{ .value = getValT(T, i) };
|
|
||||||
const qb = if (comptime std.mem.eql(u8, op_name, "divBy")) S{ .value = getValT(T, 2) } else M{ .value = getValT(T, 2) };
|
|
||||||
|
|
||||||
const r = if (comptime std.mem.eql(u8, op_name, "add"))
|
|
||||||
qa.add(qb)
|
|
||||||
else if (comptime std.mem.eql(u8, op_name, "mulBy"))
|
|
||||||
qa.mulBy(qb)
|
|
||||||
else
|
|
||||||
qa.divBy(qb);
|
|
||||||
|
|
||||||
if (comptime @typeInfo(T) == .float) q_sink += r.value else q_sink ^= r.value;
|
|
||||||
}
|
|
||||||
const q_end = getTime();
|
|
||||||
quantity_total_ns += @as(f64, @floatFromInt(q_start.durationTo(q_end).toNanoseconds()));
|
|
||||||
fold(T, &gsink, q_sink);
|
|
||||||
}
|
|
||||||
|
|
||||||
const avg_n = (native_total_ns / SAMPLES) / @as(f64, @floatFromInt(ITERS));
|
const avg_n = (native_total_ns / SAMPLES) / @as(f64, @floatFromInt(ITERS));
|
||||||
const avg_q = (quantity_total_ns / SAMPLES) / @as(f64, @floatFromInt(ITERS));
|
const avg_q = (quantity_total_ns / SAMPLES) / @as(f64, @floatFromInt(ITERS));
|
||||||
@ -245,8 +237,6 @@ fn bench_vsNative(writer: *std.Io.Writer) !void {
|
|||||||
}
|
}
|
||||||
|
|
||||||
try writer.print("└───────────┴──────┴───────────┴───────────┴───────────┘\n", .{});
|
try writer.print("└───────────┴──────┴───────────┴───────────┴───────────┘\n", .{});
|
||||||
try writer.print("\nAnti-optimisation sink: {d:.4}\n", .{gsink});
|
|
||||||
try std.testing.expect(gsink != 0);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn bench_crossTypeVsNative(writer: *std.Io.Writer) !void {
|
fn bench_crossTypeVsNative(writer: *std.Io.Writer) !void {
|
||||||
@ -280,8 +270,6 @@ fn bench_crossTypeVsNative(writer: *std.Io.Writer) !void {
|
|||||||
const TNames = .{ "i16", "i64", "i128", "f32", "f64" };
|
const TNames = .{ "i16", "i64", "i128", "f32", "f64" };
|
||||||
const Ops = .{ "add", "mulBy", "divBy" };
|
const Ops = .{ "add", "mulBy", "divBy" };
|
||||||
|
|
||||||
var gsink: f64 = 0;
|
|
||||||
|
|
||||||
try writer.print(
|
try writer.print(
|
||||||
\\
|
\\
|
||||||
\\ Cross-Type Overhead Analysis: Scalar vs Native
|
\\ Cross-Type Overhead Analysis: Scalar vs Native
|
||||||
@ -302,60 +290,54 @@ fn bench_crossTypeVsNative(writer: *std.Io.Writer) !void {
|
|||||||
const M2 = Scalar(T2, .init(.{ .L = 1 }), .init(.{}));
|
const M2 = Scalar(T2, .init(.{ .L = 1 }), .init(.{}));
|
||||||
const S2 = Scalar(T2, .init(.{ .T = 1 }), .init(.{}));
|
const S2 = Scalar(T2, .init(.{ .T = 1 }), .init(.{}));
|
||||||
|
|
||||||
for (0..SAMPLES) |_| {
|
std.mem.doNotOptimizeAway({
|
||||||
// --- 1. Benchmark Native (Cast T2 to T1, then math) ---
|
for (0..SAMPLES) |_| {
|
||||||
var n_sink: T1 = 0;
|
// --- 1. Benchmark Native (Cast T2 to T1, then math) ---
|
||||||
const n_start = getTime();
|
const n_start = getTime();
|
||||||
for (0..ITERS) |i| {
|
for (0..ITERS) |i| {
|
||||||
const a = getValT(T1, i);
|
const a = getValT(T1, i);
|
||||||
const b_raw = getValT(T2, 2);
|
const b_raw = getValT(T2, 2);
|
||||||
const b = castTo(T1, T2, b_raw);
|
const b = castTo(T1, T2, b_raw);
|
||||||
|
|
||||||
const r = if (comptime std.mem.eql(u8, op_name, "add"))
|
_ = if (comptime std.mem.eql(u8, op_name, "add"))
|
||||||
a + b
|
a + b
|
||||||
else if (comptime std.mem.eql(u8, op_name, "mulBy"))
|
else if (comptime std.mem.eql(u8, op_name, "mulBy"))
|
||||||
a * b
|
a * b
|
||||||
else if (comptime @typeInfo(T1) == .int)
|
else if (comptime @typeInfo(T1) == .int)
|
||||||
@divTrunc(a, b)
|
@divTrunc(a, b)
|
||||||
else
|
else
|
||||||
a / b;
|
a / b;
|
||||||
|
}
|
||||||
|
const n_end = getTime();
|
||||||
|
native_total_ns += @as(f64, @floatFromInt(n_start.durationTo(n_end).toNanoseconds()));
|
||||||
|
|
||||||
if (comptime @typeInfo(T1) == .float) n_sink += r else n_sink ^= r;
|
// --- 2. Benchmark Scalar ---
|
||||||
|
const q_start = getTime();
|
||||||
|
for (0..ITERS) |i| {
|
||||||
|
const qa = M1{ .value = getValT(T1, i) };
|
||||||
|
const qb = if (comptime std.mem.eql(u8, op_name, "divBy"))
|
||||||
|
S2{ .value = getValT(T2, 2) }
|
||||||
|
else
|
||||||
|
M2{ .value = getValT(T2, 2) };
|
||||||
|
|
||||||
|
_ = if (comptime std.mem.eql(u8, op_name, "add"))
|
||||||
|
qa.add(qb)
|
||||||
|
else if (comptime std.mem.eql(u8, op_name, "mulBy"))
|
||||||
|
qa.mulBy(qb)
|
||||||
|
else
|
||||||
|
qa.divBy(qb);
|
||||||
|
}
|
||||||
|
const q_end = getTime();
|
||||||
|
quantity_total_ns += @as(f64, @floatFromInt(q_start.durationTo(q_end).toNanoseconds()));
|
||||||
}
|
}
|
||||||
const n_end = getTime();
|
|
||||||
native_total_ns += @as(f64, @floatFromInt(n_start.durationTo(n_end).toNanoseconds()));
|
|
||||||
fold(T1, &gsink, n_sink);
|
|
||||||
|
|
||||||
// --- 2. Benchmark Scalar ---
|
const avg_n = (native_total_ns / SAMPLES) / @as(f64, @floatFromInt(ITERS));
|
||||||
var q_sink: T1 = 0;
|
const avg_q = (quantity_total_ns / SAMPLES) / @as(f64, @floatFromInt(ITERS));
|
||||||
const q_start = getTime();
|
const slowdown = avg_q / avg_n;
|
||||||
for (0..ITERS) |i| {
|
|
||||||
const qa = M1{ .value = getValT(T1, i) };
|
|
||||||
const qb = if (comptime std.mem.eql(u8, op_name, "divBy"))
|
|
||||||
S2{ .value = getValT(T2, 2) }
|
|
||||||
else
|
|
||||||
M2{ .value = getValT(T2, 2) };
|
|
||||||
|
|
||||||
const r = if (comptime std.mem.eql(u8, op_name, "add"))
|
try writer.print("│ {s:<7} │ {s:<4} │ {s:<4} │ {d:>7.2}ns │ {d:>7.2}ns │ {d:>8.2}x │\n", .{
|
||||||
qa.add(qb)
|
op_name, TNames[t1_idx], TNames[t2_idx], avg_n, avg_q, slowdown,
|
||||||
else if (comptime std.mem.eql(u8, op_name, "mulBy"))
|
});
|
||||||
qa.mulBy(qb)
|
|
||||||
else
|
|
||||||
qa.divBy(qb);
|
|
||||||
|
|
||||||
if (comptime @typeInfo(T1) == .float) q_sink += r.value else q_sink ^= r.value;
|
|
||||||
}
|
|
||||||
const q_end = getTime();
|
|
||||||
quantity_total_ns += @as(f64, @floatFromInt(q_start.durationTo(q_end).toNanoseconds()));
|
|
||||||
fold(T1, &gsink, q_sink);
|
|
||||||
}
|
|
||||||
|
|
||||||
const avg_n = (native_total_ns / SAMPLES) / @as(f64, @floatFromInt(ITERS));
|
|
||||||
const avg_q = (quantity_total_ns / SAMPLES) / @as(f64, @floatFromInt(ITERS));
|
|
||||||
const slowdown = avg_q / avg_n;
|
|
||||||
|
|
||||||
try writer.print("│ {s:<7} │ {s:<4} │ {s:<4} │ {d:>7.2}ns │ {d:>7.2}ns │ {d:>8.2}x │\n", .{
|
|
||||||
op_name, TNames[t1_idx], TNames[t2_idx], avg_n, avg_q, slowdown,
|
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -365,14 +347,11 @@ fn bench_crossTypeVsNative(writer: *std.Io.Writer) !void {
|
|||||||
}
|
}
|
||||||
|
|
||||||
try writer.print("└─────────┴──────┴──────┴───────────┴───────────┴───────────┘\n", .{});
|
try writer.print("└─────────┴──────┴──────┴───────────┴───────────┴───────────┘\n", .{});
|
||||||
try writer.print("\nAnti-optimisation sink: {d:.4}\n", .{gsink});
|
|
||||||
try std.testing.expect(gsink != 0);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn bench_Vector(writer: *std.Io.Writer) !void {
|
fn bench_Vector(writer: *std.Io.Writer) !void {
|
||||||
const ITERS: usize = 10_000;
|
const ITERS: usize = 10_000;
|
||||||
const SAMPLES: usize = 10;
|
const SAMPLES: usize = 10;
|
||||||
var gsink: f64 = 0;
|
|
||||||
|
|
||||||
const getVal = struct {
|
const getVal = struct {
|
||||||
fn f(comptime TT: type, i: usize, comptime mask: u7) TT {
|
fn f(comptime TT: type, i: usize, comptime mask: u7) TT {
|
||||||
@ -420,44 +399,33 @@ fn bench_Vector(writer: *std.Io.Writer) !void {
|
|||||||
|
|
||||||
var samples: [SAMPLES]f64 = undefined;
|
var samples: [SAMPLES]f64 = undefined;
|
||||||
|
|
||||||
for (0..SAMPLES) |s_idx| {
|
std.mem.doNotOptimizeAway({
|
||||||
var sink: T = 0;
|
for (0..SAMPLES) |s_idx| {
|
||||||
|
const t_start = getTime();
|
||||||
|
for (0..ITERS) |i| {
|
||||||
|
const v1 = V.initDefault(getVal(T, i, 63));
|
||||||
|
|
||||||
const t_start = getTime();
|
if (comptime std.mem.eql(u8, op_name, "add")) {
|
||||||
for (0..ITERS) |i| {
|
const v2 = V.initDefault(getVal(T, i +% 7, 63));
|
||||||
const v1 = V.initDefault(getVal(T, i, 63));
|
_ = v1.add(v2);
|
||||||
|
} else if (comptime std.mem.eql(u8, op_name, "scale")) {
|
||||||
if (comptime std.mem.eql(u8, op_name, "add")) {
|
const sc = getVal(T, i +% 2, 63);
|
||||||
const v2 = V.initDefault(getVal(T, i +% 7, 63));
|
_ = v1.scale(sc);
|
||||||
const res = v1.add(v2);
|
} else if (comptime std.mem.eql(u8, op_name, "mulByScalar")) {
|
||||||
for (res.data) |val| {
|
const s_val = Q_time{ .value = getVal(T, i +% 2, 63) };
|
||||||
if (comptime @typeInfo(T) == .float) sink += val else sink ^= val;
|
_ = v1.mulByScalar(s_val);
|
||||||
|
} else if (comptime std.mem.eql(u8, op_name, "length")) {
|
||||||
|
_ = v1.length();
|
||||||
}
|
}
|
||||||
} else if (comptime std.mem.eql(u8, op_name, "scale")) {
|
|
||||||
const sc = getVal(T, i +% 2, 63);
|
|
||||||
const res = v1.scale(sc);
|
|
||||||
for (res.data) |val| {
|
|
||||||
if (comptime @typeInfo(T) == .float) sink += val else sink ^= val;
|
|
||||||
}
|
|
||||||
} else if (comptime std.mem.eql(u8, op_name, "mulByScalar")) {
|
|
||||||
const s_val = Q_time{ .value = getVal(T, i +% 2, 63) };
|
|
||||||
const res = v1.mulByScalar(s_val);
|
|
||||||
for (res.data) |val| {
|
|
||||||
if (comptime @typeInfo(T) == .float) sink += val else sink ^= val;
|
|
||||||
}
|
|
||||||
} else if (comptime std.mem.eql(u8, op_name, "length")) {
|
|
||||||
const r = v1.length();
|
|
||||||
if (comptime @typeInfo(T) == .float) sink += r else sink ^= r;
|
|
||||||
}
|
}
|
||||||
|
const t_end = getTime();
|
||||||
|
|
||||||
|
samples[s_idx] = @as(f64, @floatFromInt(t_start.durationTo(t_end).toNanoseconds()));
|
||||||
}
|
}
|
||||||
const t_end = getTime();
|
|
||||||
|
|
||||||
samples[s_idx] = @as(f64, @floatFromInt(t_start.durationTo(t_end).toNanoseconds()));
|
const median_ns_per_op = computeStats(&samples, ITERS);
|
||||||
fold(T, &gsink, sink);
|
try writer.print(" {d:>7.1} │", .{median_ns_per_op});
|
||||||
}
|
});
|
||||||
|
|
||||||
const median_ns_per_op = computeStats(&samples, ITERS);
|
|
||||||
try writer.print(" {d:>7.1} │", .{median_ns_per_op});
|
|
||||||
}
|
}
|
||||||
try writer.print("\n", .{});
|
try writer.print("\n", .{});
|
||||||
}
|
}
|
||||||
@ -467,6 +435,4 @@ fn bench_Vector(writer: *std.Io.Writer) !void {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
try writer.print("└─────────────┴──────┴─────────┴─────────┴─────────┘\n", .{});
|
try writer.print("└─────────────┴──────┴─────────┴─────────┴─────────┘\n", .{});
|
||||||
try writer.print("\nAnti-optimisation sink: {d:.4}\n", .{gsink});
|
|
||||||
try std.testing.expect(gsink != 0);
|
|
||||||
}
|
}
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user