Compare commits

..

No commits in common. "main" and "0.2.2" have entirely different histories.
main ... 0.2.2

12 changed files with 502 additions and 4031 deletions

1
.gitignore vendored
View File

@ -1,4 +1,3 @@
zig-out
.zig-cache
mkdocs.yaml
zig-pkg

View File

@ -4,27 +4,20 @@ pub fn build(b: *std.Build) void {
const target = b.standardTargetOptions(.{});
const optimize = b.standardOptimizeOption(.{});
const zig_wgpu = b.dependency("zig_wgpu", .{
.target = target,
.optimize = optimize,
});
// 1. Define the module so other projects can import it
const mod = b.addModule("dimal", .{
.root_source_file = b.path("src/lib.zig"),
_ = b.addModule("dimal", .{
.root_source_file = b.path("src/main.zig"),
});
mod.addImport("gpu", zig_wgpu.module("zig-wgpu"));
const exe_tests = b.addTest(.{
.root_module = b.createModule(.{
.root_source_file = b.path("src/test.zig"),
.root_source_file = b.path("src/main.zig"),
.target = target,
.optimize = optimize,
}),
.test_runner = .{ .path = b.path("test_runner.zig"), .mode = .simple },
});
exe_tests.root_module.addImport("gpu", zig_wgpu.module("zig-wgpu"));
const run_exe_tests = b.addRunArtifact(exe_tests);
const test_step = b.step("test", "Run tests");
test_step.dependOn(&run_exe_tests.step);
@ -37,7 +30,6 @@ pub fn build(b: *std.Build) void {
.imports = &.{},
}),
});
bench_exe.root_module.addImport("gpu", zig_wgpu.module("zig-wgpu"));
b.installArtifact(bench_exe);

View File

@ -1,14 +1,9 @@
.{
.name = .dimal,
.version = "0.3.0",
.version = "0.2.2",
.fingerprint = 0x9453b1ff1e52d858,
.minimum_zig_version = "0.16.0",
.dependencies = .{
.zig_wgpu = .{
.url = "git+https://git.bouvais.lu/adrien/zig-wgpu?ref=0.2.2#5f8da0940d77c40eacd39c268d09acbeaea0b2a5",
.hash = "zig_wgpu-0.2.0-xsLAy2-s0QPNwR2QNd8ZX2kWiVfV5oB92N3ga1V1Uwpu",
},
},
.dependencies = .{},
.paths = .{
"build.zig",
"build.zig.zon",

View File

@ -3,7 +3,7 @@ const std = @import("std");
// Adjust these imports to match your actual file names
const Dimensions = @import("Dimensions.zig");
const Scales = @import("Scales.zig");
const Tensor = @import("TensorStatic.zig").Tensor;
const Tensor = @import("Tensor.zig").TensorStatic;
fn PhysicalConstant(comptime d: Dimensions.ArgOpts, comptime val: f64, comptime s: Scales.ArgOpts) type {
return struct {

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,6 +1,6 @@
const std = @import("std");
const Io = std.Io;
const Tensor = @import("Tensor.zig").Tensor;
const Tensor = @import("Tensor.zig").TensorStatic;
var io: Io = undefined;
pub fn main(init: std.process.Init) !void {
@ -10,8 +10,23 @@ pub fn main(init: std.process.Init) !void {
io = init.io;
try vectorSIMDvsNative(f64, &stdout_writer.interface);
try stdout_writer.flush();
try vectorSIMDvsNative(f32, &stdout_writer.interface);
try stdout_writer.flush();
try vectorSIMDvsNative(i32, &stdout_writer.interface);
try stdout_writer.flush();
try vectorSIMDvsNative(i64, &stdout_writer.interface);
try stdout_writer.flush();
try vectorSIMDvsNative(i128, &stdout_writer.interface);
try stdout_writer.flush();
try bench_Scalar(&stdout_writer.interface);
try stdout_writer.flush();
try bench_vsNative(&stdout_writer.interface);
try stdout_writer.flush();
try bench_crossTypeVsNative(&stdout_writer.interface);
try stdout_writer.flush();
try bench_Vector(&stdout_writer.interface);
try stdout_writer.flush();
try bench_HighDimTensor(&stdout_writer.interface);
@ -154,6 +169,245 @@ fn bench_Scalar(writer: *std.Io.Writer) !void {
try writer.print("└──────────────┴───────┴───────┴───────┴───────┴───────┴───────┴───────┘\n", .{});
}
fn bench_vsNative(writer: *std.Io.Writer) !void {
const ITERS: usize = 100_000;
const SAMPLES: usize = 100;
const getValT = struct {
fn f(comptime TT: type, i: usize) TT {
const v = (i % 100) + 1;
return if (comptime @typeInfo(TT) == .float) @floatFromInt(v) else @intCast(v);
}
}.f;
const Types = .{ i32, i64, i128, f32, f64 };
const TNames = .{ "i32", "i64", "i128", "f32", "f64" };
// Expanded Ops to match bench_Scalar
const Ops = .{ "add", "sub", "mul", "div", "abs", "eq", "gt" };
try writer.print(
\\
\\ Scalar vs Native Overhead Analysis
\\
\\┌───────────┬──────┬───────────┬───────────┬───────────┬───────────────────────┐
\\│ Operation │ Type │ Native │ @Vector │ Tensor{{1}} │ Slowdown Nat | Vec │
\\├───────────┼──────┼───────────┼───────────┼───────────┼───────────────────────┤
\\
, .{});
inline for (Ops, 0..) |op_name, j| {
inline for (Types, 0..) |T, tidx| {
var native_total_ns: f64 = 0;
var vector_total_ns: f64 = 0;
var tensor_total_ns: f64 = 0;
const M = Tensor(T, .{}, .{}, &.{1});
for (0..SAMPLES) |_| {
// --- 1. Benchmark Native ---
const n_start = getTime();
const a = getValT(T, 10);
const b = getValT(T, 2);
for (0..ITERS) |_| {
// Native logic branch
_ = if (comptime std.mem.eql(u8, op_name, "add"))
if (comptime @typeInfo(T) == .int) a +| b else a + b
else if (comptime std.mem.eql(u8, op_name, "sub"))
if (comptime @typeInfo(T) == .int) a -| b else a - b
else if (comptime std.mem.eql(u8, op_name, "mul"))
if (comptime @typeInfo(T) == .int) a *| b else a * b
else if (comptime std.mem.eql(u8, op_name, "div"))
if (comptime @typeInfo(T) == .int) @divTrunc(a, b) else a / b
else if (comptime std.mem.eql(u8, op_name, "abs"))
if (comptime @typeInfo(T) == .int) @abs(a) else @as(T, @abs(a))
else if (comptime std.mem.eql(u8, op_name, "eq"))
a == b
else if (comptime std.mem.eql(u8, op_name, "gt"))
a > b
else
unreachable;
}
const n_end = getTime();
native_total_ns += @as(f64, @floatFromInt(n_start.durationTo(n_end).toNanoseconds()));
const v_start = getTime();
const va = @Vector(1, T){getValT(T, 10)};
const vb = @Vector(1, T){getValT(T, 2)};
for (0..ITERS) |_| {
// Native logic branch
_ = if (comptime std.mem.eql(u8, op_name, "add"))
if (comptime @typeInfo(T) == .int) va +| vb else va + vb
else if (comptime std.mem.eql(u8, op_name, "sub"))
if (comptime @typeInfo(T) == .int) va -| vb else va - vb
else if (comptime std.mem.eql(u8, op_name, "mul"))
if (comptime @typeInfo(T) == .int) va *| vb else va * vb
else if (comptime std.mem.eql(u8, op_name, "div"))
if (comptime @typeInfo(T) == .int) @divTrunc(va, vb) else va / vb
else if (comptime std.mem.eql(u8, op_name, "abs"))
if (comptime @typeInfo(T) == .int) @as(T, @intCast(@abs(va[0]))) else @abs(va)
else if (comptime std.mem.eql(u8, op_name, "eq"))
va == vb
else if (comptime std.mem.eql(u8, op_name, "gt"))
va > vb
else
unreachable;
}
const v_end = getTime();
vector_total_ns += @as(f64, @floatFromInt(v_start.durationTo(v_end).toNanoseconds()));
// --- 2. Benchmark Scalar ---
const q_start = getTime();
const qa = M.splat(getValT(T, 10));
const qb = M.splat(getValT(T, 2));
for (0..ITERS) |_| {
// Scalar logic branch
_ = if (comptime std.mem.eql(u8, op_name, "add"))
qa.add(qb)
else if (comptime std.mem.eql(u8, op_name, "sub"))
qa.sub(qb)
else if (comptime std.mem.eql(u8, op_name, "mul"))
qa.mul(qb)
else if (comptime std.mem.eql(u8, op_name, "div"))
qa.div(qb)
else if (comptime std.mem.eql(u8, op_name, "abs"))
qa.abs()
else if (comptime std.mem.eql(u8, op_name, "eq"))
qa.eq(qb)
else if (comptime std.mem.eql(u8, op_name, "gt"))
qa.gt(qb)
else
unreachable;
}
const q_end = getTime();
tensor_total_ns += @as(f64, @floatFromInt(q_start.durationTo(q_end).toNanoseconds()));
}
const avg_n = (native_total_ns / SAMPLES) / @as(f64, @floatFromInt(ITERS));
const avg_v = (vector_total_ns / SAMPLES) / @as(f64, @floatFromInt(ITERS));
const avg_t = (tensor_total_ns / SAMPLES) / @as(f64, @floatFromInt(ITERS));
const slowdown_nt = avg_t / avg_n;
const slowdown_vt = avg_t / avg_v;
try writer.print("│ {s:<9} │ {s:<4} │ {d:>7.2}ns │ {d:>7.2}ns │ {d:>7.2}ns │ {d:>8.2}x {d:>8.2}x │\n", .{
op_name, TNames[tidx], avg_n, avg_v, avg_t, slowdown_nt, slowdown_vt,
});
}
if (j != Ops.len - 1) try writer.print("├───────────┼──────┼───────────┼───────────┼───────────┼───────────────────────┤\n", .{});
}
try writer.print("└───────────┴──────┴───────────┴───────────┴───────────┴───────────────────────┘\n", .{});
}
fn bench_crossTypeVsNative(writer: *std.Io.Writer) !void {
const ITERS: usize = 100_000;
const SAMPLES: usize = 5;
const getValT = struct {
fn f(comptime TT: type, i: usize) TT {
// Keep values safe and non-zero to avoid division by zero or overflows during cross-casting
const v = (i % 50) + 1;
return if (comptime @typeInfo(TT) == .float) @floatFromInt(v) else @intCast(v);
}
}.f;
// Helper for the Native baseline: explicitly casting T2 to T1 before the operation
const castTo = struct {
fn f(comptime DestT: type, comptime SrcT: type, val: SrcT) DestT {
if (comptime DestT == SrcT) return val;
const src_info = @typeInfo(SrcT);
const dest_info = @typeInfo(DestT);
if (dest_info == .int and src_info == .int) return @intCast(val);
if (dest_info == .float and src_info == .int) return @floatFromInt(val);
if (dest_info == .int and src_info == .float) return @intFromFloat(val);
if (dest_info == .float and src_info == .float) return @floatCast(val);
unreachable;
}
}.f;
const Types = .{ i16, i64, i128, f32, f64 };
const TNames = .{ "i16", "i64", "i128", "f32", "f64" };
const Ops = .{ "add", "mul", "div" };
try writer.print(
\\
\\ Cross-Type Overhead Analysis: Scalar vs Native
\\
\\┌─────────┬──────┬──────┬───────────┬───────────┬───────────┐
\\│ Op │ T1 │ T2 │ Native │ Scalar │ Slowdown │
\\├─────────┼──────┼──────┼───────────┼───────────┼───────────┤
\\
, .{});
inline for (Ops, 0..) |op_name, j| {
inline for (Types, 0..) |T1, t1_idx| {
inline for (Types, 0..) |T2, t2_idx| {
var native_total_ns: f64 = 0;
var quantity_total_ns: f64 = 0;
const M1 = Tensor(T1, .{ .L = 1 }, .{}, &.{1});
const M2 = Tensor(T2, .{ .L = 1 }, .{}, &.{1});
const S2 = Tensor(T2, .{ .T = 1 }, .{}, &.{1});
std.mem.doNotOptimizeAway({
for (0..SAMPLES) |_| {
// --- 1. Benchmark Native (Cast T2 to T1, then math) ---
const n_start = getTime();
for (0..ITERS) |i| {
const a = getValT(T1, i);
const b_raw = getValT(T2, 2);
const b = castTo(T1, T2, b_raw);
_ = if (comptime std.mem.eql(u8, op_name, "add"))
a + b
else if (comptime std.mem.eql(u8, op_name, "mul"))
a * b
else if (comptime @typeInfo(T1) == .int)
@divTrunc(a, b)
else
a / b;
}
const n_end = getTime();
native_total_ns += @as(f64, @floatFromInt(n_start.durationTo(n_end).toNanoseconds()));
// --- 2. Benchmark Scalar ---
const q_start = getTime();
for (0..ITERS) |i| {
const qa = M1.splat(getValT(T1, i));
const qb = if (comptime std.mem.eql(u8, op_name, "div"))
S2.splat(getValT(T2, 2))
else
M2.splat(getValT(T2, 2));
_ = if (comptime std.mem.eql(u8, op_name, "add"))
qa.add(qb)
else if (comptime std.mem.eql(u8, op_name, "mul"))
qa.mul(qb)
else
qa.div(qb);
}
const q_end = getTime();
quantity_total_ns += @as(f64, @floatFromInt(q_start.durationTo(q_end).toNanoseconds()));
}
const avg_n = (native_total_ns / SAMPLES) / @as(f64, @floatFromInt(ITERS));
const avg_q = (quantity_total_ns / SAMPLES) / @as(f64, @floatFromInt(ITERS));
const slowdown = avg_q / avg_n;
try writer.print("│ {s:<7} │ {s:<4} │ {s:<4} │ {d:>7.2}ns │ {d:>7.2}ns │ {d:>8.2}x │\n", .{
op_name, TNames[t1_idx], TNames[t2_idx], avg_n, avg_q, slowdown,
});
});
}
}
if (j != Ops.len - 1) {
try writer.print("├─────────┼──────┼──────┼───────────┼───────────┼───────────┤\n", .{});
}
}
try writer.print("└─────────┴──────┴──────┴───────────┴───────────┴───────────┘\n", .{});
}
fn bench_Vector(writer: *std.Io.Writer) !void {
const ITERS: usize = 10_000;
const SAMPLES: usize = 10;
@ -192,7 +446,7 @@ fn bench_Vector(writer: *std.Io.Writer) !void {
const TNames = .{ "i32", "i64", "i128", "f32", "f64" };
const Lengths = .{ 1, 3, 4, 16, 100 };
// "cross" is only valid for len=3; other cells will show " --- "
const Ops = .{ "add", "div", "mulScalar", "dot", "product", "pow", "length" };
const Ops = .{ "add", "div", "mulScalar", "dot", "cross", "product", "pow", "length" };
inline for (Ops, 0..) |op_name, o_idx| {
inline for (Types, TNames) |T, tname| {
@ -228,6 +482,10 @@ fn bench_Vector(writer: *std.Io.Writer) !void {
} else if (comptime std.mem.eql(u8, op_name, "dot")) {
const v2 = V.splat(getVal(T, i +% 5, 63));
_ = v1.contract(v2, 0, 0);
} else if (comptime std.mem.eql(u8, op_name, "cross")) {
// len == 3 guaranteed by the guard above
const v2 = V.splat(getVal(T, i +% 5, 63));
_ = v1.cross(v2);
} else if (comptime std.mem.eql(u8, op_name, "product")) {
_ = v1.product();
} else if (comptime std.mem.eql(u8, op_name, "pow")) {
@ -350,3 +608,62 @@ fn bench_HighDimTensor(writer: *std.Io.Writer) !void {
}
try writer.print("└─────────────────┴──────┴──────────────┴──────────────┴──────────────┴──────────────┘\n", .{});
}
fn vectorSIMDvsNative(comptime T: type, writer: *std.Io.Writer) !void {
const iterations: u64 = 10_000;
const lens = [_]u32{ 1, 2, 3, 4, 5, 10, 100, 1_000, 10_000 };
try writer.print("\nSIMD Speedup Analysis: {s}\n", .{@typeName(T)});
try writer.print("┌────────────┬────────────┬────────────┬────────────┐\n", .{});
try writer.print("│ Vector Len │ Scalar (us)│ Vector (us)│ Speedup │\n", .{});
try writer.print("├────────────┼────────────┼────────────┼────────────┤\n", .{});
inline for (lens) |vector_len| {
// --- Scalar Test ---
var scalar_val: T = 10;
const start_scalar = getTime();
var i: u64 = 0;
while (i < iterations * vector_len) : (i += 1) {
if (comptime @typeInfo(T) == .int)
scalar_val = scalar_val +% 1
else
scalar_val = scalar_val + 1;
}
const scalar_time = start_scalar.durationTo(getTime()).toMicroseconds();
// --- Vector Test ---
var vector_val: @Vector(vector_len, T) = @splat(20);
const start_vector = getTime();
i = 0;
const increment: @Vector(vector_len, T) = @splat(1);
while (i < iterations) : (i += 1) {
if (comptime @typeInfo(T) == .int)
vector_val = vector_val +% increment
else
vector_val = vector_val + increment;
}
const vector_time = start_vector.durationTo(getTime()).toMicroseconds();
// --- Results ---
const s_float = @as(f64, @floatFromInt(scalar_time));
const v_float = @as(f64, @floatFromInt(vector_time));
// Speedup = ScalarTime / VectorTime.
// > 1.0 means SIMD is faster.
const speedup = if (vector_time > 0) s_float / v_float else 0;
try writer.print("│ {d:<10} │ {d:>10} │ {d:>10} │ {d:>9.2}x │\n", .{
vector_len,
scalar_time,
vector_time,
speedup,
});
try writer.flush();
std.mem.doNotOptimizeAway(scalar_val);
std.mem.doNotOptimizeAway(vector_val);
}
try writer.print("└────────────┴────────────┴────────────┴────────────┘\n", .{});
}

View File

@ -1,9 +0,0 @@
const std = @import("std");
pub const TensorStatic = @import("TensorStatic.zig").Tensor;
pub const TensorAlloc = @import("TensorAlloc.zig").Tensor;
pub const TensorGpu = @import("TensorGpu.zig").Tensor;
pub const Dimensions = @import("Dimensions.zig");
pub const Scales = @import("Scales.zig");
pub const Base = @import("Base.zig");
pub const UnitParser = @import("UnitParser.zig");

15
src/main.zig Normal file
View File

@ -0,0 +1,15 @@
const std = @import("std");
pub const Tensor = @import("Tensor.zig").TensorStatic;
pub const Dimensions = @import("Dimensions.zig");
pub const Scales = @import("Scales.zig");
pub const Base = @import("Base.zig");
pub const UnitParser = @import("UnitParser.zig");
test {
_ = @import("Tensor.zig");
_ = @import("Dimensions.zig");
_ = @import("Scales.zig");
_ = @import("Base.zig");
_ = @import("UnitParser.zig");
}

View File

@ -4,12 +4,6 @@ const UnitScale = Scales.UnitScale;
const Dimensions = @import("Dimensions.zig");
const Dimension = Dimensions.Dimension;
pub const TensorKind = enum { static, alloc, gpu };
pub fn isTensor(comptime T: type) bool {
return comptime @typeInfo(T) == .@"struct" and @hasDecl(T, "ISTENSOR");
}
pub fn shapeTotal(shape: []const comptime_int) usize {
var t: comptime_int = 1;
for (shape) |s| t *= s;

View File

@ -1,9 +0,0 @@
test {
_ = @import("TensorStatic.zig");
_ = @import("TensorAlloc.zig");
_ = @import("TensorGpu.zig");
_ = @import("Dimensions.zig");
_ = @import("Scales.zig");
_ = @import("Base.zig");
_ = @import("UnitParser.zig");
}