From 9ac3d4d699bcb0aabfa74633b0376d0eae268403 Mon Sep 17 00:00:00 2001 From: adrien Date: Tue, 26 May 2026 23:34:53 +0200 Subject: [PATCH] Squeletton TensorGpu that init --- src/TensorGpu.zig | 1775 +++++++++++++++++++++++++++++++++++++++++++++ src/lib.zig | 1 + src/test.zig | 1 + 3 files changed, 1777 insertions(+) create mode 100644 src/TensorGpu.zig diff --git a/src/TensorGpu.zig b/src/TensorGpu.zig new file mode 100644 index 0000000..79102a7 --- /dev/null +++ b/src/TensorGpu.zig @@ -0,0 +1,1775 @@ +const std = @import("std"); +const Allocator = std.mem.Allocator; +const Scales = @import("Scales.zig"); +const UnitScale = Scales.UnitScale; +const Dimensions = @import("Dimensions.zig"); +const Dimension = Dimensions.Dimension; +const sh = @import("shared.zig"); +const gpu = @import("gpu"); +const GpuAllocator = gpu.GpuAllocator; +const TensorAlloc = @import("TensorAlloc.zig").Tensor; + +pub fn Tensor( + comptime T: type, + comptime d_opt: Dimensions.ArgOpts, + comptime s_opt: Scales.ArgOpts, + comptime shape_: []const comptime_int, +) type { + if (shape_.len == 0) + @compileError("Tensor shape must have at least 1 dimension (rank >= 1)."); + for (shape_) |s| + if (s < 1) @compileError("Tensor shape dimensions must be strictly >= 1."); + @setEvalBranchQuota(100_000_000); + + const _total: usize = comptime sh.shapeTotal(shape_); + const _strides = comptime sh.shapeStrides(shape_); + + if (comptime _total * @bitSizeOf(T) > 1_000_000) + @compileError("Tensor too big, consider using a TensorGPU or TensorAlloc."); + + return struct { + data: gpu.GpuBuffer, + + const Self = @This(); + + pub const ValueType: type = T; + pub const dims: Dimensions = Dimensions.init(d_opt); + pub const scales: Scales = Scales.init(s_opt); + pub const shape: []const comptime_int = shape_; + pub const rank: comptime_int = shape_.len; + pub const total: comptime_int = _total; + pub const strides_arr: [shape_.len]comptime_int = _strides; + pub const ISTENSOR = true; + pub const TENSORKIND: sh.TensorKind = .alloc; + + var static_buf: [total]T = undefined; + + /// Broadcast a single value across all elements. + pub fn splat(alloc: GpuAllocator, v: T) !Self { + var new: Self = .{ .data = try .init(alloc, .{ + .size = @sizeOf(T) * total, + .usage = .initMany(&.{ .Storage, .CopyDst, .CopySrc }), + }) }; + static_buf = @splat(v); + try new.data.load(T, &static_buf); + return new; + } + + pub fn load(alloc: GpuAllocator, data: []const T) !Self { + var new: Self = .{ .data = try .init(alloc, .{ + .size = @sizeOf(T) * total, + .usage = .initMany(&.{ .Storage, .CopyDst, .CopySrc }), + }) }; + try new.data.load(T, data); + return new; + } + + pub fn deinit(self: @This(), _: GpuAllocator) void { + self.data.deinit(); + } + + pub fn toCpu(self: @This(), alloc: Allocator) !TensorAlloc(T, d_opt, s_opt, shape_) { + const staging = try gpu.GpuBuffer.init(self.data.gloc, .{ + .size = self.data.def.size, + .usage = .initMany(&.{ .MapRead, .CopyDst }), + }); + defer staging.deinit(); + + try self.data.copy(staging); + const data = try staging.read(alloc, T); + defer alloc.free(data); + return try .load(alloc, data); + } + + pub fn copy(self: *const Self, alloc: GpuAllocator) !Self { + return .{ .data = self.data.copy(alloc) }; + } + + // /// Convert N-D coords (row-major) to flat index — fully comptime. + // /// Usage: Tensor.idx(.{row, col}) + // pub inline fn idx(comptime coords: [rank]usize) usize { + // comptime { + // var flat: usize = 0; + // for (0..rank) |i| { + // if (coords[i] >= shape[i]) @compileError("idx: Coordinate out of bounds"); + // flat += coords[i] * strides_arr[i]; + // } + // return flat; + // } + // } + // + // /// Element-wise add. Dimensions must match; scales resolve to finer. + // /// RHS must have the same shape as self, or total == 1 (broadcast). + // pub fn add(self: *const Self, alloc: Allocator, rhs: anytype) !Tensor( + // T, + // dims.argsOpt(), + // sh.finerScales(Self, @TypeOf(rhs)).argsOpt(), + // shape, + // ) { + // const RhsType = @TypeOf(rhs); + // if (comptime !sh.isTensor(RhsType)) + // @compileError("rhs can only be a Tensor "); + // if (comptime !dims.eql(RhsType.dims)) + // @compileError("Dimension mismatch in add: " ++ dims.str() ++ " vs " ++ RhsType.dims.str()); + // if (comptime RhsType.total != 1 and !sh.shapeEql(shape, RhsType.shape)) + // @compileError("Shape mismatch in add: element-wise operations require identical shapes, or a scalar RHS."); + // + // const TargetType = Tensor(T, dims.argsOpt(), sh.finerScales(Self, RhsType).argsOpt(), shape); + // const l: TargetType = try self.to(alloc, TargetType); + // defer l.deinit(alloc); + // const r: TargetType = try rhs.to(alloc, TargetType); + // defer r.deinit(alloc); + // + // const result_vec = if (comptime sh.isInt(T)) + // l.data.* +| r.data.* + // else + // l.data.* + r.data.*; + // + // const vec_ptr = try alloc.create(@TypeOf(result_vec)); + // vec_ptr.* = result_vec; + // + // return TargetType{ .data = vec_ptr }; + // } + // + // /// Element-wise sub. Dimensions must match; scales resolve to finer. + // /// RHS must have the same shape as self, or total == 1 (broadcast). + // pub fn sub(self: *const Self, alloc: Allocator, rhs: anytype) !Tensor( + // T, + // dims.argsOpt(), + // sh.finerScales(Self, @TypeOf(rhs)).argsOpt(), + // shape, + // ) { + // const RhsType = @TypeOf(rhs); + // if (comptime !sh.isTensor(RhsType)) + // @compileError("rhs can only be a Tensor "); + // if (comptime !dims.eql(RhsType.dims)) + // @compileError("Dimension mismatch in sub: " ++ dims.str() ++ " vs " ++ RhsType.dims.str()); + // if (comptime RhsType.total != 1 and !sh.shapeEql(shape, RhsType.shape)) + // @compileError("Shape mismatch in sub: element-wise operations require identical shapes, or a scalar RHS."); + // + // const TargetType = Tensor(T, dims.argsOpt(), sh.finerScales(Self, RhsType).argsOpt(), shape); + // const l: TargetType = try self.to(alloc, TargetType); + // defer l.deinit(alloc); + // const r: TargetType = try rhs.to(alloc, TargetType); + // defer r.deinit(alloc); + // + // const result_vec = if (comptime sh.isInt(T)) + // l.data.* -| r.data.* + // else + // l.data.* - r.data.*; + // + // const vec_ptr = try alloc.create(@TypeOf(result_vec)); + // vec_ptr.* = result_vec; + // + // return TargetType{ .data = vec_ptr }; + // } + // + // /// Element-wise multiply. Dimension exponents summed. + // /// Shape {1} RHS is automatically broadcast across all elements. + // pub fn mul(self: *const Self, alloc: Allocator, rhs: anytype) !Tensor( + // T, + // dims.add(@TypeOf(rhs).dims).argsOpt(), + // sh.finerScales(Self, @TypeOf(rhs)).argsOpt(), + // shape, + // ) { + // const RhsType = @TypeOf(rhs); + // if (comptime !sh.isTensor(RhsType)) + // @compileError("rhs can only be a Tensor "); + // if (comptime RhsType.total != 1 and !sh.shapeEql(shape, RhsType.shape)) + // @compileError("Shape mismatch in mul: element-wise operations require identical shapes, or a scalar RHS."); + // + // const SelfNorm = Tensor(T, dims.argsOpt(), sh.finerScales(Self, RhsType).argsOpt(), shape); + // const RhsNorm = Tensor(T, RhsType.dims.argsOpt(), sh.finerScales(Self, RhsType).argsOpt(), shape); + // const TargetType = Tensor(T, dims.add(RhsType.dims).argsOpt(), sh.finerScales(Self, RhsType).argsOpt(), shape); + // + // const l: SelfNorm = try self.to(alloc, SelfNorm); + // defer l.deinit(alloc); + // const r: RhsNorm = try rhs.to(alloc, RhsNorm); + // defer r.deinit(alloc); + // + // const result_vec = if (comptime sh.isInt(T)) + // l.data.* *| r.data.* + // else + // l.data.* * r.data.*; + // + // const vec_ptr = try alloc.create(@TypeOf(result_vec)); + // vec_ptr.* = result_vec; + // + // return TargetType{ .data = vec_ptr }; + // } + // + // /// Element-wise divide. Dimension exponents subtracted. + // /// Shape {1} RHS is automatically broadcast across all elements. + // pub fn div(self: *const Self, alloc: Allocator, rhs: anytype) !Tensor( + // T, + // dims.sub(@TypeOf(rhs).dims).argsOpt(), + // sh.finerScales(Self, @TypeOf(rhs)).argsOpt(), + // shape, + // ) { + // const RhsType = @TypeOf(rhs); + // if (comptime !sh.isTensor(RhsType)) + // @compileError("rhs can only be a Tensor "); + // if (comptime RhsType.total != 1 and !sh.shapeEql(shape, RhsType.shape)) + // @compileError("Shape mismatch in div: element-wise operations require identical shapes, or a scalar RHS."); + // + // const SelfNorm = Tensor(T, dims.argsOpt(), sh.finerScales(Self, RhsType).argsOpt(), shape); + // const RhsNorm = Tensor(T, RhsType.dims.argsOpt(), sh.finerScales(Self, RhsType).argsOpt(), shape); + // const TargetType = Tensor(T, dims.sub(RhsType.dims).argsOpt(), sh.finerScales(Self, RhsType).argsOpt(), shape); + // + // const l: SelfNorm = try self.to(alloc, SelfNorm); + // defer l.deinit(alloc); + // const r: RhsNorm = try rhs.to(alloc, RhsNorm); + // defer r.deinit(alloc); + // + // const result_vec = if (comptime sh.isInt(T)) + // @divTrunc(l.data.*, r.data.*) + // else + // l.data.* / r.data.*; + // + // const vec_ptr = try alloc.create(@TypeOf(result_vec)); + // vec_ptr.* = result_vec; + // + // return TargetType{ .data = vec_ptr }; + // } + // + // /// Absolute value of every element. + // pub fn abs(self: *const Self, alloc: Allocator) !Self { + // const result_vec = @as(Vec, @bitCast(@abs(self.data.*))); + // + // const vec_ptr = try alloc.create(@TypeOf(result_vec)); + // vec_ptr.* = result_vec; + // + // return Self{ .data = vec_ptr }; + // } + // + // /// Raise every element to a comptime integer exponent. + // pub fn pow(self: *const Self, alloc: Allocator, comptime exp: comptime_int) !Tensor( + // T, + // dims.scale(exp).argsOpt(), + // scales.argsOpt(), + // shape, + // ) { + // if (comptime exp < 0) @compileError("Pow only support exp >= 0"); + // + // const TargetType = Tensor(T, dims.scale(exp).argsOpt(), scales.argsOpt(), shape); + // + // if (comptime exp == 0) { + // const result_vec: Vec = @splat(1); + // const vec_ptr = try alloc.create(@TypeOf(result_vec)); + // vec_ptr.* = result_vec; + // return TargetType{ .data = vec_ptr }; + // } + // + // if (comptime exp == 1) { + // // Copy allocation to ensure `.deinit(alloc)` works cleanly for the caller + // const vec_ptr = try alloc.create(Vec); + // vec_ptr.* = self.data.*; + // return TargetType{ .data = vec_ptr }; + // } + // + // var data: Vec = self.data.*; + // for (0..exp - 1) |_| + // data = data * self.data.*; + // + // const vec_ptr = try alloc.create(@TypeOf(data)); + // vec_ptr.* = data; + // + // return TargetType{ .data = vec_ptr }; + // } + // + // /// Square root of every element. All dimension exponents must be even. + // pub fn sqrt(self: *const Self, alloc: Allocator) !Tensor( + // T, + // dims.div(2).argsOpt(), + // scales.argsOpt(), + // shape, + // ) { + // if (comptime !dims.isSquare()) + // @compileError("Cannot take sqrt of " ++ dims.str() ++ ": exponents must be even."); + // + // const TargetType = Tensor(T, dims.div(2).argsOpt(), scales.argsOpt(), shape); + // + // if (comptime @typeInfo(T) == .float) { + // const result_vec = @sqrt(self.data.*); + // const vec_ptr = try alloc.create(@TypeOf(result_vec)); + // vec_ptr.* = result_vec; + // return TargetType{ .data = vec_ptr }; + // } + // + // const arr: [total]T = self.data.*; + // var res_arr: [total]T = undefined; + // const UnsignedT = @Int(.unsigned, @typeInfo(T).int.bits); + // + // for (0..total) |i| { + // const v = arr[i]; + // res_arr[i] = if (v < 0) 0 else @as(T, @intCast(std.math.sqrt(@as(UnsignedT, @intCast(v))))); + // } + // + // const vec_ptr = try alloc.create(Vec); + // vec_ptr.* = res_arr; + // + // return TargetType{ .data = vec_ptr }; + // } + // + // /// Negate every element. + // pub fn negate(self: *const Self, alloc: Allocator) !Self { + // const result_vec = -self.data.*; + // + // const vec_ptr = try alloc.create(@TypeOf(result_vec)); + // vec_ptr.* = result_vec; + // + // return Self{ .data = vec_ptr }; + // } + // + // /// Extract sub-tensor by half-open ranges [start, end) per axis. + // /// All bounds comptime. Dims and scales preserved. + // /// Negative indices count from end: -1 = last element. + // pub inline fn slice( + // self: *const Self, + // alloc: Allocator, + // comptime ranges: [rank]struct { start: ?isize = null, end: ?isize = null }, + // ) !blk: { + // var ns: [rank]comptime_int = undefined; + // for (0..rank) |i| { + // const dim = @as(isize, @intCast(shape[i])); + // const s: isize = blk2: { + // const raw = ranges[i].start orelse 0; + // break :blk2 if (raw < 0) raw + dim else raw; + // }; + // const e: isize = blk2: { + // const raw = ranges[i].end orelse dim; + // break :blk2 if (raw < 0) raw + dim else raw; + // }; + // if (s < 0) @compileError("slice: start out of bounds after normalization"); + // if (e < 0) @compileError("slice: end out of bounds after normalization"); + // if (s >= e) @compileError("slice: start must be < end"); + // if (e > dim) @compileError("slice: end exceeds shape"); + // ns[i] = e - s; + // } + // const new_shape: [rank]comptime_int = ns; + // break :blk Tensor(T, dims.argsOpt(), scales.argsOpt(), &new_shape); + // } { + // const new_shape: [rank]comptime_int = comptime blk: { + // var ns: [rank]comptime_int = undefined; + // for (0..rank) |i| { + // const dim = @as(isize, @intCast(shape[i])); + // const raw_s = ranges[i].start orelse 0; + // const raw_e = ranges[i].end orelse dim; + // const s: isize = if (raw_s < 0) raw_s + dim else raw_s; + // const e: isize = if (raw_e < 0) raw_e + dim else raw_e; + // ns[i] = e - s; + // } + // break :blk ns; + // }; + // const ResultType = Tensor(T, dims.argsOpt(), scales.argsOpt(), &new_shape); + // const DestVec = @Vector(ResultType.total, T); + // + // const src: [total]T = self.data.*; + // var dst: [ResultType.total]T = undefined; + // for (0..ResultType.total) |flat| { + // var src_flat: usize = 0; + // inline for (0..rank) |i| { + // const dim = @as(isize, @intCast(shape[i])); + // const raw_s = ranges[i].start orelse 0; + // const s: isize = if (raw_s < 0) raw_s + dim else raw_s; + // const coord = (flat / ResultType.strides_arr[i]) % new_shape[i]; + // src_flat += (coord + @as(usize, @intCast(s))) * strides_arr[i]; + // } + // dst[flat] = src[src_flat]; + // } + // + // const vec_ptr = try alloc.create(DestVec); + // vec_ptr.* = dst; + // return ResultType{ .data = vec_ptr }; + // } + // + // /// Convert to a compatible Tensor type. + // /// • Dimension mismatch → compile error. + // /// • Dest.shape must equal self.shape, or total == 1 -> splat to Dest shape (scalar pattern). + // /// • Scale ratio is computed fully at comptime; only a SIMD multiply at runtime. + // pub fn to( + // self: *const Self, + // alloc: Allocator, + // comptime Dest: type, + // ) !Dest { + // if (comptime Self == Dest) return self.copy(alloc); + // + // // Run validation checks FIRST before dealing with types + // if (comptime !dims.eql(Dest.dims)) + // @compileError("Dimension mismatch in to: " ++ dims.str() ++ " vs " ++ Dest.dims.str()); + // if (comptime total != 1 and !sh.shapeEql(shape, Dest.shape)) + // @compileError("Shape mismatch in to: destination type must have the identical shape, or be a scalar."); + // + // const ratio = comptime (scales.getFactor(dims) / Dest.scales.getFactor(Dest.dims)); + // const DestT = Dest.ValueType; + // const DestVec = @Vector(Dest.total, DestT); + // + // // 1. Prepare the source vector (handling scalar -> tensor broadcast) + // const SrcVec = @Vector(Dest.total, T); + // const src_vec: SrcVec = if (comptime total == 1 and Dest.total != 1) + // @splat(self.data[0]) + // else + // self.data.*; + // + // var result_vec: DestVec = undefined; + // + // // 2. Perform the vectorized conversion safely + // if (comptime ratio == 1.0) { + // if (comptime T == DestT) { + // result_vec = src_vec; + // } else { + // const T_info = @typeInfo(T); + // const Dest_info = @typeInfo(DestT); + // + // result_vec = if (comptime T_info == .int and Dest_info == .int) + // @as(DestVec, @intCast(src_vec)) + // else if (comptime T_info == .float and Dest_info == .float) + // @as(DestVec, @floatCast(src_vec)) + // else if (comptime T_info == .int and Dest_info == .float) + // @as(DestVec, @floatFromInt(src_vec)) + // else if (comptime T_info == .float and Dest_info == .int) + // @as(DestVec, @intFromFloat(src_vec)) + // else + // unreachable; + // } + // } else if (comptime T == DestT) { + // if (comptime @typeInfo(T) == .float) { + // result_vec = src_vec * @as(DestVec, @splat(@as(T, @floatCast(ratio)))); + // } else { + // if (comptime ratio >= 1.0) { + // const mult: T = comptime @intFromFloat(@round(ratio)); + // result_vec = src_vec *| @as(DestVec, @splat(mult)); + // } else { + // const div_val: T = comptime @intFromFloat(@round(1.0 / ratio)); + // const half: T = comptime @divTrunc(div_val, 2); + // + // if (comptime @typeInfo(T).int.signedness == .unsigned) { + // result_vec = @divTrunc(src_vec + @as(DestVec, @splat(half)), @as(DestVec, @splat(div_val))); + // } else { + // // Vectorized branchless negative handling + // const is_pos = src_vec >= @as(DestVec, @splat(0)); + // const offsets = @select(T, is_pos, @as(DestVec, @splat(half)), @as(DestVec, @splat(-half))); + // result_vec = @divTrunc(src_vec + offsets, @as(DestVec, @splat(div_val))); + // } + // } + // } + // } else { + // // Cross-type fully vectorized casting with scales + // const FVec = @Vector(Dest.total, f64); + // const float_vec: FVec = switch (comptime @typeInfo(T)) { + // .float => @floatCast(src_vec), + // .int => @floatFromInt(src_vec), + // else => unreachable, + // }; + // + // const scaled = float_vec * @as(FVec, @splat(ratio)); + // + // result_vec = switch (comptime @typeInfo(DestT)) { + // .float => @floatCast(scaled), + // .int => @intFromFloat(@round(scaled)), + // else => unreachable, + // }; + // } + // + // // 3. Allocate once and assign the computed result + // const vec_ptr = try alloc.create(DestVec); + // vec_ptr.* = result_vec; + // return Dest{ .data = vec_ptr }; + // } + // + // const CmpResult = if (total == 1) bool else [total]bool; + // + // fn cmpResult(v: @Vector(total, bool)) CmpResult { + // return if (comptime total == 1) @reduce(.And, v) else @as([total]bool, v); + // } + // + // /// Resolve both sides to the finer scale, broadcasting shape {1} RHS if needed. + // fn resolveScalePair(self: *const Self, alloc: Allocator, rhs: anytype) !struct { + // l: Tensor(T, dims.argsOpt(), sh.finerScales(Self, @TypeOf(rhs)).argsOpt(), shape), + // r: Tensor(T, dims.argsOpt(), sh.finerScales(Self, @TypeOf(rhs)).argsOpt(), shape), + // + // fn deinit(s: @This(), a: Allocator) void { + // s.l.deinit(a); + // s.r.deinit(a); + // } + // } { + // const RhsType = @TypeOf(rhs); + // if (comptime !sh.isTensor(RhsType)) + // @compileError("rhs can only be a Tensor "); + // if (comptime RhsType.total != 1 and !sh.shapeEql(shape, RhsType.shape)) + // @compileError("Shape mismatch in comparison: element-wise operations require identical shapes, or a scalar RHS."); + // + // const TargetType = Tensor(T, dims.argsOpt(), sh.finerScales(Self, RhsType).argsOpt(), shape); + // return .{ .l = try self.to(alloc, TargetType), .r = try rhs.to(alloc, TargetType) }; + // } + // + // pub fn eq(self: *const Self, alloc: Allocator, rhs: anytype) !CmpResult { + // if (comptime !dims.eql(@TypeOf(rhs).dims)) + // @compileError("Dimension mismatch in eq."); + // const p = try resolveScalePair(self, alloc, rhs); + // defer p.deinit(alloc); + // return cmpResult(p.l.data.* == p.r.data.*); + // } + // + // pub fn ne(self: *const Self, alloc: Allocator, rhs: anytype) !CmpResult { + // if (comptime !dims.eql(@TypeOf(rhs).dims)) + // @compileError("Dimension mismatch in ne."); + // const p = try resolveScalePair(self, alloc, rhs); + // defer p.deinit(alloc); + // return cmpResult(p.l.data.* != p.r.data.*); + // } + // + // pub fn gt(self: *const Self, alloc: Allocator, rhs: anytype) !CmpResult { + // if (comptime !dims.eql(@TypeOf(rhs).dims)) + // @compileError("Dimension mismatch in gt."); + // const p = try resolveScalePair(self, alloc, rhs); + // defer p.deinit(alloc); + // return cmpResult(p.l.data.* > p.r.data.*); + // } + // + // pub fn gte(self: *const Self, alloc: Allocator, rhs: anytype) !CmpResult { + // if (comptime !dims.eql(@TypeOf(rhs).dims)) + // @compileError("Dimension mismatch in gte."); + // const p = try resolveScalePair(self, alloc, rhs); + // defer p.deinit(alloc); + // return cmpResult(p.l.data.* >= p.r.data.*); + // } + // + // pub fn lt(self: *const Self, alloc: Allocator, rhs: anytype) !CmpResult { + // if (comptime !dims.eql(@TypeOf(rhs).dims)) + // @compileError("Dimension mismatch in lt."); + // const p = try resolveScalePair(self, alloc, rhs); + // defer p.deinit(alloc); + // return cmpResult(p.l.data.* < p.r.data.*); + // } + // + // pub fn lte(self: *const Self, alloc: Allocator, rhs: anytype) !CmpResult { + // if (comptime !dims.eql(@TypeOf(rhs).dims)) + // @compileError("Dimension mismatch in lte."); + // const p = try resolveScalePair(self, alloc, rhs); + // defer p.deinit(alloc); + // return cmpResult(p.l.data.* <= p.r.data.*); + // } + // + // /// True iff every element is equal after scale resolution. + // pub fn eqAll(self: *const Self, alloc: Allocator, other: anytype) !bool { + // if (comptime !dims.eql(@TypeOf(other).dims)) + // @compileError("Dimension mismatch in eqAll."); + // const p = try resolveScalePair(self, alloc, other); + // defer p.deinit(alloc); + // return @reduce(.And, p.l.data.* == p.r.data.*); + // } + // + // /// True iff any element differs after scale resolution. + // pub fn neAll(self: *const Self, alloc: Allocator, other: anytype) !bool { + // return !(try self.eqAll(alloc, other)); + // } + // + // pub fn contract( + // self: *const Self, + // alloc: Allocator, + // rhs: anytype, + // comptime axis_a: usize, + // comptime axis_b: usize, + // ) !blk: { + // const RhsType = @TypeOf(rhs); + // if (!sh.isTensor(RhsType)) + // @compileError("rhs can only be a Tensor "); + // if (axis_a >= rank) @compileError("contract: axis_a out of bounds"); + // if (axis_b >= RhsType.rank) @compileError("contract: axis_b out of bounds"); + // if (shape[axis_a] != RhsType.shape[axis_b]) @compileError("contract: shape mismatch at contraction axes"); + // + // const sa = sh.shapeRemoveAxis(shape, axis_a); + // const sb = sh.shapeRemoveAxis(RhsType.shape, axis_b); + // const rs_raw = sh.shapeCat(&sa, &sb); + // const rs: []const comptime_int = if (rs_raw.len == 0) &.{1} else &rs_raw; + // break :blk Tensor( + // T, + // dims.add(RhsType.dims).argsOpt(), + // sh.finerScales(Self, RhsType).argsOpt(), + // rs, + // ); + // } { + // const RhsType = @TypeOf(rhs); + // const k: usize = comptime shape[axis_a]; // contraction dimension + // + // const sa = comptime sh.shapeRemoveAxis(shape, axis_a); + // const sb = comptime sh.shapeRemoveAxis(RhsType.shape, axis_b); + // const rs_raw = comptime sh.shapeCat(&sa, &sb); + // const rs: []const comptime_int = comptime if (rs_raw.len == 0) &.{1} else &rs_raw; + // + // const ResultType = Tensor( + // T, + // dims.add(RhsType.dims).argsOpt(), + // sh.finerScales(Self, RhsType).argsOpt(), + // rs, + // ); + // + // const SelfNorm = Tensor(T, dims.argsOpt(), sh.finerScales(Self, RhsType).argsOpt(), shape); + // const OtherNorm = Tensor(T, RhsType.dims.argsOpt(), sh.finerScales(Self, RhsType).argsOpt(), RhsType.shape); + // + // // Normalize both tensors to matching, finer scales safely with allocations + // const l = try self.to(alloc, SelfNorm); + // defer l.deinit(alloc); + // const r = try rhs.to(alloc, OtherNorm); + // defer r.deinit(alloc); + // + // const ResVec = @Vector(ResultType.total, T); + // + // // FAST PATH: Dot Product + // if (comptime rank == 1 and RhsType.rank == 1 and axis_a == 0 and axis_b == 0) { + // const result_vec: ResVec = if (comptime !sh.isInt(T)) blk: { + // break :blk @splat(@reduce(.Add, l.data.* * r.data.*)); + // } else blk: { + // const mul_arr: [total]T = l.data.* *| r.data.*; + // var acc: T = 0; + // for (mul_arr) |val| acc +|= val; + // break :blk @splat(acc); + // }; + // + // const vec_ptr = try alloc.create(ResVec); + // vec_ptr.* = result_vec; + // return ResultType{ .data = vec_ptr }; + // } + // + // // --- ZERO-COST COERCION TO ARRAYS FOR RUNTIME INDEXING --- + // const a_arr: [total]T = l.data.*; + // const b_arr: [RhsType.total]T = r.data.*; + // + // // FAST PATH: 2D Matrix Multiplication + // if (comptime rank == 2 and RhsType.rank == 2 and axis_a == 1 and axis_b == 0) { + // const rows = shape[0]; + // const cols = RhsType.shape[1]; + // const inner = shape[1]; + // + // var res_arr: [ResultType.total]T = undefined; + // + // for (0..rows) |i| { + // for (0..cols) |j| { + // var acc: T = 0; + // for (0..inner) |id| { + // const a_flat = i * _strides[0] + id * _strides[1]; + // const b_flat = id * RhsType.strides_arr[0] + j * RhsType.strides_arr[1]; + // + // if (comptime sh.isInt(T)) acc +|= a_arr[a_flat] *| b_arr[b_flat] else acc += a_arr[a_flat] * b_arr[b_flat]; + // } + // res_arr[i * cols + j] = acc; + // } + // } + // + // const vec_ptr = try alloc.create(ResVec); + // vec_ptr.* = res_arr; + // return ResultType{ .data = vec_ptr }; + // } + // + // // FALLBACK PATH + // const rs_raw_strides = comptime sh.shapeStrides(&rs_raw); + // var result_arr: [ResultType.total]T = undefined; + // + // for (0..ResultType.total) |res_flat| { + // const res_coords = sh.decodeFlatCoords(res_flat, rs_raw.len, rs_raw_strides); + // + // var a_free: [sa.len]usize = undefined; + // for (0..sa.len) |i| a_free[i] = res_coords[i]; + // var b_free: [sb.len]usize = undefined; + // for (0..sb.len) |i| b_free[i] = res_coords[sa.len + i]; + // + // var acc: T = 0; + // for (0..k) |ki| { + // const a_coords = sh.insertAxis(rank, axis_a, ki, &a_free); + // const b_coords = sh.insertAxis(RhsType.rank, axis_b, ki, &b_free); + // const a_flat = sh.encodeFlatCoords(&a_coords, rank, _strides); + // const b_flat = sh.encodeFlatCoords(&b_coords, RhsType.rank, RhsType.strides_arr); + // + // if (comptime sh.isInt(T)) acc +|= a_arr[a_flat] *| b_arr[b_flat] else acc += a_arr[a_flat] * b_arr[b_flat]; + // } + // result_arr[res_flat] = acc; + // } + // + // const vec_ptr = try alloc.create(ResVec); + // vec_ptr.* = result_arr; + // return ResultType{ .data = vec_ptr }; + // } + // + // /// 3D Cross Product. Only defined for Rank-1 tensors of length 3. + // /// Result dimensions are the sum of input dimensions. + // pub fn cross(self: *const Self, alloc: Allocator, rhs: anytype) !Tensor( + // T, + // dims.add(@TypeOf(rhs).dims).argsOpt(), + // sh.finerScales(Self, @TypeOf(rhs)).argsOpt(), + // &.{3}, + // ) { + // const RhsType = @TypeOf(rhs); + // + // if (!sh.isTensor(RhsType)) + // @compileError("rhs can only be a Tensor "); + // if (comptime rank != 1 or shape[0] != 3 or RhsType.rank != 1 or RhsType.shape[0] != 3) + // @compileError("cross product is only defined for 3D vectors (rank-1, length 3)"); + // + // // Bring both to the same scale (e.g., mm vs m) + // const p = try self.resolveScalePair(alloc, rhs); + // defer p.deinit(alloc); + // const l = p.l.data; + // const r = p.r.data; + // + // var res: [3]T = undefined; + // if (comptime sh.isInt(T)) { + // res[0] = (l[1] *| r[2]) -| (l[2] *| r[1]); + // res[1] = (l[2] *| r[0]) -| (l[0] *| r[2]); + // res[2] = (l[0] *| r[1]) -| (l[1] *| r[0]); + // } else { + // res[0] = (l[1] * r[2]) - (l[2] * r[1]); + // res[1] = (l[2] * r[0]) - (l[0] * r[2]); + // res[2] = (l[0] * r[1]) - (l[1] * r[0]); + // } + // + // return try .load(alloc, res); + // } + // + // /// Sum of squared elements. Cheaper than length(); use for ordering. + // pub fn lengthSqr(self: *const Self) T { + // return @reduce(.Add, self.data.* * self.data.*); + // } + // + // /// Euclidean length (L2 norm). + // pub fn length(self: *const Self) T { + // const sq = self.lengthSqr(); + // if (comptime @typeInfo(T) == .int) { + // const UnsignedT = @Int(.unsigned, @typeInfo(T).int.bits); + // return @as(T, @intCast(std.math.sqrt(@as(UnsignedT, @intCast(sq))))); + // } + // return @sqrt(sq); + // } + // + // /// Product of all elements. Result has shape {1}; dimension exponent * total. + // pub fn product(self: *const Self, alloc: Allocator) !Tensor( + // T, + // dims.scale(@as(comptime_int, total)).argsOpt(), + // scales.argsOpt(), + // &.{1}, + // ) { + // return Tensor( + // T, + // dims.scale(@as(comptime_int, total)).argsOpt(), + // scales.argsOpt(), + // &.{1}, + // ).splat(alloc, @reduce(.Mul, self.data.*)); + // } + // + // pub fn formatNumber( + // self: *const Self, + // writer: *std.Io.Writer, + // options: std.fmt.Number, + // ) !void { + // if (comptime total == 1) { + // switch (@typeInfo(T)) { + // .float, .comptime_float => try writer.printFloat(self.data[0], options), + // .int, .comptime_int => try writer.printInt(self.data[0], 10, .lower, .{ + // .width = options.width, + // .alignment = options.alignment, + // .fill = options.fill, + // .precision = options.precision, + // }), + // else => unreachable, + // } + // } else { + // try writer.writeAll("("); + // const max_to_print = 6; + // inline for (0..@min(total, max_to_print)) |i| { + // if (i > 0) try writer.writeAll(", "); + // switch (@typeInfo(T)) { + // .float, .comptime_float => try writer.printFloat(self.data[i], options), + // .int, .comptime_int => try writer.printInt(self.data[i], 10, .lower, .{ + // .width = options.width, + // .alignment = options.alignment, + // .fill = options.fill, + // .precision = options.precision, + // }), + // else => unreachable, + // } + // if (comptime i == max_to_print - 1 and total != max_to_print - 1) + // try writer.writeAll(", ..."); + // } + // try writer.writeAll(")"); + // } + // + // var first = true; + // inline for (std.enums.values(Dimension)) |bu| { + // const v = dims.get(bu); + // if (comptime v == 0) continue; + // if (!first) try writer.writeAll("."); + // first = false; + // + // const uscale = scales.get(bu); + // if (bu == .T and (uscale == .min or uscale == .hour or uscale == .year)) + // try writer.print("{s}", .{uscale.str()}) + // else + // try writer.print("{s}{s}", .{ uscale.str(), bu.unit() }); + // + // if (v != 1) try sh.printSuperscript(writer, v); + // } + // } + }; +} + +// ═════════════════════════════════════════════════════════════════════════════ +// Tests +// ───────────────────────────────────────────────────────────────────────────── + +// ─── Scalar tests ───────────────────────────────────────────────────────── + +test "TensorAlloc | Scalar initiat" { + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + const alloc = arena.allocator(); + + const device = try gpu.GpuDevice.init(.{}); + var grena = gpu.GpuArenaAllocator.init(std.testing.allocator, device.gpuAllocator()); + defer grena.deinit(); + const glloc = grena.gpuAllocator(); + + const Meter = Tensor(i128, .{ .L = 1 }, .{ .L = @enumFromInt(-3) }, &.{1}); + const Second = Tensor(f32, .{ .T = 1 }, .{ .T = .n }, &.{1}); + + const distance = try Meter.splat(glloc, 10); + defer distance.deinit(glloc); + const time = try Second.splat(glloc, 2); + defer time.deinit(glloc); + + const distance_cpu = try distance.toCpu(alloc); + const time_cpu = try time.toCpu(alloc); + + try std.testing.expectEqual(10, distance_cpu.data[0]); + try std.testing.expectEqual(2, time_cpu.data[0]); +} + +// test "TensorAlloc | Scalar comparisons (eq, ne, gt, gte, lt, lte)" { +// var arena = std.heap.ArenaAllocator.init(std.testing.allocator); +// defer arena.deinit(); +// const alloc = arena.allocator(); +// +// const Meter = Tensor(i128, .{ .L = 1 }, .{}, &.{1}); +// const KiloMeter = Tensor(i128, .{ .L = 1 }, .{ .L = .k }, &.{1}); +// +// const m1000 = try Meter.splat(alloc, 1000); +// const km1 = try KiloMeter.splat(alloc, 1); +// const km2 = try KiloMeter.splat(alloc, 2); +// +// try std.testing.expect(try m1000.eq(alloc, km1)); +// try std.testing.expect(try km1.eq(alloc, m1000)); +// try std.testing.expect(try km2.ne(alloc, m1000)); +// +// try std.testing.expect(try km2.gt(alloc, m1000)); +// try std.testing.expect(try km2.gt(alloc, km1)); +// try std.testing.expect(try km1.gte(alloc, m1000)); +// try std.testing.expect(try km2.gte(alloc, m1000)); +// +// try std.testing.expect(try m1000.lt(alloc, km2)); +// try std.testing.expect(try km1.lt(alloc, km2)); +// try std.testing.expect(try km1.lte(alloc, m1000)); +// try std.testing.expect(try m1000.lte(alloc, km2)); +// } +// +// test "TensorAlloc | Scalar Add" { +// var arena = std.heap.ArenaAllocator.init(std.testing.allocator); +// defer arena.deinit(); +// const alloc = arena.allocator(); +// +// const Meter = Tensor(i128, .{ .L = 1 }, .{}, &.{1}); +// const KiloMeter = Tensor(i128, .{ .L = 1 }, .{ .L = .k }, &.{1}); +// const KiloMeter_f = Tensor(f64, .{ .L = 1 }, .{ .L = .k }, &.{1}); +// +// const distance = try Meter.splat(alloc, 10); +// const distance2 = try Meter.splat(alloc, 20); +// const added = try distance.add(alloc, distance2); +// try std.testing.expectEqual(30, added.data[0]); +// try std.testing.expectEqual(1, @TypeOf(added).dims.get(.L)); +// +// const distance3 = try KiloMeter.splat(alloc, 2); +// const added2 = try distance.add(alloc, distance3); +// try std.testing.expectEqual(2010, added2.data[0]); +// +// const added3_tmp = try distance3.add(alloc, distance); +// const added3 = try added3_tmp.to(alloc, KiloMeter); +// try std.testing.expectEqual(2, added3.data[0]); +// +// const distance4 = try KiloMeter_f.splat(alloc, 2); +// const added4_tmp = try distance4.add(alloc, distance); +// const added4 = try added4_tmp.to(alloc, KiloMeter_f); +// try std.testing.expectApproxEqAbs(2.01, added4.data[0], 0.000001); +// } +// +// test "TensorAlloc | Scalar Sub" { +// var arena = std.heap.ArenaAllocator.init(std.testing.allocator); +// defer arena.deinit(); +// const alloc = arena.allocator(); +// +// const Meter = Tensor(i128, .{ .L = 1 }, .{}, &.{1}); +// const KiloMeter_f = Tensor(f64, .{ .L = 1 }, .{ .L = .k }, &.{1}); +// +// const a = try Meter.splat(alloc, 500); +// const b = try Meter.splat(alloc, 200); +// const diff = try a.sub(alloc, b); +// try std.testing.expectEqual(300, diff.data[0]); +// const diff2 = try b.sub(alloc, a); +// try std.testing.expectEqual(-300, diff2.data[0]); +// +// const km_f = try KiloMeter_f.splat(alloc, 2.5); +// const m_f = try Meter.splat(alloc, 500); +// const diff3 = try km_f.sub(alloc, m_f); +// try std.testing.expectApproxEqAbs(2000.0, diff3.data[0], 1e-4); +// } +// +// test "TensorAlloc | Scalar MulBy" { +// var arena = std.heap.ArenaAllocator.init(std.testing.allocator); +// defer arena.deinit(); +// const alloc = arena.allocator(); +// +// const Meter = Tensor(i128, .{ .L = 1 }, .{}, &.{1}); +// const Second = Tensor(f32, .{ .T = 1 }, .{}, &.{1}); +// +// const d = try Meter.splat(alloc, 3); +// const t = try Second.splat(alloc, 4); +// const at = try d.mul(alloc, t); +// try std.testing.expectEqual(12, at.data[0]); +// try std.testing.expectEqual(1, @TypeOf(at).dims.get(.L)); +// try std.testing.expectEqual(1, @TypeOf(at).dims.get(.T)); +// +// const d2 = try Meter.splat(alloc, 5); +// const area = try d.mul(alloc, d2); +// try std.testing.expectEqual(15, area.data[0]); +// try std.testing.expectEqual(2, @TypeOf(area).dims.get(.L)); +// } +// +// test "TensorAlloc | Scalar MulBy with scale" { +// var arena = std.heap.ArenaAllocator.init(std.testing.allocator); +// defer arena.deinit(); +// const alloc = arena.allocator(); +// +// const KiloMeter = Tensor(f32, .{ .L = 1 }, .{ .L = .k }, &.{1}); +// const KiloGram = Tensor(f32, .{ .M = 1 }, .{ .M = .k }, &.{1}); +// +// const dist = try KiloMeter.splat(alloc, 2.0); +// const mass = try KiloGram.splat(alloc, 3.0); +// const prod = try dist.mul(alloc, mass); +// try std.testing.expectEqual(1, @TypeOf(prod).dims.get(.L)); +// try std.testing.expectEqual(1, @TypeOf(prod).dims.get(.M)); +// } +// +// test "TensorAlloc | Scalar MulBy with type change" { +// var arena = std.heap.ArenaAllocator.init(std.testing.allocator); +// defer arena.deinit(); +// const alloc = arena.allocator(); +// +// const Meter = Tensor(i128, .{ .L = 1 }, .{ .L = .k }, &.{1}); +// const Second = Tensor(f64, .{ .T = 1 }, .{}, &.{1}); +// const KmSec = Tensor(i64, .{ .L = 1, .T = 1 }, .{ .L = .k }, &.{1}); +// const KmSec_f = Tensor(f32, .{ .L = 1, .T = 1 }, .{ .L = .k }, &.{1}); +// +// const d = try Meter.splat(alloc, 3); +// const t = try Second.splat(alloc, 4); +// const dt_prod = try d.mul(alloc, t); +// +// const kmsec_val = try dt_prod.to(alloc, KmSec); +// try std.testing.expectEqual(12, kmsec_val.data[0]); +// +// const kmsec_f_val = try dt_prod.to(alloc, KmSec_f); +// try std.testing.expectApproxEqAbs(12.0, kmsec_f_val.data[0], 0.0001); +// } +// +// test "TensorAlloc | Scalar MulBy small" { +// var arena = std.heap.ArenaAllocator.init(std.testing.allocator); +// defer arena.deinit(); +// const alloc = arena.allocator(); +// +// const Meter = Tensor(i128, .{ .L = 1 }, .{ .L = .n }, &.{1}); +// const Second = Tensor(f32, .{ .T = 1 }, .{}, &.{1}); +// const d = try Meter.splat(alloc, 3); +// const t = try Second.splat(alloc, 4); +// const dt = try d.mul(alloc, t); +// try std.testing.expectEqual(12, dt.data[0]); +// } +// +// test "TensorAlloc | Scalar MulBy dimensionless" { +// var arena = std.heap.ArenaAllocator.init(std.testing.allocator); +// defer arena.deinit(); +// const alloc = arena.allocator(); +// +// const DimLess = Tensor(i128, .{}, .{}, &.{1}); +// const Meter = Tensor(i128, .{ .L = 1 }, .{}, &.{1}); +// const d = try Meter.splat(alloc, 7); +// const dl = try DimLess.splat(alloc, 3); +// const scaled = try d.mul(alloc, dl); +// try std.testing.expectEqual(21, scaled.data[0]); +// } +// +// test "TensorAlloc | Scalar Sqrt" { +// var arena = std.heap.ArenaAllocator.init(std.testing.allocator); +// defer arena.deinit(); +// const alloc = arena.allocator(); +// +// const MeterSquare = Tensor(i128, .{ .L = 2 }, .{}, &.{1}); +// const MeterSquare_f = Tensor(f64, .{ .L = 2 }, .{}, &.{1}); +// +// var d = try MeterSquare.splat(alloc, 9); +// var scaled = try d.sqrt(alloc); +// try std.testing.expectEqual(3, scaled.data[0]); +// try std.testing.expectEqual(1, @TypeOf(scaled).dims.get(.L)); +// +// d = try MeterSquare.splat(alloc, -5); +// scaled = try d.sqrt(alloc); +// try std.testing.expectEqual(0, scaled.data[0]); +// +// const d2 = try MeterSquare_f.splat(alloc, 20); +// const scaled2 = try d2.sqrt(alloc); +// try std.testing.expectApproxEqAbs(4.472135955, scaled2.data[0], 1e-4); +// } +// +// test "TensorAlloc | Scalar Chained: velocity and acceleration" { +// var arena = std.heap.ArenaAllocator.init(std.testing.allocator); +// defer arena.deinit(); +// const alloc = arena.allocator(); +// +// const Meter = Tensor(i128, .{ .L = 1 }, .{}, &.{1}); +// const Second = Tensor(f32, .{ .T = 1 }, .{}, &.{1}); +// +// const dist = try Meter.splat(alloc, 100); +// const t1 = try Second.splat(alloc, 5); +// const velocity = try dist.div(alloc, t1); +// try std.testing.expectEqual(20, velocity.data[0]); +// +// const t2 = try Second.splat(alloc, 4); +// const accel = try velocity.div(alloc, t2); +// try std.testing.expectEqual(5, accel.data[0]); +// } +// +// test "TensorAlloc | Scalar DivBy integer exact" { +// var arena = std.heap.ArenaAllocator.init(std.testing.allocator); +// defer arena.deinit(); +// const alloc = arena.allocator(); +// +// const Meter = Tensor(i128, .{ .L = 1 }, .{}, &.{1}); +// const Second = Tensor(f32, .{ .T = 1 }, .{}, &.{1}); +// +// const dist = try Meter.splat(alloc, 120); +// const time = try Second.splat(alloc, 4); +// const vel = try dist.div(alloc, time); +// try std.testing.expectEqual(30, vel.data[0]); +// } +// +// test "TensorAlloc | Scalar Finer scales skip dim 0" { +// var arena = std.heap.ArenaAllocator.init(std.testing.allocator); +// defer arena.deinit(); +// const alloc = arena.allocator(); +// +// const Dimless = Tensor(i128, .{}, .{}, &.{1}); +// const KiloMetre = Tensor(i128, .{ .L = 1 }, .{ .L = .k }, &.{1}); +// +// const r = try Dimless.splat(alloc, 30); +// const km = try KiloMetre.splat(alloc, 4); +// const vel = try r.mul(alloc, km); +// try std.testing.expectEqual(120, vel.data[0]); +// try std.testing.expectEqual(Scales.UnitScale.k, @TypeOf(vel).scales.get(.L)); +// } +// +// test "TensorAlloc | Scalar Conversion chain: km -> m -> cm" { +// var arena = std.heap.ArenaAllocator.init(std.testing.allocator); +// defer arena.deinit(); +// const alloc = arena.allocator(); +// +// const KiloMeter = Tensor(i128, .{ .L = 1 }, .{ .L = .k }, &.{1}); +// const Meter = Tensor(i128, .{ .L = 1 }, .{}, &.{1}); +// const CentiMeter = Tensor(i128, .{ .L = 1 }, .{ .L = .c }, &.{1}); +// +// const km = try KiloMeter.splat(alloc, 15); +// const m = try km.to(alloc, Meter); +// const cm = try m.to(alloc, CentiMeter); +// try std.testing.expectEqual(15_000, m.data[0]); +// try std.testing.expectEqual(1_500_000, cm.data[0]); +// } +// +// test "TensorAlloc | Scalar Conversion: hours -> minutes -> seconds" { +// var arena = std.heap.ArenaAllocator.init(std.testing.allocator); +// defer arena.deinit(); +// const alloc = arena.allocator(); +// +// const Hour = Tensor(i128, .{ .T = 1 }, .{ .T = .hour }, &.{1}); +// const Minute = Tensor(i128, .{ .T = 1 }, .{ .T = .min }, &.{1}); +// const Second = Tensor(i128, .{ .T = 1 }, .{}, &.{1}); +// +// const h = try Hour.splat(alloc, 1); +// const min = try h.to(alloc, Minute); +// const sec = try min.to(alloc, Second); +// try std.testing.expectEqual(60, min.data[0]); +// try std.testing.expectEqual(3600, sec.data[0]); +// } +// +// test "TensorAlloc | Scalar Format" { +// var arena = std.heap.ArenaAllocator.init(std.testing.allocator); +// defer arena.deinit(); +// const alloc = arena.allocator(); +// +// const MeterPerSecondSq = Tensor(f32, .{ .L = 1, .T = -2 }, .{ .T = .n }, &.{1}); +// const Meter = Tensor(f32, .{ .L = 1 }, .{}, &.{1}); +// +// const m = try Meter.splat(alloc, 1.23456); +// const accel = try MeterPerSecondSq.splat(alloc, 9.81); +// +// var buf: [64]u8 = undefined; +// var res = try std.fmt.bufPrint(&buf, "{d:.2}", .{m}); +// try std.testing.expectEqualStrings("1.23m", res); +// +// res = try std.fmt.bufPrint(&buf, "{d}", .{accel}); +// try std.testing.expectEqualStrings("9.81m.ns⁻²", res); +// } +// +// test "TensorAlloc | Scalar Abs" { +// var arena = std.heap.ArenaAllocator.init(std.testing.allocator); +// defer arena.deinit(); +// const alloc = arena.allocator(); +// +// const Meter = Tensor(i128, .{ .L = 1 }, .{}, &.{1}); +// const MeterF = Tensor(f32, .{ .L = 1 }, .{}, &.{1}); +// +// try std.testing.expectEqual(50, (try (try Meter.splat(alloc, -50)).abs(alloc)).data[0]); +// try std.testing.expectEqual(42.5, (try (try MeterF.splat(alloc, -42.5)).abs(alloc)).data[0]); +// } +// +// test "TensorAlloc | Scalar Pow" { +// var arena = std.heap.ArenaAllocator.init(std.testing.allocator); +// defer arena.deinit(); +// const alloc = arena.allocator(); +// +// const Meter = Tensor(i128, .{ .L = 1 }, .{}, &.{1}); +// const d = try Meter.splat(alloc, 4); +// try std.testing.expectEqual(16, (try d.pow(alloc, 2)).data[0]); +// try std.testing.expectEqual(64, (try d.pow(alloc, 3)).data[0]); +// } +// +// test "TensorAlloc | Scalar add/sub bare number on dimensionless scalar" { +// var arena = std.heap.ArenaAllocator.init(std.testing.allocator); +// defer arena.deinit(); +// const alloc = arena.allocator(); +// +// const DimLess = Tensor(i128, .{}, .{}, &.{1}); +// const a = try DimLess.splat(alloc, 10); +// try std.testing.expectEqual(15, (try a.add(alloc, try DimLess.splat(alloc, 5))).data[0]); +// try std.testing.expectEqual(7, (try a.sub(alloc, try DimLess.splat(alloc, 3))).data[0]); +// } +// +// test "TensorAlloc | Scalar Imperial length scales" { +// var arena = std.heap.ArenaAllocator.init(std.testing.allocator); +// defer arena.deinit(); +// const alloc = arena.allocator(); +// +// const Foot = Tensor(f64, .{ .L = 1 }, .{ .L = .ft }, &.{1}); +// const Meter = Tensor(f64, .{ .L = 1 }, .{}, &.{1}); +// const Inch = Tensor(f64, .{ .L = 1 }, .{ .L = .inch }, &.{1}); +// +// try std.testing.expectApproxEqAbs(0.3048, (try (try Foot.splat(alloc, 1.0)).to(alloc, Meter)).data[0], 1e-9); +// try std.testing.expectApproxEqAbs(1.0, (try (try Inch.splat(alloc, 12.0)).to(alloc, Foot)).data[0], 1e-9); +// } +// +// test "TensorAlloc | Scalar Imperial mass scales" { +// var arena = std.heap.ArenaAllocator.init(std.testing.allocator); +// defer arena.deinit(); +// const alloc = arena.allocator(); +// +// const Pound = Tensor(f64, .{ .M = 1 }, .{ .M = .lb }, &.{1}); +// const Ounce = Tensor(f64, .{ .M = 1 }, .{ .M = .oz }, &.{1}); +// +// const total = try (try (try Pound.splat(alloc, 2.0)).add(alloc, try Ounce.splat(alloc, 8.0))).to(alloc, Pound); +// try std.testing.expectApproxEqAbs(2.5, total.data[0], 1e-6); +// } +// +// // ─── Vector / Tensor tests ──────────────────────────────────────────────── +// +// test "TensorAlloc | Vector initiate" { +// var arena = std.heap.ArenaAllocator.init(std.testing.allocator); +// defer arena.deinit(); +// const alloc = arena.allocator(); +// +// const Meter4 = Tensor(f32, .{ .L = 1 }, .{}, &.{4}); +// const m = try Meter4.splat(alloc, 1); +// try std.testing.expect(m.data[0] == 1); +// try std.testing.expect(m.data[3] == 1); +// } +// +// test "TensorAlloc | Vector format" { +// var arena = std.heap.ArenaAllocator.init(std.testing.allocator); +// defer arena.deinit(); +// const alloc = arena.allocator(); +// +// const MeterPerSecondSq = Tensor(f32, .{ .L = 1, .T = -2 }, .{ .T = .n }, &.{3}); +// const KgMeterPerSecond = Tensor(f32, .{ .M = 1, .L = 1, .T = -1 }, .{ .M = .k }, &.{3}); +// +// const accel = try MeterPerSecondSq.splat(alloc, 9.81); +// const momentum = try KgMeterPerSecond.load(alloc, &.{ 43, 0, 11 }); +// +// var buf: [64]u8 = undefined; +// var res = try std.fmt.bufPrint(&buf, "{d}", .{accel}); +// try std.testing.expectEqualStrings("(9.81, 9.81, 9.81)m.ns⁻²", res); +// +// res = try std.fmt.bufPrint(&buf, "{d:.2}", .{momentum}); +// try std.testing.expectEqualStrings("(43.00, 0.00, 11.00)m.kg.s⁻¹", res); +// } +// +// test "TensorAlloc | Vector Vec3 Init and Basic Arithmetic" { +// var arena = std.heap.ArenaAllocator.init(std.testing.allocator); +// defer arena.deinit(); +// const alloc = arena.allocator(); +// +// const Meter3 = Tensor(i32, .{ .L = 1 }, .{}, &.{3}); +// +// const v_zero = try Meter3.splat(alloc, 0); +// try std.testing.expectEqual(0, v_zero.data[0]); +// try std.testing.expectEqual(0, v_zero.data[2]); +// +// const v_one = try Meter3.splat(alloc, 1); +// try std.testing.expectEqual(1, v_one.data[0]); +// +// const v_def = try Meter3.splat(alloc, 5); +// try std.testing.expectEqual(5, v_def.data[2]); +// +// const v1 = try Meter3.load(alloc, &.{ 10, 20, 30 }); +// const v2 = try Meter3.load(alloc, &.{ 2, 4, 6 }); +// +// const added = try v1.add(alloc, v2); +// try std.testing.expectEqual(12, added.data[0]); +// try std.testing.expectEqual(24, added.data[1]); +// try std.testing.expectEqual(36, added.data[2]); +// +// const subbed = try v1.sub(alloc, v2); +// try std.testing.expectEqual(8, subbed.data[0]); +// try std.testing.expectEqual(16, subbed.data[1]); +// try std.testing.expectEqual(24, subbed.data[2]); +// +// const neg = try v1.negate(alloc); +// try std.testing.expectEqual(-10, neg.data[0]); +// try std.testing.expectEqual(-20, neg.data[1]); +// try std.testing.expectEqual(-30, neg.data[2]); +// } +// +// test "TensorAlloc | Vector Kinematics (scalar mul/div broadcast)" { +// var arena = std.heap.ArenaAllocator.init(std.testing.allocator); +// defer arena.deinit(); +// const alloc = arena.allocator(); +// +// const Meter3 = Tensor(i32, .{ .L = 1 }, .{}, &.{3}); +// const Second1 = Tensor(i32, .{ .T = 1 }, .{}, &.{1}); +// +// const pos = try Meter3.load(alloc, &.{ 100, 200, 300 }); +// const time = try Second1.splat(alloc, 10); +// +// const vel = try pos.div(alloc, time); +// try std.testing.expectEqual(10, vel.data[0]); +// try std.testing.expectEqual(20, vel.data[1]); +// try std.testing.expectEqual(30, vel.data[2]); +// try std.testing.expectEqual(1, @TypeOf(vel).dims.get(.L)); +// try std.testing.expectEqual(-1, @TypeOf(vel).dims.get(.T)); +// +// const new_pos = try vel.mul(alloc, time); +// try std.testing.expectEqual(100, new_pos.data[0]); +// try std.testing.expectEqual(0, @TypeOf(new_pos).dims.get(.T)); +// } +// +// test "TensorAlloc | Vector Element-wise Math and Scaling" { +// var arena = std.heap.ArenaAllocator.init(std.testing.allocator); +// defer arena.deinit(); +// const alloc = arena.allocator(); +// +// const Meter3 = Tensor(i32, .{ .L = 1 }, .{}, &.{3}); +// +// const v1 = try Meter3.load(alloc, &.{ 10, 20, 30 }); +// const v2 = try Meter3.load(alloc, &.{ 2, 5, 10 }); +// +// const dv = try v1.div(alloc, v2); +// try std.testing.expectEqual(5, dv.data[0]); +// try std.testing.expectEqual(4, dv.data[1]); +// try std.testing.expectEqual(3, dv.data[2]); +// try std.testing.expectEqual(0, @TypeOf(dv).dims.get(.L)); +// } +// +// test "TensorAlloc | Vector Conversions" { +// var arena = std.heap.ArenaAllocator.init(std.testing.allocator); +// defer arena.deinit(); +// const alloc = arena.allocator(); +// +// const KiloMeter3 = Tensor(i32, .{ .L = 1 }, .{ .L = .k }, &.{3}); +// const Meter3 = Tensor(i32, .{ .L = 1 }, .{}, &.{3}); +// +// const v_km = try KiloMeter3.load(alloc, &.{ 1, 2, 3 }); +// const v_m = try v_km.to(alloc, Meter3); +// +// try std.testing.expectEqual(1000, v_m.data[0]); +// try std.testing.expectEqual(2000, v_m.data[1]); +// try std.testing.expectEqual(3000, v_m.data[2]); +// try std.testing.expectEqual(UnitScale.none, @TypeOf(v_m).scales.get(.L)); +// } +// +// test "TensorAlloc | Vector Length" { +// var arena = std.heap.ArenaAllocator.init(std.testing.allocator); +// defer arena.deinit(); +// const alloc = arena.allocator(); +// +// const MeterInt3 = Tensor(i32, .{ .L = 1 }, .{}, &.{3}); +// const MeterFloat3 = Tensor(f32, .{ .L = 1 }, .{}, &.{3}); +// +// const v_int = try MeterInt3.load(alloc, &.{ 3, 4, 0 }); +// try std.testing.expectEqual(25, v_int.lengthSqr()); +// try std.testing.expectEqual(5, v_int.length()); +// +// const v_float = try MeterFloat3.load(alloc, &.{ 3.0, 4.0, 0.0 }); +// try std.testing.expectApproxEqAbs(@as(f32, 25.0), v_float.lengthSqr(), 1e-4); +// try std.testing.expectApproxEqAbs(@as(f32, 5.0), v_float.length(), 1e-4); +// } +// +// test "TensorAlloc | Vector Comparisons" { +// var arena = std.heap.ArenaAllocator.init(std.testing.allocator); +// defer arena.deinit(); +// const alloc = arena.allocator(); +// +// const Meter3 = Tensor(f32, .{ .L = 1 }, .{}, &.{3}); +// const KiloMeter3 = Tensor(f32, .{ .L = 1 }, .{ .L = .k }, &.{3}); +// +// const v1 = try Meter3.load(alloc, &.{ 1000.0, 500.0, 0.0 }); +// const v2 = try KiloMeter3.load(alloc, &.{ 1.0, 0.5, 0.0 }); +// const v3 = try KiloMeter3.load(alloc, &.{ 1.0, 0.6, 0.0 }); +// +// try std.testing.expect(try v1.eqAll(alloc, v2)); +// try std.testing.expect(try v1.neAll(alloc, v3)); +// +// const higher = try v3.gt(alloc, v1); +// try std.testing.expectEqual(false, higher[0]); +// try std.testing.expectEqual(true, higher[1]); +// try std.testing.expectEqual(false, higher[2]); +// +// const equal = try v3.eq(alloc, v1); +// try std.testing.expectEqual(true, equal[0]); +// try std.testing.expectEqual(false, equal[1]); +// try std.testing.expectEqual(true, equal[2]); +// +// const low_eq = try v1.lte(alloc, v3); +// try std.testing.expect(low_eq[0] and low_eq[1] and low_eq[2]); +// } +// +// test "TensorAlloc | Vector vs Scalar broadcast comparison" { +// var arena = std.heap.ArenaAllocator.init(std.testing.allocator); +// defer arena.deinit(); +// const alloc = arena.allocator(); +// +// const Meter3 = Tensor(f32, .{ .L = 1 }, .{}, &.{3}); +// const KiloMeter1 = Tensor(f32, .{ .L = 1 }, .{ .L = .k }, &.{1}); +// +// const positions = try Meter3.load(alloc, &.{ 500.0, 1200.0, 3000.0 }); +// const threshold = try KiloMeter1.splat(alloc, 1); // 1 km = 1000 m +// +// const exceeded = try positions.gt(alloc, threshold); +// try std.testing.expectEqual(false, exceeded[0]); +// try std.testing.expectEqual(true, exceeded[1]); +// try std.testing.expectEqual(true, exceeded[2]); +// +// const Meter1 = Tensor(f32, .{ .L = 1 }, .{}, &.{1}); +// const exact = try positions.eq(alloc, try Meter1.splat(alloc, 500)); +// try std.testing.expect(exact[0] == true); +// try std.testing.expect(exact[1] == false); +// } +// +// test "TensorAlloc | Vector contract — dot product (rank-1 * rank-1)" { +// var arena = std.heap.ArenaAllocator.init(std.testing.allocator); +// defer arena.deinit(); +// const alloc = arena.allocator(); +// +// const Meter3 = Tensor(f32, .{ .L = 1 }, .{}, &.{3}); +// const Newton3 = Tensor(f32, .{ .M = 1, .L = 1, .T = -2 }, .{}, &.{3}); +// +// const pos = try Meter3.load(alloc, &.{ 10.0, 0.0, 0.0 }); +// const force = try Newton3.load(alloc, &.{ 5.0, 5.0, 0.0 }); +// +// const work = try force.contract(alloc, pos, 0, 0); +// try std.testing.expectEqual(50.0, work.data[0]); +// try std.testing.expectEqual(1, @TypeOf(work).dims.get(.M)); +// try std.testing.expectEqual(2, @TypeOf(work).dims.get(.L)); +// try std.testing.expectEqual(-2, @TypeOf(work).dims.get(.T)); +// } +// +// test "TensorAlloc | Vector contract — matrix multiply (rank-2 * rank-2)" { +// var arena = std.heap.ArenaAllocator.init(std.testing.allocator); +// defer arena.deinit(); +// const alloc = arena.allocator(); +// +// const A = Tensor(f32, .{}, .{}, &.{ 2, 3 }); +// const B = Tensor(f32, .{}, .{}, &.{ 3, 2 }); +// +// const a = try A.load(alloc, &.{ 1, 2, 3, 4, 5, 6 }); +// const b = try B.load(alloc, &.{ 7, 8, 9, 10, 11, 12 }); +// +// const c = try a.contract(alloc, b, 1, 0); +// try std.testing.expectEqual(58, c.data[Tensor(f32, .{}, .{}, &.{ 2, 2 }).idx(.{ 0, 0 })]); +// try std.testing.expectEqual(64, c.data[Tensor(f32, .{}, .{}, &.{ 2, 2 }).idx(.{ 0, 1 })]); +// try std.testing.expectEqual(139, c.data[Tensor(f32, .{}, .{}, &.{ 2, 2 }).idx(.{ 1, 0 })]); +// try std.testing.expectEqual(154, c.data[Tensor(f32, .{}, .{}, &.{ 2, 2 }).idx(.{ 1, 1 })]); +// } +// +// test "TensorAlloc | Vector Abs, Pow, Sqrt and Product" { +// var arena = std.heap.ArenaAllocator.init(std.testing.allocator); +// defer arena.deinit(); +// const alloc = arena.allocator(); +// +// const Meter3 = Tensor(f32, .{ .L = 1 }, .{}, &.{3}); +// +// const v1 = try Meter3.load(alloc, &.{ -2.0, 3.0, -4.0 }); +// const v_abs = try v1.abs(alloc); +// try std.testing.expectEqual(2.0, v_abs.data[0]); +// try std.testing.expectEqual(4.0, v_abs.data[2]); +// +// const vol = try v_abs.product(alloc); +// try std.testing.expectEqual(24.0, vol.data[0]); +// try std.testing.expectEqual(3, @TypeOf(vol).dims.get(.L)); +// +// const area_vec = try v_abs.pow(alloc, 2); +// try std.testing.expectEqual(4.0, area_vec.data[0]); +// try std.testing.expectEqual(16.0, area_vec.data[2]); +// try std.testing.expectEqual(2, @TypeOf(area_vec).dims.get(.L)); +// +// const sqrted = try area_vec.sqrt(alloc); +// try std.testing.expectEqual(2, sqrted.data[0]); +// try std.testing.expectEqual(4, sqrted.data[2]); +// try std.testing.expectEqual(1, @TypeOf(sqrted).dims.get(.L)); +// } +// +// test "TensorAlloc | Vector eq broadcast on dimensionless" { +// var arena = std.heap.ArenaAllocator.init(std.testing.allocator); +// defer arena.deinit(); +// const alloc = arena.allocator(); +// +// const DimLess3 = Tensor(i32, .{}, .{}, &.{3}); +// const v = try DimLess3.load(alloc, &.{ 1, 2, 3 }); +// +// const eq_res = try v.eq(alloc, try DimLess3.splat(alloc, 2)); +// try std.testing.expectEqual(false, eq_res[0]); +// try std.testing.expectEqual(true, eq_res[1]); +// try std.testing.expectEqual(false, eq_res[2]); +// } +// +// test "TensorAlloc | Tensor idx helper and matrix access" { +// var arena = std.heap.ArenaAllocator.init(std.testing.allocator); +// defer arena.deinit(); +// const alloc = arena.allocator(); +// +// const Mat3x3 = Tensor(f32, .{}, .{}, &.{ 3, 3 }); +// var m = try Mat3x3.splat(alloc, 0); +// m.data[Mat3x3.idx(.{ 0, 0 })] = 1; +// m.data[Mat3x3.idx(.{ 1, 1 })] = 2; +// m.data[Mat3x3.idx(.{ 2, 2 })] = 3; +// +// try std.testing.expectEqual(1.0, m.data[0]); +// try std.testing.expectEqual(2.0, m.data[4]); +// try std.testing.expectEqual(3.0, m.data[8]); +// try std.testing.expectEqual(0.0, m.data[1]); +// } +// +// test "TensorAlloc | Tensor strides_arr correctness" { +// const T1 = Tensor(f32, .{}, .{}, &.{3}); +// const T2 = Tensor(f32, .{}, .{}, &.{ 3, 4 }); +// const T3 = Tensor(f32, .{}, .{}, &.{ 2, 3, 4 }); +// +// try std.testing.expectEqual(1, T1.strides_arr[0]); +// try std.testing.expectEqual(4, T2.strides_arr[0]); +// try std.testing.expectEqual(1, T2.strides_arr[1]); +// try std.testing.expectEqual(12, T3.strides_arr[0]); +// try std.testing.expectEqual(4, T3.strides_arr[1]); +// try std.testing.expectEqual(1, T3.strides_arr[2]); +// } +// +// test "TensorAlloc | Slice 1D basic" { +// var arena = std.heap.ArenaAllocator.init(std.testing.allocator); +// defer arena.deinit(); +// const alloc = arena.allocator(); +// +// const Vec = Tensor(i32, .{}, .{}, &.{5}); +// var v = try Vec.load(alloc, &.{ 10, 20, 30, 40, 50 }); +// defer v.deinit(alloc); +// +// const s = try v.slice(alloc, .{.{ .start = 1, .end = 4 }}); +// defer s.deinit(alloc); +// +// try std.testing.expectEqual(3, @TypeOf(s).total); +// try std.testing.expectEqual(20, s.data[0]); +// try std.testing.expectEqual(30, s.data[1]); +// try std.testing.expectEqual(40, s.data[2]); +// } +// +// test "TensorAlloc | Slice 1D full range" { +// var arena = std.heap.ArenaAllocator.init(std.testing.allocator); +// defer arena.deinit(); +// const alloc = arena.allocator(); +// const Vec = Tensor(f32, .{}, .{}, &.{4}); +// const v = try Vec.load(alloc, &.{ 1.0, 2.0, 3.0, 4.0 }); +// defer v.deinit(alloc); +// +// const s = try v.slice(alloc, .{.{ .start = 0, .end = 4 }}); +// defer s.deinit(alloc); +// +// try std.testing.expectEqual(4, @TypeOf(s).total); +// inline for (0..4) |i| try std.testing.expectEqual(v.data[i], s.data[i]); +// } +// +// test "TensorAlloc | Slice 1D single element" { +// var arena = std.heap.ArenaAllocator.init(std.testing.allocator); +// defer arena.deinit(); +// const alloc = arena.allocator(); +// const Vec = Tensor(i64, .{}, .{}, &.{6}); +// const v = try Vec.load(alloc, &.{ 5, 10, 15, 20, 25, 30 }); +// defer v.deinit(alloc); +// +// const s = try v.slice(alloc, .{.{ .start = 3, .end = 4 }}); +// defer s.deinit(alloc); +// +// try std.testing.expectEqual(1, @TypeOf(s).total); +// try std.testing.expectEqual(20, s.data[0]); +// } +// +// test "TensorAlloc | Slice 1D preserves dims and scales" { +// var arena = std.heap.ArenaAllocator.init(std.testing.allocator); +// defer arena.deinit(); +// const alloc = arena.allocator(); +// const Meter = Tensor(i128, .{ .L = 1 }, .{ .L = .k }, &.{5}); +// const v = try Meter.load(alloc, &.{ 1, 2, 3, 4, 5 }); +// defer v.deinit(alloc); +// +// const s = try v.slice(alloc, .{.{ .start = 0, .end = 3 }}); +// defer s.deinit(alloc); +// +// const S = @TypeOf(s); +// try std.testing.expectEqual(1, S.dims.get(.L)); +// try std.testing.expectEqual(Meter.scales.get(.L), S.scales.get(.L)); +// } +// +// test "TensorAlloc | Slice 2D rows" { +// var arena = std.heap.ArenaAllocator.init(std.testing.allocator); +// defer arena.deinit(); +// const alloc = arena.allocator(); +// const Mat = Tensor(i32, .{}, .{}, &.{ 4, 3 }); +// const m = try Mat.load(alloc, &.{ +// 1, 2, 3, +// 4, 5, 6, +// 7, 8, 9, +// 10, 11, 12, +// }); +// defer m.deinit(alloc); +// +// // rows [1,3), all cols +// const s = try m.slice(alloc, .{ .{ .start = 1, .end = 3 }, .{ .start = 0, .end = 3 } }); +// defer s.deinit(alloc); +// +// try std.testing.expectEqual(6, @TypeOf(s).total); +// try std.testing.expectEqual(4, s.data[0]); +// try std.testing.expectEqual(5, s.data[1]); +// try std.testing.expectEqual(6, s.data[2]); +// try std.testing.expectEqual(7, s.data[3]); +// try std.testing.expectEqual(8, s.data[4]); +// try std.testing.expectEqual(9, s.data[5]); +// } +// +// test "TensorAlloc | Slice 2D cols" { +// var arena = std.heap.ArenaAllocator.init(std.testing.allocator); +// defer arena.deinit(); +// const alloc = arena.allocator(); +// const Mat = Tensor(i32, .{}, .{}, &.{ 3, 4 }); +// const m = try Mat.load(alloc, &.{ +// 1, 2, 3, 4, +// 5, 6, 7, 8, +// 9, 10, 11, 12, +// }); +// defer m.deinit(alloc); +// +// // all rows, cols [1,3) +// const s = try m.slice(alloc, .{ .{ .start = 0, .end = 3 }, .{ .start = 1, .end = 3 } }); +// defer s.deinit(alloc); +// +// const S = @TypeOf(s); +// try std.testing.expectEqual(3, S.shape[0]); +// try std.testing.expectEqual(2, S.shape[1]); +// try std.testing.expectEqual(2, s.data[0]); +// try std.testing.expectEqual(3, s.data[1]); +// try std.testing.expectEqual(6, s.data[2]); +// try std.testing.expectEqual(7, s.data[3]); +// try std.testing.expectEqual(10, s.data[4]); +// try std.testing.expectEqual(11, s.data[5]); +// } +// +// test "TensorAlloc | Slice 2D subblock" { +// var arena = std.heap.ArenaAllocator.init(std.testing.allocator); +// defer arena.deinit(); +// const alloc = arena.allocator(); +// const Mat = Tensor(f64, .{}, .{}, &.{ 4, 4 }); +// const m = try Mat.load(alloc, &.{ +// 1, 2, 3, 4, +// 5, 6, 7, 8, +// 9, 10, 11, 12, +// 13, 14, 15, 16, +// }); +// defer m.deinit(alloc); +// +// // centre 2x2 +// const s = try m.slice(alloc, .{ .{ .start = 1, .end = 3 }, .{ .start = 1, .end = 3 } }); +// defer s.deinit(alloc); +// +// try std.testing.expectEqual(4, @TypeOf(s).total); +// try std.testing.expectApproxEqAbs(6.0, s.data[0], 1e-9); +// try std.testing.expectApproxEqAbs(7.0, s.data[1], 1e-9); +// try std.testing.expectApproxEqAbs(10.0, s.data[2], 1e-9); +// try std.testing.expectApproxEqAbs(11.0, s.data[3], 1e-9); +// } +// +// test "TensorAlloc | Slice then add" { +// var arena = std.heap.ArenaAllocator.init(std.testing.allocator); +// defer arena.deinit(); +// const alloc = arena.allocator(); +// const Meter = Tensor(i32, .{ .L = 1 }, .{}, &.{5}); +// const a = try Meter.load(alloc, &.{ 1, 2, 3, 4, 5 }); +// defer a.deinit(alloc); +// const b = try Meter.load(alloc, &.{ 10, 20, 30, 40, 50 }); +// defer b.deinit(alloc); +// +// const sa = try a.slice(alloc, .{.{ .start = 0, .end = 3 }}); +// defer sa.deinit(alloc); +// const sb = try b.slice(alloc, .{.{ .start = 2, .end = 5 }}); +// defer sb.deinit(alloc); +// +// const r = try sa.add(alloc, sb); +// defer r.deinit(alloc); +// +// try std.testing.expectEqual(31, r.data[0]); // 1+30 +// try std.testing.expectEqual(42, r.data[1]); // 2+40 +// try std.testing.expectEqual(53, r.data[2]); // 3+50 +// } +// +// test "TensorAlloc | Slice then scale convert" { +// var arena = std.heap.ArenaAllocator.init(std.testing.allocator); +// defer arena.deinit(); +// const alloc = arena.allocator(); +// const KiloMeter = Tensor(i64, .{ .L = 1 }, .{ .L = .k }, &.{4}); +// const Meter = Tensor(i64, .{ .L = 1 }, .{}, &.{2}); +// const v = try KiloMeter.load(alloc, &.{ 1, 2, 3, 4 }); +// defer v.deinit(alloc); +// +// const s = try v.slice(alloc, .{.{ .start = 1, .end = 3 }}); // {2, 3} km +// defer s.deinit(alloc); +// +// const converted = try s.to(alloc, Meter); +// defer converted.deinit(alloc); +// +// try std.testing.expectEqual(2000, converted.data[0]); +// try std.testing.expectEqual(3000, converted.data[1]); +// } +// +// test "TensorAlloc | Slice 1D negative start" { +// var arena = std.heap.ArenaAllocator.init(std.testing.allocator); +// defer arena.deinit(); +// const alloc = arena.allocator(); +// const Vec = Tensor(i32, .{}, .{}, &.{5}); +// const v = try Vec.load(alloc, &.{ 10, 20, 30, 40, 50 }); +// defer v.deinit(alloc); +// +// const s = try v.slice(alloc, .{.{ .start = -3, .end = 5 }}); // [2,5) → 30,40,50 +// defer s.deinit(alloc); +// +// try std.testing.expectEqual(3, @TypeOf(s).total); +// try std.testing.expectEqual(30, s.data[0]); +// try std.testing.expectEqual(40, s.data[1]); +// try std.testing.expectEqual(50, s.data[2]); +// } +// +// test "TensorAlloc | Slice 1D negative end" { +// var arena = std.heap.ArenaAllocator.init(std.testing.allocator); +// defer arena.deinit(); +// const alloc = arena.allocator(); +// const Vec = Tensor(i32, .{}, .{}, &.{5}); +// const v = try Vec.load(alloc, &.{ 10, 20, 30, 40, 50 }); +// defer v.deinit(alloc); +// +// const s = try v.slice(alloc, .{.{ .start = 1, .end = -1 }}); // [1,4) → 20,30,40 +// defer s.deinit(alloc); +// +// try std.testing.expectEqual(3, @TypeOf(s).total); +// try std.testing.expectEqual(20, s.data[0]); +// try std.testing.expectEqual(30, s.data[1]); +// try std.testing.expectEqual(40, s.data[2]); +// } +// +// test "TensorAlloc | Slice 1D both negative" { +// var arena = std.heap.ArenaAllocator.init(std.testing.allocator); +// defer arena.deinit(); +// const alloc = arena.allocator(); +// const Vec = Tensor(i64, .{}, .{}, &.{6}); +// const v = try Vec.load(alloc, &.{ 5, 10, 15, 20, 25, 30 }); +// defer v.deinit(alloc); +// +// const s = try v.slice(alloc, .{.{ .start = -4, .end = -1 }}); // [2,5) → 15,20,25 +// defer s.deinit(alloc); +// +// try std.testing.expectEqual(3, @TypeOf(s).total); +// try std.testing.expectEqual(15, s.data[0]); +// try std.testing.expectEqual(20, s.data[1]); +// try std.testing.expectEqual(25, s.data[2]); +// } +// +// test "TensorAlloc | Slice 1D null start" { +// var arena = std.heap.ArenaAllocator.init(std.testing.allocator); +// defer arena.deinit(); +// const alloc = arena.allocator(); +// const Vec = Tensor(i32, .{}, .{}, &.{5}); +// const v = try Vec.load(alloc, &.{ 10, 20, 30, 40, 50 }); +// defer v.deinit(alloc); +// +// const s = try v.slice(alloc, .{.{ .end = -2 }}); // [:-2] → 10,20,30 +// defer s.deinit(alloc); +// +// try std.testing.expectEqual(3, @TypeOf(s).total); +// try std.testing.expectEqual(10, s.data[0]); +// try std.testing.expectEqual(20, s.data[1]); +// try std.testing.expectEqual(30, s.data[2]); +// } +// +// test "TensorAlloc | Slice 1D null end" { +// var arena = std.heap.ArenaAllocator.init(std.testing.allocator); +// defer arena.deinit(); +// const alloc = arena.allocator(); +// const Vec = Tensor(i32, .{}, .{}, &.{5}); +// const v = try Vec.load(alloc, &.{ 10, 20, 30, 40, 50 }); +// defer v.deinit(alloc); +// +// const s = try v.slice(alloc, .{.{ .start = -3 }}); // [-3:] → 30,40,50 +// defer s.deinit(alloc); +// +// try std.testing.expectEqual(3, @TypeOf(s).total); +// try std.testing.expectEqual(30, s.data[0]); +// try std.testing.expectEqual(40, s.data[1]); +// try std.testing.expectEqual(50, s.data[2]); +// } +// +// test "TensorAlloc | Slice 2D negative & null indices" { +// var arena = std.heap.ArenaAllocator.init(std.testing.allocator); +// defer arena.deinit(); +// const alloc = arena.allocator(); +// const Mat = Tensor(i32, .{}, .{}, &.{ 4, 4 }); +// const m = try Mat.load(alloc, &.{ +// 1, 2, 3, 4, +// 5, 6, 7, 8, +// 9, 10, 11, 12, +// 13, 14, 15, 16, +// }); +// defer m.deinit(alloc); +// +// // last 2 rows, last 2 cols → same as subblock test [2,4)x[2,4) +// const s = try m.slice(alloc, .{ .{ .start = -2, .end = 4 }, .{ .start = -2 } }); +// defer s.deinit(alloc); +// +// try std.testing.expectEqual(4, @TypeOf(s).total); +// try std.testing.expectEqual(11, s.data[0]); +// try std.testing.expectEqual(12, s.data[1]); +// try std.testing.expectEqual(15, s.data[2]); +// try std.testing.expectEqual(16, s.data[3]); +// } diff --git a/src/lib.zig b/src/lib.zig index b380724..60ed1a5 100644 --- a/src/lib.zig +++ b/src/lib.zig @@ -2,6 +2,7 @@ const std = @import("std"); pub const TensorStatic = @import("TensorStatic.zig").Tensor; pub const TensorAlloc = @import("TensorAlloc.zig").Tensor; +pub const TensorGpu = @import("TensorGpu.zig").Tensor; pub const Dimensions = @import("Dimensions.zig"); pub const Scales = @import("Scales.zig"); pub const Base = @import("Base.zig"); diff --git a/src/test.zig b/src/test.zig index e6efb47..c6d7e44 100644 --- a/src/test.zig +++ b/src/test.zig @@ -1,6 +1,7 @@ test { _ = @import("TensorStatic.zig"); _ = @import("TensorAlloc.zig"); + _ = @import("TensorGpu.zig"); _ = @import("Dimensions.zig"); _ = @import("Scales.zig"); _ = @import("Base.zig");