diff --git a/src/Tensor.zig b/src/Tensor.zig
index 2c791f9..978ca78 100644
--- a/src/Tensor.zig
+++ b/src/Tensor.zig
@@ -11,12 +11,10 @@ pub fn Tensor(
     comptime s_opt: Scales.ArgOpts,
     comptime shape_: []const comptime_int,
 ) type {
-    comptime {
-        if (shape_.len == 0)
-            @compileError("Tensor shape must have at least 1 dimension (rank >= 1).");
-        for (shape_) |s|
-            if (s < 1) @compileError("Tensor shape dimensions must be strictly >= 1.");
-    }
+    if (shape_.len == 0)
+        @compileError("Tensor shape must have at least 1 dimension (rank >= 1).");
+    for (shape_) |s|
+        if (s < 1) @compileError("Tensor shape dimensions must be strictly >= 1.");
     @setEvalBranchQuota(100_000_000);
 
     const _total: usize = comptime sh.shapeTotal(shape_);
diff --git a/src/benchmark.zig b/src/benchmark.zig
index 9bd21b2..2d53818 100644
--- a/src/benchmark.zig
+++ b/src/benchmark.zig
@@ -10,23 +10,8 @@ pub fn main(init: std.process.Init) !void {
 
     io = init.io;
 
-    try vectorSIMDvsNative(f64, &stdout_writer.interface);
-    try stdout_writer.flush();
-    try vectorSIMDvsNative(f32, &stdout_writer.interface);
-    try stdout_writer.flush();
-    try vectorSIMDvsNative(i32, &stdout_writer.interface);
-    try stdout_writer.flush();
-    try vectorSIMDvsNative(i64, &stdout_writer.interface);
-    try stdout_writer.flush();
-    try vectorSIMDvsNative(i128, &stdout_writer.interface);
-    try stdout_writer.flush();
-
     try bench_Scalar(&stdout_writer.interface);
     try stdout_writer.flush();
-    try bench_vsNative(&stdout_writer.interface);
-    try stdout_writer.flush();
-    try bench_crossTypeVsNative(&stdout_writer.interface);
-    try stdout_writer.flush();
     try bench_Vector(&stdout_writer.interface);
     try stdout_writer.flush();
     try bench_HighDimTensor(&stdout_writer.interface);
@@ -169,245 +154,6 @@ fn bench_Scalar(writer: *std.Io.Writer) !void {
     try writer.print("└──────────────┴───────┴───────┴───────┴───────┴───────┴───────┴───────┘\n", .{});
 }
 
-fn bench_vsNative(writer: *std.Io.Writer) !void {
-    const ITERS: usize = 100_000;
-    const SAMPLES: usize = 100;
-
-    const getValT = struct {
-        fn f(comptime TT: type, i: usize) TT {
-            const v = (i % 100) + 1;
-            return if (comptime @typeInfo(TT) == .float) @floatFromInt(v) else @intCast(v);
-        }
-    }.f;
-
-    const Types = .{ i32, i64, i128, f32, f64 };
-    const TNames = .{ "i32", "i64", "i128", "f32", "f64" };
-    // Expanded Ops to match bench_Scalar
-    const Ops = .{ "add", "sub", "mul", "div", "abs", "eq", "gt" };
-
-    try writer.print(
-        \\
-        \\ Scalar vs Native Overhead Analysis
-        \\
-        \\┌───────────┬──────┬───────────┬───────────┬───────────┬───────────────────────┐
-        \\│ Operation │ Type │ Native    │ @Vector   │ Tensor{{1}} │ Slowdown  Nat | Vec   │
-        \\├───────────┼──────┼───────────┼───────────┼───────────┼───────────────────────┤
-        \\
-    , .{});
-
-    inline for (Ops, 0..) |op_name, j| {
-        inline for (Types, 0..) |T, tidx| {
-            var native_total_ns: f64 = 0;
-            var vector_total_ns: f64 = 0;
-            var tensor_total_ns: f64 = 0;
-
-            const M = Tensor(T, .{}, .{}, &.{1});
-
-            for (0..SAMPLES) |_| {
-                // --- 1. Benchmark Native ---
-                const n_start = getTime();
-                const a = getValT(T, 10);
-                const b = getValT(T, 2);
-                for (0..ITERS) |_| {
-                    // Native logic branch
-                    _ = if (comptime std.mem.eql(u8, op_name, "add"))
-                        if (comptime @typeInfo(T) == .int) a +| b else a + b
-                    else if (comptime std.mem.eql(u8, op_name, "sub"))
-                        if (comptime @typeInfo(T) == .int) a -| b else a - b
-                    else if (comptime std.mem.eql(u8, op_name, "mul"))
-                        if (comptime @typeInfo(T) == .int) a *| b else a * b
-                    else if (comptime std.mem.eql(u8, op_name, "div"))
-                        if (comptime @typeInfo(T) == .int) @divTrunc(a, b) else a / b
-                    else if (comptime std.mem.eql(u8, op_name, "abs"))
-                        if (comptime @typeInfo(T) == .int) @abs(a) else @as(T, @abs(a))
-                    else if (comptime std.mem.eql(u8, op_name, "eq"))
-                        a == b
-                    else if (comptime std.mem.eql(u8, op_name, "gt"))
-                        a > b
-                    else
-                        unreachable;
-                }
-                const n_end = getTime();
-                native_total_ns += @as(f64, @floatFromInt(n_start.durationTo(n_end).toNanoseconds()));
-
-                const v_start = getTime();
-                const va = @Vector(1, T){getValT(T, 10)};
-                const vb = @Vector(1, T){getValT(T, 2)};
-                for (0..ITERS) |_| {
-                    // Native logic branch
-                    _ = if (comptime std.mem.eql(u8, op_name, "add"))
-                        if (comptime @typeInfo(T) == .int) va +| vb else va + vb
-                    else if (comptime std.mem.eql(u8, op_name, "sub"))
-                        if (comptime @typeInfo(T) == .int) va -| vb else va - vb
-                    else if (comptime std.mem.eql(u8, op_name, "mul"))
-                        if (comptime @typeInfo(T) == .int) va *| vb else va * vb
-                    else if (comptime std.mem.eql(u8, op_name, "div"))
-                        if (comptime @typeInfo(T) == .int) @divTrunc(va, vb) else va / vb
-                    else if (comptime std.mem.eql(u8, op_name, "abs"))
-                        if (comptime @typeInfo(T) == .int) @as(T, @intCast(@abs(va[0]))) else @abs(va)
-                    else if (comptime std.mem.eql(u8, op_name, "eq"))
-                        va == vb
-                    else if (comptime std.mem.eql(u8, op_name, "gt"))
-                        va > vb
-                    else
-                        unreachable;
-                }
-                const v_end = getTime();
-                vector_total_ns += @as(f64, @floatFromInt(v_start.durationTo(v_end).toNanoseconds()));
-
-                // --- 2. Benchmark Scalar ---
-                const q_start = getTime();
-                const qa = M.splat(getValT(T, 10));
-                const qb = M.splat(getValT(T, 2));
-                for (0..ITERS) |_| {
-                    // Scalar logic branch
-                    _ = if (comptime std.mem.eql(u8, op_name, "add"))
-                        qa.add(qb)
-                    else if (comptime std.mem.eql(u8, op_name, "sub"))
-                        qa.sub(qb)
-                    else if (comptime std.mem.eql(u8, op_name, "mul"))
-                        qa.mul(qb)
-                    else if (comptime std.mem.eql(u8, op_name, "div"))
-                        qa.div(qb)
-                    else if (comptime std.mem.eql(u8, op_name, "abs"))
-                        qa.abs()
-                    else if (comptime std.mem.eql(u8, op_name, "eq"))
-                        qa.eq(qb)
-                    else if (comptime std.mem.eql(u8, op_name, "gt"))
-                        qa.gt(qb)
-                    else
-                        unreachable;
-                }
-                const q_end = getTime();
-                tensor_total_ns += @as(f64, @floatFromInt(q_start.durationTo(q_end).toNanoseconds()));
-            }
-
-            const avg_n = (native_total_ns / SAMPLES) / @as(f64, @floatFromInt(ITERS));
-            const avg_v = (vector_total_ns / SAMPLES) / @as(f64, @floatFromInt(ITERS));
-            const avg_t = (tensor_total_ns / SAMPLES) / @as(f64, @floatFromInt(ITERS));
-            const slowdown_nt = avg_t / avg_n;
-            const slowdown_vt = avg_t / avg_v;
-
-            try writer.print("│ {s:<9} │ {s:<4} │ {d:>7.2}ns │ {d:>7.2}ns │ {d:>7.2}ns │ {d:>8.2}x   {d:>8.2}x │\n", .{
-                op_name, TNames[tidx], avg_n, avg_v, avg_t, slowdown_nt, slowdown_vt,
-            });
-        }
-        if (j != Ops.len - 1) try writer.print("├───────────┼──────┼───────────┼───────────┼───────────┼───────────────────────┤\n", .{});
-    }
-
-    try writer.print("└───────────┴──────┴───────────┴───────────┴───────────┴───────────────────────┘\n", .{});
-}
-
-fn bench_crossTypeVsNative(writer: *std.Io.Writer) !void {
-    const ITERS: usize = 100_000;
-    const SAMPLES: usize = 5;
-
-    const getValT = struct {
-        fn f(comptime TT: type, i: usize) TT {
-            // Keep values safe and non-zero to avoid division by zero or overflows during cross-casting
-            const v = (i % 50) + 1;
-            return if (comptime @typeInfo(TT) == .float) @floatFromInt(v) else @intCast(v);
-        }
-    }.f;
-
-    // Helper for the Native baseline: explicitly casting T2 to T1 before the operation
-    const castTo = struct {
-        fn f(comptime DestT: type, comptime SrcT: type, val: SrcT) DestT {
-            if (comptime DestT == SrcT) return val;
-            const src_info = @typeInfo(SrcT);
-            const dest_info = @typeInfo(DestT);
-
-            if (dest_info == .int and src_info == .int) return @intCast(val);
-            if (dest_info == .float and src_info == .int) return @floatFromInt(val);
-            if (dest_info == .int and src_info == .float) return @intFromFloat(val);
-            if (dest_info == .float and src_info == .float) return @floatCast(val);
-            unreachable;
-        }
-    }.f;
-
-    const Types = .{ i16, i64, i128, f32, f64 };
-    const TNames = .{ "i16", "i64", "i128", "f32", "f64" };
-    const Ops = .{ "add", "mul", "div" };
-
-    try writer.print(
-        \\
-        \\ Cross-Type Overhead Analysis: Scalar vs Native
-        \\
-        \\┌─────────┬──────┬──────┬───────────┬───────────┬───────────┐
-        \\│ Op      │ T1   │ T2   │ Native    │ Scalar    │ Slowdown  │
-        \\├─────────┼──────┼──────┼───────────┼───────────┼───────────┤
-        \\
-    , .{});
-
-    inline for (Ops, 0..) |op_name, j| {
-        inline for (Types, 0..) |T1, t1_idx| {
-            inline for (Types, 0..) |T2, t2_idx| {
-                var native_total_ns: f64 = 0;
-                var quantity_total_ns: f64 = 0;
-
-                const M1 = Tensor(T1, .{ .L = 1 }, .{}, &.{1});
-                const M2 = Tensor(T2, .{ .L = 1 }, .{}, &.{1});
-                const S2 = Tensor(T2, .{ .T = 1 }, .{}, &.{1});
-
-                std.mem.doNotOptimizeAway({
-                    for (0..SAMPLES) |_| {
-                        // --- 1. Benchmark Native (Cast T2 to T1, then math) ---
-                        const n_start = getTime();
-                        for (0..ITERS) |i| {
-                            const a = getValT(T1, i);
-                            const b_raw = getValT(T2, 2);
-                            const b = castTo(T1, T2, b_raw);
-
-                            _ = if (comptime std.mem.eql(u8, op_name, "add"))
-                                a + b
-                            else if (comptime std.mem.eql(u8, op_name, "mul"))
-                                a * b
-                            else if (comptime @typeInfo(T1) == .int)
-                                @divTrunc(a, b)
-                            else
-                                a / b;
-                        }
-                        const n_end = getTime();
-                        native_total_ns += @as(f64, @floatFromInt(n_start.durationTo(n_end).toNanoseconds()));
-
-                        // --- 2. Benchmark Scalar ---
-                        const q_start = getTime();
-                        for (0..ITERS) |i| {
-                            const qa = M1.splat(getValT(T1, i));
-                            const qb = if (comptime std.mem.eql(u8, op_name, "div"))
-                                S2.splat(getValT(T2, 2))
-                            else
-                                M2.splat(getValT(T2, 2));
-
-                            _ = if (comptime std.mem.eql(u8, op_name, "add"))
-                                qa.add(qb)
-                            else if (comptime std.mem.eql(u8, op_name, "mul"))
-                                qa.mul(qb)
-                            else
-                                qa.div(qb);
-                        }
-                        const q_end = getTime();
-                        quantity_total_ns += @as(f64, @floatFromInt(q_start.durationTo(q_end).toNanoseconds()));
-                    }
-
-                    const avg_n = (native_total_ns / SAMPLES) / @as(f64, @floatFromInt(ITERS));
-                    const avg_q = (quantity_total_ns / SAMPLES) / @as(f64, @floatFromInt(ITERS));
-                    const slowdown = avg_q / avg_n;
-
-                    try writer.print("│ {s:<7} │ {s:<4} │ {s:<4} │ {d:>7.2}ns │ {d:>7.2}ns │ {d:>8.2}x │\n", .{
-                        op_name, TNames[t1_idx], TNames[t2_idx], avg_n, avg_q, slowdown,
-                    });
-                });
-            }
-        }
-        if (j != Ops.len - 1) {
-            try writer.print("├─────────┼──────┼──────┼───────────┼───────────┼───────────┤\n", .{});
-        }
-    }
-
-    try writer.print("└─────────┴──────┴──────┴───────────┴───────────┴───────────┘\n", .{});
-}
-
 fn bench_Vector(writer: *std.Io.Writer) !void {
     const ITERS: usize = 10_000;
     const SAMPLES: usize = 10;
@@ -446,7 +192,7 @@ fn bench_Vector(writer: *std.Io.Writer) !void {
     const TNames = .{ "i32", "i64", "i128", "f32", "f64" };
     const Lengths = .{ 1, 3, 4, 16, 100 };
     // "cross" is only valid for len=3; other cells will show "  ---  "
-    const Ops = .{ "add", "div", "mulScalar", "dot", "cross", "product", "pow", "length" };
+    const Ops = .{ "add", "div", "mulScalar", "dot", "product", "pow", "length" };
 
     inline for (Ops, 0..) |op_name, o_idx| {
         inline for (Types, TNames) |T, tname| {
@@ -482,10 +228,6 @@ fn bench_Vector(writer: *std.Io.Writer) !void {
                             } else if (comptime std.mem.eql(u8, op_name, "dot")) {
                                 const v2 = V.splat(getVal(T, i +% 5, 63));
                                 _ = v1.contract(v2, 0, 0);
-                            } else if (comptime std.mem.eql(u8, op_name, "cross")) {
-                                // len == 3 guaranteed by the guard above
-                                const v2 = V.splat(getVal(T, i +% 5, 63));
-                                _ = v1.cross(v2);
                             } else if (comptime std.mem.eql(u8, op_name, "product")) {
                                 _ = v1.product();
                             } else if (comptime std.mem.eql(u8, op_name, "pow")) {
@@ -608,62 +350,3 @@ fn bench_HighDimTensor(writer: *std.Io.Writer) !void {
     }
     try writer.print("└─────────────────┴──────┴──────────────┴──────────────┴──────────────┴──────────────┘\n", .{});
 }
-
-fn vectorSIMDvsNative(comptime T: type, writer: *std.Io.Writer) !void {
-    const iterations: u64 = 10_000;
-    const lens = [_]u32{ 1, 2, 3, 4, 5, 10, 100, 1_000, 10_000 };
-
-    try writer.print("\nSIMD Speedup Analysis: {s}\n", .{@typeName(T)});
-    try writer.print("┌────────────┬────────────┬────────────┬────────────┐\n", .{});
-    try writer.print("│ Vector Len │ Scalar (us)│ Vector (us)│ Speedup    │\n", .{});
-    try writer.print("├────────────┼────────────┼────────────┼────────────┤\n", .{});
-
-    inline for (lens) |vector_len| {
-        // --- Scalar Test ---
-        var scalar_val: T = 10;
-        const start_scalar = getTime();
-
-        var i: u64 = 0;
-        while (i < iterations * vector_len) : (i += 1) {
-            if (comptime @typeInfo(T) == .int)
-                scalar_val = scalar_val +% 1
-            else
-                scalar_val = scalar_val + 1;
-        }
-        const scalar_time = start_scalar.durationTo(getTime()).toMicroseconds();
-
-        // --- Vector Test ---
-        var vector_val: @Vector(vector_len, T) = @splat(20);
-        const start_vector = getTime();
-
-        i = 0;
-        const increment: @Vector(vector_len, T) = @splat(1);
-        while (i < iterations) : (i += 1) {
-            if (comptime @typeInfo(T) == .int)
-                vector_val = vector_val +% increment
-            else
-                vector_val = vector_val + increment;
-        }
-        const vector_time = start_vector.durationTo(getTime()).toMicroseconds();
-
-        // --- Results ---
-        const s_float = @as(f64, @floatFromInt(scalar_time));
-        const v_float = @as(f64, @floatFromInt(vector_time));
-
-        // Speedup = ScalarTime / VectorTime.
-        // > 1.0 means SIMD is faster.
-        const speedup = if (vector_time > 0) s_float / v_float else 0;
-
-        try writer.print("│ {d:<10} │ {d:>10} │ {d:>10} │ {d:>9.2}x │\n", .{
-            vector_len,
-            scalar_time,
-            vector_time,
-            speedup,
-        });
-        try writer.flush();
-
-        std.mem.doNotOptimizeAway(scalar_val);
-        std.mem.doNotOptimizeAway(vector_val);
-    }
-    try writer.print("└────────────┴────────────┴────────────┴────────────┘\n", .{});
-}