Compare commits
44 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
22ffd4fc64 | ||
|
|
9ac3d4d699 | ||
|
|
2215a5d86d | ||
|
|
9deb25b825 | ||
|
|
ceff8ff1bd | ||
|
|
ff21f0ac8b | ||
|
|
5ac9968021 | ||
|
|
8028cf41a5 | ||
|
|
0ef19e18de | ||
|
|
7494595db4 | ||
|
|
91c5c41fc5 | ||
|
|
ba671ee486 | ||
|
|
09d6ca1ff5 | ||
|
|
bcd888d59e | ||
|
|
63e9b6b63d | ||
|
|
957f75243f | ||
|
|
5f833a5e58 | ||
|
|
00e0f5ab73 | ||
|
|
f67e9d709d | ||
|
|
e6d0f62929 | ||
|
|
f702c1e09a | ||
|
|
b959f5f28a | ||
|
|
6ba1e664c1 | ||
|
|
6559ed9f62 | ||
|
|
55906e0ab7 | ||
|
|
7d28de2028 | ||
|
|
eb3b0d4de3 | ||
|
|
5bdc78c065 | ||
|
|
18830c8b45 | ||
|
|
4595397e70 | ||
|
|
7844aacfce | ||
|
|
9b6cd4b377 | ||
|
|
4d275dca2d | ||
|
|
9635cfb481 | ||
|
|
f0029449f0 | ||
|
|
8816a65518 | ||
|
|
26ff02c50f | ||
|
|
bb6dd59b9a | ||
|
|
acb908a448 | ||
|
|
d4a1054fdc | ||
|
|
fb2a6f4806 | ||
|
|
8c77ab86f8 | ||
|
|
8565869919 | ||
|
|
1a69b3dbf2 |
@ -1,34 +0,0 @@
|
||||
name: Deploy MkDocs to Garage
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- main # Adjust to your branch name
|
||||
|
||||
jobs:
|
||||
build-and-deploy:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: '3.x'
|
||||
|
||||
- name: Install dependencies
|
||||
run: pip install mkdocs-material
|
||||
|
||||
- name: Build
|
||||
run: mkdocs build
|
||||
|
||||
- name: Sync to Garage S3
|
||||
uses: https://github.com/jakejarvis/s3-sync-action@master
|
||||
with:
|
||||
args: --endpoint-url https://s3.garage.bouvais.lu --acl public-read --delete
|
||||
env:
|
||||
AWS_S3_BUCKET: 'zig-dimal.bouvais.lu'
|
||||
AWS_ACCESS_KEY_ID: ${{ secrets.GARAGE_ACCESS_KEY }}
|
||||
AWS_SECRET_ACCESS_KEY: ${{ secrets.GARAGE_SECRET_KEY }}
|
||||
AWS_REGION: 'garage'
|
||||
SOURCE_DIR: 'site' # MkDocs defaults to 'site' folder for output
|
||||
2
.gitignore
vendored
2
.gitignore
vendored
@ -1,2 +1,4 @@
|
||||
zig-out
|
||||
.zig-cache
|
||||
mkdocs.yaml
|
||||
zig-pkg
|
||||
|
||||
16
build.zig
16
build.zig
@ -2,22 +2,29 @@ const std = @import("std");
|
||||
|
||||
pub fn build(b: *std.Build) void {
|
||||
const target = b.standardTargetOptions(.{});
|
||||
const optimize = b.standardOptimizeOption(.{ .preferred_optimize_mode = .ReleaseFast });
|
||||
const optimize = b.standardOptimizeOption(.{});
|
||||
|
||||
const zig_wgpu = b.dependency("zig_wgpu", .{
|
||||
.target = target,
|
||||
.optimize = optimize,
|
||||
});
|
||||
|
||||
// 1. Define the module so other projects can import it
|
||||
_ = b.addModule("dimal", .{
|
||||
.root_source_file = b.path("src/main.zig"),
|
||||
const mod = b.addModule("dimal", .{
|
||||
.root_source_file = b.path("src/lib.zig"),
|
||||
});
|
||||
mod.addImport("gpu", zig_wgpu.module("zig-wgpu"));
|
||||
|
||||
const exe_tests = b.addTest(.{
|
||||
.root_module = b.createModule(.{
|
||||
.root_source_file = b.path("src/main.zig"),
|
||||
.root_source_file = b.path("src/test.zig"),
|
||||
.target = target,
|
||||
.optimize = optimize,
|
||||
}),
|
||||
.test_runner = .{ .path = b.path("test_runner.zig"), .mode = .simple },
|
||||
});
|
||||
|
||||
exe_tests.root_module.addImport("gpu", zig_wgpu.module("zig-wgpu"));
|
||||
const run_exe_tests = b.addRunArtifact(exe_tests);
|
||||
const test_step = b.step("test", "Run tests");
|
||||
test_step.dependOn(&run_exe_tests.step);
|
||||
@ -30,6 +37,7 @@ pub fn build(b: *std.Build) void {
|
||||
.imports = &.{},
|
||||
}),
|
||||
});
|
||||
bench_exe.root_module.addImport("gpu", zig_wgpu.module("zig-wgpu"));
|
||||
|
||||
b.installArtifact(bench_exe);
|
||||
|
||||
|
||||
@ -1,9 +1,14 @@
|
||||
.{
|
||||
.name = .dimal,
|
||||
.version = "0.1.0",
|
||||
.version = "0.3.0",
|
||||
.fingerprint = 0x9453b1ff1e52d858,
|
||||
.minimum_zig_version = "0.16.0",
|
||||
.dependencies = .{},
|
||||
.dependencies = .{
|
||||
.zig_wgpu = .{
|
||||
.url = "git+https://git.bouvais.lu/adrien/zig-wgpu?ref=0.2.2#5f8da0940d77c40eacd39c268d09acbeaea0b2a5",
|
||||
.hash = "zig_wgpu-0.2.0-xsLAy2-s0QPNwR2QNd8ZX2kWiVfV5oB92N3ga1V1Uwpu",
|
||||
},
|
||||
},
|
||||
.paths = .{
|
||||
"build.zig",
|
||||
"build.zig.zon",
|
||||
|
||||
@ -1,11 +0,0 @@
|
||||
- Changed Quantity to Tensor that can use any shape and is a single @Vector.
|
||||
Point being to add WebGPU easily from this.
|
||||
Scalr suffer in performance tho, I will work on that
|
||||
|
||||
Maybe I can do a jupiter like web interface with cells to make Dim analysis
|
||||
I could:
|
||||
- Use cells with a toy language
|
||||
- A nice debugger to display current variables with dimensions, type and value
|
||||
- Realtime error (I try to compile at change, display error on the cell)
|
||||
- Integrate a small graphic API that use Raylib canvas
|
||||
- COuld generate template at comptime =o
|
||||
253
docs/index.md
253
docs/index.md
@ -1,253 +0,0 @@
|
||||
# dimal — Dimensional Analysis for Zig
|
||||
|
||||
A dimensional analysis library for Zig with a unified `Tensor` API for scalars, vectors, matrices, and higher-dimensional data. All dimension and unit tracking happens at compile time—zero runtime overhead—and all operations use SIMD intrinsics.
|
||||
|
||||
If you try to add meters to seconds, it won't compile. That's the point.
|
||||
|
||||
> **Source:** [git.bouvais.lu/adrien/zig-dimal](https://git.bouvais.lu/adrien/zig-dimal)
|
||||
> **Minimum Zig version:** `0.16.0`
|
||||
|
||||
---
|
||||
|
||||
## Background
|
||||
|
||||
Started because I needed `i128` positions for a space simulation to avoid floating-point precision loss far from the origin. Grew into a type system for tracking physical dimensions at compile time. It's been useful enough to share.
|
||||
|
||||
- **Compile-time dimension checking** — catch unit mismatches before runtime.
|
||||
- **Unified `Tensor` API** — same interface for scalars, vectors, matrices, and higher-rank tensors.
|
||||
- **SIMD operations** — vector and matrix code automatically uses SIMD instructions.
|
||||
- **Zero runtime cost** — all dimension and scale tracking is erased at compile time.
|
||||
- **Supports `i128`** — useful for high-precision fixed-point integer math.
|
||||
|
||||
---
|
||||
|
||||
## Features
|
||||
|
||||
- **Compile-time dimension checking** — all physical-unit tracking happens at compile time.
|
||||
- **Automatic unit conversion** — use `.to()` to convert between compatible units (e.g. `km/h` → `m/s`). Scale factors are resolved at comptime.
|
||||
- **Unified `Tensor` API** — one type for scalars `{1}`, vectors `{N}`, matrices `{M, N}`, and higher-rank tensors.
|
||||
- **SIMD operations** — vector and matrix code compiles to SIMD instructions automatically.
|
||||
- **Tensor contraction** — `.contract(other, axis_a, axis_b)` for dot products, matrix multiplication, and general tensor contractions.
|
||||
- **Full SI prefix support** — `pico` through `peta`, plus Imperial units and time scales.
|
||||
- **Physical constants** — Planck, Boltzmann, speed of light, gravitational constant, etc.
|
||||
- **Pre-built quantities** — `Velocity`, `Acceleration`, `Force`, `Energy`, `Pressure`, `Charge`, and more.
|
||||
- **Basic vector operations** — cross product, length/magnitude, element-wise arithmetic.
|
||||
- **Formatting** — values print with units: `9.81m.s⁻²`, `0.172km`.
|
||||
|
||||
### Current Limitations
|
||||
|
||||
- GPU support not implemented.
|
||||
- Performance on small tensors is limited by Zig's vector width.
|
||||
|
||||
---
|
||||
|
||||
## The 7 SI Base Dimensions
|
||||
|
||||
| Symbol | Dimension | SI Unit |
|
||||
|--------|----------------------|---------|
|
||||
| `L` | Length | `m` |
|
||||
| `M` | Mass | `g` |
|
||||
| `T` | Time | `s` |
|
||||
| `I` | Electric Current | `A` |
|
||||
| `Tr` | Temperature | `K` |
|
||||
| `N` | Amount of Substance | `mol` |
|
||||
| `J` | Luminous Intensity | `cd` |
|
||||
|
||||
---
|
||||
|
||||
## Installation
|
||||
|
||||
### 1. Add the dependency (Zig 0.14+)
|
||||
|
||||
```sh
|
||||
zig fetch --save git+https://git.bouvais.lu/adrien/zig-dimal#0.2.0
|
||||
```
|
||||
|
||||
### 2. Wire it up in `build.zig`
|
||||
|
||||
```zig
|
||||
const std = @import("std");
|
||||
|
||||
pub fn build(b: *std.Build) void {
|
||||
const target = b.standardTargetOptions(.{});
|
||||
const optimize = b.standardOptimizeOption(.{});
|
||||
|
||||
const dimal = b.dependency("dimal", .{
|
||||
.target = target,
|
||||
.optimize = optimize,
|
||||
}).module("dimal");
|
||||
|
||||
const exe = b.addExecutable(.{
|
||||
.name = "my_app",
|
||||
.root_source_file = b.path("src/main.zig"),
|
||||
.target = target,
|
||||
.optimize = optimize,
|
||||
});
|
||||
exe.root_module.addImport("dimal", dimal);
|
||||
b.installArtifact(exe);
|
||||
}
|
||||
```
|
||||
|
||||
### 3. Import and use
|
||||
|
||||
```zig
|
||||
const dma = @import("dimal");
|
||||
const Tensor = dma.Tensor;
|
||||
const Base = dma.Base;
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Quick Example: Lunar Descent
|
||||
|
||||
Simulate a spacecraft descending to the Moon with correct physics and type safety:
|
||||
|
||||
```zig
|
||||
const std = @import("std");
|
||||
const dma = @import("dimal");
|
||||
const Tensor = dma.Tensor;
|
||||
|
||||
pub fn main() void {
|
||||
// Define types: m/s² acceleration, m/s velocity, m distance
|
||||
const Acceleration = dma.Base.Acceleration.Of(f64);
|
||||
const Velocity = dma.Base.Velocity.Of(f64);
|
||||
const Distance = dma.Base.Meter.Of(f64);
|
||||
const Time = dma.Base.Second.Of(f64);
|
||||
|
||||
// Initial conditions
|
||||
const g_moon: Acceleration = .{ .data = @splat(1.62) };
|
||||
const v_initial: Velocity = .{ .data = @splat(100.0) };
|
||||
const h_initial: Distance = .{ .data = @splat(10000.0) };
|
||||
const dt: Time = .{ .data = @splat(1.0) };
|
||||
|
||||
var h = h_initial;
|
||||
var v = v_initial;
|
||||
var t: f64 = 0;
|
||||
|
||||
// Simulate descent
|
||||
while (h.data[0] > 0 and t < 1000) : (t += 1.0) {
|
||||
// a = -g (gravity pulls down)
|
||||
const a = g_moon.mul(-1.0);
|
||||
|
||||
// Update: v = v₀ + at
|
||||
v = v.add(a.mul(dt));
|
||||
|
||||
// Update: h = h₀ + vt
|
||||
h = h.add(v.mul(dt));
|
||||
|
||||
if (@mod(t, 100.0) == 0) {
|
||||
std.debug.print("t={d:.0}s | h={d:.1} | v={d:.1}\n", .{
|
||||
t,
|
||||
h,
|
||||
v,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
std.debug.print("Landed in {d:.1}s at h={d:.1}\n", .{ t, h });
|
||||
}
|
||||
```
|
||||
|
||||
**Output:**
|
||||
```
|
||||
t=0s | h=10000m | v=100m.s⁻¹
|
||||
t=100s | h=8019m | v=-61.8m.s⁻¹
|
||||
t=200s | h=4174.4m | v=-223.6m.s⁻¹
|
||||
...
|
||||
Landed in 323.5s at h=-0.01m
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## API Overview
|
||||
|
||||
### Tensors
|
||||
|
||||
A **`Tensor`** is parameterized by:
|
||||
- **`T`** — numeric type: `f32`, `f64`, `i128`, etc.
|
||||
- **`dims`** — physical dimensions (struct literal): `.{.L = 1, .T = -1}` means length/time (velocity).
|
||||
- **`scales`** — SI prefixes or custom scales: `.{.L = .k, .T = .hour}` means km/h.
|
||||
- **`shape`** — array shape: `&.{1}` is a scalar, `&.{3}` is a 3-vector, `&.{3, 3}` is a 3×3 matrix.
|
||||
|
||||
```zig
|
||||
// Scalar: 1-element tensor
|
||||
const Meter = Tensor(f64, .{.L = 1}, .{}, &.{1});
|
||||
const m = Meter{ .data = @splat(5.0) };
|
||||
|
||||
// Vector: N-element tensor (SIMD)
|
||||
const Vec3Meter = Tensor(f64, .{.L = 1}, .{}, &.{3});
|
||||
const v = Vec3Meter{ .data = @shuffle(f64, [_]f64{1, 2, 3}, [_]f64 undefined, [_]i32{0, 1, 2, 0, 0, 0}) };
|
||||
|
||||
// Matrix: M×N tensor (SIMD-accelerated)
|
||||
const Mat3x3Velocity = Tensor(f32, .{.L = 1, .T = -1}, .{}, &.{3, 3});
|
||||
const m_vel = Mat3x3Velocity{ .data = @splat(10.0) };
|
||||
|
||||
// Higher-rank tensor
|
||||
const Rank4 = Tensor(f64, .{.M = 1}, .{}, &.{2, 3, 4, 5});
|
||||
```
|
||||
|
||||
### Common Operations
|
||||
|
||||
| Operation | Description |
|
||||
|-----------|-------------|
|
||||
| `.add(rhs)` | Element-wise addition. Auto-converts scales. |
|
||||
| `.sub(rhs)` | Element-wise subtraction. |
|
||||
| `.mul(rhs)` | Multiply; dimensions are summed. `rhs` can be a tensor or bare number. |
|
||||
| `.div(rhs)` | Divide; dimensions are subtracted. |
|
||||
| `.contract(other, axis_a, axis_b)` | Tensor contraction: dot product, matrix multiply, or general N-D contraction. |
|
||||
| `.cross(rhs)` | Cross product (3-vectors only). Returns a 3-vector. |
|
||||
| `.length()` / `.lengthSqr()` | Euclidean length (or squared length) of a vector. Returns a scalar `T`. |
|
||||
| `.product()` | Multiply all elements. Returns a scalar with combined dimensions. |
|
||||
| `.abs()` | Element-wise absolute value. Dimensions unchanged. |
|
||||
| `.pow(exp)` | Raise to comptime exponent. Dimension exponents multiplied by `exp`. |
|
||||
| `.sqrt()` | Element-wise square root. Compile error if any dimension exponent is odd. |
|
||||
| `.to(DestType)` | Convert to another unit of the same dimension. Comptime error on mismatch. |
|
||||
| `.eq(rhs)` / `.ne(rhs)` | Element-wise equality/inequality. |
|
||||
| `.gt(rhs)` / `.gte(rhs)` | Greater-than comparisons. |
|
||||
| `.lt(rhs)` / `.lte(rhs)` | Less-than comparisons. |
|
||||
|
||||
### Pre-built Types (via `dma.Base`)
|
||||
|
||||
Use `.Of(T)` for base units, `.Scaled(T, scales)` for custom scales:
|
||||
|
||||
```zig
|
||||
const Velocity = dma.Base.Velocity.Of(f64);
|
||||
const Kmh = dma.Base.Velocity.Scaled(f64, .{.L = .k, .T = .hour});
|
||||
const Force = dma.Base.Force.Of(f32);
|
||||
const Energy = dma.Base.Energy.Of(f64);
|
||||
```
|
||||
|
||||
Also available: `Acceleration`, `Inertia`, `Pressure`, `Power`, `Area`, `Volume`, `Density`, `Frequency`, `Viscosity`, `Charge`, `Potential`, `Resistance`, `MagneticFlux`, `ThermalCapacity`, `ThermalConductivity`, and many more.
|
||||
|
||||
---
|
||||
|
||||
## SIMD Performance
|
||||
|
||||
Operations on vectors and matrices use Zig's `@Vector` intrinsics, which compile to SIMD instructions on most platforms. This makes vector operations faster than equivalent scalar loops, but don't expect miracles—SIMD is still limited by memory bandwidth and CPU cache.
|
||||
|
||||
Run the included benchmarks to see what you get on your hardware:
|
||||
```sh
|
||||
zig build benchmark
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Next Steps
|
||||
|
||||
- **GPU support** — eventually, for large tensor operations. WebGPU is a target.
|
||||
- **Toy physics language** — I've been sketching ideas for a language optimized for numerical physics (tentatively called Éclat). It would use dimal as the foundation. No timeline yet; this is a long-term experiment.
|
||||
|
||||
---
|
||||
|
||||
## Testing & Benchmarks
|
||||
|
||||
```sh
|
||||
zig build test # Run all unit tests
|
||||
zig build benchmark # Run performance benchmarks
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## License
|
||||
|
||||
See the repository for license details.
|
||||
@ -1,3 +0,0 @@
|
||||
## GPU support with WebGPU
|
||||
|
||||
Example: https://github.com/seyhajin/webgpu-wasm-zig
|
||||
48
mkdocs.yml
48
mkdocs.yml
@ -1,48 +0,0 @@
|
||||
site_name: Bouvais Docs
|
||||
site_url: https://zig-dimal.bouvais.lu
|
||||
site_description: A minimal technical documentation site.
|
||||
site_author: Adrien Bouvais
|
||||
|
||||
theme:
|
||||
name: material
|
||||
language: en
|
||||
# Color palette with auto light/dark mode
|
||||
palette:
|
||||
- media: "(prefers-color-scheme: light)"
|
||||
scheme: default
|
||||
primary: indigo
|
||||
accent: indigo
|
||||
toggle:
|
||||
icon: material/brightness-7
|
||||
name: Switch to dark mode
|
||||
- media: "(prefers-color-scheme: dark)"
|
||||
scheme: slate
|
||||
primary: indigo
|
||||
accent: indigo
|
||||
toggle:
|
||||
icon: material/brightness-4
|
||||
name: Switch to light mode
|
||||
|
||||
features:
|
||||
- navigation.sections
|
||||
- navigation.top
|
||||
- content.code.copy
|
||||
- content.code.annotate
|
||||
|
||||
# Minimal plugins
|
||||
plugins:
|
||||
- search
|
||||
|
||||
# Your single page
|
||||
nav:
|
||||
- Home: index.md
|
||||
|
||||
# Extensions to make your markdown look better
|
||||
markdown_extensions:
|
||||
- admonition
|
||||
- pymdownx.details
|
||||
- pymdownx.superfences
|
||||
- pymdownx.highlight:
|
||||
anchor_linenums: true
|
||||
- pymdownx.inlinehilite
|
||||
- attr_list
|
||||
@ -3,7 +3,7 @@ const std = @import("std");
|
||||
// Adjust these imports to match your actual file names
|
||||
const Dimensions = @import("Dimensions.zig");
|
||||
const Scales = @import("Scales.zig");
|
||||
const Tensor = @import("Tensor.zig").Tensor;
|
||||
const Tensor = @import("TensorStatic.zig").Tensor;
|
||||
|
||||
fn PhysicalConstant(comptime d: Dimensions.ArgOpts, comptime val: f64, comptime s: Scales.ArgOpts) type {
|
||||
return struct {
|
||||
@ -60,7 +60,7 @@ pub const Constants = struct {
|
||||
/// Newtonian constant of gravitation (G) [m³⋅kg⁻¹⋅s⁻²]
|
||||
pub const Gravitational = PhysicalConstant(.{ .M = -1, .L = 3, .T = -2 }, 6.67430e-11, .{ .M = .k });
|
||||
|
||||
/// Stefan–Boltzmann constant (σ) [W⋅m⁻²⋅K⁻⁴ = kg⋅s⁻³⋅K⁻⁴]
|
||||
/// Stefan-Boltzmann constant () [W⋅m⁻²⋅K⁻⁴ = kg⋅s⁻³⋅K⁻⁴]
|
||||
pub const StefanBoltzmann = PhysicalConstant(.{ .M = 1, .T = -3, .Tp = -4 }, 5.670374419e-8, .{ .M = .k });
|
||||
|
||||
/// Elementary charge (e) [C = A⋅s]
|
||||
@ -81,7 +81,7 @@ pub const Constants = struct {
|
||||
/// Neutron mass (m_n) [kg]
|
||||
pub const NeutronMass = PhysicalConstant(.{ .M = 1 }, 1.67492750056e-27, .{ .M = .k });
|
||||
|
||||
/// Fine-structure constant (α) [Dimensionless]
|
||||
/// Fine-structure constant () [Dimensionless]
|
||||
pub const FineStructure = PhysicalConstant(.{}, 0.0072973525643, .{});
|
||||
|
||||
/// Avogadro constant (N_A) [mol⁻¹]
|
||||
|
||||
@ -1,13 +1,13 @@
|
||||
const std = @import("std");
|
||||
|
||||
pub const ArgOpts = struct {
|
||||
L: comptime_int = 0,
|
||||
M: comptime_int = 0,
|
||||
T: comptime_int = 0,
|
||||
I: comptime_int = 0,
|
||||
Tp: comptime_int = 0,
|
||||
N: comptime_int = 0,
|
||||
J: comptime_int = 0,
|
||||
L: isize = 0,
|
||||
M: isize = 0,
|
||||
T: isize = 0,
|
||||
I: isize = 0,
|
||||
Tp: isize = 0,
|
||||
N: isize = 0,
|
||||
J: isize = 0,
|
||||
};
|
||||
|
||||
pub const Dimension = enum {
|
||||
|
||||
@ -59,7 +59,8 @@ pub const UnitScale = enum(isize) {
|
||||
var buf: [16]u8 = undefined;
|
||||
return switch (self) {
|
||||
.none => "",
|
||||
.P, .T, .G, .M, .k, .h, .da, .d, .c, .m, .u, .n, .p, .f, .min, .hour, .year, .inch, .ft, .yd, .mi, .oz, .lb, .st => @tagName(self),
|
||||
.P, .T, .G, .M, .k, .h, .da, .d, .c, .m, .u, .n, .p, .f, .min, .year, .inch, .ft, .yd, .mi, .oz, .lb, .st => @tagName(self),
|
||||
.hour => "h",
|
||||
else => std.fmt.bufPrint(&buf, "[{d}]", .{@intFromEnum(self)}) catch "[]", // This cannot be inline because of non exhaustive enum, but that's ok, it is just str, not calculation
|
||||
};
|
||||
}
|
||||
|
||||
1745
src/TensorAlloc.zig
Normal file
1745
src/TensorAlloc.zig
Normal file
File diff suppressed because it is too large
Load Diff
1775
src/TensorGpu.zig
Normal file
1775
src/TensorGpu.zig
Normal file
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
145
src/UnitParser.zig
Normal file
145
src/UnitParser.zig
Normal file
@ -0,0 +1,145 @@
|
||||
const std = @import("std");
|
||||
const Dimensions = @import("Dimensions.zig");
|
||||
const Scales = @import("Scales.zig");
|
||||
|
||||
/// A container returning the separated arguments needed to construct a Tensor.
|
||||
pub const ParsedUnit = struct {
|
||||
dims: Dimensions.ArgOpts = .{},
|
||||
scales: Scales.ArgOpts = .{},
|
||||
};
|
||||
|
||||
pub const UnitParseError = error{
|
||||
UnknownBaseUnit,
|
||||
UnknownPrefix,
|
||||
InvalidExponent,
|
||||
EmptyStr,
|
||||
};
|
||||
|
||||
/// Parses strings like "km/s^2", "m", "kg*m/s^2", "1/min".
|
||||
/// Evaluates entirely at comptime.
|
||||
pub fn parseUnit(str: []const u8) !ParsedUnit {
|
||||
if (str.len == 0) return UnitParseError.EmptyStr;
|
||||
|
||||
var parsed: ParsedUnit = .{ .dims = .{}, .scales = .{} };
|
||||
|
||||
// We need to track if we are after a '/' to flip exponents to negative
|
||||
var is_denominator = false;
|
||||
|
||||
// Manual iteration to handle '/' properly
|
||||
var cursor: usize = 0;
|
||||
while (cursor < str.len) {
|
||||
// Find the next segment
|
||||
const segment_start = cursor;
|
||||
while (cursor < str.len and str[cursor] != '/' and str[cursor] != '.' and str[cursor] != '*') : (cursor += 1) {}
|
||||
const segment = str[segment_start..cursor];
|
||||
|
||||
if (segment.len > 0) {
|
||||
try parseSegment(segment, &parsed, is_denominator);
|
||||
}
|
||||
|
||||
if (cursor < str.len) {
|
||||
if (str[cursor] == '/') {
|
||||
is_denominator = true;
|
||||
}
|
||||
cursor += 1; // skip the separator
|
||||
}
|
||||
}
|
||||
|
||||
return parsed;
|
||||
}
|
||||
|
||||
fn parseSegment(segment: []const u8, parsed: *ParsedUnit, is_denominator: bool) !void {
|
||||
var scale: Scales.UnitScale = .none;
|
||||
var found_scale = false;
|
||||
var active_dim: ?Dimensions.Dimension = null;
|
||||
|
||||
// 1. Try to find a Scale + Dimension pair (e.g., "mm", "km")
|
||||
inline for (std.enums.values(Scales.UnitScale)) |sca| {
|
||||
const s_str = sca.str();
|
||||
if (s_str.len > 0 and std.mem.startsWith(u8, segment, s_str)) {
|
||||
// Check if it's a "Unit-as-Scale" (hour, min) or a prefix (k, m, c)
|
||||
switch (sca) {
|
||||
.hour, .min, .year => {
|
||||
// These are dimensions themselves (Time)
|
||||
if (segment.len == s_str.len or (segment.len > s_str.len and (segment[s_str.len] == '^' or (segment[s_str.len] >= '0' and segment[s_str.len] <= '9')))) {
|
||||
scale = sca;
|
||||
active_dim = .T;
|
||||
found_scale = true;
|
||||
}
|
||||
},
|
||||
else => {
|
||||
// Standard prefixes: Must be followed by a valid dimension unit
|
||||
inline for (std.enums.values(Dimensions.Dimension)) |dim| {
|
||||
if (std.mem.startsWith(u8, segment[s_str.len..], dim.unit())) {
|
||||
scale = sca;
|
||||
active_dim = dim;
|
||||
found_scale = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
},
|
||||
}
|
||||
}
|
||||
if (found_scale) break;
|
||||
}
|
||||
|
||||
// 2. If no scale prefix was found, try identifying as a pure Dimension (e.g., "m", "s")
|
||||
if (!found_scale) {
|
||||
inline for (std.enums.values(Dimensions.Dimension)) |dim| {
|
||||
if (std.mem.startsWith(u8, segment, dim.unit())) {
|
||||
active_dim = dim;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const dimen = active_dim orelse return UnitParseError.UnknownBaseUnit;
|
||||
|
||||
// 3. Determine where the exponent starts
|
||||
// If it was a Time Scale (like 'h'), the exponent starts after 'h'
|
||||
// If it was a Prefix + Dim (like 'km'), it starts after 'km'
|
||||
const unit_part_len = if (found_scale)
|
||||
(if (scale == .hour or scale == .min or scale == .year) scale.str().len else scale.str().len + dimen.unit().len)
|
||||
else
|
||||
dimen.unit().len;
|
||||
|
||||
const expo_str = segment[unit_part_len..];
|
||||
|
||||
// 4. Parse Exponent
|
||||
var expo: i32 = 1;
|
||||
if (expo_str.len > 0) {
|
||||
const cleaned_expo = if (expo_str[0] == '^') expo_str[1..] else expo_str;
|
||||
expo = std.fmt.parseInt(i32, cleaned_expo, 10) catch return UnitParseError.InvalidExponent;
|
||||
}
|
||||
|
||||
if (is_denominator) expo *= -1;
|
||||
|
||||
// 5. Assign to struct
|
||||
inline for (std.meta.fields(Dimensions.ArgOpts)) |f| {
|
||||
if (std.mem.eql(u8, f.name, @tagName(dimen))) {
|
||||
@field(parsed.dims, f.name) += expo;
|
||||
@field(parsed.scales, f.name) = scale;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
inline fn testParser(
|
||||
comptime str: []const u8,
|
||||
comptime expected_dims: Dimensions.ArgOpts,
|
||||
comptime expected_scales: Scales.ArgOpts,
|
||||
) !void {
|
||||
const unit = comptime try parseUnit(str);
|
||||
if (comptime !Dimensions.init(expected_dims).eql(Dimensions.init(unit.dims))) return error.WrongDims;
|
||||
if (comptime !Scales.init(expected_scales).eql(Scales.init(unit.scales))) return error.WrongScales;
|
||||
}
|
||||
|
||||
test "parseUnit" {
|
||||
@setEvalBranchQuota(10000);
|
||||
try testParser("m", .{ .L = 1 }, .{});
|
||||
try testParser("s", .{ .T = 1 }, .{});
|
||||
try testParser("mm", .{ .L = 1 }, .{ .L = .m });
|
||||
try testParser("m/s", .{ .L = 1, .T = -1 }, .{});
|
||||
try testParser("m1/s2/kg", .{ .L = 1, .T = -2, .M = -1 }, .{ .M = .k });
|
||||
try testParser("km/h", .{ .L = 1, .T = -1 }, .{ .L = .k, .T = .hour });
|
||||
try testParser("m.s^-1", .{ .L = 1, .T = -1 }, .{});
|
||||
}
|
||||
@ -10,22 +10,7 @@ pub fn main(init: std.process.Init) !void {
|
||||
|
||||
io = init.io;
|
||||
|
||||
// try vectorSIMDvsNative(f64, &stdout_writer.interface);
|
||||
// try stdout_writer.flush();
|
||||
// try vectorSIMDvsNative(f32, &stdout_writer.interface);
|
||||
// try stdout_writer.flush();
|
||||
// try vectorSIMDvsNative(i32, &stdout_writer.interface);
|
||||
// try stdout_writer.flush();
|
||||
// try vectorSIMDvsNative(i64, &stdout_writer.interface);
|
||||
// try stdout_writer.flush();
|
||||
// try vectorSIMDvsNative(i128, &stdout_writer.interface);
|
||||
// try stdout_writer.flush();
|
||||
//
|
||||
// try bench_Scalar(&stdout_writer.interface);
|
||||
// try stdout_writer.flush();
|
||||
try bench_vsNative(&stdout_writer.interface);
|
||||
try stdout_writer.flush();
|
||||
// try bench_crossTypeVsNative(&stdout_writer.interface);
|
||||
try bench_Scalar(&stdout_writer.interface);
|
||||
try stdout_writer.flush();
|
||||
try bench_Vector(&stdout_writer.interface);
|
||||
try stdout_writer.flush();
|
||||
@ -128,7 +113,7 @@ fn bench_Scalar(writer: *std.Io.Writer) !void {
|
||||
else if (comptime std.mem.eql(u8, op_name, "gt"))
|
||||
(M.splat(getVal(T, i, 63))).gt(M.splat(getVal(T, i +% 3, 63)))
|
||||
else
|
||||
(M.splat(getVal(T, i, 63))).mul(3);
|
||||
(M.splat(getVal(T, i, 63))).mul(M.splat(3));
|
||||
},
|
||||
);
|
||||
}
|
||||
@ -169,247 +154,6 @@ fn bench_Scalar(writer: *std.Io.Writer) !void {
|
||||
try writer.print("└──────────────┴───────┴───────┴───────┴───────┴───────┴───────┴───────┘\n", .{});
|
||||
}
|
||||
|
||||
fn bench_vsNative(writer: *std.Io.Writer) !void {
|
||||
const ITERS: usize = 100_000;
|
||||
const SAMPLES: usize = 100;
|
||||
|
||||
const getValT = struct {
|
||||
fn f(comptime TT: type, i: usize) TT {
|
||||
const v = (i % 100) + 1;
|
||||
return if (comptime @typeInfo(TT) == .float) @floatFromInt(v) else @intCast(v);
|
||||
}
|
||||
}.f;
|
||||
|
||||
const Types = .{ f64, i64, i128, f32, f64 };
|
||||
const TNames = .{ "f64", "i64", "i128", "f32", "f64" };
|
||||
// Expanded Ops to match bench_Scalar
|
||||
const Ops = .{ "add", "sub", "mul", "div", "abs", "eq", "gt" };
|
||||
|
||||
try writer.print(
|
||||
\\
|
||||
\\ Scalar vs Native Overhead Analysis
|
||||
\\
|
||||
\\┌───────────┬──────┬───────────┬───────────┬───────────┬───────────────────────┐
|
||||
\\│ Operation │ Type │ Native │ @Vector │ Tensor{{1}} │ Slowdown Nat | Vec │
|
||||
\\├───────────┼──────┼───────────┼───────────┼───────────┼───────────────────────┤
|
||||
\\
|
||||
, .{});
|
||||
|
||||
inline for (Ops, 0..) |op_name, j| {
|
||||
inline for (Types, 0..) |T, tidx| {
|
||||
var native_total_ns: f64 = 0;
|
||||
var vector_total_ns: f64 = 0;
|
||||
var tensor_total_ns: f64 = 0;
|
||||
|
||||
const M = Tensor(T, .{}, .{}, &.{1});
|
||||
|
||||
std.mem.doNotOptimizeAway({
|
||||
for (0..SAMPLES) |_| {
|
||||
// --- 1. Benchmark Native ---
|
||||
const n_start = getTime();
|
||||
const a = getValT(T, 10);
|
||||
const b = getValT(T, 2);
|
||||
for (0..ITERS) |_| {
|
||||
// Native logic branch
|
||||
_ = if (comptime std.mem.eql(u8, op_name, "add"))
|
||||
if (comptime @typeInfo(T) == .int) a +| b else a + b
|
||||
else if (comptime std.mem.eql(u8, op_name, "sub"))
|
||||
if (comptime @typeInfo(T) == .int) a -| b else a - b
|
||||
else if (comptime std.mem.eql(u8, op_name, "mul"))
|
||||
if (comptime @typeInfo(T) == .int) a *| b else a * b
|
||||
else if (comptime std.mem.eql(u8, op_name, "div"))
|
||||
if (comptime @typeInfo(T) == .int) @divTrunc(a, b) else a / b
|
||||
else if (comptime std.mem.eql(u8, op_name, "abs"))
|
||||
if (comptime @typeInfo(T) == .int) @abs(a) else @as(T, @abs(a))
|
||||
else if (comptime std.mem.eql(u8, op_name, "eq"))
|
||||
a == b
|
||||
else if (comptime std.mem.eql(u8, op_name, "gt"))
|
||||
a > b
|
||||
else
|
||||
unreachable;
|
||||
}
|
||||
const n_end = getTime();
|
||||
native_total_ns += @as(f64, @floatFromInt(n_start.durationTo(n_end).toNanoseconds()));
|
||||
|
||||
const v_start = getTime();
|
||||
const va = getValT(T, 10);
|
||||
const vb = getValT(T, 2);
|
||||
for (0..ITERS) |_| {
|
||||
// Native logic branch
|
||||
_ = if (comptime std.mem.eql(u8, op_name, "add"))
|
||||
if (comptime @typeInfo(T) == .int) va +| vb else va + vb
|
||||
else if (comptime std.mem.eql(u8, op_name, "sub"))
|
||||
if (comptime @typeInfo(T) == .int) va -| vb else va - vb
|
||||
else if (comptime std.mem.eql(u8, op_name, "mul"))
|
||||
if (comptime @typeInfo(T) == .int) va *| vb else va * vb
|
||||
else if (comptime std.mem.eql(u8, op_name, "div"))
|
||||
if (comptime @typeInfo(T) == .int) @divTrunc(va, vb) else va / vb
|
||||
else if (comptime std.mem.eql(u8, op_name, "abs"))
|
||||
if (comptime @typeInfo(T) == .int) @abs(va) else @as(T, @abs(va))
|
||||
else if (comptime std.mem.eql(u8, op_name, "eq"))
|
||||
va == vb
|
||||
else if (comptime std.mem.eql(u8, op_name, "gt"))
|
||||
va > vb
|
||||
else
|
||||
unreachable;
|
||||
}
|
||||
const v_end = getTime();
|
||||
vector_total_ns += @as(f64, @floatFromInt(v_start.durationTo(v_end).toNanoseconds()));
|
||||
|
||||
// --- 2. Benchmark Scalar ---
|
||||
const q_start = getTime();
|
||||
const qa = M.splat(getValT(T, 10));
|
||||
const qb = M.splat(getValT(T, 2));
|
||||
for (0..ITERS) |_| {
|
||||
// Scalar logic branch
|
||||
_ = if (comptime std.mem.eql(u8, op_name, "add"))
|
||||
qa.add(qb)
|
||||
else if (comptime std.mem.eql(u8, op_name, "sub"))
|
||||
qa.sub(qb)
|
||||
else if (comptime std.mem.eql(u8, op_name, "mul"))
|
||||
qa.mul(qb)
|
||||
else if (comptime std.mem.eql(u8, op_name, "div"))
|
||||
qa.div(qb)
|
||||
else if (comptime std.mem.eql(u8, op_name, "abs"))
|
||||
qa.abs()
|
||||
else if (comptime std.mem.eql(u8, op_name, "eq"))
|
||||
qa.eq(qb)
|
||||
else if (comptime std.mem.eql(u8, op_name, "gt"))
|
||||
qa.gt(qb)
|
||||
else
|
||||
unreachable;
|
||||
}
|
||||
const q_end = getTime();
|
||||
tensor_total_ns += @as(f64, @floatFromInt(q_start.durationTo(q_end).toNanoseconds()));
|
||||
}
|
||||
});
|
||||
|
||||
const avg_n = (native_total_ns / SAMPLES) / @as(f64, @floatFromInt(ITERS));
|
||||
const avg_v = (vector_total_ns / SAMPLES) / @as(f64, @floatFromInt(ITERS));
|
||||
const avg_t = (tensor_total_ns / SAMPLES) / @as(f64, @floatFromInt(ITERS));
|
||||
const slowdown_nt = avg_t / avg_n;
|
||||
const slowdown_vt = avg_t / avg_v;
|
||||
|
||||
try writer.print("│ {s:<9} │ {s:<4} │ {d:>7.2}ns │ {d:>7.2}ns │ {d:>7.2}ns │ {d:>8.2}x {d:>8.2}x │\n", .{
|
||||
op_name, TNames[tidx], avg_n, avg_v, avg_t, slowdown_nt, slowdown_vt,
|
||||
});
|
||||
}
|
||||
if (j != Ops.len - 1) try writer.print("├───────────┼──────┼───────────┼───────────┼───────────┼───────────────────────┤\n", .{});
|
||||
}
|
||||
|
||||
try writer.print("└───────────┴──────┴───────────┴───────────┴───────────┴───────────────────────┘\n", .{});
|
||||
}
|
||||
|
||||
fn bench_crossTypeVsNative(writer: *std.Io.Writer) !void {
|
||||
const ITERS: usize = 100_000;
|
||||
const SAMPLES: usize = 5;
|
||||
|
||||
const getValT = struct {
|
||||
fn f(comptime TT: type, i: usize) TT {
|
||||
// Keep values safe and non-zero to avoid division by zero or overflows during cross-casting
|
||||
const v = (i % 50) + 1;
|
||||
return if (comptime @typeInfo(TT) == .float) @floatFromInt(v) else @intCast(v);
|
||||
}
|
||||
}.f;
|
||||
|
||||
// Helper for the Native baseline: explicitly casting T2 to T1 before the operation
|
||||
const castTo = struct {
|
||||
fn f(comptime DestT: type, comptime SrcT: type, val: SrcT) DestT {
|
||||
if (comptime DestT == SrcT) return val;
|
||||
const src_info = @typeInfo(SrcT);
|
||||
const dest_info = @typeInfo(DestT);
|
||||
|
||||
if (dest_info == .int and src_info == .int) return @intCast(val);
|
||||
if (dest_info == .float and src_info == .int) return @floatFromInt(val);
|
||||
if (dest_info == .int and src_info == .float) return @intFromFloat(val);
|
||||
if (dest_info == .float and src_info == .float) return @floatCast(val);
|
||||
unreachable;
|
||||
}
|
||||
}.f;
|
||||
|
||||
const Types = .{ i16, i64, i128, f32, f64 };
|
||||
const TNames = .{ "i16", "i64", "i128", "f32", "f64" };
|
||||
const Ops = .{ "add", "mul", "div" };
|
||||
|
||||
try writer.print(
|
||||
\\
|
||||
\\ Cross-Type Overhead Analysis: Scalar vs Native
|
||||
\\
|
||||
\\┌─────────┬──────┬──────┬───────────┬───────────┬───────────┐
|
||||
\\│ Op │ T1 │ T2 │ Native │ Scalar │ Slowdown │
|
||||
\\├─────────┼──────┼──────┼───────────┼───────────┼───────────┤
|
||||
\\
|
||||
, .{});
|
||||
|
||||
inline for (Ops, 0..) |op_name, j| {
|
||||
inline for (Types, 0..) |T1, t1_idx| {
|
||||
inline for (Types, 0..) |T2, t2_idx| {
|
||||
var native_total_ns: f64 = 0;
|
||||
var quantity_total_ns: f64 = 0;
|
||||
|
||||
const M1 = Tensor(T1, .{ .L = 1 }, .{}, &.{1});
|
||||
const M2 = Tensor(T2, .{ .L = 1 }, .{}, &.{1});
|
||||
const S2 = Tensor(T2, .{ .T = 1 }, .{}, &.{1});
|
||||
|
||||
std.mem.doNotOptimizeAway({
|
||||
for (0..SAMPLES) |_| {
|
||||
// --- 1. Benchmark Native (Cast T2 to T1, then math) ---
|
||||
const n_start = getTime();
|
||||
for (0..ITERS) |i| {
|
||||
const a = getValT(T1, i);
|
||||
const b_raw = getValT(T2, 2);
|
||||
const b = castTo(T1, T2, b_raw);
|
||||
|
||||
_ = if (comptime std.mem.eql(u8, op_name, "add"))
|
||||
a + b
|
||||
else if (comptime std.mem.eql(u8, op_name, "mul"))
|
||||
a * b
|
||||
else if (comptime @typeInfo(T1) == .int)
|
||||
@divTrunc(a, b)
|
||||
else
|
||||
a / b;
|
||||
}
|
||||
const n_end = getTime();
|
||||
native_total_ns += @as(f64, @floatFromInt(n_start.durationTo(n_end).toNanoseconds()));
|
||||
|
||||
// --- 2. Benchmark Scalar ---
|
||||
const q_start = getTime();
|
||||
for (0..ITERS) |i| {
|
||||
const qa = M1.splat(getValT(T1, i));
|
||||
const qb = if (comptime std.mem.eql(u8, op_name, "div"))
|
||||
S2.splat(getValT(T2, 2))
|
||||
else
|
||||
M2.splat(getValT(T2, 2));
|
||||
|
||||
_ = if (comptime std.mem.eql(u8, op_name, "add"))
|
||||
qa.add(qb)
|
||||
else if (comptime std.mem.eql(u8, op_name, "mul"))
|
||||
qa.mul(qb)
|
||||
else
|
||||
qa.div(qb);
|
||||
}
|
||||
const q_end = getTime();
|
||||
quantity_total_ns += @as(f64, @floatFromInt(q_start.durationTo(q_end).toNanoseconds()));
|
||||
}
|
||||
|
||||
const avg_n = (native_total_ns / SAMPLES) / @as(f64, @floatFromInt(ITERS));
|
||||
const avg_q = (quantity_total_ns / SAMPLES) / @as(f64, @floatFromInt(ITERS));
|
||||
const slowdown = avg_q / avg_n;
|
||||
|
||||
try writer.print("│ {s:<7} │ {s:<4} │ {s:<4} │ {d:>7.2}ns │ {d:>7.2}ns │ {d:>8.2}x │\n", .{
|
||||
op_name, TNames[t1_idx], TNames[t2_idx], avg_n, avg_q, slowdown,
|
||||
});
|
||||
});
|
||||
}
|
||||
}
|
||||
if (j != Ops.len - 1) {
|
||||
try writer.print("├─────────┼──────┼──────┼───────────┼───────────┼───────────┤\n", .{});
|
||||
}
|
||||
}
|
||||
|
||||
try writer.print("└─────────┴──────┴──────┴───────────┴───────────┴───────────┘\n", .{});
|
||||
}
|
||||
|
||||
fn bench_Vector(writer: *std.Io.Writer) !void {
|
||||
const ITERS: usize = 10_000;
|
||||
const SAMPLES: usize = 10;
|
||||
@ -448,7 +192,7 @@ fn bench_Vector(writer: *std.Io.Writer) !void {
|
||||
const TNames = .{ "i32", "i64", "i128", "f32", "f64" };
|
||||
const Lengths = .{ 1, 3, 4, 16, 100 };
|
||||
// "cross" is only valid for len=3; other cells will show " --- "
|
||||
const Ops = .{ "add", "div", "mulScalar", "dot", "cross", "product", "pow", "length" };
|
||||
const Ops = .{ "add", "div", "mulScalar", "dot", "product", "pow", "length" };
|
||||
|
||||
inline for (Ops, 0..) |op_name, o_idx| {
|
||||
inline for (Types, TNames) |T, tname| {
|
||||
@ -484,10 +228,6 @@ fn bench_Vector(writer: *std.Io.Writer) !void {
|
||||
} else if (comptime std.mem.eql(u8, op_name, "dot")) {
|
||||
const v2 = V.splat(getVal(T, i +% 5, 63));
|
||||
_ = v1.contract(v2, 0, 0);
|
||||
} else if (comptime std.mem.eql(u8, op_name, "cross")) {
|
||||
// len == 3 guaranteed by the guard above
|
||||
const v2 = V.splat(getVal(T, i +% 5, 63));
|
||||
_ = v1.cross(v2);
|
||||
} else if (comptime std.mem.eql(u8, op_name, "product")) {
|
||||
_ = v1.product();
|
||||
} else if (comptime std.mem.eql(u8, op_name, "pow")) {
|
||||
@ -610,62 +350,3 @@ fn bench_HighDimTensor(writer: *std.Io.Writer) !void {
|
||||
}
|
||||
try writer.print("└─────────────────┴──────┴──────────────┴──────────────┴──────────────┴──────────────┘\n", .{});
|
||||
}
|
||||
|
||||
fn vectorSIMDvsNative(comptime T: type, writer: *std.Io.Writer) !void {
|
||||
const iterations: u64 = 10_000;
|
||||
const lens = [_]u32{ 1, 2, 3, 4, 5, 10, 100, 1_000, 10_000 };
|
||||
|
||||
try writer.print("\nSIMD Speedup Analysis: {s}\n", .{@typeName(T)});
|
||||
try writer.print("┌────────────┬────────────┬────────────┬────────────┐\n", .{});
|
||||
try writer.print("│ Vector Len │ Scalar (us)│ Vector (us)│ Speedup │\n", .{});
|
||||
try writer.print("├────────────┼────────────┼────────────┼────────────┤\n", .{});
|
||||
|
||||
inline for (lens) |vector_len| {
|
||||
// --- Scalar Test ---
|
||||
var scalar_val: T = 10;
|
||||
const start_scalar = getTime();
|
||||
|
||||
var i: u64 = 0;
|
||||
while (i < iterations * vector_len) : (i += 1) {
|
||||
if (comptime @typeInfo(T) == .int)
|
||||
scalar_val = scalar_val +% 1
|
||||
else
|
||||
scalar_val = scalar_val + 1;
|
||||
}
|
||||
const scalar_time = start_scalar.durationTo(getTime()).toMicroseconds();
|
||||
|
||||
// --- Vector Test ---
|
||||
var vector_val: @Vector(vector_len, T) = @splat(20);
|
||||
const start_vector = getTime();
|
||||
|
||||
i = 0;
|
||||
const increment: @Vector(vector_len, T) = @splat(1);
|
||||
while (i < iterations) : (i += 1) {
|
||||
if (comptime @typeInfo(T) == .int)
|
||||
vector_val = vector_val +% increment
|
||||
else
|
||||
vector_val = vector_val + increment;
|
||||
}
|
||||
const vector_time = start_vector.durationTo(getTime()).toMicroseconds();
|
||||
|
||||
// --- Results ---
|
||||
const s_float = @as(f64, @floatFromInt(scalar_time));
|
||||
const v_float = @as(f64, @floatFromInt(vector_time));
|
||||
|
||||
// Speedup = ScalarTime / VectorTime.
|
||||
// > 1.0 means SIMD is faster.
|
||||
const speedup = if (vector_time > 0) s_float / v_float else 0;
|
||||
|
||||
try writer.print("│ {d:<10} │ {d:>10} │ {d:>10} │ {d:>9.2}x │\n", .{
|
||||
vector_len,
|
||||
scalar_time,
|
||||
vector_time,
|
||||
speedup,
|
||||
});
|
||||
try writer.flush();
|
||||
|
||||
std.mem.doNotOptimizeAway(scalar_val);
|
||||
std.mem.doNotOptimizeAway(vector_val);
|
||||
}
|
||||
try writer.print("└────────────┴────────────┴────────────┴────────────┘\n", .{});
|
||||
}
|
||||
|
||||
9
src/lib.zig
Normal file
9
src/lib.zig
Normal file
@ -0,0 +1,9 @@
|
||||
const std = @import("std");
|
||||
|
||||
pub const TensorStatic = @import("TensorStatic.zig").Tensor;
|
||||
pub const TensorAlloc = @import("TensorAlloc.zig").Tensor;
|
||||
pub const TensorGpu = @import("TensorGpu.zig").Tensor;
|
||||
pub const Dimensions = @import("Dimensions.zig");
|
||||
pub const Scales = @import("Scales.zig");
|
||||
pub const Base = @import("Base.zig");
|
||||
pub const UnitParser = @import("UnitParser.zig");
|
||||
13
src/main.zig
13
src/main.zig
@ -1,13 +0,0 @@
|
||||
const std = @import("std");
|
||||
|
||||
pub const Tensor = @import("Tensor.zig").Tensor;
|
||||
pub const Dimensions = @import("Dimensions.zig");
|
||||
pub const Scales = @import("Scales.zig");
|
||||
pub const Base = @import("Base.zig");
|
||||
|
||||
test {
|
||||
_ = @import("Tensor.zig");
|
||||
_ = @import("Dimensions.zig");
|
||||
_ = @import("Scales.zig");
|
||||
_ = @import("Base.zig");
|
||||
}
|
||||
155
src/shared.zig
Normal file
155
src/shared.zig
Normal file
@ -0,0 +1,155 @@
|
||||
const std = @import("std");
|
||||
const Scales = @import("Scales.zig");
|
||||
const UnitScale = Scales.UnitScale;
|
||||
const Dimensions = @import("Dimensions.zig");
|
||||
const Dimension = Dimensions.Dimension;
|
||||
|
||||
pub const TensorKind = enum { static, alloc, gpu };
|
||||
|
||||
pub fn isTensor(comptime T: type) bool {
|
||||
return comptime @typeInfo(T) == .@"struct" and @hasDecl(T, "ISTENSOR");
|
||||
}
|
||||
|
||||
pub fn shapeTotal(shape: []const comptime_int) usize {
|
||||
var t: comptime_int = 1;
|
||||
for (shape) |s| t *= s;
|
||||
return t;
|
||||
}
|
||||
|
||||
/// Check if two shapes are strictly identical.
|
||||
pub fn shapeEql(a: []const comptime_int, b: []const comptime_int) bool {
|
||||
if (a.len != b.len) return false;
|
||||
for (a, 0..) |v, i|
|
||||
if (v != b[i]) return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
/// Row-major (C-order) strides: strides[i] = product(shape[i+1..]).
|
||||
/// e.g. shape {3, 4} → strides {4, 1}
|
||||
/// shape {2, 3, 4} → strides {12, 4, 1}
|
||||
pub fn shapeStrides(shape: []const comptime_int) [shape.len]comptime_int {
|
||||
var st: [shape.len]comptime_int = undefined;
|
||||
if (shape.len == 0) return st;
|
||||
st[shape.len - 1] = 1;
|
||||
if (shape.len > 1) {
|
||||
var i: comptime_int = shape.len - 1;
|
||||
while (i > 0) : (i -= 1) st[i - 1] = st[i] * shape[i];
|
||||
}
|
||||
return st;
|
||||
}
|
||||
|
||||
/// Return a copy of `shape` with the element at `axis` removed.
|
||||
pub fn shapeRemoveAxis(shape: []const comptime_int, axis: comptime_int) [shape.len - 1]comptime_int {
|
||||
var out: [shape.len - 1]comptime_int = undefined;
|
||||
var j: comptime_int = 0;
|
||||
for (shape, 0..) |v, i| {
|
||||
if (i != axis) {
|
||||
out[j] = v;
|
||||
j += 1;
|
||||
}
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
/// Concatenate two compile-time slices.
|
||||
pub fn shapeCat(a: []const comptime_int, b: []const comptime_int) [a.len + b.len]comptime_int {
|
||||
var out: [a.len + b.len]comptime_int = undefined;
|
||||
for (a, 0..) |v, i| out[i] = v;
|
||||
for (b, 0..) |v, i| out[a.len + i] = v;
|
||||
return out;
|
||||
}
|
||||
|
||||
/// Decode a flat row-major index into N-D coordinates.
|
||||
/// Called only in comptime contexts (all arguments are comptime).
|
||||
pub fn decodeFlatCoords(flat: comptime_int, n: comptime_int, strd: [n]comptime_int) [n]usize {
|
||||
var coords: [n]comptime_int = undefined;
|
||||
var tmp = flat;
|
||||
for (0..n) |i| {
|
||||
coords[i] = if (strd[i] == 0) 0 else tmp / strd[i];
|
||||
tmp = if (strd[i] == 0) 0 else tmp % strd[i];
|
||||
}
|
||||
return coords;
|
||||
}
|
||||
|
||||
/// Encode N-D coordinates into a flat row-major index.
|
||||
/// Called only in comptime contexts.
|
||||
pub fn encodeFlatCoords(coords: []const usize, n: usize, strd: [n]usize) usize {
|
||||
var flat: usize = 0;
|
||||
for (0..n) |i| flat += coords[i] * strd[i];
|
||||
return flat;
|
||||
}
|
||||
|
||||
/// Rebuild a full coordinate array by inserting `val` at `axis` into `free`.
|
||||
/// `free` holds the remaining (non-contracted) coordinates in order.
|
||||
pub fn insertAxis(
|
||||
comptime n: usize,
|
||||
comptime axis: usize,
|
||||
comptime val: usize,
|
||||
comptime free: []const usize,
|
||||
) [n]usize {
|
||||
var out: [n]usize = undefined;
|
||||
var fi: usize = 0;
|
||||
for (0..n) |i| {
|
||||
if (i == axis) {
|
||||
out[i] = val;
|
||||
} else {
|
||||
out[i] = free[fi];
|
||||
fi += 1;
|
||||
}
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
pub inline fn isInt(comptime T: type) bool {
|
||||
return @typeInfo(T) == .int or @typeInfo(T) == .comptime_int;
|
||||
}
|
||||
|
||||
pub fn finerScales(comptime T1: type, comptime T2: type) Scales {
|
||||
const d1: Dimensions = T1.dims;
|
||||
const d2: Dimensions = T2.dims;
|
||||
const s1: Scales = T1.scales;
|
||||
const s2: Scales = T2.scales;
|
||||
comptime var out = Scales.initFill(.none);
|
||||
for (std.enums.values(Dimension)) |dim| {
|
||||
const scale1 = comptime s1.get(dim);
|
||||
const scale2 = comptime s2.get(dim);
|
||||
out.set(dim, if (comptime d1.get(dim) == 0 and d2.get(dim) == 0)
|
||||
.none
|
||||
else if (comptime d1.get(dim) == 0)
|
||||
scale2
|
||||
else if (comptime d2.get(dim) == 0)
|
||||
scale1
|
||||
else if (comptime scale1.getFactor() > scale2.getFactor())
|
||||
scale2
|
||||
else
|
||||
scale1);
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
pub fn printSuperscript(writer: *std.Io.Writer, n: i32) !void {
|
||||
if (n == 0) return;
|
||||
var val = n;
|
||||
if (val < 0) {
|
||||
try writer.writeAll("\u{207B}");
|
||||
val = -val;
|
||||
}
|
||||
var buf: [12]u8 = undefined;
|
||||
const str = std.fmt.bufPrint(&buf, "{d}", .{val}) catch return;
|
||||
for (str) |c| {
|
||||
const s = switch (c) {
|
||||
'0' => "\u{2070}",
|
||||
'1' => "\u{00B9}",
|
||||
'2' => "\u{00B2}",
|
||||
'3' => "\u{00B3}",
|
||||
'4' => "\u{2074}",
|
||||
'5' => "\u{2075}",
|
||||
'6' => "\u{2076}",
|
||||
'7' => "\u{2077}",
|
||||
'8' => "\u{2078}",
|
||||
'9' => "\u{2079}",
|
||||
else => unreachable,
|
||||
};
|
||||
try writer.writeAll(s);
|
||||
}
|
||||
}
|
||||
9
src/test.zig
Normal file
9
src/test.zig
Normal file
@ -0,0 +1,9 @@
|
||||
test {
|
||||
_ = @import("TensorStatic.zig");
|
||||
_ = @import("TensorAlloc.zig");
|
||||
_ = @import("TensorGpu.zig");
|
||||
_ = @import("Dimensions.zig");
|
||||
_ = @import("Scales.zig");
|
||||
_ = @import("Base.zig");
|
||||
_ = @import("UnitParser.zig");
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user