diff --git a/README.md b/README.md index 807d3fb..8159bdc 100644 --- a/README.md +++ b/README.md @@ -10,7 +10,7 @@ The library exports five primary components: * **`GpuDevice`**: Initializes the WebGPU instance, adapter, device, and queue. It is configured to prioritize high performance and automatically requests the `ShaderF16` feature if the adapter supports it. By default, it enforces a 2 GB VRAM limit. * **`GpuArena` / `GpuAllocator`**: A memory management layer that tracks allocated VRAM bytes to prevent exceeding the device budget. The arena automatically destroys and releases all tracked WebGPU buffers when deinitialized. * **`GpuBuffer`**: Wraps native WebGPU buffers. It automatically aligns buffer sizes forward to a multiple of 4 bytes. It provides a `.load()` method for CPU-to-GPU data transfers (handling both aligned and unaligned lengths smoothly) and a `.read()` method that utilizes a staging buffer to map GPU data back to the CPU. -* **`GpuProcess`**: Compiles WGSL source code into a compute pipeline. When running a process, it automatically splits the work into manageable chunks (up to 1 GB at a time) and dispatches workgroups of size 256. +* **`GpuCompute`**: Compiles WGSL source code into a compute pipeline. When running, it automatically splits the work into manageable chunks (up to 1 GB at a time) and dispatches workgroups of size 256. ## Quick Start Example @@ -22,7 +22,7 @@ const gpu = @import("gpu"); const GpuDevice = gpu.GpuDevice; const GpuArena = gpu.GpuArena; const GpuBuffer = gpu.GpuBuffer; -const GpuProcess = gpu.GpuProcess; +const GpuCompute = gpu.GpuCompute; pub fn main(init: std.process.Init) !void { const allocator = init.gpa; @@ -37,7 +37,7 @@ pub fn main(init: std.process.Init) !void { const gloc = grena.gpuAllocator(); // 3. Load the WGSL compute pipeline - const add_process = try GpuProcess.init( + const add_cp = try GpuCompute.init( device, @embedFile("shaders/add.wgsl"), .{ .bindings = &.{ @@ -46,7 +46,7 @@ pub fn main(init: std.process.Init) !void { .{ .element_size = @sizeOf(f16) }, } }, ); - defer add_process.deinit(); + defer add_cp.deinit(); // 4. Setup CPU data const len: usize = 16; @@ -74,7 +74,7 @@ pub fn main(init: std.process.Init) !void { try buf_b.load(f16, data_b); // 7. Dispatch the Compute Process - try add_process.run(gloc, .{ buf_a, buf_b, buf_out }); + try add_cp.run(gloc, .{ buf_a, buf_b, buf_out }); // 8. Map and copy the resulting buffer back to the CPU const out = try buf_out.read(allocator, f16); @@ -82,7 +82,6 @@ pub fn main(init: std.process.Init) !void { std.debug.print("Result: {any}\n", .{out}); } - ``` ## Dependencies diff --git a/examples/add.zig b/examples/add.zig index 6d78b74..e3c4b84 100644 --- a/examples/add.zig +++ b/examples/add.zig @@ -3,7 +3,7 @@ const gpu = @import("gpu"); const GpuDevice = gpu.GpuDevice; const GpuArena = gpu.GpuArena; const GpuBuffer = gpu.GpuBuffer; -const GpuProcess = gpu.GpuProcess; +const GpuCompute = gpu.GpuCompute; pub fn main(init: std.process.Init) !void { const allocator = init.gpa; @@ -18,7 +18,7 @@ pub fn main(init: std.process.Init) !void { const gloc = grena.gpuAllocator(); // 3. Load the WGSL compute pipeline - const add_process = try GpuProcess.init( + const add_cp = try GpuCompute.init( device, @embedFile("shaders/add.wgsl"), .{ .bindings = &.{ @@ -27,7 +27,7 @@ pub fn main(init: std.process.Init) !void { .{ .element_size = @sizeOf(f16) }, } }, ); - defer add_process.deinit(); + defer add_cp.deinit(); // 4. Setup CPU data const len: usize = 16; @@ -55,7 +55,7 @@ pub fn main(init: std.process.Init) !void { try buf_b.load(f16, data_b); // 7. Dispatch the Compute Process - try add_process.run(gloc, .{ buf_a, buf_b, buf_out }); + try add_cp.run(gloc, .{ buf_a, buf_b, buf_out }); // 8. Map and copy the resulting buffer back to the CPU const out = try buf_out.read(allocator, f16); diff --git a/examples/bench.zig b/examples/bench.zig index 4138f47..635b14e 100644 --- a/examples/bench.zig +++ b/examples/bench.zig @@ -4,7 +4,7 @@ const GpuDevice = gpu.GpuDevice; const GpuArena = gpu.GpuArena; const GpuAllocator = gpu.GpuAllocator; const GpuBuffer = gpu.GpuBuffer; -const GpuProcess = gpu.GpuProcess; +const GpuCompute = gpu.GpuCompute; /// Minimal implementation of a f16 Vector const Vec = struct { @@ -40,13 +40,13 @@ const Vec = struct { } // Changed: gloc is passed by value instead of *GpuAllocator - pub fn run(self: Vec, gloc: GpuAllocator, other: Vec, process: GpuProcess) !Vec { + pub fn run(self: Vec, gloc: GpuAllocator, other: Vec, process: GpuCompute) !Vec { std.debug.assert(self.len == other.len); const result = try Vec.initZero(gloc, self.len); errdefer result.deinit(); - try process.run(gloc, f16, self.buf, other.buf, result.buf); + try process.run(gloc, .{ self.buf, other.buf, result.buf }); return result; } @@ -65,7 +65,11 @@ pub fn main(init: std.process.Init) !void { const gloc = grena.gpuAllocator(); - const add_pip = try GpuProcess.init(device, @embedFile("shaders/add.wgsl")); + const add_pip = try GpuCompute.init(device, @embedFile("shaders/add.wgsl"), .{ .bindings = &.{ + .{ .element_size = @sizeOf(f16) }, + .{ .element_size = @sizeOf(f16) }, + .{ .element_size = @sizeOf(f16) }, + } }); defer add_pip.deinit(); const allocator = init.gpa; diff --git a/src/GpuProcess.zig b/src/GpuCompute.zig similarity index 100% rename from src/GpuProcess.zig rename to src/GpuCompute.zig diff --git a/src/lib.zig b/src/lib.zig index 0ae3081..d85f2c8 100644 --- a/src/lib.zig +++ b/src/lib.zig @@ -2,4 +2,4 @@ pub const GpuAllocator = @import("GpuAllocator.zig"); pub const GpuArena = @import("GpuArena.zig"); pub const GpuBuffer = @import("GpuBuffer.zig"); pub const GpuDevice = @import("GpuDevice.zig"); -pub const GpuProcess = @import("GpuProcess.zig"); +pub const GpuCompute = @import("GpuCompute.zig");