Better example (no Vec)

2026-05-18 16:38:30 +02:00 · 2026-05-18 16:38:30 +02:00 · c62bead6e9
commit c62bead6e9
parent 9dbd668de9
1 changed files with 29 additions and 63 deletions
--- a/src/example.zig
+++ b/src/example.zig
@ -1,92 +1,58 @@
 /// This is a fully self contained example.
 /// It set a simple f16 Vector and do a add operation on it
 const std = @import("std");
 const GpuDevice = @import("GpuDevice.zig");
 const GpuArena = @import("GpuArena.zig");
 const GpuAllocator = @import("GpuAllocator.zig");
 const GpuBuffer = @import("GpuBuffer.zig");
 const GpuProcess = @import("GpuProcess.zig");
 pub fn main(init: std.process.Init) !void {
    const allocator = init.gpa;
-    // Open GPU Device
+    // 1. Open GPU Device
    const device = try GpuDevice.init(.{});
    defer device.deinit();
-    // Create a GPU Arena to hold GPU memory
+    // 2. Create a GPU Arena to manage VRAM
    var grena = GpuArena.init(allocator, device);
    defer grena.deinit();
    const gloc = grena.gpuAllocator();
-    // Create a GPU process that load the pipeline/shader
+    // 3. Load the WGSL compute pipeline
-    const add = try GpuProcess.init(device, @embedFile("shaders/add.wgsl"));
+    const add_process = try GpuProcess.init(device, @embedFile("shaders/add.wgsl"));
-    defer add.deinit();
+    defer add_process.deinit();
-    // Allocate CPU memory
+    // 4. Setup CPU data
-    const data_a = try allocator.alloc(f16, 16);
+    const len: usize = 16;
    const data_a = try allocator.alloc(f16, len);
    defer allocator.free(data_a);
-    const data_b = try allocator.alloc(f16, 16);
+    const data_b = try allocator.alloc(f16, len);
    defer allocator.free(data_b);
-    for (0..16) |i| {
+    for (0..len) |i| {
        data_a[i] = @floatFromInt(i);
-        data_b[i] = @floatFromInt(16 - 1 - i);
+        data_b[i] = @floatFromInt(len - 1 - i);
    }
-    // Allocate GPU memory (Vec.deinit isn't necessary because grena will do it when deinit)
+    // 5. Initialize raw GPU Buffers
-    const a = try Vec.initZero(gloc, 16);
+    // We pass the EnumSet inline using `.initMany` since the Enum itself isn't exported
-    const b = try Vec.initZero(gloc, 16);
+    const byte_size = len * @sizeOf(f16);
    const buf_a = try GpuBuffer.init(gloc, byte_size, .initMany(&.{ .Storage, .CopyDst, .CopySrc }));
    const buf_b = try GpuBuffer.init(gloc, byte_size, .initMany(&.{ .Storage, .CopyDst, .CopySrc }));
    const buf_out = try GpuBuffer.init(gloc, byte_size, .initMany(&.{ .Storage, .CopyDst, .CopySrc }));
-    // Load CPU -> GPU
+    // Note: The buffers are safely tied to the GpuArena which will automatically
-    try a.load(data_a);
+    // release them at the end. You can also manually call buf_x.deinit() if desired.
    try b.load(data_b);
-    // Run GPU Pipeline
+    // 6. Transfer data from CPU slices to GPU Buffers
-    const sum = try a.run(gloc, b, add);
+    try buf_a.load(f16, data_a);
    try buf_b.load(f16, data_b);
-    // Read GPU -> CPU
+    // 7. Dispatch the Compute Process
-    const out = try sum.read(allocator);
+    // We pass the data type (f16) to allow GpuProcess to calculate chunks correctly
    try add_process.run(gloc, f16, buf_a, buf_b, buf_out);
    // 8. Map and copy the resulting buffer back to the CPU
    const out = try buf_out.read(allocator, f16);
    defer allocator.free(out);
-    std.debug.print("{any}\n", .{out});
+    std.debug.print("Result: {any}\n", .{out});
 }
 /// Minimal implementation of a f16 Vector
 const Vec = struct {
    buf: GpuBuffer,
    len: usize,
    pub fn initZero(gloc: GpuAllocator, len: usize) !Vec {
        return .{
            .buf = try GpuBuffer.init(
                gloc,
                len * @sizeOf(f16),
                .initMany(&.{ .Storage, .CopyDst, .CopySrc }),
            ),
            .len = len,
        };
    }
    pub fn deinit(self: Vec) void {
        self.buf.deinit();
    }
    pub fn load(self: Vec, data: []const f16) !void {
        try self.buf.load(f16, data);
    }
    pub fn read(self: Vec, alloc: std.mem.Allocator) ![]f16 {
        return self.buf.read(alloc, f16);
    }
    pub fn run(self: Vec, gloc: GpuAllocator, other: Vec, process: GpuProcess) !Vec {
        std.debug.assert(self.len == other.len);
        const result = try Vec.initZero(gloc, self.len);
        errdefer result.deinit();
        try process.run(gloc, f16, self.buf, other.buf, result.buf);
        return result;
    }
 };