Removed GpuPipeline for a GpuProcess

2026-05-18 15:28:02 +02:00 · 2026-05-18 15:28:02 +02:00 · 97d5f9001f
commit 97d5f9001f
parent d5e7f60926
4 changed files with 126 additions and 118 deletions
--- a/src/GpuPipeline.zig
+++ b/src/GpuPipeline.zig
@ -1,25 +0,0 @@
-const std = @import("std");
-const GpuDevice = @import("GpuDevice.zig");
-const c = @import("utils.zig").c;
-const sv = @import("utils.zig").sv;
-
-raw: c.WGPUComputePipeline,
-
-pub fn init(device: GpuDevice, wgsl: []const u8) !@This() {
-    var wgsl_src = c.WGPUShaderSourceWGSL{
-        .chain = .{ .sType = c.WGPUSType_ShaderSourceWGSL },
-        .code = sv(wgsl),
-    };
-    const shader = c.wgpuDeviceCreateShaderModule(device.device, &.{
-        .nextInChain = @ptrCast(&wgsl_src),
-    }) orelse return error.Shader;
-    defer c.wgpuShaderModuleRelease(shader);
-
-    return .{ .raw = c.wgpuDeviceCreateComputePipeline(device.device, &.{
-        .compute = .{ .module = shader, .entryPoint = sv("main") },
-    }) orelse return error.Pipeline };
-}
-
-pub fn deinit(self: @This()) void {
-    c.wgpuComputePipelineRelease(self.raw);
-}
--- a/src/GpuProcess.zig
+++ b/src/GpuProcess.zig
@ -0,0 +1,118 @@
+const std = @import("std");
+const c = @import("utils.zig").c;
+const sv = @import("utils.zig").sv;
+const GpuAllocator = @import("GpuAllocator.zig");
+const GpuBuffer = @import("GpuBuffer.zig");
+const GpuDevice = @import("GpuDevice.zig");
+
+pip: c.WGPUComputePipeline,
+
+pub fn init(device: GpuDevice, wgsl: []const u8) !@This() {
+    var wgsl_src = c.WGPUShaderSourceWGSL{
+        .chain = .{ .sType = c.WGPUSType_ShaderSourceWGSL },
+        .code = sv(wgsl),
+    };
+    const shader = c.wgpuDeviceCreateShaderModule(device.device, &.{
+        .nextInChain = @ptrCast(&wgsl_src),
+    }) orelse return error.Shader;
+    defer c.wgpuShaderModuleRelease(shader);
+
+    return .{ .pip = c.wgpuDeviceCreateComputePipeline(device.device, &.{
+        .compute = .{ .module = shader, .entryPoint = sv("main") },
+    }) orelse return error.Pipeline };
+}
+
+pub fn deinit(self: @This()) void {
+    c.wgpuComputePipelineRelease(self.pip);
+}
+
+fn onMapped(
+    status: c.WGPUMapAsyncStatus,
+    _: c.WGPUStringView,
+    userdata1: ?*anyopaque,
+    _: ?*anyopaque,
+) callconv(.c) void {
+    const flag: *bool = @ptrCast(@alignCast(userdata1.?));
+    flag.* = (status == c.WGPUMapAsyncStatus_Success);
+}
+
+// Changed: gloc is passed by value instead of *GpuAllocator
+pub fn run(
+    self: @This(),
+    gloc: GpuAllocator,
+    buf_a: GpuBuffer,
+    buf_b: GpuBuffer,
+    buf_out: GpuBuffer,
+) !void {
+    const max_chunk_bytes: u64 = 1024 * 1024 * 1024; // 1 GB
+
+    const bytes = buf_a.size;
+    var offset: u64 = 0;
+    while (offset < bytes) {
+        const current_chunk_bytes = @min(max_chunk_bytes, bytes - offset);
+        const current_chunk_elements: u32 = @intCast(current_chunk_bytes / @sizeOf(f16));
+
+        const info_buf = try GpuBuffer.init(
+            gloc,
+            @sizeOf(u32),
+            .initMany(&.{ .Uniform, .CopyDst }),
+        );
+        defer info_buf.deinit();
+
+        c.wgpuQueueWriteBuffer(gloc.device.queue, info_buf.raw, 0, &current_chunk_elements, @sizeOf(u32));
+
+        const entries = [_]c.WGPUBindGroupEntry{
+            .{ .binding = 0, .buffer = buf_a.raw, .offset = offset, .size = current_chunk_bytes },
+            .{ .binding = 1, .buffer = buf_b.raw, .offset = offset, .size = current_chunk_bytes },
+            .{ .binding = 2, .buffer = buf_out.raw, .offset = offset, .size = current_chunk_bytes },
+            .{ .binding = 3, .buffer = info_buf.raw, .offset = 0, .size = @sizeOf(u32) },
+        };
+
+        try submitPass(gloc, self.pip, &entries, current_chunk_elements);
+
+        offset += current_chunk_bytes;
+    }
+}
+
+// Changed: gloc is passed by value instead of *GpuAllocator
+fn submitPass(
+    gloc: GpuAllocator,
+    pipeline: c.WGPUComputePipeline,
+    entries: []const c.WGPUBindGroupEntry,
+    n: usize,
+) !void {
+    const bgl = c.wgpuComputePipelineGetBindGroupLayout(pipeline, 0);
+    defer c.wgpuBindGroupLayoutRelease(bgl);
+
+    const bg = c.wgpuDeviceCreateBindGroup(gloc.device.device, &.{
+        .layout = bgl,
+        .entries = entries.ptr,
+        .entryCount = entries.len,
+    }) orelse return error.BindGroup;
+    defer c.wgpuBindGroupRelease(bg);
+
+    const enc = c.wgpuDeviceCreateCommandEncoder(gloc.device.device, null) orelse
+        return error.Encoder;
+    const pass = c.wgpuCommandEncoderBeginComputePass(enc, null);
+    c.wgpuComputePassEncoderSetPipeline(pass, pipeline);
+    c.wgpuComputePassEncoderSetBindGroup(pass, 0, bg, 0, null);
+
+    const WORKGROUP_SIZE = 256;
+    const MAX_WORKGROUPS = 65535;
+
+    const desired_workgroups = ceilDiv(n, WORKGROUP_SIZE);
+    const dispatch_count = @min(desired_workgroups, MAX_WORKGROUPS);
+
+    c.wgpuComputePassEncoderDispatchWorkgroups(pass, @intCast(dispatch_count), 1, 1);
+    c.wgpuComputePassEncoderEnd(pass);
+    c.wgpuComputePassEncoderRelease(pass);
+
+    const cmd = c.wgpuCommandEncoderFinish(enc, null);
+    defer c.wgpuCommandEncoderRelease(enc);
+    defer c.wgpuCommandBufferRelease(cmd);
+    c.wgpuQueueSubmit(gloc.device.queue, 1, &cmd);
+}
+
+fn ceilDiv(n: usize, d: usize) usize {
+    return (n + d - 1) / d;
+}
--- a/src/Vec.zig
+++ b/src/Vec.zig
@ -3,7 +3,7 @@ const c = @import("utils.zig").c;
 const GpuAllocator = @import("GpuAllocator.zig");
 const GpuBuffer = @import("GpuBuffer.zig");
 const GpuDevice = @import("GpuDevice.zig");
-const GpuPipeline = @import("GpuPipeline.zig");
+const GpuProcess = @import("GpuProcess.zig");

 const Vec = @This();

@ -34,10 +34,7 @@ pub fn deinit(self: Vec) void {
 }

 /// CPU to GPU.
-pub fn load(
-    self: Vec,
-    data: []const f16,
-) !void {
+pub fn load(self: Vec, data: []const f16) !void {
    try self.buf.load(data);
 }

@ -46,14 +43,13 @@ pub fn byteSize(self: Vec) u64 {
 }

 // Changed: gloc is passed by value instead of *GpuAllocator
-pub fn run(self: Vec, gloc: GpuAllocator, other: Vec, pip: GpuPipeline) !Vec {
+pub fn run(self: Vec, gloc: GpuAllocator, other: Vec, process: GpuProcess) !Vec {
    std.debug.assert(self.len == other.len);

    const result = try Vec.initZero(gloc, self.len);
    errdefer result.deinit();

-    try dispatch2in1out(gloc, pip.raw, self.buf, other.buf, result.buf, self.byteSize());
-
+    try process.run(gloc, self.buf, other.buf, result.buf);
    return result;
 }

@ -71,84 +67,3 @@ fn onMapped(
    const flag: *bool = @ptrCast(@alignCast(userdata1.?));
    flag.* = (status == c.WGPUMapAsyncStatus_Success);
 }
-
-// Changed: gloc is passed by value instead of *GpuAllocator
-fn dispatch2in1out(
-    gloc: GpuAllocator,
-    pipeline: c.WGPUComputePipeline,
-    buf_a: GpuBuffer,
-    buf_b: GpuBuffer,
-    buf_out: GpuBuffer,
-    bytes: u64,
-) !void {
-    const max_chunk_bytes: u64 = 1024 * 1024 * 1024; // 1 GB
-
-    var offset: u64 = 0;
-    while (offset < bytes) {
-        const current_chunk_bytes = @min(max_chunk_bytes, bytes - offset);
-        const current_chunk_elements: u32 = @intCast(current_chunk_bytes / @sizeOf(f16));
-
-        const info_buf = try GpuBuffer.init(
-            gloc,
-            @sizeOf(u32),
-            .initMany(&.{ .Uniform, .CopyDst }),
-        );
-        defer info_buf.deinit();
-
-        c.wgpuQueueWriteBuffer(gloc.device.queue, info_buf.raw, 0, &current_chunk_elements, @sizeOf(u32));
-
-        const entries = [_]c.WGPUBindGroupEntry{
-            .{ .binding = 0, .buffer = buf_a.raw, .offset = offset, .size = current_chunk_bytes },
-            .{ .binding = 1, .buffer = buf_b.raw, .offset = offset, .size = current_chunk_bytes },
-            .{ .binding = 2, .buffer = buf_out.raw, .offset = offset, .size = current_chunk_bytes },
-            .{ .binding = 3, .buffer = info_buf.raw, .offset = 0, .size = @sizeOf(u32) },
-        };
-
-        try submitPass(gloc, pipeline, &entries, current_chunk_elements);
-
-        offset += current_chunk_bytes;
-    }
-}
-
-// Changed: gloc is passed by value instead of *GpuAllocator
-fn submitPass(
-    gloc: GpuAllocator,
-    pipeline: c.WGPUComputePipeline,
-    entries: []const c.WGPUBindGroupEntry,
-    n: usize,
-) !void {
-    const bgl = c.wgpuComputePipelineGetBindGroupLayout(pipeline, 0);
-    defer c.wgpuBindGroupLayoutRelease(bgl);
-
-    const bg = c.wgpuDeviceCreateBindGroup(gloc.device.device, &.{
-        .layout = bgl,
-        .entries = entries.ptr,
-        .entryCount = entries.len,
-    }) orelse return error.BindGroup;
-    defer c.wgpuBindGroupRelease(bg);
-
-    const enc = c.wgpuDeviceCreateCommandEncoder(gloc.device.device, null) orelse
-        return error.Encoder;
-    const pass = c.wgpuCommandEncoderBeginComputePass(enc, null);
-    c.wgpuComputePassEncoderSetPipeline(pass, pipeline);
-    c.wgpuComputePassEncoderSetBindGroup(pass, 0, bg, 0, null);
-
-    const WORKGROUP_SIZE = 256;
-    const MAX_WORKGROUPS = 65535;
-
-    const desired_workgroups = ceilDiv(n, WORKGROUP_SIZE);
-    const dispatch_count = @min(desired_workgroups, MAX_WORKGROUPS);
-
-    c.wgpuComputePassEncoderDispatchWorkgroups(pass, @intCast(dispatch_count), 1, 1);
-    c.wgpuComputePassEncoderEnd(pass);
-    c.wgpuComputePassEncoderRelease(pass);
-
-    const cmd = c.wgpuCommandEncoderFinish(enc, null);
-    defer c.wgpuCommandEncoderRelease(enc);
-    defer c.wgpuCommandBufferRelease(cmd);
-    c.wgpuQueueSubmit(gloc.device.queue, 1, &cmd);
-}
-
-fn ceilDiv(n: usize, d: usize) usize {
-    return (n + d - 1) / d;
-}
--- a/src/example.zig
+++ b/src/example.zig
@ -2,7 +2,7 @@ const std = @import("std");
 const GpuDevice = @import("GpuDevice.zig");
 const GpuAllocator = @import("GpuAllocator.zig");
 const GpuArena = @import("GpuArena.zig");
-const GpuPipeline = @import("GpuPipeline.zig");
+const GpuProcess = @import("GpuProcess.zig");
 const Vec = @import("Vec.zig");

 const c = @import("utils.zig").c;
@ -18,8 +18,8 @@ pub fn main(init: std.process.Init) !void {

    const gloc = grena.gpuAllocator();

-    const add_pip = try GpuPipeline.init(device, @embedFile("shaders/add.wgsl"));
-    defer add_pip.deinit();
+    const add = try GpuProcess.init(device, @embedFile("shaders/add.wgsl"));
+    defer add.deinit();

    const data_a = try allocator.alloc(f16, 16);
    defer allocator.free(data_a);
@ -36,7 +36,7 @@ pub fn main(init: std.process.Init) !void {
    const b = try Vec.initLoad(gloc, data_b);
    defer b.deinit();

-    const sum = try a.run(gloc, b, add_pip);
+    const sum = try a.run(gloc, b, add);
    // Don't need `sum.deinit()` because grena will deallocate everything when deinit

    std.debug.print("Bytes used: {d} (3 * {d})\n", .{ grena.allocated_vram_bytes, a.byteSize() });