Removed GpuPipeline for a GpuProcess

2026-05-18 15:28:02 +02:00 · 2026-05-18 15:28:02 +02:00 · 97d5f9001f
commit 97d5f9001f
parent d5e7f60926
4 changed files with 126 additions and 118 deletions
--- a/src/GpuPipeline.zig
+++ b/src/GpuPipeline.zig
@ -1,25 +0,0 @@
 const std = @import("std");
 const GpuDevice = @import("GpuDevice.zig");
 const c = @import("utils.zig").c;
 const sv = @import("utils.zig").sv;
 raw: c.WGPUComputePipeline,
 pub fn init(device: GpuDevice, wgsl: []const u8) !@This() {
    var wgsl_src = c.WGPUShaderSourceWGSL{
        .chain = .{ .sType = c.WGPUSType_ShaderSourceWGSL },
        .code = sv(wgsl),
    };
    const shader = c.wgpuDeviceCreateShaderModule(device.device, &.{
        .nextInChain = @ptrCast(&wgsl_src),
    }) orelse return error.Shader;
    defer c.wgpuShaderModuleRelease(shader);
    return .{ .raw = c.wgpuDeviceCreateComputePipeline(device.device, &.{
        .compute = .{ .module = shader, .entryPoint = sv("main") },
    }) orelse return error.Pipeline };
 }
 pub fn deinit(self: @This()) void {
    c.wgpuComputePipelineRelease(self.raw);
 }
--- a/src/GpuProcess.zig
+++ b/src/GpuProcess.zig
@ -0,0 +1,118 @@
 const std = @import("std");
 const c = @import("utils.zig").c;
 const sv = @import("utils.zig").sv;
 const GpuAllocator = @import("GpuAllocator.zig");
 const GpuBuffer = @import("GpuBuffer.zig");
 const GpuDevice = @import("GpuDevice.zig");
 pip: c.WGPUComputePipeline,
 pub fn init(device: GpuDevice, wgsl: []const u8) !@This() {
    var wgsl_src = c.WGPUShaderSourceWGSL{
        .chain = .{ .sType = c.WGPUSType_ShaderSourceWGSL },
        .code = sv(wgsl),
    };
    const shader = c.wgpuDeviceCreateShaderModule(device.device, &.{
        .nextInChain = @ptrCast(&wgsl_src),
    }) orelse return error.Shader;
    defer c.wgpuShaderModuleRelease(shader);
    return .{ .pip = c.wgpuDeviceCreateComputePipeline(device.device, &.{
        .compute = .{ .module = shader, .entryPoint = sv("main") },
    }) orelse return error.Pipeline };
 }
 pub fn deinit(self: @This()) void {
    c.wgpuComputePipelineRelease(self.pip);
 }
 fn onMapped(
    status: c.WGPUMapAsyncStatus,
    _: c.WGPUStringView,
    userdata1: ?*anyopaque,
    _: ?*anyopaque,
 ) callconv(.c) void {
    const flag: *bool = @ptrCast(@alignCast(userdata1.?));
    flag.* = (status == c.WGPUMapAsyncStatus_Success);
 }
 // Changed: gloc is passed by value instead of *GpuAllocator
 pub fn run(
    self: @This(),
    gloc: GpuAllocator,
    buf_a: GpuBuffer,
    buf_b: GpuBuffer,
    buf_out: GpuBuffer,
 ) !void {
    const max_chunk_bytes: u64 = 1024 * 1024 * 1024; // 1 GB
    const bytes = buf_a.size;
    var offset: u64 = 0;
    while (offset < bytes) {
        const current_chunk_bytes = @min(max_chunk_bytes, bytes - offset);
        const current_chunk_elements: u32 = @intCast(current_chunk_bytes / @sizeOf(f16));
        const info_buf = try GpuBuffer.init(
            gloc,
            @sizeOf(u32),
            .initMany(&.{ .Uniform, .CopyDst }),
        );
        defer info_buf.deinit();
        c.wgpuQueueWriteBuffer(gloc.device.queue, info_buf.raw, 0, &current_chunk_elements, @sizeOf(u32));
        const entries = [_]c.WGPUBindGroupEntry{
            .{ .binding = 0, .buffer = buf_a.raw, .offset = offset, .size = current_chunk_bytes },
            .{ .binding = 1, .buffer = buf_b.raw, .offset = offset, .size = current_chunk_bytes },
            .{ .binding = 2, .buffer = buf_out.raw, .offset = offset, .size = current_chunk_bytes },
            .{ .binding = 3, .buffer = info_buf.raw, .offset = 0, .size = @sizeOf(u32) },
        };
        try submitPass(gloc, self.pip, &entries, current_chunk_elements);
        offset += current_chunk_bytes;
    }
 }
 // Changed: gloc is passed by value instead of *GpuAllocator
 fn submitPass(
    gloc: GpuAllocator,
    pipeline: c.WGPUComputePipeline,
    entries: []const c.WGPUBindGroupEntry,
    n: usize,
 ) !void {
    const bgl = c.wgpuComputePipelineGetBindGroupLayout(pipeline, 0);
    defer c.wgpuBindGroupLayoutRelease(bgl);
    const bg = c.wgpuDeviceCreateBindGroup(gloc.device.device, &.{
        .layout = bgl,
        .entries = entries.ptr,
        .entryCount = entries.len,
    }) orelse return error.BindGroup;
    defer c.wgpuBindGroupRelease(bg);
    const enc = c.wgpuDeviceCreateCommandEncoder(gloc.device.device, null) orelse
        return error.Encoder;
    const pass = c.wgpuCommandEncoderBeginComputePass(enc, null);
    c.wgpuComputePassEncoderSetPipeline(pass, pipeline);
    c.wgpuComputePassEncoderSetBindGroup(pass, 0, bg, 0, null);
    const WORKGROUP_SIZE = 256;
    const MAX_WORKGROUPS = 65535;
    const desired_workgroups = ceilDiv(n, WORKGROUP_SIZE);
    const dispatch_count = @min(desired_workgroups, MAX_WORKGROUPS);
    c.wgpuComputePassEncoderDispatchWorkgroups(pass, @intCast(dispatch_count), 1, 1);
    c.wgpuComputePassEncoderEnd(pass);
    c.wgpuComputePassEncoderRelease(pass);
    const cmd = c.wgpuCommandEncoderFinish(enc, null);
    defer c.wgpuCommandEncoderRelease(enc);
    defer c.wgpuCommandBufferRelease(cmd);
    c.wgpuQueueSubmit(gloc.device.queue, 1, &cmd);
 }
 fn ceilDiv(n: usize, d: usize) usize {
    return (n + d - 1) / d;
 }
--- a/src/Vec.zig
+++ b/src/Vec.zig
@ -3,7 +3,7 @@ const c = @import("utils.zig").c;
 const GpuAllocator = @import("GpuAllocator.zig");
 const GpuBuffer = @import("GpuBuffer.zig");
 const GpuDevice = @import("GpuDevice.zig");
-const GpuPipeline = @import("GpuPipeline.zig");
+const GpuProcess = @import("GpuProcess.zig");
 const Vec = @This();
@ -34,10 +34,7 @@ pub fn deinit(self: Vec) void {
 }
 /// CPU to GPU.
-pub fn load(
+pub fn load(self: Vec, data: []const f16) !void {
    self: Vec,
    data: []const f16,
 ) !void {
    try self.buf.load(data);
 }
@ -46,14 +43,13 @@ pub fn byteSize(self: Vec) u64 {
 }
 // Changed: gloc is passed by value instead of *GpuAllocator
-pub fn run(self: Vec, gloc: GpuAllocator, other: Vec, pip: GpuPipeline) !Vec {
+pub fn run(self: Vec, gloc: GpuAllocator, other: Vec, process: GpuProcess) !Vec {
    std.debug.assert(self.len == other.len);
    const result = try Vec.initZero(gloc, self.len);
    errdefer result.deinit();
-    try dispatch2in1out(gloc, pip.raw, self.buf, other.buf, result.buf, self.byteSize());
+    try process.run(gloc, self.buf, other.buf, result.buf);
    return result;
 }
@ -71,84 +67,3 @@ fn onMapped(
    const flag: *bool = @ptrCast(@alignCast(userdata1.?));
    flag.* = (status == c.WGPUMapAsyncStatus_Success);
 }
 // Changed: gloc is passed by value instead of *GpuAllocator
 fn dispatch2in1out(
    gloc: GpuAllocator,
    pipeline: c.WGPUComputePipeline,
    buf_a: GpuBuffer,
    buf_b: GpuBuffer,
    buf_out: GpuBuffer,
    bytes: u64,
 ) !void {
    const max_chunk_bytes: u64 = 1024 * 1024 * 1024; // 1 GB
    var offset: u64 = 0;
    while (offset < bytes) {
        const current_chunk_bytes = @min(max_chunk_bytes, bytes - offset);
        const current_chunk_elements: u32 = @intCast(current_chunk_bytes / @sizeOf(f16));
        const info_buf = try GpuBuffer.init(
            gloc,
            @sizeOf(u32),
            .initMany(&.{ .Uniform, .CopyDst }),
        );
        defer info_buf.deinit();
        c.wgpuQueueWriteBuffer(gloc.device.queue, info_buf.raw, 0, &current_chunk_elements, @sizeOf(u32));
        const entries = [_]c.WGPUBindGroupEntry{
            .{ .binding = 0, .buffer = buf_a.raw, .offset = offset, .size = current_chunk_bytes },
            .{ .binding = 1, .buffer = buf_b.raw, .offset = offset, .size = current_chunk_bytes },
            .{ .binding = 2, .buffer = buf_out.raw, .offset = offset, .size = current_chunk_bytes },
            .{ .binding = 3, .buffer = info_buf.raw, .offset = 0, .size = @sizeOf(u32) },
        };
        try submitPass(gloc, pipeline, &entries, current_chunk_elements);
        offset += current_chunk_bytes;
    }
 }
 // Changed: gloc is passed by value instead of *GpuAllocator
 fn submitPass(
    gloc: GpuAllocator,
    pipeline: c.WGPUComputePipeline,
    entries: []const c.WGPUBindGroupEntry,
    n: usize,
 ) !void {
    const bgl = c.wgpuComputePipelineGetBindGroupLayout(pipeline, 0);
    defer c.wgpuBindGroupLayoutRelease(bgl);
    const bg = c.wgpuDeviceCreateBindGroup(gloc.device.device, &.{
        .layout = bgl,
        .entries = entries.ptr,
        .entryCount = entries.len,
    }) orelse return error.BindGroup;
    defer c.wgpuBindGroupRelease(bg);
    const enc = c.wgpuDeviceCreateCommandEncoder(gloc.device.device, null) orelse
        return error.Encoder;
    const pass = c.wgpuCommandEncoderBeginComputePass(enc, null);
    c.wgpuComputePassEncoderSetPipeline(pass, pipeline);
    c.wgpuComputePassEncoderSetBindGroup(pass, 0, bg, 0, null);
    const WORKGROUP_SIZE = 256;
    const MAX_WORKGROUPS = 65535;
    const desired_workgroups = ceilDiv(n, WORKGROUP_SIZE);
    const dispatch_count = @min(desired_workgroups, MAX_WORKGROUPS);
    c.wgpuComputePassEncoderDispatchWorkgroups(pass, @intCast(dispatch_count), 1, 1);
    c.wgpuComputePassEncoderEnd(pass);
    c.wgpuComputePassEncoderRelease(pass);
    const cmd = c.wgpuCommandEncoderFinish(enc, null);
    defer c.wgpuCommandEncoderRelease(enc);
    defer c.wgpuCommandBufferRelease(cmd);
    c.wgpuQueueSubmit(gloc.device.queue, 1, &cmd);
 }
 fn ceilDiv(n: usize, d: usize) usize {
    return (n + d - 1) / d;
 }
--- a/src/example.zig
+++ b/src/example.zig
@ -2,7 +2,7 @@ const std = @import("std");
 const GpuDevice = @import("GpuDevice.zig");
 const GpuAllocator = @import("GpuAllocator.zig");
 const GpuArena = @import("GpuArena.zig");
-const GpuPipeline = @import("GpuPipeline.zig");
+const GpuProcess = @import("GpuProcess.zig");
 const Vec = @import("Vec.zig");
 const c = @import("utils.zig").c;
@ -18,8 +18,8 @@ pub fn main(init: std.process.Init) !void {
    const gloc = grena.gpuAllocator();
-    const add_pip = try GpuPipeline.init(device, @embedFile("shaders/add.wgsl"));
+    const add = try GpuProcess.init(device, @embedFile("shaders/add.wgsl"));
-    defer add_pip.deinit();
+    defer add.deinit();
    const data_a = try allocator.alloc(f16, 16);
    defer allocator.free(data_a);
@ -36,7 +36,7 @@ pub fn main(init: std.process.Init) !void {
    const b = try Vec.initLoad(gloc, data_b);
    defer b.deinit();
-    const sum = try a.run(gloc, b, add_pip);
+    const sum = try a.run(gloc, b, add);
    // Don't need `sum.deinit()` because grena will deallocate everything when deinit
    std.debug.print("Bytes used: {d} (3 * {d})\n", .{ grena.allocated_vram_bytes, a.byteSize() });