From 0da02f60c45f84988eb3ee3bbc53b85e39039c59 Mon Sep 17 00:00:00 2001 From: adrien Date: Fri, 15 May 2026 16:03:40 +0200 Subject: [PATCH] Created a GpuBuffer + an arry in GpuAlloc to deinit all when deinit the alloc --- build.zig.zon | 40 +--------------------- src/GpuAllocator.zig | 37 +++++++++++++++++++- src/GpuBuffer.zig | 47 ++++++++++++++++++++++++++ src/Mat.zig | 80 +++++++++++++++++++------------------------- src/main.zig | 4 +-- 5 files changed, 121 insertions(+), 87 deletions(-) create mode 100644 src/GpuBuffer.zig diff --git a/build.zig.zon b/build.zig.zon index 14a8524..45b4fed 100644 --- a/build.zig.zon +++ b/build.zig.zon @@ -31,45 +31,7 @@ // `zig build --fetch` can be used to fetch all dependencies of a package, recursively. // Once all dependencies are fetched, `zig build` no longer requires // internet connectivity. - .dependencies = .{ - // See `zig fetch --save ` for a command-line interface for adding dependencies. - //.example = .{ - // // When updating this field to a new URL, be sure to delete the corresponding - // // `hash`, otherwise you are communicating that you expect to find the old hash at - // // the new URL. If the contents of a URL change this will result in a hash mismatch - // // which will prevent zig from using it. - // .url = "https://example.com/foo.tar.gz", - // - // // This is computed from the file contents of the directory of files that is - // // obtained after fetching `url` and applying the inclusion rules given by - // // `paths`. - // // - // // This field is the source of truth; packages do not come from a `url`; they - // // come from a `hash`. `url` is just one of many possible mirrors for how to - // // obtain a package matching this `hash`. - // // - // // Uses the [multihash](https://multiformats.io/multihash/) format. - // .hash = "...", - // - // // When this is provided, the package is found in a directory relative to the - // // build root. In this case the package's hash is irrelevant and therefore not - // // computed. This field and `url` are mutually exclusive. - // .path = "foo", - // - // // When this is set to `true`, a package is declared to be lazily - // // fetched. This makes the dependency only get fetched if it is - // // actually used. - // .lazy = false, - //}, - }, - // Specifies the set of files and directories that are included in this package. - // Only files and directories listed here are included in the `hash` that - // is computed for this package. Only files listed here will remain on disk - // when using the zig package manager. As a rule of thumb, one should list - // files required for compilation plus any license(s). - // Paths are relative to the build root. Use the empty string (`""`) to refer to - // the build root itself. - // A directory listed here means that all files within, recursively, are included. + .dependencies = .{}, .paths = .{ "build.zig", "build.zig.zon", diff --git a/src/GpuAllocator.zig b/src/GpuAllocator.zig index 29bf741..3ace41c 100644 --- a/src/GpuAllocator.zig +++ b/src/GpuAllocator.zig @@ -4,16 +4,19 @@ const c = @import("c.zig").c; const GpuAllocator = @This(); +cpu_allocator: std.mem.Allocator, instance: c.WGPUInstance, adapter: c.WGPUAdapter, device: c.WGPUDevice, queue: c.WGPUQueue, +tracked_buffers: std.AutoHashMap(c.WGPUBuffer, void), + // Lazily created, cached for lifetime of allocator _pip_add: c.WGPUComputePipeline = null, _pip_scale: c.WGPUComputePipeline = null, -pub fn init() !GpuAllocator { +pub fn init(cpu_allocator: std.mem.Allocator) !GpuAllocator { const instance = c.wgpuCreateInstance( &std.mem.zeroes(c.WGPUInstanceDescriptor), ) orelse return error.NoInstance; @@ -38,22 +41,54 @@ pub fn init() !GpuAllocator { const device = ctx.device orelse return error.NoDevice; return .{ + .cpu_allocator = cpu_allocator, .instance = instance, .adapter = adapter, .device = device, .queue = c.wgpuDeviceGetQueue(device), + .tracked_buffers = .init(cpu_allocator), }; } pub fn deinit(self: *GpuAllocator) void { if (self._pip_add) |p| c.wgpuComputePipelineRelease(p); if (self._pip_scale) |p| c.wgpuComputePipelineRelease(p); + + var it = self.tracked_buffers.keyIterator(); + while (it.next()) |buf_ptr| { + const buf = buf_ptr.*; + c.wgpuBufferDestroy(buf); + c.wgpuBufferRelease(buf); + } + self.tracked_buffers.deinit(); + c.wgpuQueueRelease(self.queue); c.wgpuDeviceRelease(self.device); c.wgpuAdapterRelease(self.adapter); c.wgpuInstanceRelease(self.instance); } +pub fn registerBuffer( + self: *GpuAllocator, + bytes: u64, + usage: c.WGPUBufferUsage, +) !c.WGPUBuffer { + const buf = c.wgpuDeviceCreateBuffer(self.device, &.{ + .usage = usage, + .size = bytes, + }) orelse return error.BufferAlloc; + + try self.tracked_buffers.put(buf, {}); + return buf; +} + +pub fn unregisterAndDestroyBuffer(self: *GpuAllocator, buf: c.WGPUBuffer) void { + if (self.tracked_buffers.remove(buf)) { + c.wgpuBufferDestroy(buf); + c.wgpuBufferRelease(buf); + } +} + // ── Internal ───────────────────────────────────────────────────────────── pub fn makeBuffer( diff --git a/src/GpuBuffer.zig b/src/GpuBuffer.zig new file mode 100644 index 0000000..90aeae7 --- /dev/null +++ b/src/GpuBuffer.zig @@ -0,0 +1,47 @@ +const std = @import("std"); +const c = @import("c.zig").c; +const GpuAllocator = @import("GpuAllocator.zig"); + +const GpuBuffer = @This(); + +raw: c.WGPUBuffer, +size: u64, +usage: c.WGPUBufferUsage, +gloc: *GpuAllocator, + +/// Allocates the underlying WebGPU handle and registers it to the parent GpuAllocator +pub fn init(gloc: *GpuAllocator, bytes: u64, usage: c.WGPUBufferUsage) !GpuBuffer { + const raw_handle = try gloc.registerBuffer(bytes, usage); + return .{ + .raw = raw_handle, + .size = bytes, + .usage = usage, + .gloc = gloc, + }; +} + +/// Unregisters from the parent GpuAllocator and cleanly destroys GPU resources +pub fn deinit(self: GpuBuffer) void { + self.gloc.unregisterAndDestroyBuffer(self.raw); +} + +/// Native mapAsync wrapper +pub fn mapAsync( + self: GpuBuffer, + mode: c.WGPUMapMode, + offset: u64, + size: u64, + callback_info: c.WGPUBufferMapCallbackInfo, +) void { + _ = c.wgpuBufferMapAsync(self.raw, mode, offset, size, callback_info); +} + +/// Native getConstMappedRange wrapper +pub fn getConstMappedRange(self: GpuBuffer, offset: u64, size: u64) ?*const anyopaque { + return c.wgpuBufferGetConstMappedRange(self.raw, offset, size); +} + +/// Native unmap wrapper +pub fn unmap(self: GpuBuffer) void { + c.wgpuBufferUnmap(self.raw); +} diff --git a/src/Mat.zig b/src/Mat.zig index 36e2b5a..0e186ca 100644 --- a/src/Mat.zig +++ b/src/Mat.zig @@ -1,16 +1,14 @@ const std = @import("std"); const c = @import("c.zig").c; const GpuAllocator = @import("GpuAllocator.zig"); +const GpuBuffer = @import("GpuBuffer.zig"); const Mat = @This(); -buf: c.WGPUBuffer, +buf: GpuBuffer, rows: u32, cols: u32, -// ── Lifecycle ───────────────────────────────────────────────────────────── - -/// Allocate GPU buffer and upload `data`. `data.len` must equal rows*cols. pub fn load( gloc: *GpuAllocator, data: []const f32, @@ -19,30 +17,30 @@ pub fn load( ) !Mat { std.debug.assert(data.len == @as(usize, rows) * cols); const bytes = data.len * @sizeOf(f32); - const buf = try gloc.makeBuffer( + + // Uses structural constructor initialization + const buf = try GpuBuffer.init( + gloc, bytes, - c.WGPUBufferUsage_Storage | - c.WGPUBufferUsage_CopyDst | - c.WGPUBufferUsage_CopySrc, + c.WGPUBufferUsage_Storage | c.WGPUBufferUsage_CopyDst | c.WGPUBufferUsage_CopySrc, ); - c.wgpuQueueWriteBuffer(gloc.queue, buf, 0, data.ptr, bytes); + + c.wgpuQueueWriteBuffer(gloc.queue, buf.raw, 0, data.ptr, bytes); return .{ .buf = buf, .rows = rows, .cols = cols }; } -/// Allocate zeroed GPU buffer (no upload). pub fn zeros(gloc: *GpuAllocator, rows: u32, cols: u32) !Mat { const bytes: u64 = @as(u64, rows) * cols * @sizeOf(f32); - const buf = try gloc.makeBuffer( + const buf = try GpuBuffer.init( + gloc, bytes, - c.WGPUBufferUsage_Storage | - c.WGPUBufferUsage_CopyDst | - c.WGPUBufferUsage_CopySrc, + c.WGPUBufferUsage_Storage | c.WGPUBufferUsage_CopyDst | c.WGPUBufferUsage_CopySrc, ); return .{ .buf = buf, .rows = rows, .cols = cols }; } pub fn deinit(self: Mat) void { - c.wgpuBufferRelease(self.buf); + self.buf.deinit(); // Automatically cleans tracking map & releases GPU memory } pub fn len(self: Mat) u32 { @@ -53,7 +51,6 @@ pub fn byteSize(self: Mat) u64 { return @as(u64, self.len()) * @sizeOf(f32); } -/// Element-wise add. Shapes must match. Returns new Mat (caller owns). pub fn add(self: Mat, gloc: *GpuAllocator, other: Mat) !Mat { std.debug.assert(self.rows == other.rows and self.cols == other.cols); @@ -66,7 +63,6 @@ pub fn add(self: Mat, gloc: *GpuAllocator, other: Mat) !Mat { return result; } -/// Element-wise multiply by scalar. Returns new Mat (caller owns). pub fn scale(self: Mat, gloc: *GpuAllocator, scalar: f32) !Mat { const result = try Mat.zeros(gloc, self.rows, self.cols); errdefer result.deinit(); @@ -74,52 +70,46 @@ pub fn scale(self: Mat, gloc: *GpuAllocator, scalar: f32) !Mat { const bytes = self.byteSize(); const n = self.len(); - // Upload scalar as uniform buffer - const uni_buf = try gloc.makeBuffer( + const uni_buf = try GpuBuffer.init( + gloc, @sizeOf(f32), c.WGPUBufferUsage_Uniform | c.WGPUBufferUsage_CopyDst, ); - defer c.wgpuBufferRelease(uni_buf); - c.wgpuQueueWriteBuffer(gloc.queue, uni_buf, 0, &scalar, @sizeOf(f32)); + defer uni_buf.deinit(); // Gracefully deinitializes locally + + c.wgpuQueueWriteBuffer(gloc.queue, uni_buf.raw, 0, &scalar, @sizeOf(f32)); const pipeline = try gloc.pipScale(); - const bgl = c.wgpuComputePipelineGetBindGroupLayout(pipeline, 0); - defer c.wgpuBindGroupLayoutRelease(bgl); - const entries = [_]c.WGPUBindGroupEntry{ - .{ .binding = 0, .buffer = self.buf, .offset = 0, .size = bytes }, - .{ .binding = 1, .buffer = result.buf, .offset = 0, .size = bytes }, - .{ .binding = 2, .buffer = uni_buf, .offset = 0, .size = @sizeOf(f32) }, + .{ .binding = 0, .buffer = self.buf.raw, .offset = 0, .size = bytes }, + .{ .binding = 1, .buffer = result.buf.raw, .offset = 0, .size = bytes }, + .{ .binding = 2, .buffer = uni_buf.raw, .offset = 0, .size = @sizeOf(f32) }, }; try submitPass(gloc, pipeline, &entries, n); return result; } -/// Read GPU buffer back to CPU. `out.len` must be >= rows*cols. pub fn read(self: Mat, gloc: *GpuAllocator, out: []f32) !void { std.debug.assert(out.len >= self.len()); const bytes = self.byteSize(); - const staging = try gloc.makeBuffer( + const staging = try GpuBuffer.init( + gloc, bytes, c.WGPUBufferUsage_MapRead | c.WGPUBufferUsage_CopyDst, ); - defer c.wgpuBufferRelease(staging); + defer staging.deinit(); - // Copy result → staging - const enc = c.wgpuDeviceCreateCommandEncoder(gloc.device, null) orelse - return error.Encoder; - c.wgpuCommandEncoderCopyBufferToBuffer(enc, self.buf, 0, staging, 0, bytes); + const enc = c.wgpuDeviceCreateCommandEncoder(gloc.device, null) orelse return error.Encoder; + c.wgpuCommandEncoderCopyBufferToBuffer(enc, self.buf.raw, 0, staging.raw, 0, bytes); const cmd = c.wgpuCommandEncoderFinish(enc, null); defer c.wgpuCommandEncoderRelease(enc); defer c.wgpuCommandBufferRelease(cmd); c.wgpuQueueSubmit(gloc.queue, 1, &cmd); - // Map and copy to slice var mapped = false; - _ = c.wgpuBufferMapAsync( - staging, + staging.mapAsync( c.WGPUMapMode_Read, 0, bytes, @@ -128,10 +118,10 @@ pub fn read(self: Mat, gloc: *GpuAllocator, out: []f32) !void { while (!mapped) gloc.poll(); const ptr: [*]const f32 = @ptrCast(@alignCast( - c.wgpuBufferGetConstMappedRange(staging, 0, bytes), + staging.getConstMappedRange(0, bytes), )); @memcpy(out[0..self.len()], ptr[0..self.len()]); - c.wgpuBufferUnmap(staging); + staging.unmap(); } fn onMapped( @@ -150,9 +140,9 @@ fn onMapped( fn dispatch2in1out( gloc: *GpuAllocator, pipeline: c.WGPUComputePipeline, - buf_a: c.WGPUBuffer, - buf_b: c.WGPUBuffer, - buf_out: c.WGPUBuffer, + buf_a: GpuBuffer, + buf_b: GpuBuffer, + buf_out: GpuBuffer, bytes: u64, n: u32, ) !void { @@ -160,9 +150,9 @@ fn dispatch2in1out( defer c.wgpuBindGroupLayoutRelease(bgl); const entries = [_]c.WGPUBindGroupEntry{ - .{ .binding = 0, .buffer = buf_a, .offset = 0, .size = bytes }, - .{ .binding = 1, .buffer = buf_b, .offset = 0, .size = bytes }, - .{ .binding = 2, .buffer = buf_out, .offset = 0, .size = bytes }, + .{ .binding = 0, .buffer = buf_a.raw, .offset = 0, .size = bytes }, + .{ .binding = 1, .buffer = buf_b.raw, .offset = 0, .size = bytes }, + .{ .binding = 2, .buffer = buf_out.raw, .offset = 0, .size = bytes }, }; try submitPass(gloc, pipeline, &entries, n); } diff --git a/src/main.zig b/src/main.zig index dff596c..0fdd20e 100644 --- a/src/main.zig +++ b/src/main.zig @@ -2,8 +2,8 @@ const std = @import("std"); const GpuAllocator = @import("GpuAllocator.zig"); const Mat = @import("Mat.zig"); -pub fn main() !void { - var gloc = try GpuAllocator.init(); +pub fn main(init: std.process.Init) !void { + var gloc = try GpuAllocator.init(init.gpa); defer gloc.deinit(); // Input data: a[i] = i, b[i] = 15 - i → add should give all 15s