From 0da02f60c45f84988eb3ee3bbc53b85e39039c59 Mon Sep 17 00:00:00 2001
From: adrien <adrien@bouvais.lu>
Date: Fri, 15 May 2026 16:03:40 +0200
Subject: [PATCH] Created a GpuBuffer + an arry in GpuAlloc to deinit all when
 deinit the alloc

---
 build.zig.zon        | 40 +---------------------
 src/GpuAllocator.zig | 37 +++++++++++++++++++-
 src/GpuBuffer.zig    | 47 ++++++++++++++++++++++++++
 src/Mat.zig          | 80 +++++++++++++++++++-------------------------
 src/main.zig         |  4 +--
 5 files changed, 121 insertions(+), 87 deletions(-)
 create mode 100644 src/GpuBuffer.zig
diff --git a/build.zig.zon b/build.zig.zon
index 14a8524..45b4fed 100644
--- a/build.zig.zon
+++ b/build.zig.zon
@@ -31,45 +31,7 @@
     // `zig build --fetch` can be used to fetch all dependencies of a package, recursively.
     // Once all dependencies are fetched, `zig build` no longer requires
     // internet connectivity.
-    .dependencies = .{
-        // See `zig fetch --save <url>` for a command-line interface for adding dependencies.
-        //.example = .{
-        //    // When updating this field to a new URL, be sure to delete the corresponding
-        //    // `hash`, otherwise you are communicating that you expect to find the old hash at
-        //    // the new URL. If the contents of a URL change this will result in a hash mismatch
-        //    // which will prevent zig from using it.
-        //    .url = "https://example.com/foo.tar.gz",
-        //
-        //    // This is computed from the file contents of the directory of files that is
-        //    // obtained after fetching `url` and applying the inclusion rules given by
-        //    // `paths`.
-        //    //
-        //    // This field is the source of truth; packages do not come from a `url`; they
-        //    // come from a `hash`. `url` is just one of many possible mirrors for how to
-        //    // obtain a package matching this `hash`.
-        //    //
-        //    // Uses the [multihash](https://multiformats.io/multihash/) format.
-        //    .hash = "...",
-        //
-        //    // When this is provided, the package is found in a directory relative to the
-        //    // build root. In this case the package's hash is irrelevant and therefore not
-        //    // computed. This field and `url` are mutually exclusive.
-        //    .path = "foo",
-        //
-        //    // When this is set to `true`, a package is declared to be lazily
-        //    // fetched. This makes the dependency only get fetched if it is
-        //    // actually used.
-        //    .lazy = false,
-        //},
-    },
-    // Specifies the set of files and directories that are included in this package.
-    // Only files and directories listed here are included in the `hash` that
-    // is computed for this package. Only files listed here will remain on disk
-    // when using the zig package manager. As a rule of thumb, one should list
-    // files required for compilation plus any license(s).
-    // Paths are relative to the build root. Use the empty string (`""`) to refer to
-    // the build root itself.
-    // A directory listed here means that all files within, recursively, are included.
+    .dependencies = .{},
     .paths = .{
         "build.zig",
         "build.zig.zon",
diff --git a/src/GpuAllocator.zig b/src/GpuAllocator.zig
index 29bf741..3ace41c 100644
--- a/src/GpuAllocator.zig
+++ b/src/GpuAllocator.zig
@@ -4,16 +4,19 @@ const c = @import("c.zig").c;
 
 const GpuAllocator = @This();
 
+cpu_allocator: std.mem.Allocator,
 instance: c.WGPUInstance,
 adapter: c.WGPUAdapter,
 device: c.WGPUDevice,
 queue: c.WGPUQueue,
 
+tracked_buffers: std.AutoHashMap(c.WGPUBuffer, void),
+
 // Lazily created, cached for lifetime of allocator
 _pip_add: c.WGPUComputePipeline = null,
 _pip_scale: c.WGPUComputePipeline = null,
 
-pub fn init() !GpuAllocator {
+pub fn init(cpu_allocator: std.mem.Allocator) !GpuAllocator {
     const instance = c.wgpuCreateInstance(
         &std.mem.zeroes(c.WGPUInstanceDescriptor),
     ) orelse return error.NoInstance;
@@ -38,22 +41,54 @@ pub fn init() !GpuAllocator {
     const device = ctx.device orelse return error.NoDevice;
 
     return .{
+        .cpu_allocator = cpu_allocator,
         .instance = instance,
         .adapter = adapter,
         .device = device,
         .queue = c.wgpuDeviceGetQueue(device),
+        .tracked_buffers = .init(cpu_allocator),
     };
 }
 
 pub fn deinit(self: *GpuAllocator) void {
     if (self._pip_add) |p| c.wgpuComputePipelineRelease(p);
     if (self._pip_scale) |p| c.wgpuComputePipelineRelease(p);
+
+    var it = self.tracked_buffers.keyIterator();
+    while (it.next()) |buf_ptr| {
+        const buf = buf_ptr.*;
+        c.wgpuBufferDestroy(buf);
+        c.wgpuBufferRelease(buf);
+    }
+    self.tracked_buffers.deinit();
+
     c.wgpuQueueRelease(self.queue);
     c.wgpuDeviceRelease(self.device);
     c.wgpuAdapterRelease(self.adapter);
     c.wgpuInstanceRelease(self.instance);
 }
 
+pub fn registerBuffer(
+    self: *GpuAllocator,
+    bytes: u64,
+    usage: c.WGPUBufferUsage,
+) !c.WGPUBuffer {
+    const buf = c.wgpuDeviceCreateBuffer(self.device, &.{
+        .usage = usage,
+        .size = bytes,
+    }) orelse return error.BufferAlloc;
+
+    try self.tracked_buffers.put(buf, {});
+    return buf;
+}
+
+pub fn unregisterAndDestroyBuffer(self: *GpuAllocator, buf: c.WGPUBuffer) void {
+    if (self.tracked_buffers.remove(buf)) {
+        c.wgpuBufferDestroy(buf);
+        c.wgpuBufferRelease(buf);
+    }
+}
+
 // ── Internal ─────────────────────────────────────────────────────────────
 
 pub fn makeBuffer(
diff --git a/src/GpuBuffer.zig b/src/GpuBuffer.zig
new file mode 100644
index 0000000..90aeae7
--- /dev/null
+++ b/src/GpuBuffer.zig
@@ -0,0 +1,47 @@
+const std = @import("std");
+const c = @import("c.zig").c;
+const GpuAllocator = @import("GpuAllocator.zig");
+
+const GpuBuffer = @This();
+
+raw: c.WGPUBuffer,
+size: u64,
+usage: c.WGPUBufferUsage,
+gloc: *GpuAllocator,
+
+/// Allocates the underlying WebGPU handle and registers it to the parent GpuAllocator
+pub fn init(gloc: *GpuAllocator, bytes: u64, usage: c.WGPUBufferUsage) !GpuBuffer {
+    const raw_handle = try gloc.registerBuffer(bytes, usage);
+    return .{
+        .raw = raw_handle,
+        .size = bytes,
+        .usage = usage,
+        .gloc = gloc,
+    };
+}
+
+/// Unregisters from the parent GpuAllocator and cleanly destroys GPU resources
+pub fn deinit(self: GpuBuffer) void {
+    self.gloc.unregisterAndDestroyBuffer(self.raw);
+}
+
+/// Native mapAsync wrapper
+pub fn mapAsync(
+    self: GpuBuffer,
+    mode: c.WGPUMapMode,
+    offset: u64,
+    size: u64,
+    callback_info: c.WGPUBufferMapCallbackInfo,
+) void {
+    _ = c.wgpuBufferMapAsync(self.raw, mode, offset, size, callback_info);
+}
+
+/// Native getConstMappedRange wrapper
+pub fn getConstMappedRange(self: GpuBuffer, offset: u64, size: u64) ?*const anyopaque {
+    return c.wgpuBufferGetConstMappedRange(self.raw, offset, size);
+}
+
+/// Native unmap wrapper
+pub fn unmap(self: GpuBuffer) void {
+    c.wgpuBufferUnmap(self.raw);
+}
diff --git a/src/Mat.zig b/src/Mat.zig
index 36e2b5a..0e186ca 100644
--- a/src/Mat.zig
+++ b/src/Mat.zig
@@ -1,16 +1,14 @@
 const std = @import("std");
 const c = @import("c.zig").c;
 const GpuAllocator = @import("GpuAllocator.zig");
+const GpuBuffer = @import("GpuBuffer.zig");
 
 const Mat = @This();
 
-buf: c.WGPUBuffer,
+buf: GpuBuffer,
 rows: u32,
 cols: u32,
 
-// ── Lifecycle ─────────────────────────────────────────────────────────────
-
-/// Allocate GPU buffer and upload `data`. `data.len` must equal rows*cols.
 pub fn load(
     gloc: *GpuAllocator,
     data: []const f32,
@@ -19,30 +17,30 @@ pub fn load(
 ) !Mat {
     std.debug.assert(data.len == @as(usize, rows) * cols);
     const bytes = data.len * @sizeOf(f32);
-    const buf = try gloc.makeBuffer(
+
+    // Uses structural constructor initialization
+    const buf = try GpuBuffer.init(
+        gloc,
         bytes,
-        c.WGPUBufferUsage_Storage |
-            c.WGPUBufferUsage_CopyDst |
-            c.WGPUBufferUsage_CopySrc,
+        c.WGPUBufferUsage_Storage | c.WGPUBufferUsage_CopyDst | c.WGPUBufferUsage_CopySrc,
     );
-    c.wgpuQueueWriteBuffer(gloc.queue, buf, 0, data.ptr, bytes);
+
+    c.wgpuQueueWriteBuffer(gloc.queue, buf.raw, 0, data.ptr, bytes);
     return .{ .buf = buf, .rows = rows, .cols = cols };
 }
 
-/// Allocate zeroed GPU buffer (no upload).
 pub fn zeros(gloc: *GpuAllocator, rows: u32, cols: u32) !Mat {
     const bytes: u64 = @as(u64, rows) * cols * @sizeOf(f32);
-    const buf = try gloc.makeBuffer(
+    const buf = try GpuBuffer.init(
+        gloc,
         bytes,
-        c.WGPUBufferUsage_Storage |
-            c.WGPUBufferUsage_CopyDst |
-            c.WGPUBufferUsage_CopySrc,
+        c.WGPUBufferUsage_Storage | c.WGPUBufferUsage_CopyDst | c.WGPUBufferUsage_CopySrc,
     );
     return .{ .buf = buf, .rows = rows, .cols = cols };
 }
 
 pub fn deinit(self: Mat) void {
-    c.wgpuBufferRelease(self.buf);
+    self.buf.deinit(); // Automatically cleans tracking map & releases GPU memory
 }
 
 pub fn len(self: Mat) u32 {
@@ -53,7 +51,6 @@ pub fn byteSize(self: Mat) u64 {
     return @as(u64, self.len()) * @sizeOf(f32);
 }
 
-/// Element-wise add. Shapes must match. Returns new Mat (caller owns).
 pub fn add(self: Mat, gloc: *GpuAllocator, other: Mat) !Mat {
     std.debug.assert(self.rows == other.rows and self.cols == other.cols);
 
@@ -66,7 +63,6 @@ pub fn add(self: Mat, gloc: *GpuAllocator, other: Mat) !Mat {
     return result;
 }
 
-/// Element-wise multiply by scalar. Returns new Mat (caller owns).
 pub fn scale(self: Mat, gloc: *GpuAllocator, scalar: f32) !Mat {
     const result = try Mat.zeros(gloc, self.rows, self.cols);
     errdefer result.deinit();
@@ -74,52 +70,46 @@ pub fn scale(self: Mat, gloc: *GpuAllocator, scalar: f32) !Mat {
     const bytes = self.byteSize();
     const n = self.len();
 
-    // Upload scalar as uniform buffer
-    const uni_buf = try gloc.makeBuffer(
+    const uni_buf = try GpuBuffer.init(
+        gloc,
         @sizeOf(f32),
         c.WGPUBufferUsage_Uniform | c.WGPUBufferUsage_CopyDst,
     );
-    defer c.wgpuBufferRelease(uni_buf);
-    c.wgpuQueueWriteBuffer(gloc.queue, uni_buf, 0, &scalar, @sizeOf(f32));
+    defer uni_buf.deinit(); // Gracefully deinitializes locally
+
+    c.wgpuQueueWriteBuffer(gloc.queue, uni_buf.raw, 0, &scalar, @sizeOf(f32));
 
     const pipeline = try gloc.pipScale();
-    const bgl = c.wgpuComputePipelineGetBindGroupLayout(pipeline, 0);
-    defer c.wgpuBindGroupLayoutRelease(bgl);
-
     const entries = [_]c.WGPUBindGroupEntry{
-        .{ .binding = 0, .buffer = self.buf, .offset = 0, .size = bytes },
-        .{ .binding = 1, .buffer = result.buf, .offset = 0, .size = bytes },
-        .{ .binding = 2, .buffer = uni_buf, .offset = 0, .size = @sizeOf(f32) },
+        .{ .binding = 0, .buffer = self.buf.raw, .offset = 0, .size = bytes },
+        .{ .binding = 1, .buffer = result.buf.raw, .offset = 0, .size = bytes },
+        .{ .binding = 2, .buffer = uni_buf.raw, .offset = 0, .size = @sizeOf(f32) },
     };
     try submitPass(gloc, pipeline, &entries, n);
 
     return result;
 }
 
-/// Read GPU buffer back to CPU. `out.len` must be >= rows*cols.
 pub fn read(self: Mat, gloc: *GpuAllocator, out: []f32) !void {
     std.debug.assert(out.len >= self.len());
     const bytes = self.byteSize();
 
-    const staging = try gloc.makeBuffer(
+    const staging = try GpuBuffer.init(
+        gloc,
         bytes,
         c.WGPUBufferUsage_MapRead | c.WGPUBufferUsage_CopyDst,
     );
-    defer c.wgpuBufferRelease(staging);
+    defer staging.deinit();
 
-    // Copy result → staging
-    const enc = c.wgpuDeviceCreateCommandEncoder(gloc.device, null) orelse
-        return error.Encoder;
-    c.wgpuCommandEncoderCopyBufferToBuffer(enc, self.buf, 0, staging, 0, bytes);
+    const enc = c.wgpuDeviceCreateCommandEncoder(gloc.device, null) orelse return error.Encoder;
+    c.wgpuCommandEncoderCopyBufferToBuffer(enc, self.buf.raw, 0, staging.raw, 0, bytes);
     const cmd = c.wgpuCommandEncoderFinish(enc, null);
     defer c.wgpuCommandEncoderRelease(enc);
     defer c.wgpuCommandBufferRelease(cmd);
     c.wgpuQueueSubmit(gloc.queue, 1, &cmd);
 
-    // Map and copy to slice
     var mapped = false;
-    _ = c.wgpuBufferMapAsync(
-        staging,
+    staging.mapAsync(
         c.WGPUMapMode_Read,
         0,
         bytes,
@@ -128,10 +118,10 @@ pub fn read(self: Mat, gloc: *GpuAllocator, out: []f32) !void {
     while (!mapped) gloc.poll();
 
     const ptr: [*]const f32 = @ptrCast(@alignCast(
-        c.wgpuBufferGetConstMappedRange(staging, 0, bytes),
+        staging.getConstMappedRange(0, bytes),
     ));
     @memcpy(out[0..self.len()], ptr[0..self.len()]);
-    c.wgpuBufferUnmap(staging);
+    staging.unmap();
 }
 
 fn onMapped(
@@ -150,9 +140,9 @@ fn onMapped(
 fn dispatch2in1out(
     gloc: *GpuAllocator,
     pipeline: c.WGPUComputePipeline,
-    buf_a: c.WGPUBuffer,
-    buf_b: c.WGPUBuffer,
-    buf_out: c.WGPUBuffer,
+    buf_a: GpuBuffer,
+    buf_b: GpuBuffer,
+    buf_out: GpuBuffer,
     bytes: u64,
     n: u32,
 ) !void {
@@ -160,9 +150,9 @@ fn dispatch2in1out(
     defer c.wgpuBindGroupLayoutRelease(bgl);
 
     const entries = [_]c.WGPUBindGroupEntry{
-        .{ .binding = 0, .buffer = buf_a, .offset = 0, .size = bytes },
-        .{ .binding = 1, .buffer = buf_b, .offset = 0, .size = bytes },
-        .{ .binding = 2, .buffer = buf_out, .offset = 0, .size = bytes },
+        .{ .binding = 0, .buffer = buf_a.raw, .offset = 0, .size = bytes },
+        .{ .binding = 1, .buffer = buf_b.raw, .offset = 0, .size = bytes },
+        .{ .binding = 2, .buffer = buf_out.raw, .offset = 0, .size = bytes },
     };
     try submitPass(gloc, pipeline, &entries, n);
 }
diff --git a/src/main.zig b/src/main.zig
index dff596c..0fdd20e 100644
--- a/src/main.zig
+++ b/src/main.zig
@@ -2,8 +2,8 @@ const std = @import("std");
 const GpuAllocator = @import("GpuAllocator.zig");
 const Mat = @import("Mat.zig");
 
-pub fn main() !void {
-    var gloc = try GpuAllocator.init();
+pub fn main(init: std.process.Init) !void {
+    var gloc = try GpuAllocator.init(init.gpa);
     defer gloc.deinit();
 
     // Input data: a[i] = i, b[i] = 15 - i  →  add should give all 15s