Created a GpuBuffer + an arry in GpuAlloc to deinit all when deinit the alloc

This commit is contained in:
adrien 2026-05-15 16:03:40 +02:00
parent 4725723d42
commit 0da02f60c4
5 changed files with 121 additions and 87 deletions

View File

@ -31,45 +31,7 @@
// `zig build --fetch` can be used to fetch all dependencies of a package, recursively. // `zig build --fetch` can be used to fetch all dependencies of a package, recursively.
// Once all dependencies are fetched, `zig build` no longer requires // Once all dependencies are fetched, `zig build` no longer requires
// internet connectivity. // internet connectivity.
.dependencies = .{ .dependencies = .{},
// See `zig fetch --save <url>` for a command-line interface for adding dependencies.
//.example = .{
// // When updating this field to a new URL, be sure to delete the corresponding
// // `hash`, otherwise you are communicating that you expect to find the old hash at
// // the new URL. If the contents of a URL change this will result in a hash mismatch
// // which will prevent zig from using it.
// .url = "https://example.com/foo.tar.gz",
//
// // This is computed from the file contents of the directory of files that is
// // obtained after fetching `url` and applying the inclusion rules given by
// // `paths`.
// //
// // This field is the source of truth; packages do not come from a `url`; they
// // come from a `hash`. `url` is just one of many possible mirrors for how to
// // obtain a package matching this `hash`.
// //
// // Uses the [multihash](https://multiformats.io/multihash/) format.
// .hash = "...",
//
// // When this is provided, the package is found in a directory relative to the
// // build root. In this case the package's hash is irrelevant and therefore not
// // computed. This field and `url` are mutually exclusive.
// .path = "foo",
//
// // When this is set to `true`, a package is declared to be lazily
// // fetched. This makes the dependency only get fetched if it is
// // actually used.
// .lazy = false,
//},
},
// Specifies the set of files and directories that are included in this package.
// Only files and directories listed here are included in the `hash` that
// is computed for this package. Only files listed here will remain on disk
// when using the zig package manager. As a rule of thumb, one should list
// files required for compilation plus any license(s).
// Paths are relative to the build root. Use the empty string (`""`) to refer to
// the build root itself.
// A directory listed here means that all files within, recursively, are included.
.paths = .{ .paths = .{
"build.zig", "build.zig",
"build.zig.zon", "build.zig.zon",

View File

@ -4,16 +4,19 @@ const c = @import("c.zig").c;
const GpuAllocator = @This(); const GpuAllocator = @This();
cpu_allocator: std.mem.Allocator,
instance: c.WGPUInstance, instance: c.WGPUInstance,
adapter: c.WGPUAdapter, adapter: c.WGPUAdapter,
device: c.WGPUDevice, device: c.WGPUDevice,
queue: c.WGPUQueue, queue: c.WGPUQueue,
tracked_buffers: std.AutoHashMap(c.WGPUBuffer, void),
// Lazily created, cached for lifetime of allocator // Lazily created, cached for lifetime of allocator
_pip_add: c.WGPUComputePipeline = null, _pip_add: c.WGPUComputePipeline = null,
_pip_scale: c.WGPUComputePipeline = null, _pip_scale: c.WGPUComputePipeline = null,
pub fn init() !GpuAllocator { pub fn init(cpu_allocator: std.mem.Allocator) !GpuAllocator {
const instance = c.wgpuCreateInstance( const instance = c.wgpuCreateInstance(
&std.mem.zeroes(c.WGPUInstanceDescriptor), &std.mem.zeroes(c.WGPUInstanceDescriptor),
) orelse return error.NoInstance; ) orelse return error.NoInstance;
@ -38,22 +41,54 @@ pub fn init() !GpuAllocator {
const device = ctx.device orelse return error.NoDevice; const device = ctx.device orelse return error.NoDevice;
return .{ return .{
.cpu_allocator = cpu_allocator,
.instance = instance, .instance = instance,
.adapter = adapter, .adapter = adapter,
.device = device, .device = device,
.queue = c.wgpuDeviceGetQueue(device), .queue = c.wgpuDeviceGetQueue(device),
.tracked_buffers = .init(cpu_allocator),
}; };
} }
pub fn deinit(self: *GpuAllocator) void { pub fn deinit(self: *GpuAllocator) void {
if (self._pip_add) |p| c.wgpuComputePipelineRelease(p); if (self._pip_add) |p| c.wgpuComputePipelineRelease(p);
if (self._pip_scale) |p| c.wgpuComputePipelineRelease(p); if (self._pip_scale) |p| c.wgpuComputePipelineRelease(p);
var it = self.tracked_buffers.keyIterator();
while (it.next()) |buf_ptr| {
const buf = buf_ptr.*;
c.wgpuBufferDestroy(buf);
c.wgpuBufferRelease(buf);
}
self.tracked_buffers.deinit();
c.wgpuQueueRelease(self.queue); c.wgpuQueueRelease(self.queue);
c.wgpuDeviceRelease(self.device); c.wgpuDeviceRelease(self.device);
c.wgpuAdapterRelease(self.adapter); c.wgpuAdapterRelease(self.adapter);
c.wgpuInstanceRelease(self.instance); c.wgpuInstanceRelease(self.instance);
} }
pub fn registerBuffer(
self: *GpuAllocator,
bytes: u64,
usage: c.WGPUBufferUsage,
) !c.WGPUBuffer {
const buf = c.wgpuDeviceCreateBuffer(self.device, &.{
.usage = usage,
.size = bytes,
}) orelse return error.BufferAlloc;
try self.tracked_buffers.put(buf, {});
return buf;
}
pub fn unregisterAndDestroyBuffer(self: *GpuAllocator, buf: c.WGPUBuffer) void {
if (self.tracked_buffers.remove(buf)) {
c.wgpuBufferDestroy(buf);
c.wgpuBufferRelease(buf);
}
}
// Internal // Internal
pub fn makeBuffer( pub fn makeBuffer(

47
src/GpuBuffer.zig Normal file
View File

@ -0,0 +1,47 @@
const std = @import("std");
const c = @import("c.zig").c;
const GpuAllocator = @import("GpuAllocator.zig");
const GpuBuffer = @This();
raw: c.WGPUBuffer,
size: u64,
usage: c.WGPUBufferUsage,
gloc: *GpuAllocator,
/// Allocates the underlying WebGPU handle and registers it to the parent GpuAllocator
pub fn init(gloc: *GpuAllocator, bytes: u64, usage: c.WGPUBufferUsage) !GpuBuffer {
const raw_handle = try gloc.registerBuffer(bytes, usage);
return .{
.raw = raw_handle,
.size = bytes,
.usage = usage,
.gloc = gloc,
};
}
/// Unregisters from the parent GpuAllocator and cleanly destroys GPU resources
pub fn deinit(self: GpuBuffer) void {
self.gloc.unregisterAndDestroyBuffer(self.raw);
}
/// Native mapAsync wrapper
pub fn mapAsync(
self: GpuBuffer,
mode: c.WGPUMapMode,
offset: u64,
size: u64,
callback_info: c.WGPUBufferMapCallbackInfo,
) void {
_ = c.wgpuBufferMapAsync(self.raw, mode, offset, size, callback_info);
}
/// Native getConstMappedRange wrapper
pub fn getConstMappedRange(self: GpuBuffer, offset: u64, size: u64) ?*const anyopaque {
return c.wgpuBufferGetConstMappedRange(self.raw, offset, size);
}
/// Native unmap wrapper
pub fn unmap(self: GpuBuffer) void {
c.wgpuBufferUnmap(self.raw);
}

View File

@ -1,16 +1,14 @@
const std = @import("std"); const std = @import("std");
const c = @import("c.zig").c; const c = @import("c.zig").c;
const GpuAllocator = @import("GpuAllocator.zig"); const GpuAllocator = @import("GpuAllocator.zig");
const GpuBuffer = @import("GpuBuffer.zig");
const Mat = @This(); const Mat = @This();
buf: c.WGPUBuffer, buf: GpuBuffer,
rows: u32, rows: u32,
cols: u32, cols: u32,
// Lifecycle
/// Allocate GPU buffer and upload `data`. `data.len` must equal rows*cols.
pub fn load( pub fn load(
gloc: *GpuAllocator, gloc: *GpuAllocator,
data: []const f32, data: []const f32,
@ -19,30 +17,30 @@ pub fn load(
) !Mat { ) !Mat {
std.debug.assert(data.len == @as(usize, rows) * cols); std.debug.assert(data.len == @as(usize, rows) * cols);
const bytes = data.len * @sizeOf(f32); const bytes = data.len * @sizeOf(f32);
const buf = try gloc.makeBuffer(
// Uses structural constructor initialization
const buf = try GpuBuffer.init(
gloc,
bytes, bytes,
c.WGPUBufferUsage_Storage | c.WGPUBufferUsage_Storage | c.WGPUBufferUsage_CopyDst | c.WGPUBufferUsage_CopySrc,
c.WGPUBufferUsage_CopyDst |
c.WGPUBufferUsage_CopySrc,
); );
c.wgpuQueueWriteBuffer(gloc.queue, buf, 0, data.ptr, bytes);
c.wgpuQueueWriteBuffer(gloc.queue, buf.raw, 0, data.ptr, bytes);
return .{ .buf = buf, .rows = rows, .cols = cols }; return .{ .buf = buf, .rows = rows, .cols = cols };
} }
/// Allocate zeroed GPU buffer (no upload).
pub fn zeros(gloc: *GpuAllocator, rows: u32, cols: u32) !Mat { pub fn zeros(gloc: *GpuAllocator, rows: u32, cols: u32) !Mat {
const bytes: u64 = @as(u64, rows) * cols * @sizeOf(f32); const bytes: u64 = @as(u64, rows) * cols * @sizeOf(f32);
const buf = try gloc.makeBuffer( const buf = try GpuBuffer.init(
gloc,
bytes, bytes,
c.WGPUBufferUsage_Storage | c.WGPUBufferUsage_Storage | c.WGPUBufferUsage_CopyDst | c.WGPUBufferUsage_CopySrc,
c.WGPUBufferUsage_CopyDst |
c.WGPUBufferUsage_CopySrc,
); );
return .{ .buf = buf, .rows = rows, .cols = cols }; return .{ .buf = buf, .rows = rows, .cols = cols };
} }
pub fn deinit(self: Mat) void { pub fn deinit(self: Mat) void {
c.wgpuBufferRelease(self.buf); self.buf.deinit(); // Automatically cleans tracking map & releases GPU memory
} }
pub fn len(self: Mat) u32 { pub fn len(self: Mat) u32 {
@ -53,7 +51,6 @@ pub fn byteSize(self: Mat) u64 {
return @as(u64, self.len()) * @sizeOf(f32); return @as(u64, self.len()) * @sizeOf(f32);
} }
/// Element-wise add. Shapes must match. Returns new Mat (caller owns).
pub fn add(self: Mat, gloc: *GpuAllocator, other: Mat) !Mat { pub fn add(self: Mat, gloc: *GpuAllocator, other: Mat) !Mat {
std.debug.assert(self.rows == other.rows and self.cols == other.cols); std.debug.assert(self.rows == other.rows and self.cols == other.cols);
@ -66,7 +63,6 @@ pub fn add(self: Mat, gloc: *GpuAllocator, other: Mat) !Mat {
return result; return result;
} }
/// Element-wise multiply by scalar. Returns new Mat (caller owns).
pub fn scale(self: Mat, gloc: *GpuAllocator, scalar: f32) !Mat { pub fn scale(self: Mat, gloc: *GpuAllocator, scalar: f32) !Mat {
const result = try Mat.zeros(gloc, self.rows, self.cols); const result = try Mat.zeros(gloc, self.rows, self.cols);
errdefer result.deinit(); errdefer result.deinit();
@ -74,52 +70,46 @@ pub fn scale(self: Mat, gloc: *GpuAllocator, scalar: f32) !Mat {
const bytes = self.byteSize(); const bytes = self.byteSize();
const n = self.len(); const n = self.len();
// Upload scalar as uniform buffer const uni_buf = try GpuBuffer.init(
const uni_buf = try gloc.makeBuffer( gloc,
@sizeOf(f32), @sizeOf(f32),
c.WGPUBufferUsage_Uniform | c.WGPUBufferUsage_CopyDst, c.WGPUBufferUsage_Uniform | c.WGPUBufferUsage_CopyDst,
); );
defer c.wgpuBufferRelease(uni_buf); defer uni_buf.deinit(); // Gracefully deinitializes locally
c.wgpuQueueWriteBuffer(gloc.queue, uni_buf, 0, &scalar, @sizeOf(f32));
c.wgpuQueueWriteBuffer(gloc.queue, uni_buf.raw, 0, &scalar, @sizeOf(f32));
const pipeline = try gloc.pipScale(); const pipeline = try gloc.pipScale();
const bgl = c.wgpuComputePipelineGetBindGroupLayout(pipeline, 0);
defer c.wgpuBindGroupLayoutRelease(bgl);
const entries = [_]c.WGPUBindGroupEntry{ const entries = [_]c.WGPUBindGroupEntry{
.{ .binding = 0, .buffer = self.buf, .offset = 0, .size = bytes }, .{ .binding = 0, .buffer = self.buf.raw, .offset = 0, .size = bytes },
.{ .binding = 1, .buffer = result.buf, .offset = 0, .size = bytes }, .{ .binding = 1, .buffer = result.buf.raw, .offset = 0, .size = bytes },
.{ .binding = 2, .buffer = uni_buf, .offset = 0, .size = @sizeOf(f32) }, .{ .binding = 2, .buffer = uni_buf.raw, .offset = 0, .size = @sizeOf(f32) },
}; };
try submitPass(gloc, pipeline, &entries, n); try submitPass(gloc, pipeline, &entries, n);
return result; return result;
} }
/// Read GPU buffer back to CPU. `out.len` must be >= rows*cols.
pub fn read(self: Mat, gloc: *GpuAllocator, out: []f32) !void { pub fn read(self: Mat, gloc: *GpuAllocator, out: []f32) !void {
std.debug.assert(out.len >= self.len()); std.debug.assert(out.len >= self.len());
const bytes = self.byteSize(); const bytes = self.byteSize();
const staging = try gloc.makeBuffer( const staging = try GpuBuffer.init(
gloc,
bytes, bytes,
c.WGPUBufferUsage_MapRead | c.WGPUBufferUsage_CopyDst, c.WGPUBufferUsage_MapRead | c.WGPUBufferUsage_CopyDst,
); );
defer c.wgpuBufferRelease(staging); defer staging.deinit();
// Copy result staging const enc = c.wgpuDeviceCreateCommandEncoder(gloc.device, null) orelse return error.Encoder;
const enc = c.wgpuDeviceCreateCommandEncoder(gloc.device, null) orelse c.wgpuCommandEncoderCopyBufferToBuffer(enc, self.buf.raw, 0, staging.raw, 0, bytes);
return error.Encoder;
c.wgpuCommandEncoderCopyBufferToBuffer(enc, self.buf, 0, staging, 0, bytes);
const cmd = c.wgpuCommandEncoderFinish(enc, null); const cmd = c.wgpuCommandEncoderFinish(enc, null);
defer c.wgpuCommandEncoderRelease(enc); defer c.wgpuCommandEncoderRelease(enc);
defer c.wgpuCommandBufferRelease(cmd); defer c.wgpuCommandBufferRelease(cmd);
c.wgpuQueueSubmit(gloc.queue, 1, &cmd); c.wgpuQueueSubmit(gloc.queue, 1, &cmd);
// Map and copy to slice
var mapped = false; var mapped = false;
_ = c.wgpuBufferMapAsync( staging.mapAsync(
staging,
c.WGPUMapMode_Read, c.WGPUMapMode_Read,
0, 0,
bytes, bytes,
@ -128,10 +118,10 @@ pub fn read(self: Mat, gloc: *GpuAllocator, out: []f32) !void {
while (!mapped) gloc.poll(); while (!mapped) gloc.poll();
const ptr: [*]const f32 = @ptrCast(@alignCast( const ptr: [*]const f32 = @ptrCast(@alignCast(
c.wgpuBufferGetConstMappedRange(staging, 0, bytes), staging.getConstMappedRange(0, bytes),
)); ));
@memcpy(out[0..self.len()], ptr[0..self.len()]); @memcpy(out[0..self.len()], ptr[0..self.len()]);
c.wgpuBufferUnmap(staging); staging.unmap();
} }
fn onMapped( fn onMapped(
@ -150,9 +140,9 @@ fn onMapped(
fn dispatch2in1out( fn dispatch2in1out(
gloc: *GpuAllocator, gloc: *GpuAllocator,
pipeline: c.WGPUComputePipeline, pipeline: c.WGPUComputePipeline,
buf_a: c.WGPUBuffer, buf_a: GpuBuffer,
buf_b: c.WGPUBuffer, buf_b: GpuBuffer,
buf_out: c.WGPUBuffer, buf_out: GpuBuffer,
bytes: u64, bytes: u64,
n: u32, n: u32,
) !void { ) !void {
@ -160,9 +150,9 @@ fn dispatch2in1out(
defer c.wgpuBindGroupLayoutRelease(bgl); defer c.wgpuBindGroupLayoutRelease(bgl);
const entries = [_]c.WGPUBindGroupEntry{ const entries = [_]c.WGPUBindGroupEntry{
.{ .binding = 0, .buffer = buf_a, .offset = 0, .size = bytes }, .{ .binding = 0, .buffer = buf_a.raw, .offset = 0, .size = bytes },
.{ .binding = 1, .buffer = buf_b, .offset = 0, .size = bytes }, .{ .binding = 1, .buffer = buf_b.raw, .offset = 0, .size = bytes },
.{ .binding = 2, .buffer = buf_out, .offset = 0, .size = bytes }, .{ .binding = 2, .buffer = buf_out.raw, .offset = 0, .size = bytes },
}; };
try submitPass(gloc, pipeline, &entries, n); try submitPass(gloc, pipeline, &entries, n);
} }

View File

@ -2,8 +2,8 @@ const std = @import("std");
const GpuAllocator = @import("GpuAllocator.zig"); const GpuAllocator = @import("GpuAllocator.zig");
const Mat = @import("Mat.zig"); const Mat = @import("Mat.zig");
pub fn main() !void { pub fn main(init: std.process.Init) !void {
var gloc = try GpuAllocator.init(); var gloc = try GpuAllocator.init(init.gpa);
defer gloc.deinit(); defer gloc.deinit();
// Input data: a[i] = i, b[i] = 15 - i add should give all 15s // Input data: a[i] = i, b[i] = 15 - i add should give all 15s