From f5daf66784f1bd3a5867866b192e56ef08e07dd9 Mon Sep 17 00:00:00 2001 From: adrien Date: Mon, 18 May 2026 14:07:28 +0200 Subject: [PATCH] Changed GpuAllocator to be like std.mem.Allocator Now it is 2 ptr and I created a GpuArena. Point is to be like Zig, a const allocator and a var arena that track everything. --- src/GpuAllocator.zig | 64 +++++++++------------------------------ src/GpuArena.zig | 72 ++++++++++++++++++++++++++++++++++++++++++++ src/GpuBuffer.zig | 8 ++--- src/GpuDevice.zig | 2 +- src/Vec.zig | 29 ++++++++---------- src/example.zig | 25 ++++++++------- src/lib.zig | 1 + 7 files changed, 120 insertions(+), 81 deletions(-) create mode 100644 src/GpuArena.zig diff --git a/src/GpuAllocator.zig b/src/GpuAllocator.zig index 017288f..e1678d1 100644 --- a/src/GpuAllocator.zig +++ b/src/GpuAllocator.zig @@ -1,58 +1,24 @@ +// GpuAllocator.zig const std = @import("std"); const GpuDevice = @import("GpuDevice.zig"); -const GpuBuffer = @import("GpuBuffer.zig"); const c = @import("utils.zig").c; +const GpuAllocator = @This(); + +/// The function definitions our underlying implementations must satisfy +pub const VTable = struct { + alloc: *const fn (ctx: *anyopaque, bytes: u64, usage: c.WGPUBufferUsage) anyerror!c.WGPUBuffer, + free: *const fn (ctx: *anyopaque, buf_raw: c.WGPUBuffer, size: u64) void, +}; + device: GpuDevice, -tracked_buffers: std.AutoHashMap(c.WGPUBuffer, void), -allocated_vram_bytes: u64 = 0, +ptr: *anyopaque, +vtable: *const VTable, -pub fn init(cpu_allocator: std.mem.Allocator, device: GpuDevice) !@This() { - return .{ - .device = device, - .tracked_buffers = .init(cpu_allocator), - }; +pub fn allocBuffer(self: GpuAllocator, bytes: u64, usage: c.WGPUBufferUsage) !c.WGPUBuffer { + return self.vtable.alloc(self.ptr, bytes, usage); } -pub fn deinit(self: *@This()) void { - var it = self.tracked_buffers.keyIterator(); - while (it.next()) |buf_ptr| { - const buf = buf_ptr.*; - c.wgpuBufferDestroy(buf); - c.wgpuBufferRelease(buf); - } - self.tracked_buffers.deinit(); -} - -pub fn registerBuffer( - self: *@This(), - bytes: u64, - usage: c.WGPUBufferUsage, -) !c.WGPUBuffer { - if (bytes > self.device.limits.maxBufferSize) - return error.SingleBufferExceedsLimit; - - if (bytes + self.allocated_vram_bytes > self.device.config.vram_bytes_limit) - return error.ExceedsVramBudget; - - const buf = c.wgpuDeviceCreateBuffer(self.device.device, &.{ - .usage = usage, - .size = bytes, - }) orelse return error.BufferAlloc; - errdefer { - c.wgpuBufferDestroy(buf); - c.wgpuBufferRelease(buf); - } - - try self.tracked_buffers.put(buf, {}); - self.allocated_vram_bytes += bytes; - return buf; -} - -pub fn unregisterAndDestroyBuffer(self: *@This(), buf: GpuBuffer) void { - if (self.tracked_buffers.remove(buf.raw)) { - c.wgpuBufferDestroy(buf.raw); - c.wgpuBufferRelease(buf.raw); - self.allocated_vram_bytes -= buf.size; - } +pub fn freeBuffer(self: GpuAllocator, buf_raw: c.WGPUBuffer, size: u64) void { + self.vtable.free(self.ptr, buf_raw, size); } diff --git a/src/GpuArena.zig b/src/GpuArena.zig new file mode 100644 index 0000000..2043206 --- /dev/null +++ b/src/GpuArena.zig @@ -0,0 +1,72 @@ +// GpuArena.zig +const std = @import("std"); +const GpuDevice = @import("GpuDevice.zig"); +const GpuAllocator = @import("GpuAllocator.zig"); +const c = @import("utils.zig").c; + +const GpuArena = @This(); + +device: GpuDevice, +tracked_buffers: std.AutoHashMap(c.WGPUBuffer, void), +allocated_vram_bytes: u64 = 0, + +pub fn init(cpu_allocator: std.mem.Allocator, device: GpuDevice) GpuArena { + return .{ + .device = device, + .tracked_buffers = .init(cpu_allocator), + }; +} + +pub fn deinit(self: *GpuArena) void { + var it = self.tracked_buffers.keyIterator(); + while (it.next()) |buf_ptr| { + c.wgpuBufferDestroy(buf_ptr.*); + c.wgpuBufferRelease(buf_ptr.*); + } + self.tracked_buffers.deinit(); +} + +/// Returns the type-erased immutable interface wrapper +pub fn gpuAllocator(self: *GpuArena) GpuAllocator { + return .{ + .device = self.device, + .ptr = self, + .vtable = &.{ + .alloc = alloc, + .free = free, + }, + }; +} + +fn alloc(ctx: *anyopaque, bytes: u64, usage: c.WGPUBufferUsage) anyerror!c.WGPUBuffer { + const self: *GpuArena = @ptrCast(@alignCast(ctx)); + + if (bytes > self.device.limits.maxBufferSize) + return error.SingleBufferExceedsLimit; + + if (bytes + self.allocated_vram_bytes > self.device.config.vram_bytes_limit) + return error.ExceedsVramBudget; + + const buf = c.wgpuDeviceCreateBuffer(self.device.device, &.{ + .usage = usage, + .size = bytes, + }) orelse return error.BufferAlloc; + errdefer { + c.wgpuBufferDestroy(buf); + c.wgpuBufferRelease(buf); + } + + try self.tracked_buffers.put(buf, {}); + self.allocated_vram_bytes += bytes; + return buf; +} + +fn free(ctx: *anyopaque, buf_raw: c.WGPUBuffer, size: u64) void { + const self: *GpuArena = @ptrCast(@alignCast(ctx)); + + if (self.tracked_buffers.remove(buf_raw)) { + c.wgpuBufferDestroy(buf_raw); + c.wgpuBufferRelease(buf_raw); + self.allocated_vram_bytes -= size; + } +} diff --git a/src/GpuBuffer.zig b/src/GpuBuffer.zig index c52c51d..094507e 100644 --- a/src/GpuBuffer.zig +++ b/src/GpuBuffer.zig @@ -5,7 +5,7 @@ const GpuAllocator = @import("GpuAllocator.zig"); raw: c.WGPUBuffer, size: u64, usage: c.WGPUBufferUsage, -gloc: *GpuAllocator, +gloc: GpuAllocator, const BufferUsage = enum(u64) { None = 0x0000000000000000, @@ -22,7 +22,7 @@ const BufferUsage = enum(u64) { }; /// Allocates the underlying WebGPU handle and registers it to the parent GpuAllocator -pub fn init(gloc: *GpuAllocator, T: type, len: usize, usage: std.EnumSet(BufferUsage)) !@This() { +pub fn init(gloc: GpuAllocator, T: type, len: usize, usage: std.EnumSet(BufferUsage)) !@This() { switch (@typeInfo(T)) { .int, .float => {}, else => @compileError("GpuBuffer can only use int and float type"), @@ -33,7 +33,7 @@ pub fn init(gloc: *GpuAllocator, T: type, len: usize, usage: std.EnumSet(BufferU while (iter.next()) |flag| use |= @intFromEnum(flag); const bytes = @sizeOf(T) * len; - const raw_handle = try gloc.registerBuffer(bytes, use); + const raw_handle = try gloc.allocBuffer(bytes, use); return .{ .raw = raw_handle, @@ -45,7 +45,7 @@ pub fn init(gloc: *GpuAllocator, T: type, len: usize, usage: std.EnumSet(BufferU /// Unregisters from the parent GpuAllocator and cleanly destroys GPU resources pub fn deinit(self: @This()) void { - self.gloc.unregisterAndDestroyBuffer(self); + self.gloc.freeBuffer(self.raw, self.size); } /// Native mapAsync wrapper diff --git a/src/GpuDevice.zig b/src/GpuDevice.zig index b1da5c4..4f94fa6 100644 --- a/src/GpuDevice.zig +++ b/src/GpuDevice.zig @@ -87,7 +87,7 @@ pub fn deinit(self: @This()) void { c.wgpuInstanceRelease(self.instance); } -pub fn poll(self: *@This()) void { +pub fn poll(self: @This()) void { _ = c.wgpuDevicePoll(self.device, 1, null); } diff --git a/src/Vec.zig b/src/Vec.zig index c05ff9e..4b725b0 100644 --- a/src/Vec.zig +++ b/src/Vec.zig @@ -1,4 +1,3 @@ -/// Dummy const std = @import("std"); const c = @import("utils.zig").c; const GpuAllocator = @import("GpuAllocator.zig"); @@ -11,7 +10,8 @@ const Vec = @This(); buf: GpuBuffer, len: usize, -pub fn initZero(gloc: *GpuAllocator, len: usize) !Vec { +// Changed: gloc is passed by value (const) +pub fn initZero(gloc: GpuAllocator, len: usize) !Vec { return .{ .buf = try GpuBuffer.init( gloc, @@ -23,9 +23,10 @@ pub fn initZero(gloc: *GpuAllocator, len: usize) !Vec { }; } -pub fn initLoad(gloc: *GpuAllocator, data: []const f16) !Vec { +// Changed: gloc is passed by value +pub fn initLoad(gloc: GpuAllocator, data: []const f16) !Vec { var self = try initZero(gloc, data.len); - try self.load(gloc.device, data); + try self.load(gloc.device, data); // Direct access via the interface copy return self; } @@ -48,7 +49,8 @@ pub fn byteSize(self: Vec) u64 { return @as(u64, self.len) * @sizeOf(f16); } -pub fn run(self: Vec, gloc: *GpuAllocator, other: Vec, pip: GpuPipeline) !Vec { +// Changed: gloc is passed by value instead of *GpuAllocator +pub fn run(self: Vec, gloc: GpuAllocator, other: Vec, pip: GpuPipeline) !Vec { std.debug.assert(self.len == other.len); const result = try Vec.initZero(gloc, self.len); @@ -59,8 +61,8 @@ pub fn run(self: Vec, gloc: *GpuAllocator, other: Vec, pip: GpuPipeline) !Vec { return result; } -/// GPU to CPU. -pub fn read(self: Vec, gloc: *GpuAllocator, alloc: std.mem.Allocator) ![]f16 { +// Changed: gloc is passed by value instead of *GpuAllocator +pub fn read(self: Vec, gloc: GpuAllocator, alloc: std.mem.Allocator) ![]f16 { const out = try alloc.alloc(f16, self.len); const bytes = self.byteSize(); @@ -107,9 +109,9 @@ fn onMapped( flag.* = (status == c.WGPUMapAsyncStatus_Success); } -/// Encode + submit a 2-input, 1-output compute pass (used by add). +// Changed: gloc is passed by value instead of *GpuAllocator fn dispatch2in1out( - gloc: *GpuAllocator, + gloc: GpuAllocator, pipeline: c.WGPUComputePipeline, buf_a: GpuBuffer, buf_b: GpuBuffer, @@ -120,11 +122,9 @@ fn dispatch2in1out( var offset: u64 = 0; while (offset < bytes) { - // Calculate bounds for the current chunk const current_chunk_bytes = @min(max_chunk_bytes, bytes - offset); const current_chunk_elements: u32 = @intCast(current_chunk_bytes / @sizeOf(f16)); - // Create uniform buffer for this specific chunk's size const info_buf = try GpuBuffer.init( gloc, u32, @@ -133,10 +133,8 @@ fn dispatch2in1out( ); defer info_buf.deinit(); - // Write the number of elements *in this chunk* to the uniform buffer c.wgpuQueueWriteBuffer(gloc.device.queue, info_buf.raw, 0, ¤t_chunk_elements, @sizeOf(u32)); - // Bind only the sub-slice for this chunk using `.offset` and `.size` const entries = [_]c.WGPUBindGroupEntry{ .{ .binding = 0, .buffer = buf_a.raw, .offset = offset, .size = current_chunk_bytes }, .{ .binding = 1, .buffer = buf_b.raw, .offset = offset, .size = current_chunk_bytes }, @@ -144,16 +142,15 @@ fn dispatch2in1out( .{ .binding = 3, .buffer = info_buf.raw, .offset = 0, .size = @sizeOf(u32) }, }; - // Submit the pass for this specific chunk try submitPass(gloc, pipeline, &entries, current_chunk_elements); offset += current_chunk_bytes; } } -/// Create bind group, encode pass, submit. +// Changed: gloc is passed by value instead of *GpuAllocator fn submitPass( - gloc: *GpuAllocator, + gloc: GpuAllocator, pipeline: c.WGPUComputePipeline, entries: []const c.WGPUBindGroupEntry, n: usize, diff --git a/src/example.zig b/src/example.zig index a27ae7e..75e60ef 100644 --- a/src/example.zig +++ b/src/example.zig @@ -1,6 +1,7 @@ const std = @import("std"); const GpuDevice = @import("GpuDevice.zig"); const GpuAllocator = @import("GpuAllocator.zig"); +const GpuArena = @import("GpuArena.zig"); const GpuPipeline = @import("GpuPipeline.zig"); const Vec = @import("Vec.zig"); @@ -9,34 +10,36 @@ const c = @import("utils.zig").c; pub fn main(init: std.process.Init) !void { const allocator = init.gpa; - const device = try GpuDevice.init(.{ .vram_bytes_limit = 4 * 1024 * 1024 * 1024 }); + const device = try GpuDevice.init(.{}); defer device.deinit(); - var gloc = try GpuAllocator.init(allocator, device); - defer gloc.deinit(); + var grena = GpuArena.init(allocator, device); + defer grena.deinit(); + + const gloc = grena.gpuAllocator(); const add_pip = try GpuPipeline.init(device, @embedFile("shaders/add.wgsl")); defer add_pip.deinit(); - const data_a = try allocator.alloc(f16, 1024); + const data_a = try allocator.alloc(f16, 16); defer allocator.free(data_a); - const data_b = try allocator.alloc(f16, 1024); + const data_b = try allocator.alloc(f16, 16); defer allocator.free(data_b); - for (0..1024) |i| { + for (0..16) |i| { data_a[i] = @floatFromInt(i); - data_b[i] = @floatFromInt(1024 - 1 - i); + data_b[i] = @floatFromInt(16 - 1 - i); } - const a = try Vec.initLoad(&gloc, data_a); + const a = try Vec.initLoad(gloc, data_a); defer a.deinit(); - const b = try Vec.initLoad(&gloc, data_b); + const b = try Vec.initLoad(gloc, data_b); defer b.deinit(); - const sum = try a.run(&gloc, b, add_pip); + const sum = try a.run(gloc, b, add_pip); defer sum.deinit(); - const out = try sum.read(&gloc, allocator); + const out = try sum.read(gloc, allocator); defer allocator.free(out); std.debug.print("{any}\n", .{out}); diff --git a/src/lib.zig b/src/lib.zig index e0641cf..6560570 100644 --- a/src/lib.zig +++ b/src/lib.zig @@ -1,4 +1,5 @@ pub const GpuAllocator = @import("GpuAllocator.zig"); +pub const GpuArena = @import("GpuArena.zig"); pub const GpuBuffer = @import("GpuBuffer.zig"); pub const GpuDevice = @import("GpuDevice.zig"); pub const GpuPipeline = @import("GpuPipeline.zig");