Changed GpuAllocator to be like std.mem.Allocator
Now it is 2 ptr and I created a GpuArena. Point is to be like Zig, a const allocator and a var arena that track everything.
This commit is contained in:
parent
c3166e552b
commit
f5daf66784
@ -1,58 +1,24 @@
|
|||||||
|
// GpuAllocator.zig
|
||||||
const std = @import("std");
|
const std = @import("std");
|
||||||
const GpuDevice = @import("GpuDevice.zig");
|
const GpuDevice = @import("GpuDevice.zig");
|
||||||
const GpuBuffer = @import("GpuBuffer.zig");
|
|
||||||
const c = @import("utils.zig").c;
|
const c = @import("utils.zig").c;
|
||||||
|
|
||||||
|
const GpuAllocator = @This();
|
||||||
|
|
||||||
|
/// The function definitions our underlying implementations must satisfy
|
||||||
|
pub const VTable = struct {
|
||||||
|
alloc: *const fn (ctx: *anyopaque, bytes: u64, usage: c.WGPUBufferUsage) anyerror!c.WGPUBuffer,
|
||||||
|
free: *const fn (ctx: *anyopaque, buf_raw: c.WGPUBuffer, size: u64) void,
|
||||||
|
};
|
||||||
|
|
||||||
device: GpuDevice,
|
device: GpuDevice,
|
||||||
tracked_buffers: std.AutoHashMap(c.WGPUBuffer, void),
|
ptr: *anyopaque,
|
||||||
allocated_vram_bytes: u64 = 0,
|
vtable: *const VTable,
|
||||||
|
|
||||||
pub fn init(cpu_allocator: std.mem.Allocator, device: GpuDevice) !@This() {
|
pub fn allocBuffer(self: GpuAllocator, bytes: u64, usage: c.WGPUBufferUsage) !c.WGPUBuffer {
|
||||||
return .{
|
return self.vtable.alloc(self.ptr, bytes, usage);
|
||||||
.device = device,
|
|
||||||
.tracked_buffers = .init(cpu_allocator),
|
|
||||||
};
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn deinit(self: *@This()) void {
|
pub fn freeBuffer(self: GpuAllocator, buf_raw: c.WGPUBuffer, size: u64) void {
|
||||||
var it = self.tracked_buffers.keyIterator();
|
self.vtable.free(self.ptr, buf_raw, size);
|
||||||
while (it.next()) |buf_ptr| {
|
|
||||||
const buf = buf_ptr.*;
|
|
||||||
c.wgpuBufferDestroy(buf);
|
|
||||||
c.wgpuBufferRelease(buf);
|
|
||||||
}
|
|
||||||
self.tracked_buffers.deinit();
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn registerBuffer(
|
|
||||||
self: *@This(),
|
|
||||||
bytes: u64,
|
|
||||||
usage: c.WGPUBufferUsage,
|
|
||||||
) !c.WGPUBuffer {
|
|
||||||
if (bytes > self.device.limits.maxBufferSize)
|
|
||||||
return error.SingleBufferExceedsLimit;
|
|
||||||
|
|
||||||
if (bytes + self.allocated_vram_bytes > self.device.config.vram_bytes_limit)
|
|
||||||
return error.ExceedsVramBudget;
|
|
||||||
|
|
||||||
const buf = c.wgpuDeviceCreateBuffer(self.device.device, &.{
|
|
||||||
.usage = usage,
|
|
||||||
.size = bytes,
|
|
||||||
}) orelse return error.BufferAlloc;
|
|
||||||
errdefer {
|
|
||||||
c.wgpuBufferDestroy(buf);
|
|
||||||
c.wgpuBufferRelease(buf);
|
|
||||||
}
|
|
||||||
|
|
||||||
try self.tracked_buffers.put(buf, {});
|
|
||||||
self.allocated_vram_bytes += bytes;
|
|
||||||
return buf;
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn unregisterAndDestroyBuffer(self: *@This(), buf: GpuBuffer) void {
|
|
||||||
if (self.tracked_buffers.remove(buf.raw)) {
|
|
||||||
c.wgpuBufferDestroy(buf.raw);
|
|
||||||
c.wgpuBufferRelease(buf.raw);
|
|
||||||
self.allocated_vram_bytes -= buf.size;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|||||||
72
src/GpuArena.zig
Normal file
72
src/GpuArena.zig
Normal file
@ -0,0 +1,72 @@
|
|||||||
|
// GpuArena.zig
|
||||||
|
const std = @import("std");
|
||||||
|
const GpuDevice = @import("GpuDevice.zig");
|
||||||
|
const GpuAllocator = @import("GpuAllocator.zig");
|
||||||
|
const c = @import("utils.zig").c;
|
||||||
|
|
||||||
|
const GpuArena = @This();
|
||||||
|
|
||||||
|
device: GpuDevice,
|
||||||
|
tracked_buffers: std.AutoHashMap(c.WGPUBuffer, void),
|
||||||
|
allocated_vram_bytes: u64 = 0,
|
||||||
|
|
||||||
|
pub fn init(cpu_allocator: std.mem.Allocator, device: GpuDevice) GpuArena {
|
||||||
|
return .{
|
||||||
|
.device = device,
|
||||||
|
.tracked_buffers = .init(cpu_allocator),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn deinit(self: *GpuArena) void {
|
||||||
|
var it = self.tracked_buffers.keyIterator();
|
||||||
|
while (it.next()) |buf_ptr| {
|
||||||
|
c.wgpuBufferDestroy(buf_ptr.*);
|
||||||
|
c.wgpuBufferRelease(buf_ptr.*);
|
||||||
|
}
|
||||||
|
self.tracked_buffers.deinit();
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns the type-erased immutable interface wrapper
|
||||||
|
pub fn gpuAllocator(self: *GpuArena) GpuAllocator {
|
||||||
|
return .{
|
||||||
|
.device = self.device,
|
||||||
|
.ptr = self,
|
||||||
|
.vtable = &.{
|
||||||
|
.alloc = alloc,
|
||||||
|
.free = free,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
fn alloc(ctx: *anyopaque, bytes: u64, usage: c.WGPUBufferUsage) anyerror!c.WGPUBuffer {
|
||||||
|
const self: *GpuArena = @ptrCast(@alignCast(ctx));
|
||||||
|
|
||||||
|
if (bytes > self.device.limits.maxBufferSize)
|
||||||
|
return error.SingleBufferExceedsLimit;
|
||||||
|
|
||||||
|
if (bytes + self.allocated_vram_bytes > self.device.config.vram_bytes_limit)
|
||||||
|
return error.ExceedsVramBudget;
|
||||||
|
|
||||||
|
const buf = c.wgpuDeviceCreateBuffer(self.device.device, &.{
|
||||||
|
.usage = usage,
|
||||||
|
.size = bytes,
|
||||||
|
}) orelse return error.BufferAlloc;
|
||||||
|
errdefer {
|
||||||
|
c.wgpuBufferDestroy(buf);
|
||||||
|
c.wgpuBufferRelease(buf);
|
||||||
|
}
|
||||||
|
|
||||||
|
try self.tracked_buffers.put(buf, {});
|
||||||
|
self.allocated_vram_bytes += bytes;
|
||||||
|
return buf;
|
||||||
|
}
|
||||||
|
|
||||||
|
fn free(ctx: *anyopaque, buf_raw: c.WGPUBuffer, size: u64) void {
|
||||||
|
const self: *GpuArena = @ptrCast(@alignCast(ctx));
|
||||||
|
|
||||||
|
if (self.tracked_buffers.remove(buf_raw)) {
|
||||||
|
c.wgpuBufferDestroy(buf_raw);
|
||||||
|
c.wgpuBufferRelease(buf_raw);
|
||||||
|
self.allocated_vram_bytes -= size;
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -5,7 +5,7 @@ const GpuAllocator = @import("GpuAllocator.zig");
|
|||||||
raw: c.WGPUBuffer,
|
raw: c.WGPUBuffer,
|
||||||
size: u64,
|
size: u64,
|
||||||
usage: c.WGPUBufferUsage,
|
usage: c.WGPUBufferUsage,
|
||||||
gloc: *GpuAllocator,
|
gloc: GpuAllocator,
|
||||||
|
|
||||||
const BufferUsage = enum(u64) {
|
const BufferUsage = enum(u64) {
|
||||||
None = 0x0000000000000000,
|
None = 0x0000000000000000,
|
||||||
@ -22,7 +22,7 @@ const BufferUsage = enum(u64) {
|
|||||||
};
|
};
|
||||||
|
|
||||||
/// Allocates the underlying WebGPU handle and registers it to the parent GpuAllocator
|
/// Allocates the underlying WebGPU handle and registers it to the parent GpuAllocator
|
||||||
pub fn init(gloc: *GpuAllocator, T: type, len: usize, usage: std.EnumSet(BufferUsage)) !@This() {
|
pub fn init(gloc: GpuAllocator, T: type, len: usize, usage: std.EnumSet(BufferUsage)) !@This() {
|
||||||
switch (@typeInfo(T)) {
|
switch (@typeInfo(T)) {
|
||||||
.int, .float => {},
|
.int, .float => {},
|
||||||
else => @compileError("GpuBuffer can only use int and float type"),
|
else => @compileError("GpuBuffer can only use int and float type"),
|
||||||
@ -33,7 +33,7 @@ pub fn init(gloc: *GpuAllocator, T: type, len: usize, usage: std.EnumSet(BufferU
|
|||||||
while (iter.next()) |flag| use |= @intFromEnum(flag);
|
while (iter.next()) |flag| use |= @intFromEnum(flag);
|
||||||
|
|
||||||
const bytes = @sizeOf(T) * len;
|
const bytes = @sizeOf(T) * len;
|
||||||
const raw_handle = try gloc.registerBuffer(bytes, use);
|
const raw_handle = try gloc.allocBuffer(bytes, use);
|
||||||
|
|
||||||
return .{
|
return .{
|
||||||
.raw = raw_handle,
|
.raw = raw_handle,
|
||||||
@ -45,7 +45,7 @@ pub fn init(gloc: *GpuAllocator, T: type, len: usize, usage: std.EnumSet(BufferU
|
|||||||
|
|
||||||
/// Unregisters from the parent GpuAllocator and cleanly destroys GPU resources
|
/// Unregisters from the parent GpuAllocator and cleanly destroys GPU resources
|
||||||
pub fn deinit(self: @This()) void {
|
pub fn deinit(self: @This()) void {
|
||||||
self.gloc.unregisterAndDestroyBuffer(self);
|
self.gloc.freeBuffer(self.raw, self.size);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Native mapAsync wrapper
|
/// Native mapAsync wrapper
|
||||||
|
|||||||
@ -87,7 +87,7 @@ pub fn deinit(self: @This()) void {
|
|||||||
c.wgpuInstanceRelease(self.instance);
|
c.wgpuInstanceRelease(self.instance);
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn poll(self: *@This()) void {
|
pub fn poll(self: @This()) void {
|
||||||
_ = c.wgpuDevicePoll(self.device, 1, null);
|
_ = c.wgpuDevicePoll(self.device, 1, null);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
29
src/Vec.zig
29
src/Vec.zig
@ -1,4 +1,3 @@
|
|||||||
/// Dummy
|
|
||||||
const std = @import("std");
|
const std = @import("std");
|
||||||
const c = @import("utils.zig").c;
|
const c = @import("utils.zig").c;
|
||||||
const GpuAllocator = @import("GpuAllocator.zig");
|
const GpuAllocator = @import("GpuAllocator.zig");
|
||||||
@ -11,7 +10,8 @@ const Vec = @This();
|
|||||||
buf: GpuBuffer,
|
buf: GpuBuffer,
|
||||||
len: usize,
|
len: usize,
|
||||||
|
|
||||||
pub fn initZero(gloc: *GpuAllocator, len: usize) !Vec {
|
// Changed: gloc is passed by value (const)
|
||||||
|
pub fn initZero(gloc: GpuAllocator, len: usize) !Vec {
|
||||||
return .{
|
return .{
|
||||||
.buf = try GpuBuffer.init(
|
.buf = try GpuBuffer.init(
|
||||||
gloc,
|
gloc,
|
||||||
@ -23,9 +23,10 @@ pub fn initZero(gloc: *GpuAllocator, len: usize) !Vec {
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn initLoad(gloc: *GpuAllocator, data: []const f16) !Vec {
|
// Changed: gloc is passed by value
|
||||||
|
pub fn initLoad(gloc: GpuAllocator, data: []const f16) !Vec {
|
||||||
var self = try initZero(gloc, data.len);
|
var self = try initZero(gloc, data.len);
|
||||||
try self.load(gloc.device, data);
|
try self.load(gloc.device, data); // Direct access via the interface copy
|
||||||
return self;
|
return self;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -48,7 +49,8 @@ pub fn byteSize(self: Vec) u64 {
|
|||||||
return @as(u64, self.len) * @sizeOf(f16);
|
return @as(u64, self.len) * @sizeOf(f16);
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn run(self: Vec, gloc: *GpuAllocator, other: Vec, pip: GpuPipeline) !Vec {
|
// Changed: gloc is passed by value instead of *GpuAllocator
|
||||||
|
pub fn run(self: Vec, gloc: GpuAllocator, other: Vec, pip: GpuPipeline) !Vec {
|
||||||
std.debug.assert(self.len == other.len);
|
std.debug.assert(self.len == other.len);
|
||||||
|
|
||||||
const result = try Vec.initZero(gloc, self.len);
|
const result = try Vec.initZero(gloc, self.len);
|
||||||
@ -59,8 +61,8 @@ pub fn run(self: Vec, gloc: *GpuAllocator, other: Vec, pip: GpuPipeline) !Vec {
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// GPU to CPU.
|
// Changed: gloc is passed by value instead of *GpuAllocator
|
||||||
pub fn read(self: Vec, gloc: *GpuAllocator, alloc: std.mem.Allocator) ![]f16 {
|
pub fn read(self: Vec, gloc: GpuAllocator, alloc: std.mem.Allocator) ![]f16 {
|
||||||
const out = try alloc.alloc(f16, self.len);
|
const out = try alloc.alloc(f16, self.len);
|
||||||
const bytes = self.byteSize();
|
const bytes = self.byteSize();
|
||||||
|
|
||||||
@ -107,9 +109,9 @@ fn onMapped(
|
|||||||
flag.* = (status == c.WGPUMapAsyncStatus_Success);
|
flag.* = (status == c.WGPUMapAsyncStatus_Success);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Encode + submit a 2-input, 1-output compute pass (used by add).
|
// Changed: gloc is passed by value instead of *GpuAllocator
|
||||||
fn dispatch2in1out(
|
fn dispatch2in1out(
|
||||||
gloc: *GpuAllocator,
|
gloc: GpuAllocator,
|
||||||
pipeline: c.WGPUComputePipeline,
|
pipeline: c.WGPUComputePipeline,
|
||||||
buf_a: GpuBuffer,
|
buf_a: GpuBuffer,
|
||||||
buf_b: GpuBuffer,
|
buf_b: GpuBuffer,
|
||||||
@ -120,11 +122,9 @@ fn dispatch2in1out(
|
|||||||
|
|
||||||
var offset: u64 = 0;
|
var offset: u64 = 0;
|
||||||
while (offset < bytes) {
|
while (offset < bytes) {
|
||||||
// Calculate bounds for the current chunk
|
|
||||||
const current_chunk_bytes = @min(max_chunk_bytes, bytes - offset);
|
const current_chunk_bytes = @min(max_chunk_bytes, bytes - offset);
|
||||||
const current_chunk_elements: u32 = @intCast(current_chunk_bytes / @sizeOf(f16));
|
const current_chunk_elements: u32 = @intCast(current_chunk_bytes / @sizeOf(f16));
|
||||||
|
|
||||||
// Create uniform buffer for this specific chunk's size
|
|
||||||
const info_buf = try GpuBuffer.init(
|
const info_buf = try GpuBuffer.init(
|
||||||
gloc,
|
gloc,
|
||||||
u32,
|
u32,
|
||||||
@ -133,10 +133,8 @@ fn dispatch2in1out(
|
|||||||
);
|
);
|
||||||
defer info_buf.deinit();
|
defer info_buf.deinit();
|
||||||
|
|
||||||
// Write the number of elements *in this chunk* to the uniform buffer
|
|
||||||
c.wgpuQueueWriteBuffer(gloc.device.queue, info_buf.raw, 0, ¤t_chunk_elements, @sizeOf(u32));
|
c.wgpuQueueWriteBuffer(gloc.device.queue, info_buf.raw, 0, ¤t_chunk_elements, @sizeOf(u32));
|
||||||
|
|
||||||
// Bind only the sub-slice for this chunk using `.offset` and `.size`
|
|
||||||
const entries = [_]c.WGPUBindGroupEntry{
|
const entries = [_]c.WGPUBindGroupEntry{
|
||||||
.{ .binding = 0, .buffer = buf_a.raw, .offset = offset, .size = current_chunk_bytes },
|
.{ .binding = 0, .buffer = buf_a.raw, .offset = offset, .size = current_chunk_bytes },
|
||||||
.{ .binding = 1, .buffer = buf_b.raw, .offset = offset, .size = current_chunk_bytes },
|
.{ .binding = 1, .buffer = buf_b.raw, .offset = offset, .size = current_chunk_bytes },
|
||||||
@ -144,16 +142,15 @@ fn dispatch2in1out(
|
|||||||
.{ .binding = 3, .buffer = info_buf.raw, .offset = 0, .size = @sizeOf(u32) },
|
.{ .binding = 3, .buffer = info_buf.raw, .offset = 0, .size = @sizeOf(u32) },
|
||||||
};
|
};
|
||||||
|
|
||||||
// Submit the pass for this specific chunk
|
|
||||||
try submitPass(gloc, pipeline, &entries, current_chunk_elements);
|
try submitPass(gloc, pipeline, &entries, current_chunk_elements);
|
||||||
|
|
||||||
offset += current_chunk_bytes;
|
offset += current_chunk_bytes;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Create bind group, encode pass, submit.
|
// Changed: gloc is passed by value instead of *GpuAllocator
|
||||||
fn submitPass(
|
fn submitPass(
|
||||||
gloc: *GpuAllocator,
|
gloc: GpuAllocator,
|
||||||
pipeline: c.WGPUComputePipeline,
|
pipeline: c.WGPUComputePipeline,
|
||||||
entries: []const c.WGPUBindGroupEntry,
|
entries: []const c.WGPUBindGroupEntry,
|
||||||
n: usize,
|
n: usize,
|
||||||
|
|||||||
@ -1,6 +1,7 @@
|
|||||||
const std = @import("std");
|
const std = @import("std");
|
||||||
const GpuDevice = @import("GpuDevice.zig");
|
const GpuDevice = @import("GpuDevice.zig");
|
||||||
const GpuAllocator = @import("GpuAllocator.zig");
|
const GpuAllocator = @import("GpuAllocator.zig");
|
||||||
|
const GpuArena = @import("GpuArena.zig");
|
||||||
const GpuPipeline = @import("GpuPipeline.zig");
|
const GpuPipeline = @import("GpuPipeline.zig");
|
||||||
const Vec = @import("Vec.zig");
|
const Vec = @import("Vec.zig");
|
||||||
|
|
||||||
@ -9,34 +10,36 @@ const c = @import("utils.zig").c;
|
|||||||
pub fn main(init: std.process.Init) !void {
|
pub fn main(init: std.process.Init) !void {
|
||||||
const allocator = init.gpa;
|
const allocator = init.gpa;
|
||||||
|
|
||||||
const device = try GpuDevice.init(.{ .vram_bytes_limit = 4 * 1024 * 1024 * 1024 });
|
const device = try GpuDevice.init(.{});
|
||||||
defer device.deinit();
|
defer device.deinit();
|
||||||
|
|
||||||
var gloc = try GpuAllocator.init(allocator, device);
|
var grena = GpuArena.init(allocator, device);
|
||||||
defer gloc.deinit();
|
defer grena.deinit();
|
||||||
|
|
||||||
|
const gloc = grena.gpuAllocator();
|
||||||
|
|
||||||
const add_pip = try GpuPipeline.init(device, @embedFile("shaders/add.wgsl"));
|
const add_pip = try GpuPipeline.init(device, @embedFile("shaders/add.wgsl"));
|
||||||
defer add_pip.deinit();
|
defer add_pip.deinit();
|
||||||
|
|
||||||
const data_a = try allocator.alloc(f16, 1024);
|
const data_a = try allocator.alloc(f16, 16);
|
||||||
defer allocator.free(data_a);
|
defer allocator.free(data_a);
|
||||||
const data_b = try allocator.alloc(f16, 1024);
|
const data_b = try allocator.alloc(f16, 16);
|
||||||
defer allocator.free(data_b);
|
defer allocator.free(data_b);
|
||||||
|
|
||||||
for (0..1024) |i| {
|
for (0..16) |i| {
|
||||||
data_a[i] = @floatFromInt(i);
|
data_a[i] = @floatFromInt(i);
|
||||||
data_b[i] = @floatFromInt(1024 - 1 - i);
|
data_b[i] = @floatFromInt(16 - 1 - i);
|
||||||
}
|
}
|
||||||
|
|
||||||
const a = try Vec.initLoad(&gloc, data_a);
|
const a = try Vec.initLoad(gloc, data_a);
|
||||||
defer a.deinit();
|
defer a.deinit();
|
||||||
const b = try Vec.initLoad(&gloc, data_b);
|
const b = try Vec.initLoad(gloc, data_b);
|
||||||
defer b.deinit();
|
defer b.deinit();
|
||||||
|
|
||||||
const sum = try a.run(&gloc, b, add_pip);
|
const sum = try a.run(gloc, b, add_pip);
|
||||||
defer sum.deinit();
|
defer sum.deinit();
|
||||||
|
|
||||||
const out = try sum.read(&gloc, allocator);
|
const out = try sum.read(gloc, allocator);
|
||||||
defer allocator.free(out);
|
defer allocator.free(out);
|
||||||
|
|
||||||
std.debug.print("{any}\n", .{out});
|
std.debug.print("{any}\n", .{out});
|
||||||
|
|||||||
@ -1,4 +1,5 @@
|
|||||||
pub const GpuAllocator = @import("GpuAllocator.zig");
|
pub const GpuAllocator = @import("GpuAllocator.zig");
|
||||||
|
pub const GpuArena = @import("GpuArena.zig");
|
||||||
pub const GpuBuffer = @import("GpuBuffer.zig");
|
pub const GpuBuffer = @import("GpuBuffer.zig");
|
||||||
pub const GpuDevice = @import("GpuDevice.zig");
|
pub const GpuDevice = @import("GpuDevice.zig");
|
||||||
pub const GpuPipeline = @import("GpuPipeline.zig");
|
pub const GpuPipeline = @import("GpuPipeline.zig");
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user