diff --git a/src/GpuBuffer.zig b/src/GpuBuffer.zig index 56588b6..970a2ba 100644 --- a/src/GpuBuffer.zig +++ b/src/GpuBuffer.zig @@ -22,22 +22,16 @@ const BufferUsage = enum(u64) { }; /// Allocates the underlying WebGPU handle and registers it to the parent GpuAllocator -pub fn init(gloc: GpuAllocator, T: type, len: usize, usage: std.EnumSet(BufferUsage)) !@This() { - switch (@typeInfo(T)) { - .int, .float => {}, - else => @compileError("GpuBuffer can only use int and float type"), - } - +pub fn init(gloc: GpuAllocator, size: u64, usage: std.EnumSet(BufferUsage)) !@This() { var use: u64 = 0; var iter = usage.iterator(); while (iter.next()) |flag| use |= @intFromEnum(flag); - const bytes = @sizeOf(T) * len; - const raw_handle = try gloc.allocBuffer(bytes, use); + const raw_handle = try gloc.allocBuffer(size, use); return .{ .raw = raw_handle, - .size = bytes, + .size = size, .usage = use, .gloc = gloc, }; @@ -48,6 +42,11 @@ pub fn deinit(self: @This()) void { self.gloc.freeBuffer(self.raw, self.size); } +/// Native getConstMappedRange wrapper +pub fn getConstMappedRange(self: @This(), offset: u64, size: u64) ?*const anyopaque { + return c.wgpuBufferGetConstMappedRange(self.raw, offset, size); +} + /// Native mapAsync wrapper pub fn mapAsync( self: @This(), @@ -59,11 +58,6 @@ pub fn mapAsync( _ = c.wgpuBufferMapAsync(self.raw, mode, offset, size, callback_info); } -/// Native getConstMappedRange wrapper -pub fn getConstMappedRange(self: @This(), offset: u64, size: u64) ?*const anyopaque { - return c.wgpuBufferGetConstMappedRange(self.raw, offset, size); -} - /// Native unmap wrapper pub fn unmap(self: @This()) void { c.wgpuBufferUnmap(self.raw); @@ -76,3 +70,48 @@ pub fn load( ) !void { c.wgpuQueueWriteBuffer(self.gloc.device.queue, self.raw, 0, data.ptr, self.size); } + +pub fn read(self: @This(), alloc: std.mem.Allocator, T: type) ![]f16 { + const out = try alloc.alloc(T, @divExact(self.size, @sizeOf(T))); + + const staging = try init( + self.gloc, + self.size, + .initMany(&.{ .MapRead, .CopyDst }), + ); + defer staging.deinit(); + + const enc = c.wgpuDeviceCreateCommandEncoder(self.gloc.device.device, null) orelse return error.Encoder; + c.wgpuCommandEncoderCopyBufferToBuffer(enc, self.raw, 0, staging.raw, 0, self.size); + const cmd = c.wgpuCommandEncoderFinish(enc, null); + defer c.wgpuCommandEncoderRelease(enc); + defer c.wgpuCommandBufferRelease(cmd); + c.wgpuQueueSubmit(self.gloc.device.queue, 1, &cmd); + + var mapped = false; + staging.mapAsync( + c.WGPUMapMode_Read, + 0, + self.size, + .{ .callback = onMapped, .userdata1 = &mapped }, + ); + while (!mapped) self.gloc.device.poll(); + + const ptr: [*]const T = @ptrCast(@alignCast( + staging.getConstMappedRange(0, self.size), + )); + @memcpy(out[0..out.len], ptr[0..out.len]); + staging.unmap(); + + return out; +} + +fn onMapped( + status: c.WGPUMapAsyncStatus, + _: c.WGPUStringView, + userdata1: ?*anyopaque, + _: ?*anyopaque, +) callconv(.c) void { + const flag: *bool = @ptrCast(@alignCast(userdata1.?)); + flag.* = (status == c.WGPUMapAsyncStatus_Success); +} diff --git a/src/Vec.zig b/src/Vec.zig index a6f09e0..f75dfef 100644 --- a/src/Vec.zig +++ b/src/Vec.zig @@ -15,8 +15,7 @@ pub fn initZero(gloc: GpuAllocator, len: usize) !Vec { return .{ .buf = try GpuBuffer.init( gloc, - f16, - len, + len * @sizeOf(f16), .initMany(&.{ .Storage, .CopyDst, .CopySrc }), ), .len = len, @@ -59,41 +58,8 @@ pub fn run(self: Vec, gloc: GpuAllocator, other: Vec, pip: GpuPipeline) !Vec { } // Changed: gloc is passed by value instead of *GpuAllocator -pub fn read(self: Vec, gloc: GpuAllocator, alloc: std.mem.Allocator) ![]f16 { - const out = try alloc.alloc(f16, self.len); - const bytes = self.byteSize(); - - const staging = try GpuBuffer.init( - gloc, - f16, - self.len, - .initMany(&.{ .MapRead, .CopyDst }), - ); - defer staging.deinit(); - - const enc = c.wgpuDeviceCreateCommandEncoder(gloc.device.device, null) orelse return error.Encoder; - c.wgpuCommandEncoderCopyBufferToBuffer(enc, self.buf.raw, 0, staging.raw, 0, bytes); - const cmd = c.wgpuCommandEncoderFinish(enc, null); - defer c.wgpuCommandEncoderRelease(enc); - defer c.wgpuCommandBufferRelease(cmd); - c.wgpuQueueSubmit(gloc.device.queue, 1, &cmd); - - var mapped = false; - staging.mapAsync( - c.WGPUMapMode_Read, - 0, - bytes, - .{ .callback = onMapped, .userdata1 = &mapped }, - ); - while (!mapped) gloc.device.poll(); - - const ptr: [*]const f16 = @ptrCast(@alignCast( - staging.getConstMappedRange(0, bytes), - )); - @memcpy(out[0..self.len], ptr[0..self.len]); - staging.unmap(); - - return out; +pub fn read(self: Vec, alloc: std.mem.Allocator) ![]f16 { + return self.buf.read(alloc, f16); } fn onMapped( @@ -124,8 +90,7 @@ fn dispatch2in1out( const info_buf = try GpuBuffer.init( gloc, - u32, - 1, + @sizeOf(u32), .initMany(&.{ .Uniform, .CopyDst }), ); defer info_buf.deinit(); diff --git a/src/example.zig b/src/example.zig index c81da3c..fbbb4e7 100644 --- a/src/example.zig +++ b/src/example.zig @@ -41,7 +41,7 @@ pub fn main(init: std.process.Init) !void { std.debug.print("Bytes used: {d} (3 * {d})\n", .{ grena.allocated_vram_bytes, a.byteSize() }); - const out = try sum.read(gloc, allocator); + const out = try sum.read(allocator); defer allocator.free(out); std.debug.print("{any}\n", .{out});