diff --git a/examples/compute.zig b/examples/compute.zig index 14f2312..8ed489c 100644 --- a/examples/compute.zig +++ b/examples/compute.zig @@ -32,7 +32,7 @@ pub fn main(init: std.process.Init) !void { ); // 4. Setup CPU data - const len: usize = 16; + const len: usize = 1024; const data_a = try allocator.alloc(f16, len); defer allocator.free(data_a); const data_b = try allocator.alloc(f16, len); @@ -61,8 +61,15 @@ pub fn main(init: std.process.Init) !void { try add_cp.run(gloc, .{ buf_a, buf_b, buf_out }); // 8. Map and copy the resulting buffer back to the CPU - const out = try buf_out.read(allocator, f16); + const staging = try GpuBuffer.init(gloc, .{ + .size = byte_size, + .usage = .initMany(&.{ .MapRead, .CopyDst }), + }); + defer staging.deinit(); + + try buf_out.copy(staging); + const out = try staging.read(allocator, f16); defer allocator.free(out); - std.debug.print("Result: {any}\n", .{out}); + std.debug.print("Result: {any}\n", .{out[0..@min(6, len)]}); } diff --git a/src/GpuBuffer.zig b/src/GpuBuffer.zig index 157c4b7..7c8f599 100644 --- a/src/GpuBuffer.zig +++ b/src/GpuBuffer.zig @@ -19,6 +19,13 @@ pub const GpuBufferUsage = enum(u64) { Storage = 0x0000000000000080, Indirect = 0x0000000000000100, QueryResolve = 0x0000000000000200, + + fn enumSetToWGPUBufferUsage(set: std.EnumSet(GpuBufferUsage)) c.WGPUBufferUsage { + var use: u64 = 0; + var iter = set.iterator(); + while (iter.next()) |flag| use |= @intFromEnum(flag); + return use; + } }; pub const GpuBufferDef = struct { @@ -28,16 +35,13 @@ pub const GpuBufferDef = struct { }; pub fn init(gloc: GpuAllocator, def: GpuBufferDef) !@This() { - var use: u64 = 0; - var iter = def.usage.iterator(); - while (iter.next()) |flag| use |= @intFromEnum(flag); // Automatically align the buffer size forward to a multiple of 4 bytes under the hood const aligned_size = std.mem.alignForward(u64, def.size, 4); const raw_handle = try gloc.allocBuffer(.{ .size = aligned_size, - .usage = use, + .usage = GpuBufferUsage.enumSetToWGPUBufferUsage(def.usage), .label = svOpt(def.label), }); return .{ @@ -97,25 +101,14 @@ pub fn load( } /// GPU to CPU +/// Buffer must have MapRead usage or returns error.BufferNotMappable. pub fn read(self: @This(), alloc: std.mem.Allocator, T: type) ![]T { + if (!self.def.usage.contains(.MapRead)) return error.BufferNotMappable; + const out = try alloc.alloc(T, @divExact(self.def.size, @sizeOf(T))); - const staging = try init(self.gloc, .{ - .size = self.def.size, - .usage = .initMany(&.{ .MapRead, .CopyDst }), - .label = "staging_read_buffer", - }); - defer staging.deinit(); - - const enc = c.wgpuDeviceCreateCommandEncoder(self.gloc.device.device, null) orelse return error.Encoder; - c.wgpuCommandEncoderCopyBufferToBuffer(enc, self.raw, 0, staging.raw, 0, self.def.size); - const cmd = c.wgpuCommandEncoderFinish(enc, null); - defer c.wgpuCommandEncoderRelease(enc); - defer c.wgpuCommandBufferRelease(cmd); - c.wgpuQueueSubmit(self.gloc.device.queue, 1, &cmd); - var mapped = false; - staging.mapAsync( + self.mapAsync( c.WGPUMapMode_Read, 0, self.def.size, @@ -124,10 +117,10 @@ pub fn read(self: @This(), alloc: std.mem.Allocator, T: type) ![]T { while (!mapped) self.gloc.device.poll(); const ptr: [*]const T = @ptrCast(@alignCast( - staging.getConstMappedRange(0, self.def.size), + self.getConstMappedRange(0, self.def.size), )); @memcpy(out[0..out.len], ptr[0..out.len]); - staging.unmap(); + self.unmap(); return out; } @@ -141,3 +134,20 @@ fn onMapped( const flag: *bool = @ptrCast(@alignCast(userdata1.?)); flag.* = (status == c.WGPUMapAsyncStatus_Success); } + +/// GPU to GPU. Both buffers must be same size, src needs CopySrc, dst needs CopyDst. +pub fn copy(src: @This(), dst: @This()) !void { + if (src.def.size != dst.def.size) return error.SizeMismatch; + + const copy_src: u64 = @intFromEnum(GpuBufferUsage.CopySrc); + const copy_dst: u64 = @intFromEnum(GpuBufferUsage.CopyDst); + if (@as(u64, GpuBufferUsage.enumSetToWGPUBufferUsage(src.def.usage)) & copy_src == 0) return error.SrcNotCopyable; + if (@as(u64, GpuBufferUsage.enumSetToWGPUBufferUsage(dst.def.usage)) & copy_dst == 0) return error.DstNotWritable; + + const enc = c.wgpuDeviceCreateCommandEncoder(src.gloc.device.device, null) orelse return error.Encoder; + c.wgpuCommandEncoderCopyBufferToBuffer(enc, src.raw, 0, dst.raw, 0, src.def.size); + const cmd = c.wgpuCommandEncoderFinish(enc, null); + defer c.wgpuCommandEncoderRelease(enc); + defer c.wgpuCommandBufferRelease(cmd); + c.wgpuQueueSubmit(src.gloc.device.queue, 1, &cmd); +}