From d503ce7deabd155a332c29b969833d91d1df63d6 Mon Sep 17 00:00:00 2001 From: AdrienBouvais Date: Thu, 21 May 2026 10:26:20 +0200 Subject: [PATCH] Added optional label in definitions and added logs when alloc, free and device info --- examples/bench_cp.zig | 11 ++++--- examples/circle.zig | 4 ++- examples/compute.zig | 19 ++++++----- src/GpuArenaAllocator.zig | 69 ++++++++++++++++++++++++++++++++++----- src/GpuBuffer.zig | 29 +++++++++++----- src/GpuCompute.zig | 17 ++++++---- src/GpuDevice.zig | 38 ++++++++++++++------- src/GpuRender.zig | 3 ++ src/GpuTexture.zig | 69 +++++++++++++++++++++++++-------------- src/GpuTextureView.zig | 3 ++ src/utils.zig | 14 ++++++++ 11 files changed, 203 insertions(+), 73 deletions(-) diff --git a/examples/bench_cp.zig b/examples/bench_cp.zig index 3c506d0..a3a8dba 100644 --- a/examples/bench_cp.zig +++ b/examples/bench_cp.zig @@ -6,6 +6,8 @@ const GpuAllocator = gpu.GpuAllocator; const GpuBuffer = gpu.GpuBuffer; const GpuCompute = gpu.GpuCompute; +pub const std_options = std.Options{ .log_level = .info }; + /// Minimal implementation of a f16 Vector const Vec = struct { buf: GpuBuffer, @@ -14,11 +16,10 @@ const Vec = struct { // Changed: gloc is passed by value (const) pub fn initZero(gloc: GpuAllocator, len: usize) !Vec { return .{ - .buf = try GpuBuffer.init( - gloc, - len * @sizeOf(f16), - .initMany(&.{ .Storage, .CopyDst, .CopySrc }), - ), + .buf = try GpuBuffer.init(gloc, .{ + .size = len * @sizeOf(f16), + .usage = .initMany(&.{ .Storage, .CopyDst, .CopySrc }), + }), .len = len, }; } diff --git a/examples/circle.zig b/examples/circle.zig index 60b00ab..26a60c3 100644 --- a/examples/circle.zig +++ b/examples/circle.zig @@ -18,7 +18,9 @@ pub fn main(init: std.process.Init) !void { defer device.deinit(); // 2. Init VRAM Arena - const gloc = device.gpuAllocator(); + var grena = GpuArenaAllocator.init(allocator, device.gpuAllocator()); + defer grena.deinit(); + const gloc = grena.gpuAllocator(); // 3. Load Render Pipeline const circle_rp = try GpuRender.init( diff --git a/examples/compute.zig b/examples/compute.zig index e67286c..14f2312 100644 --- a/examples/compute.zig +++ b/examples/compute.zig @@ -21,11 +21,14 @@ pub fn main(init: std.process.Init) !void { const add_cp = try GpuCompute.init( gloc, @embedFile("shaders/add.wgsl"), - .{ .bindings = &.{ - .{ .element_size = @sizeOf(f16) }, - .{ .element_size = @sizeOf(f16) }, - .{ .element_size = @sizeOf(f16) }, - } }, + .{ + .label = "add", + .bindings = &.{ + .{ .element_size = @sizeOf(f16) }, + .{ .element_size = @sizeOf(f16) }, + .{ .element_size = @sizeOf(f16) }, + }, + }, ); // 4. Setup CPU data @@ -42,9 +45,9 @@ pub fn main(init: std.process.Init) !void { // 5. Initialize raw GPU Buffers const byte_size = len * @sizeOf(f16); - const buf_a = try GpuBuffer.init(gloc, byte_size, .initMany(&.{ .Storage, .CopyDst, .CopySrc })); - const buf_b = try GpuBuffer.init(gloc, byte_size, .initMany(&.{ .Storage, .CopyDst, .CopySrc })); - const buf_out = try GpuBuffer.init(gloc, byte_size, .initMany(&.{ .Storage, .CopyDst, .CopySrc })); + const buf_a = try GpuBuffer.init(gloc, .{ .label = "a", .size = byte_size, .usage = .initMany(&.{ .Storage, .CopyDst, .CopySrc }) }); + const buf_b = try GpuBuffer.init(gloc, .{ .label = "b", .size = byte_size, .usage = .initMany(&.{ .Storage, .CopyDst, .CopySrc }) }); + const buf_out = try GpuBuffer.init(gloc, .{ .label = "out", .size = byte_size, .usage = .initMany(&.{ .Storage, .CopyDst, .CopySrc }) }); // Note: Buffers are safely tied to the GpuArenaAllocator which will automatically // release them at the end. You can also manually call buf_x.deinit() if desired. diff --git a/src/GpuArenaAllocator.zig b/src/GpuArenaAllocator.zig index 36da463..87a9335 100644 --- a/src/GpuArenaAllocator.zig +++ b/src/GpuArenaAllocator.zig @@ -3,6 +3,7 @@ const GpuDevice = @import("GpuDevice.zig"); const GpuAllocator = @import("GpuAllocator.zig"); const GpuTextureFormat = @import("lib.zig").GpuTextureFormat; const c = @import("utils.zig").c; +const viewStr = @import("utils.zig").viewStr; child_allocator: GpuAllocator, // I use Zig naming child_allocator, but that should be a parent for me. Likely something idk tracked_buffers: std.AutoHashMap(c.WGPUBuffer, c.WGPUBufferDescriptor), @@ -24,30 +25,40 @@ pub fn init(cpu_allocator: std.mem.Allocator, child_allocator: GpuAllocator) @Th } pub fn deinit(self: *@This()) void { + std.log.debug("Freeing GpuArenaAllocator (Used VRAM: {}/{} MB)", .{ + self.allocated_vram_bytes / 1024 / 1024, + self.child_allocator.device.def.vram_bytes_limit / 1024 / 1024, + }); + var it_buffer = self.tracked_buffers.keyIterator(); while (it_buffer.next()) |buf_ptr| - self.child_allocator.freeBuffer(buf_ptr.*); + freeBuffer(self, buf_ptr.*); self.tracked_buffers.deinit(); var it_tex = self.tracked_textures.keyIterator(); while (it_tex.next()) |buf_ptr| - self.child_allocator.freeTexture(buf_ptr.*); + freeTexture(self, buf_ptr.*); self.tracked_textures.deinit(); var it_view = self.tracked_views.keyIterator(); while (it_view.next()) |buf_ptr| - self.child_allocator.freeTextureView(buf_ptr.*); + freeTextureView(self, buf_ptr.*); self.tracked_views.deinit(); var it_render = self.tracked_renders.keyIterator(); while (it_render.next()) |buf_ptr| - self.child_allocator.freeRenderPipeline(buf_ptr.*); + freeRenderPipeline(self, buf_ptr.*); self.tracked_renders.deinit(); var it_compute = self.tracked_computes.keyIterator(); while (it_compute.next()) |buf_ptr| - self.child_allocator.freeComputePipeline(buf_ptr.*); + freeComputePipeline(self, buf_ptr.*); self.tracked_computes.deinit(); + + std.log.debug("Freed GpuArenaAllocator (Used VRAM: {}/{} MB)", .{ + self.allocated_vram_bytes / 1024 / 1024, + self.child_allocator.device.def.vram_bytes_limit / 1024 / 1024, + }); } /// Returns the type-erased immutable interface wrapper @@ -79,6 +90,14 @@ fn allocBuffer(ctx: *anyopaque, desc: c.WGPUBufferDescriptor) anyerror!c.WGPUBuf const raw = try self.child_allocator.allocBuffer(desc); self.tracked_buffers.putAssumeCapacity(raw, desc); self.allocated_vram_bytes += desc.size; + + std.log.debug("Allocated Buffer '{s}': {d} B (Total VRAM: {}/{} MB)", .{ + viewStr(desc.label), + desc.size, + self.allocated_vram_bytes / 1024 / 1024, + self.child_allocator.device.def.vram_bytes_limit / 1024 / 1024, + }); + return raw; } @@ -87,6 +106,12 @@ fn freeBuffer(ctx: *anyopaque, raw: c.WGPUBuffer) void { if (self.tracked_buffers.fetchRemove(raw)) |kv| { self.child_allocator.freeBuffer(raw); self.allocated_vram_bytes -= kv.value.size; + + std.log.debug("Freed Buffer '{s}' (Total VRAM: {}/{} MB)", .{ + viewStr(kv.value.label), + self.allocated_vram_bytes / 1024 / 1024, + self.child_allocator.device.def.vram_bytes_limit / 1024 / 1024, + }); } } @@ -97,13 +122,21 @@ fn allocTexture(ctx: *anyopaque, desc: c.WGPUTextureDescriptor) anyerror!c.WGPUT const format: GpuTextureFormat = @enumFromInt(desc.format); const bytes_size = desc.size.width * desc.size.height * format.bytesPerPixel(); - if (bytes_size + self.allocated_vram_bytes > self.child_allocator.device.config.vram_bytes_limit) + if (bytes_size + self.allocated_vram_bytes > self.child_allocator.device.def.vram_bytes_limit) return error.ExceedsVramBudget; const raw = try self.child_allocator.allocTexture(desc); self.tracked_textures.putAssumeCapacity(raw, desc); self.allocated_vram_bytes += bytes_size; + + std.log.debug("Allocated Texture '{s}': {d} B (Total VRAM: {}/{} MB)", .{ + viewStr(desc.label), + bytes_size, + self.allocated_vram_bytes / 1024 / 1024, + self.child_allocator.device.def.vram_bytes_limit / 1024 / 1024, + }); + return raw; } @@ -117,6 +150,12 @@ fn freeTexture(ctx: *anyopaque, raw: c.WGPUTexture) void { const format: GpuTextureFormat = @enumFromInt(desc.format); const bytes_size = desc.size.width * desc.size.height * format.bytesPerPixel(); self.allocated_vram_bytes -= bytes_size; + + std.log.debug("Freed Texture '{s}' (Total VRAM: {}/{} MB)", .{ + viewStr(desc.label), + self.allocated_vram_bytes / 1024 / 1024, + self.child_allocator.device.def.vram_bytes_limit / 1024 / 1024, + }); } } @@ -125,13 +164,17 @@ fn allocTextureView(ctx: *anyopaque, texture: c.WGPUTexture, desc: c.WGPUTexture try self.tracked_views.ensureTotalCapacity(self.tracked_views.count() + 1); const raw = try self.child_allocator.allocTextureView(texture, desc); self.tracked_views.putAssumeCapacity(raw, desc); + std.log.debug("Allocated Texture View '{s}'", .{viewStr(desc.label)}); return raw; } fn freeTextureView(ctx: *anyopaque, raw: c.WGPUTextureView) void { const self: *@This() = @ptrCast(@alignCast(ctx)); - if (self.tracked_views.remove(raw)) + if (self.tracked_views.fetchRemove(raw)) |kv| { self.child_allocator.freeTextureView(raw); + const desc = kv.value; + std.log.debug("Freed Texture View '{s}'", .{viewStr(desc.label)}); + } } fn allocRenderPipeline(ctx: *anyopaque, desc: c.WGPURenderPipelineDescriptor) anyerror!c.WGPURenderPipeline { @@ -139,13 +182,17 @@ fn allocRenderPipeline(ctx: *anyopaque, desc: c.WGPURenderPipelineDescriptor) an try self.tracked_renders.ensureTotalCapacity(self.tracked_renders.count() + 1); const raw = try self.child_allocator.allocRenderPipeline(desc); self.tracked_renders.putAssumeCapacity(raw, desc); + std.log.debug("Allocated Render Pipeline '{s}'", .{viewStr(desc.label)}); return raw; } fn freeRenderPipeline(ctx: *anyopaque, raw: c.WGPURenderPipeline) void { const self: *@This() = @ptrCast(@alignCast(ctx)); - if (self.tracked_renders.remove(raw)) + if (self.tracked_renders.fetchRemove(raw)) |kv| { self.child_allocator.freeRenderPipeline(raw); + const desc = kv.value; + std.log.debug("Freed Render Pipeline '{s}'", .{viewStr(desc.label)}); + } } fn allocComputePipeline(ctx: *anyopaque, desc: c.WGPUComputePipelineDescriptor) anyerror!c.WGPUComputePipeline { @@ -153,11 +200,15 @@ fn allocComputePipeline(ctx: *anyopaque, desc: c.WGPUComputePipelineDescriptor) try self.tracked_computes.ensureTotalCapacity(self.tracked_computes.count() + 1); const raw = try self.child_allocator.allocComputePipeline(desc); self.tracked_computes.putAssumeCapacity(raw, desc); + std.log.debug("Allocated Compute Pipeline '{s}'", .{viewStr(desc.label)}); return raw; } fn freeComputePipeline(ctx: *anyopaque, raw: c.WGPUComputePipeline) void { const self: *@This() = @ptrCast(@alignCast(ctx)); - if (self.tracked_computes.remove(raw)) + if (self.tracked_computes.fetchRemove(raw)) |kv| { self.child_allocator.freeComputePipeline(raw); + const desc = kv.value; + std.log.debug("Freed Compute Pipeline '{s}'", .{viewStr(desc.label)}); + } } diff --git a/src/GpuBuffer.zig b/src/GpuBuffer.zig index 6632462..9640229 100644 --- a/src/GpuBuffer.zig +++ b/src/GpuBuffer.zig @@ -1,6 +1,7 @@ const std = @import("std"); const c = @import("utils.zig").c; const GpuAllocator = @import("GpuAllocator.zig"); +const svOpt = @import("utils.zig").svOpt; raw: c.WGPUBuffer, size: u64, @@ -21,15 +22,25 @@ const BufferUsage = enum(u64) { QueryResolve = 0x0000000000000200, }; -pub fn init(gloc: GpuAllocator, size: u64, usage: std.EnumSet(BufferUsage)) !@This() { +const GpuBufferDef = struct { + label: ?[]const u8 = null, + size: u64, + usage: std.EnumSet(BufferUsage), +}; + +pub fn init(gloc: GpuAllocator, def: GpuBufferDef) !@This() { var use: u64 = 0; - var iter = usage.iterator(); + var iter = def.usage.iterator(); while (iter.next()) |flag| use |= @intFromEnum(flag); // Automatically align the buffer size forward to a multiple of 4 bytes under the hood - const aligned_size = std.mem.alignForward(u64, size, 4); + const aligned_size = std.mem.alignForward(u64, def.size, 4); - const raw_handle = try gloc.allocBuffer(.{ .size = aligned_size, .usage = use }); + const raw_handle = try gloc.allocBuffer(.{ + .size = aligned_size, + .usage = use, + .label = svOpt(def.label), + }); return .{ .raw = raw_handle, .size = aligned_size, @@ -91,11 +102,11 @@ pub fn load( pub fn read(self: @This(), alloc: std.mem.Allocator, T: type) ![]T { const out = try alloc.alloc(T, @divExact(self.size, @sizeOf(T))); - const staging = try init( - self.gloc, - self.size, - .initMany(&.{ .MapRead, .CopyDst }), - ); + const staging = try init(self.gloc, .{ + .size = self.size, + .usage = .initMany(&.{ .MapRead, .CopyDst }), + .label = "staging_read_buffer", + }); defer staging.deinit(); const enc = c.wgpuDeviceCreateCommandEncoder(self.gloc.device.device, null) orelse return error.Encoder; diff --git a/src/GpuCompute.zig b/src/GpuCompute.zig index c0fc52e..c678059 100644 --- a/src/GpuCompute.zig +++ b/src/GpuCompute.zig @@ -1,5 +1,6 @@ const c = @import("utils.zig").c; const sv = @import("utils.zig").sv; +const svOpt = @import("utils.zig").svOpt; const GpuAllocator = @import("GpuAllocator.zig"); const GpuBuffer = @import("GpuBuffer.zig"); const GpuDevice = @import("GpuDevice.zig"); @@ -11,6 +12,7 @@ pub const Binding = struct { }; pub const ComputeDef = struct { + label: ?[]const u8 = null, bindings: []const Binding, workgroup_size: u32 = 256, max_workgroups: u32 = 65535, @@ -33,7 +35,10 @@ pub fn init(gloc: GpuAllocator, wgsl: []const u8, def: ComputeDef) !@This() { }) orelse return error.Shader; defer c.wgpuShaderModuleRelease(shader); - const pip = try gloc.allocComputePipeline(.{ .compute = .{ .module = shader, .entryPoint = sv("main") } }); + const pip = try gloc.allocComputePipeline(.{ + .label = svOpt(def.label), + .compute = .{ .module = shader, .entryPoint = sv("main") }, + }); return .{ .gloc = gloc, @@ -108,11 +113,11 @@ pub fn run( defer if (info_buf) |b| b.deinit(); if (self.def.append_info_buffer) { - info_buf = try GpuBuffer.init( - gloc, - @sizeOf(u32), - .initMany(&.{ .Uniform, .CopyDst }), - ); + info_buf = try GpuBuffer.init(gloc, .{ + .size = @sizeOf(u32), + .usage = .initMany(&.{ .Uniform, .CopyDst }), + .label = "compute_info_buffer", + }); c.wgpuQueueWriteBuffer(gloc.device.queue, info_buf.?.raw, 0, &elements_count, @sizeOf(u32)); entries_buf[entry_count] = .{ diff --git a/src/GpuDevice.zig b/src/GpuDevice.zig index 8463aa2..6d54695 100644 --- a/src/GpuDevice.zig +++ b/src/GpuDevice.zig @@ -1,20 +1,17 @@ const std = @import("std"); const c = @import("utils.zig").c; const sv = @import("utils.zig").sv; +const svOpt = @import("utils.zig").svOpt; const GpuAllocator = @import("GpuAllocator.zig"); const GpuTextureFormat = @import("lib.zig").GpuTextureFormat; -// TODO: Make Allocator more zig like -// - GpuDevice can return a GpuAllocator that just allocate and nothing else -// - From this GpuAllocator, can create a GpuArena like std.heap.ArenaAllocator.init(allocator) -// - Rename GpuArenaAllocator too - const Ctx = struct { adapter: c.WGPUAdapter = null, device: c.WGPUDevice = null, }; -const GpuDeviceConfig = struct { +const GpuDeviceDef = struct { + label: ?[]const u8 = null, /// VRAM limit. Default 2 GB vram_bytes_limit: u64 = 2 * 1024 * 1024 * 1024, power_preference: enum(c_uint) { @@ -31,9 +28,9 @@ device: c.WGPUDevice, queue: c.WGPUQueue, limits: c.WGPULimits, -config: GpuDeviceConfig, +def: GpuDeviceDef, -pub fn init(config: GpuDeviceConfig) !@This() { +pub fn init(def: GpuDeviceDef) !@This() { const instance = c.wgpuCreateInstance( &std.mem.zeroes(c.WGPUInstanceDescriptor), ) orelse return error.NoInstance; @@ -42,13 +39,25 @@ pub fn init(config: GpuDeviceConfig) !@This() { var ctx = Ctx{}; _ = c.wgpuInstanceRequestAdapter( instance, - &.{ .powerPreference = @intFromEnum(config.power_preference) }, + &.{ .powerPreference = @intFromEnum(def.power_preference) }, .{ .callback = onAdapter, .userdata1 = &ctx }, ); c.wgpuInstanceProcessEvents(instance); const adapter = ctx.adapter orelse return error.NoAdapter; errdefer c.wgpuAdapterRelease(adapter); + var adapter_info = std.mem.zeroes(c.WGPUAdapterInfo); + _ = c.wgpuAdapterGetInfo(adapter, &adapter_info); + + std.log.info("=== WebGPU Device Initialized ===", .{}); + if (adapter_info.device.length > 0 and adapter_info.device.data != null) { + std.log.info(" Device Name : {s}", .{adapter_info.device.data[0..adapter_info.device.length]}); + } + if (adapter_info.architecture.length > 0 and adapter_info.architecture.data != null) { + std.log.info(" Architecture : {s}", .{adapter_info.architecture.data[0..adapter_info.architecture.length]}); + } + std.log.info(" Backend Type : {d}", .{adapter_info.backendType}); + var supported_features = std.mem.zeroes(c.WGPUSupportedFeatures); c.wgpuAdapterGetFeatures(adapter, &supported_features); @@ -56,6 +65,11 @@ pub fn init(config: GpuDeviceConfig) !@This() { supported_limits.nextInChain = null; if (c.wgpuAdapterGetLimits(adapter, &supported_limits) != 1) return error.FailedToGetAdapterLimits; + std.log.info(" Max Buf Size : {d} MB", .{supported_limits.maxBufferSize / 1024 / 1024}); + std.log.info(" Max Storage : {d} MB", .{supported_limits.maxStorageBufferBindingSize / 1024 / 1024}); + std.log.info(" Max Workgroup: X: {d}, Y: {d}, Z: {d}", .{ supported_limits.maxComputeWorkgroupSizeX, supported_limits.maxComputeWorkgroupSizeY, supported_limits.maxComputeWorkgroupSizeZ }); + std.log.info(" VRAM Budget : {d} MB", .{def.vram_bytes_limit / 1024 / 1024}); + var has_f16 = false; for (0..supported_features.featureCount) |i| { if (supported_features.features[i] == c.WGPUFeatureName_ShaderF16) { @@ -63,6 +77,8 @@ pub fn init(config: GpuDeviceConfig) !@This() { break; } } + std.log.info(" Shader F16 : {}", .{has_f16}); + std.log.info("=================================", .{}); var feature_buf = [_]c.WGPUFeatureName{c.WGPUFeatureName_ShaderF16}; const required_features: []const c.WGPUFeatureName = @@ -70,7 +86,7 @@ pub fn init(config: GpuDeviceConfig) !@This() { const device_descriptor = c.WGPUDeviceDescriptor{ .nextInChain = null, - .label = sv("TensorCompilerDevice"), + .label = svOpt(def.label), .requiredFeatureCount = required_features.len, .requiredFeatures = if (required_features.len > 0) required_features.ptr else null, .requiredLimits = &supported_limits, @@ -89,7 +105,7 @@ pub fn init(config: GpuDeviceConfig) !@This() { .device = device, .queue = c.wgpuDeviceGetQueue(device), .limits = supported_limits, - .config = config, + .def = def, }; } diff --git a/src/GpuRender.zig b/src/GpuRender.zig index 4f26d5a..71d7e1f 100644 --- a/src/GpuRender.zig +++ b/src/GpuRender.zig @@ -1,6 +1,7 @@ const std = @import("std"); const c = @import("utils.zig").c; const sv = @import("utils.zig").sv; +const svOpt = @import("utils.zig").svOpt; const GpuAllocator = @import("GpuAllocator.zig"); const GpuBuffer = @import("GpuBuffer.zig"); const GpuDevice = @import("GpuDevice.zig"); @@ -12,6 +13,7 @@ pub const Binding = struct { }; pub const GpuRenderDef = struct { + label: ?[]const u8 = null, bindings: []const Binding = &.{}, /// The surface texture format we are rendering to (e.g., BGRA8Unorm) texture_format: GpuTextureFormat, @@ -68,6 +70,7 @@ pub fn init(gloc: GpuAllocator, wgsl: []const u8, def: GpuRenderDef) !@This() { // 3. Compile the Complete Render Pipeline const pip = try gloc.allocRenderPipeline(.{ + .label = svOpt(def.label), .vertex = .{ .module = shader, .entryPoint = sv(def.vertex_entry), diff --git a/src/GpuTexture.zig b/src/GpuTexture.zig index b819ff2..cbc727a 100644 --- a/src/GpuTexture.zig +++ b/src/GpuTexture.zig @@ -1,11 +1,13 @@ const std = @import("std"); const c = @import("utils.zig").c; +const svOpt = @import("utils.zig").svOpt; const GpuAllocator = @import("GpuAllocator.zig"); const GpuBuffer = @import("GpuBuffer.zig"); const GpuTextureFormat = @import("lib.zig").GpuTextureFormat; const GpuTextureUsage = @import("lib.zig").GpuTextureUsage; pub const GpuTextureDef = struct { + label: ?[]const u8 = null, size: c.WGPUExtent3D, usage: std.EnumSet(GpuTextureUsage), format: GpuTextureFormat, @@ -21,6 +23,7 @@ pub fn init(gloc: GpuAllocator, def: GpuTextureDef) !@This() { while (iter.next()) |flag| use |= @intFromEnum(flag); const desc = c.WGPUTextureDescriptor{ + .label = svOpt(def.label), .usage = use, .dimension = c.WGPUTextureDimension_2D, .size = def.size, @@ -51,7 +54,11 @@ pub fn bytesSizeRow(self: @This()) u32 { /// Return a GpuBuffer containing a copy of the texture. pub fn buffCopy(self: @This(), gloc: GpuAllocator) !GpuBuffer { - const buf = try GpuBuffer.init(gloc, self.bytesSize(), .initMany(&.{ .CopyDst, .CopySrc })); + const buf = try GpuBuffer.init(gloc, .{ + .size = self.bytesSize(), + .usage = .initMany(&.{ .CopyDst, .CopySrc }), + .label = "texture_copy_buffer", + }); const enc = c.wgpuDeviceCreateCommandEncoder(gloc.device.device, null) orelse return error.Encoder; defer c.wgpuCommandEncoderRelease(enc); @@ -102,38 +109,52 @@ pub fn load( ) !void { const bytes = data.len * @sizeOf(T); - if (bytes == self.size) { - // Aligned path: direct download - c.wgpuQueueWriteBuffer(self.gloc.device.queue, self.raw, 0, data.ptr, self.size); - } else { - // Unaligned path: Split the write into an aligned chunk and a padded remainder - // to support arbitrary lengths without any allocations or large stack arrays. - const aligned_part = (bytes / 4) * 4; - if (aligned_part > 0) { - c.wgpuQueueWriteBuffer(self.gloc.device.queue, self.raw, 0, data.ptr, aligned_part); - } - - var remainder_buf: [4]u8 = .{ 0, 0, 0, 0 }; - const data_bytes = std.mem.sliceAsBytes(data); - @memcpy(remainder_buf[0 .. bytes - aligned_part], data_bytes[aligned_part..bytes]); - - c.wgpuQueueWriteBuffer(self.gloc.device.queue, self.raw, aligned_part, &remainder_buf, 4); - } + c.wgpuQueueWriteTexture( + self.gloc.device.queue, + &.{ + .texture = self.raw, + .mipLevel = 0, + .origin = .{ .x = 0, .y = 0, .z = 0 }, + .aspect = c.WGPUTextureAspect_All, + }, + data.ptr, + bytes, + &.{ + .offset = 0, + .bytesPerRow = self.bytesSizeRow(), + .rowsPerImage = self.def.size.height, + }, + &self.def.size, + ); } // GPU to CPU pub fn read(self: @This(), alloc: std.mem.Allocator, T: type) ![]T { const out = try alloc.alloc(T, @divExact(self.size, @sizeOf(T))); - const staging = try init( - self.gloc, - self.size, - .initMany(&.{ .MapRead, .CopyDst }), - ); + const staging = try init(self.gloc, .{ + .size = self.size, + .usage = .initMany(&.{ .MapRead, .CopyDst }), + .label = "texture_read_staging", + }); defer staging.deinit(); const enc = c.wgpuDeviceCreateCommandEncoder(self.gloc.device.device, null) orelse return error.Encoder; - c.wgpuCommandEncoderCopyBufferToBuffer(enc, self.raw, 0, staging.raw, 0, self.size); + const src_copy = c.WGPUTexelCopyTextureInfo{ + .texture = self.raw, + .mipLevel = 0, + .origin = .{ .x = 0, .y = 0, .z = 0 }, + .aspect = c.WGPUTextureAspect_All, + }; + const dst_copy = c.WGPUTexelCopyBufferInfo{ + .buffer = staging.raw, + .layout = .{ + .offset = 0, + .bytesPerRow = self.bytesSizeRow(), + .rowsPerImage = self.def.size.height, + }, + }; + c.wgpuCommandEncoderCopyTextureToBuffer(enc, &src_copy, &dst_copy, &self.def.size); const cmd = c.wgpuCommandEncoderFinish(enc, null); defer c.wgpuCommandEncoderRelease(enc); defer c.wgpuCommandBufferRelease(cmd); diff --git a/src/GpuTextureView.zig b/src/GpuTextureView.zig index 9475b9b..11b0f96 100644 --- a/src/GpuTextureView.zig +++ b/src/GpuTextureView.zig @@ -1,11 +1,13 @@ const std = @import("std"); const c = @import("utils.zig").c; +const svOpt = @import("utils.zig").svOpt; const GpuAllocator = @import("GpuAllocator.zig"); const GpuTexture = @import("lib.zig").GpuTexture; const GpuTextureFormat = @import("lib.zig").GpuTextureFormat; const GpuTextureUsage = @import("lib.zig").GpuTextureUsage; pub const GpuViewDef = struct { + label: ?[]const u8 = null, usage: std.EnumSet(GpuTextureUsage) = .empty, format: GpuTextureFormat = .Undefined, }; @@ -19,6 +21,7 @@ pub fn init(gloc: GpuAllocator, texture: GpuTexture, def: GpuViewDef) !@This() { while (iter.next()) |flag| use |= @intFromEnum(flag); const raw = try gloc.allocTextureView(texture.raw, .{ + .label = svOpt(def.label), .format = @intFromEnum(def.format), .usage = use, .mipLevelCount = 1, diff --git a/src/utils.zig b/src/utils.zig index f4d62b4..ebddfc6 100644 --- a/src/utils.zig +++ b/src/utils.zig @@ -3,3 +3,17 @@ pub const c = @cImport(@cInclude("wgpu.h")); pub fn sv(s: []const u8) c.WGPUStringView { return .{ .data = s.ptr, .length = s.len }; } + +/// Allows safely passing an optional Zig string to a WebGPU string view. +pub fn svOpt(s: ?[]const u8) c.WGPUStringView { + if (s) |str| return sv(str); + return .{ .data = null, .length = 0 }; +} + +/// Helper to print a WGPUStringView in your logs. +pub fn viewStr(view: c.WGPUStringView) []const u8 { + if (view.data != null and view.length > 0) { + return view.data[0..view.length]; + } + return "unnamed"; +}