From d503ce7deabd155a332c29b969833d91d1df63d6 Mon Sep 17 00:00:00 2001
From: AdrienBouvais <adrien.bouvais@clearstream.com>
Date: Thu, 21 May 2026 10:26:20 +0200
Subject: [PATCH] Added optional label in definitions and added logs when
 alloc, free and device info

---
 examples/bench_cp.zig     | 11 ++++---
 examples/circle.zig       |  4 ++-
 examples/compute.zig      | 19 ++++++-----
 src/GpuArenaAllocator.zig | 69 ++++++++++++++++++++++++++++++++++-----
 src/GpuBuffer.zig         | 29 +++++++++++-----
 src/GpuCompute.zig        | 17 ++++++----
 src/GpuDevice.zig         | 38 ++++++++++++++-------
 src/GpuRender.zig         |  3 ++
 src/GpuTexture.zig        | 69 +++++++++++++++++++++++++--------------
 src/GpuTextureView.zig    |  3 ++
 src/utils.zig             | 14 ++++++++
 11 files changed, 203 insertions(+), 73 deletions(-)

diff --git a/examples/bench_cp.zig b/examples/bench_cp.zig
index 3c506d0..a3a8dba 100644
--- a/examples/bench_cp.zig
+++ b/examples/bench_cp.zig
@@ -6,6 +6,8 @@ const GpuAllocator = gpu.GpuAllocator;
 const GpuBuffer = gpu.GpuBuffer;
 const GpuCompute = gpu.GpuCompute;
 
+pub const std_options = std.Options{ .log_level = .info };
+
 /// Minimal implementation of a f16 Vector
 const Vec = struct {
     buf: GpuBuffer,
@@ -14,11 +16,10 @@ const Vec = struct {
     // Changed: gloc is passed by value (const)
     pub fn initZero(gloc: GpuAllocator, len: usize) !Vec {
         return .{
-            .buf = try GpuBuffer.init(
-                gloc,
-                len * @sizeOf(f16),
-                .initMany(&.{ .Storage, .CopyDst, .CopySrc }),
-            ),
+            .buf = try GpuBuffer.init(gloc, .{
+                .size = len * @sizeOf(f16),
+                .usage = .initMany(&.{ .Storage, .CopyDst, .CopySrc }),
+            }),
             .len = len,
         };
     }
diff --git a/examples/circle.zig b/examples/circle.zig
index 60b00ab..26a60c3 100644
--- a/examples/circle.zig
+++ b/examples/circle.zig
@@ -18,7 +18,9 @@ pub fn main(init: std.process.Init) !void {
     defer device.deinit();
 
     // 2. Init VRAM Arena
-    const gloc = device.gpuAllocator();
+    var grena = GpuArenaAllocator.init(allocator, device.gpuAllocator());
+    defer grena.deinit();
+    const gloc = grena.gpuAllocator();
 
     // 3. Load Render Pipeline
     const circle_rp = try GpuRender.init(
diff --git a/examples/compute.zig b/examples/compute.zig
index e67286c..14f2312 100644
--- a/examples/compute.zig
+++ b/examples/compute.zig
@@ -21,11 +21,14 @@ pub fn main(init: std.process.Init) !void {
     const add_cp = try GpuCompute.init(
         gloc,
         @embedFile("shaders/add.wgsl"),
-        .{ .bindings = &.{
-            .{ .element_size = @sizeOf(f16) },
-            .{ .element_size = @sizeOf(f16) },
-            .{ .element_size = @sizeOf(f16) },
-        } },
+        .{
+            .label = "add",
+            .bindings = &.{
+                .{ .element_size = @sizeOf(f16) },
+                .{ .element_size = @sizeOf(f16) },
+                .{ .element_size = @sizeOf(f16) },
+            },
+        },
     );
 
     // 4. Setup CPU data
@@ -42,9 +45,9 @@ pub fn main(init: std.process.Init) !void {
 
     // 5. Initialize raw GPU Buffers
     const byte_size = len * @sizeOf(f16);
-    const buf_a = try GpuBuffer.init(gloc, byte_size, .initMany(&.{ .Storage, .CopyDst, .CopySrc }));
-    const buf_b = try GpuBuffer.init(gloc, byte_size, .initMany(&.{ .Storage, .CopyDst, .CopySrc }));
-    const buf_out = try GpuBuffer.init(gloc, byte_size, .initMany(&.{ .Storage, .CopyDst, .CopySrc }));
+    const buf_a = try GpuBuffer.init(gloc, .{ .label = "a", .size = byte_size, .usage = .initMany(&.{ .Storage, .CopyDst, .CopySrc }) });
+    const buf_b = try GpuBuffer.init(gloc, .{ .label = "b", .size = byte_size, .usage = .initMany(&.{ .Storage, .CopyDst, .CopySrc }) });
+    const buf_out = try GpuBuffer.init(gloc, .{ .label = "out", .size = byte_size, .usage = .initMany(&.{ .Storage, .CopyDst, .CopySrc }) });
 
     // Note: Buffers are safely tied to the GpuArenaAllocator which will automatically
     // release them at the end. You can also manually call buf_x.deinit() if desired.
diff --git a/src/GpuArenaAllocator.zig b/src/GpuArenaAllocator.zig
index 36da463..87a9335 100644
--- a/src/GpuArenaAllocator.zig
+++ b/src/GpuArenaAllocator.zig
@@ -3,6 +3,7 @@ const GpuDevice = @import("GpuDevice.zig");
 const GpuAllocator = @import("GpuAllocator.zig");
 const GpuTextureFormat = @import("lib.zig").GpuTextureFormat;
 const c = @import("utils.zig").c;
+const viewStr = @import("utils.zig").viewStr;
 
 child_allocator: GpuAllocator, // I use Zig naming child_allocator, but that should be a parent for me. Likely something idk
 tracked_buffers: std.AutoHashMap(c.WGPUBuffer, c.WGPUBufferDescriptor),
@@ -24,30 +25,40 @@ pub fn init(cpu_allocator: std.mem.Allocator, child_allocator: GpuAllocator) @Th
 }
 
 pub fn deinit(self: *@This()) void {
+    std.log.debug("Freeing GpuArenaAllocator (Used VRAM: {}/{} MB)", .{
+        self.allocated_vram_bytes / 1024 / 1024,
+        self.child_allocator.device.def.vram_bytes_limit / 1024 / 1024,
+    });
+
     var it_buffer = self.tracked_buffers.keyIterator();
     while (it_buffer.next()) |buf_ptr|
-        self.child_allocator.freeBuffer(buf_ptr.*);
+        freeBuffer(self, buf_ptr.*);
     self.tracked_buffers.deinit();
 
     var it_tex = self.tracked_textures.keyIterator();
     while (it_tex.next()) |buf_ptr|
-        self.child_allocator.freeTexture(buf_ptr.*);
+        freeTexture(self, buf_ptr.*);
     self.tracked_textures.deinit();
 
     var it_view = self.tracked_views.keyIterator();
     while (it_view.next()) |buf_ptr|
-        self.child_allocator.freeTextureView(buf_ptr.*);
+        freeTextureView(self, buf_ptr.*);
     self.tracked_views.deinit();
 
     var it_render = self.tracked_renders.keyIterator();
     while (it_render.next()) |buf_ptr|
-        self.child_allocator.freeRenderPipeline(buf_ptr.*);
+        freeRenderPipeline(self, buf_ptr.*);
     self.tracked_renders.deinit();
 
     var it_compute = self.tracked_computes.keyIterator();
     while (it_compute.next()) |buf_ptr|
-        self.child_allocator.freeComputePipeline(buf_ptr.*);
+        freeComputePipeline(self, buf_ptr.*);
     self.tracked_computes.deinit();
+
+    std.log.debug("Freed GpuArenaAllocator (Used VRAM: {}/{} MB)", .{
+        self.allocated_vram_bytes / 1024 / 1024,
+        self.child_allocator.device.def.vram_bytes_limit / 1024 / 1024,
+    });
 }
 
 /// Returns the type-erased immutable interface wrapper
@@ -79,6 +90,14 @@ fn allocBuffer(ctx: *anyopaque, desc: c.WGPUBufferDescriptor) anyerror!c.WGPUBuf
     const raw = try self.child_allocator.allocBuffer(desc);
     self.tracked_buffers.putAssumeCapacity(raw, desc);
     self.allocated_vram_bytes += desc.size;
+
+    std.log.debug("Allocated Buffer '{s}': {d} B (Total VRAM: {}/{} MB)", .{
+        viewStr(desc.label),
+        desc.size,
+        self.allocated_vram_bytes / 1024 / 1024,
+        self.child_allocator.device.def.vram_bytes_limit / 1024 / 1024,
+    });
+
     return raw;
 }
 
@@ -87,6 +106,12 @@ fn freeBuffer(ctx: *anyopaque, raw: c.WGPUBuffer) void {
     if (self.tracked_buffers.fetchRemove(raw)) |kv| {
         self.child_allocator.freeBuffer(raw);
         self.allocated_vram_bytes -= kv.value.size;
+
+        std.log.debug("Freed Buffer '{s}' (Total VRAM: {}/{} MB)", .{
+            viewStr(kv.value.label),
+            self.allocated_vram_bytes / 1024 / 1024,
+            self.child_allocator.device.def.vram_bytes_limit / 1024 / 1024,
+        });
     }
 }
 
@@ -97,13 +122,21 @@ fn allocTexture(ctx: *anyopaque, desc: c.WGPUTextureDescriptor) anyerror!c.WGPUT
     const format: GpuTextureFormat = @enumFromInt(desc.format);
     const bytes_size = desc.size.width * desc.size.height * format.bytesPerPixel();
 
-    if (bytes_size + self.allocated_vram_bytes > self.child_allocator.device.config.vram_bytes_limit)
+    if (bytes_size + self.allocated_vram_bytes > self.child_allocator.device.def.vram_bytes_limit)
         return error.ExceedsVramBudget;
 
     const raw = try self.child_allocator.allocTexture(desc);
 
     self.tracked_textures.putAssumeCapacity(raw, desc);
     self.allocated_vram_bytes += bytes_size;
+
+    std.log.debug("Allocated Texture '{s}': {d} B (Total VRAM: {}/{} MB)", .{
+        viewStr(desc.label),
+        bytes_size,
+        self.allocated_vram_bytes / 1024 / 1024,
+        self.child_allocator.device.def.vram_bytes_limit / 1024 / 1024,
+    });
+
     return raw;
 }
 
@@ -117,6 +150,12 @@ fn freeTexture(ctx: *anyopaque, raw: c.WGPUTexture) void {
         const format: GpuTextureFormat = @enumFromInt(desc.format);
         const bytes_size = desc.size.width * desc.size.height * format.bytesPerPixel();
         self.allocated_vram_bytes -= bytes_size;
+
+        std.log.debug("Freed Texture '{s}' (Total VRAM: {}/{} MB)", .{
+            viewStr(desc.label),
+            self.allocated_vram_bytes / 1024 / 1024,
+            self.child_allocator.device.def.vram_bytes_limit / 1024 / 1024,
+        });
     }
 }
 
@@ -125,13 +164,17 @@ fn allocTextureView(ctx: *anyopaque, texture: c.WGPUTexture, desc: c.WGPUTexture
     try self.tracked_views.ensureTotalCapacity(self.tracked_views.count() + 1);
     const raw = try self.child_allocator.allocTextureView(texture, desc);
     self.tracked_views.putAssumeCapacity(raw, desc);
+    std.log.debug("Allocated Texture View '{s}'", .{viewStr(desc.label)});
     return raw;
 }
 
 fn freeTextureView(ctx: *anyopaque, raw: c.WGPUTextureView) void {
     const self: *@This() = @ptrCast(@alignCast(ctx));
-    if (self.tracked_views.remove(raw))
+    if (self.tracked_views.fetchRemove(raw)) |kv| {
         self.child_allocator.freeTextureView(raw);
+        const desc = kv.value;
+        std.log.debug("Freed Texture View '{s}'", .{viewStr(desc.label)});
+    }
 }
 
 fn allocRenderPipeline(ctx: *anyopaque, desc: c.WGPURenderPipelineDescriptor) anyerror!c.WGPURenderPipeline {
@@ -139,13 +182,17 @@ fn allocRenderPipeline(ctx: *anyopaque, desc: c.WGPURenderPipelineDescriptor) an
     try self.tracked_renders.ensureTotalCapacity(self.tracked_renders.count() + 1);
     const raw = try self.child_allocator.allocRenderPipeline(desc);
     self.tracked_renders.putAssumeCapacity(raw, desc);
+    std.log.debug("Allocated Render Pipeline '{s}'", .{viewStr(desc.label)});
     return raw;
 }
 
 fn freeRenderPipeline(ctx: *anyopaque, raw: c.WGPURenderPipeline) void {
     const self: *@This() = @ptrCast(@alignCast(ctx));
-    if (self.tracked_renders.remove(raw))
+    if (self.tracked_renders.fetchRemove(raw)) |kv| {
         self.child_allocator.freeRenderPipeline(raw);
+        const desc = kv.value;
+        std.log.debug("Freed Render Pipeline '{s}'", .{viewStr(desc.label)});
+    }
 }
 
 fn allocComputePipeline(ctx: *anyopaque, desc: c.WGPUComputePipelineDescriptor) anyerror!c.WGPUComputePipeline {
@@ -153,11 +200,15 @@ fn allocComputePipeline(ctx: *anyopaque, desc: c.WGPUComputePipelineDescriptor)
     try self.tracked_computes.ensureTotalCapacity(self.tracked_computes.count() + 1);
     const raw = try self.child_allocator.allocComputePipeline(desc);
     self.tracked_computes.putAssumeCapacity(raw, desc);
+    std.log.debug("Allocated Compute Pipeline '{s}'", .{viewStr(desc.label)});
     return raw;
 }
 
 fn freeComputePipeline(ctx: *anyopaque, raw: c.WGPUComputePipeline) void {
     const self: *@This() = @ptrCast(@alignCast(ctx));
-    if (self.tracked_computes.remove(raw))
+    if (self.tracked_computes.fetchRemove(raw)) |kv| {
         self.child_allocator.freeComputePipeline(raw);
+        const desc = kv.value;
+        std.log.debug("Freed Compute Pipeline '{s}'", .{viewStr(desc.label)});
+    }
 }
diff --git a/src/GpuBuffer.zig b/src/GpuBuffer.zig
index 6632462..9640229 100644
--- a/src/GpuBuffer.zig
+++ b/src/GpuBuffer.zig
@@ -1,6 +1,7 @@
 const std = @import("std");
 const c = @import("utils.zig").c;
 const GpuAllocator = @import("GpuAllocator.zig");
+const svOpt = @import("utils.zig").svOpt;
 
 raw: c.WGPUBuffer,
 size: u64,
@@ -21,15 +22,25 @@ const BufferUsage = enum(u64) {
     QueryResolve = 0x0000000000000200,
 };
 
-pub fn init(gloc: GpuAllocator, size: u64, usage: std.EnumSet(BufferUsage)) !@This() {
+const GpuBufferDef = struct {
+    label: ?[]const u8 = null,
+    size: u64,
+    usage: std.EnumSet(BufferUsage),
+};
+
+pub fn init(gloc: GpuAllocator, def: GpuBufferDef) !@This() {
     var use: u64 = 0;
-    var iter = usage.iterator();
+    var iter = def.usage.iterator();
     while (iter.next()) |flag| use |= @intFromEnum(flag);
 
     // Automatically align the buffer size forward to a multiple of 4 bytes under the hood
-    const aligned_size = std.mem.alignForward(u64, size, 4);
+    const aligned_size = std.mem.alignForward(u64, def.size, 4);
 
-    const raw_handle = try gloc.allocBuffer(.{ .size = aligned_size, .usage = use });
+    const raw_handle = try gloc.allocBuffer(.{
+        .size = aligned_size,
+        .usage = use,
+        .label = svOpt(def.label),
+    });
     return .{
         .raw = raw_handle,
         .size = aligned_size,
@@ -91,11 +102,11 @@ pub fn load(
 pub fn read(self: @This(), alloc: std.mem.Allocator, T: type) ![]T {
     const out = try alloc.alloc(T, @divExact(self.size, @sizeOf(T)));
 
-    const staging = try init(
-        self.gloc,
-        self.size,
-        .initMany(&.{ .MapRead, .CopyDst }),
-    );
+    const staging = try init(self.gloc, .{
+        .size = self.size,
+        .usage = .initMany(&.{ .MapRead, .CopyDst }),
+        .label = "staging_read_buffer",
+    });
     defer staging.deinit();
 
     const enc = c.wgpuDeviceCreateCommandEncoder(self.gloc.device.device, null) orelse return error.Encoder;
diff --git a/src/GpuCompute.zig b/src/GpuCompute.zig
index c0fc52e..c678059 100644
--- a/src/GpuCompute.zig
+++ b/src/GpuCompute.zig
@@ -1,5 +1,6 @@
 const c = @import("utils.zig").c;
 const sv = @import("utils.zig").sv;
+const svOpt = @import("utils.zig").svOpt;
 const GpuAllocator = @import("GpuAllocator.zig");
 const GpuBuffer = @import("GpuBuffer.zig");
 const GpuDevice = @import("GpuDevice.zig");
@@ -11,6 +12,7 @@ pub const Binding = struct {
 };
 
 pub const ComputeDef = struct {
+    label: ?[]const u8 = null,
     bindings: []const Binding,
     workgroup_size: u32 = 256,
     max_workgroups: u32 = 65535,
@@ -33,7 +35,10 @@ pub fn init(gloc: GpuAllocator, wgsl: []const u8, def: ComputeDef) !@This() {
     }) orelse return error.Shader;
     defer c.wgpuShaderModuleRelease(shader);
 
-    const pip = try gloc.allocComputePipeline(.{ .compute = .{ .module = shader, .entryPoint = sv("main") } });
+    const pip = try gloc.allocComputePipeline(.{
+        .label = svOpt(def.label),
+        .compute = .{ .module = shader, .entryPoint = sv("main") },
+    });
 
     return .{
         .gloc = gloc,
@@ -108,11 +113,11 @@ pub fn run(
     defer if (info_buf) |b| b.deinit();
 
     if (self.def.append_info_buffer) {
-        info_buf = try GpuBuffer.init(
-            gloc,
-            @sizeOf(u32),
-            .initMany(&.{ .Uniform, .CopyDst }),
-        );
+        info_buf = try GpuBuffer.init(gloc, .{
+            .size = @sizeOf(u32),
+            .usage = .initMany(&.{ .Uniform, .CopyDst }),
+            .label = "compute_info_buffer",
+        });
         c.wgpuQueueWriteBuffer(gloc.device.queue, info_buf.?.raw, 0, &elements_count, @sizeOf(u32));
 
         entries_buf[entry_count] = .{
diff --git a/src/GpuDevice.zig b/src/GpuDevice.zig
index 8463aa2..6d54695 100644
--- a/src/GpuDevice.zig
+++ b/src/GpuDevice.zig
@@ -1,20 +1,17 @@
 const std = @import("std");
 const c = @import("utils.zig").c;
 const sv = @import("utils.zig").sv;
+const svOpt = @import("utils.zig").svOpt;
 const GpuAllocator = @import("GpuAllocator.zig");
 const GpuTextureFormat = @import("lib.zig").GpuTextureFormat;
 
-// TODO: Make Allocator more zig like
-//  - GpuDevice can return a GpuAllocator that just allocate and nothing else
-//  - From this GpuAllocator, can create a GpuArena like std.heap.ArenaAllocator.init(allocator)
-//  - Rename GpuArenaAllocator too
-
 const Ctx = struct {
     adapter: c.WGPUAdapter = null,
     device: c.WGPUDevice = null,
 };
 
-const GpuDeviceConfig = struct {
+const GpuDeviceDef = struct {
+    label: ?[]const u8 = null,
     /// VRAM limit. Default 2 GB
     vram_bytes_limit: u64 = 2 * 1024 * 1024 * 1024,
     power_preference: enum(c_uint) {
@@ -31,9 +28,9 @@ device: c.WGPUDevice,
 queue: c.WGPUQueue,
 limits: c.WGPULimits,
 
-config: GpuDeviceConfig,
+def: GpuDeviceDef,
 
-pub fn init(config: GpuDeviceConfig) !@This() {
+pub fn init(def: GpuDeviceDef) !@This() {
     const instance = c.wgpuCreateInstance(
         &std.mem.zeroes(c.WGPUInstanceDescriptor),
     ) orelse return error.NoInstance;
@@ -42,13 +39,25 @@ pub fn init(config: GpuDeviceConfig) !@This() {
     var ctx = Ctx{};
     _ = c.wgpuInstanceRequestAdapter(
         instance,
-        &.{ .powerPreference = @intFromEnum(config.power_preference) },
+        &.{ .powerPreference = @intFromEnum(def.power_preference) },
         .{ .callback = onAdapter, .userdata1 = &ctx },
     );
     c.wgpuInstanceProcessEvents(instance);
     const adapter = ctx.adapter orelse return error.NoAdapter;
     errdefer c.wgpuAdapterRelease(adapter);
 
+    var adapter_info = std.mem.zeroes(c.WGPUAdapterInfo);
+    _ = c.wgpuAdapterGetInfo(adapter, &adapter_info);
+
+    std.log.info("=== WebGPU Device Initialized ===", .{});
+    if (adapter_info.device.length > 0 and adapter_info.device.data != null) {
+        std.log.info("  Device Name  : {s}", .{adapter_info.device.data[0..adapter_info.device.length]});
+    }
+    if (adapter_info.architecture.length > 0 and adapter_info.architecture.data != null) {
+        std.log.info("  Architecture : {s}", .{adapter_info.architecture.data[0..adapter_info.architecture.length]});
+    }
+    std.log.info("  Backend Type : {d}", .{adapter_info.backendType});
+
     var supported_features = std.mem.zeroes(c.WGPUSupportedFeatures);
     c.wgpuAdapterGetFeatures(adapter, &supported_features);
 
@@ -56,6 +65,11 @@ pub fn init(config: GpuDeviceConfig) !@This() {
     supported_limits.nextInChain = null;
     if (c.wgpuAdapterGetLimits(adapter, &supported_limits) != 1) return error.FailedToGetAdapterLimits;
 
+    std.log.info("  Max Buf Size : {d} MB", .{supported_limits.maxBufferSize / 1024 / 1024});
+    std.log.info("  Max Storage  : {d} MB", .{supported_limits.maxStorageBufferBindingSize / 1024 / 1024});
+    std.log.info("  Max Workgroup: X: {d}, Y: {d}, Z: {d}", .{ supported_limits.maxComputeWorkgroupSizeX, supported_limits.maxComputeWorkgroupSizeY, supported_limits.maxComputeWorkgroupSizeZ });
+    std.log.info("  VRAM Budget  : {d} MB", .{def.vram_bytes_limit / 1024 / 1024});
+
     var has_f16 = false;
     for (0..supported_features.featureCount) |i| {
         if (supported_features.features[i] == c.WGPUFeatureName_ShaderF16) {
@@ -63,6 +77,8 @@ pub fn init(config: GpuDeviceConfig) !@This() {
             break;
         }
     }
+    std.log.info("  Shader F16   : {}", .{has_f16});
+    std.log.info("=================================", .{});
 
     var feature_buf = [_]c.WGPUFeatureName{c.WGPUFeatureName_ShaderF16};
     const required_features: []const c.WGPUFeatureName =
@@ -70,7 +86,7 @@ pub fn init(config: GpuDeviceConfig) !@This() {
 
     const device_descriptor = c.WGPUDeviceDescriptor{
         .nextInChain = null,
-        .label = sv("TensorCompilerDevice"),
+        .label = svOpt(def.label),
         .requiredFeatureCount = required_features.len,
         .requiredFeatures = if (required_features.len > 0) required_features.ptr else null,
         .requiredLimits = &supported_limits,
@@ -89,7 +105,7 @@ pub fn init(config: GpuDeviceConfig) !@This() {
         .device = device,
         .queue = c.wgpuDeviceGetQueue(device),
         .limits = supported_limits,
-        .config = config,
+        .def = def,
     };
 }
 
diff --git a/src/GpuRender.zig b/src/GpuRender.zig
index 4f26d5a..71d7e1f 100644
--- a/src/GpuRender.zig
+++ b/src/GpuRender.zig
@@ -1,6 +1,7 @@
 const std = @import("std");
 const c = @import("utils.zig").c;
 const sv = @import("utils.zig").sv;
+const svOpt = @import("utils.zig").svOpt;
 const GpuAllocator = @import("GpuAllocator.zig");
 const GpuBuffer = @import("GpuBuffer.zig");
 const GpuDevice = @import("GpuDevice.zig");
@@ -12,6 +13,7 @@ pub const Binding = struct {
 };
 
 pub const GpuRenderDef = struct {
+    label: ?[]const u8 = null,
     bindings: []const Binding = &.{},
     /// The surface texture format we are rendering to (e.g., BGRA8Unorm)
     texture_format: GpuTextureFormat,
@@ -68,6 +70,7 @@ pub fn init(gloc: GpuAllocator, wgsl: []const u8, def: GpuRenderDef) !@This() {
 
     // 3. Compile the Complete Render Pipeline
     const pip = try gloc.allocRenderPipeline(.{
+        .label = svOpt(def.label),
         .vertex = .{
             .module = shader,
             .entryPoint = sv(def.vertex_entry),
diff --git a/src/GpuTexture.zig b/src/GpuTexture.zig
index b819ff2..cbc727a 100644
--- a/src/GpuTexture.zig
+++ b/src/GpuTexture.zig
@@ -1,11 +1,13 @@
 const std = @import("std");
 const c = @import("utils.zig").c;
+const svOpt = @import("utils.zig").svOpt;
 const GpuAllocator = @import("GpuAllocator.zig");
 const GpuBuffer = @import("GpuBuffer.zig");
 const GpuTextureFormat = @import("lib.zig").GpuTextureFormat;
 const GpuTextureUsage = @import("lib.zig").GpuTextureUsage;
 
 pub const GpuTextureDef = struct {
+    label: ?[]const u8 = null,
     size: c.WGPUExtent3D,
     usage: std.EnumSet(GpuTextureUsage),
     format: GpuTextureFormat,
@@ -21,6 +23,7 @@ pub fn init(gloc: GpuAllocator, def: GpuTextureDef) !@This() {
     while (iter.next()) |flag| use |= @intFromEnum(flag);
 
     const desc = c.WGPUTextureDescriptor{
+        .label = svOpt(def.label),
         .usage = use,
         .dimension = c.WGPUTextureDimension_2D,
         .size = def.size,
@@ -51,7 +54,11 @@ pub fn bytesSizeRow(self: @This()) u32 {
 
 /// Return a GpuBuffer containing a copy of the texture.
 pub fn buffCopy(self: @This(), gloc: GpuAllocator) !GpuBuffer {
-    const buf = try GpuBuffer.init(gloc, self.bytesSize(), .initMany(&.{ .CopyDst, .CopySrc }));
+    const buf = try GpuBuffer.init(gloc, .{
+        .size = self.bytesSize(),
+        .usage = .initMany(&.{ .CopyDst, .CopySrc }),
+        .label = "texture_copy_buffer",
+    });
 
     const enc = c.wgpuDeviceCreateCommandEncoder(gloc.device.device, null) orelse return error.Encoder;
     defer c.wgpuCommandEncoderRelease(enc);
@@ -102,38 +109,52 @@ pub fn load(
 ) !void {
     const bytes = data.len * @sizeOf(T);
 
-    if (bytes == self.size) {
-        // Aligned path: direct download
-        c.wgpuQueueWriteBuffer(self.gloc.device.queue, self.raw, 0, data.ptr, self.size);
-    } else {
-        // Unaligned path: Split the write into an aligned chunk and a padded remainder
-        // to support arbitrary lengths without any allocations or large stack arrays.
-        const aligned_part = (bytes / 4) * 4;
-        if (aligned_part > 0) {
-            c.wgpuQueueWriteBuffer(self.gloc.device.queue, self.raw, 0, data.ptr, aligned_part);
-        }
-
-        var remainder_buf: [4]u8 = .{ 0, 0, 0, 0 };
-        const data_bytes = std.mem.sliceAsBytes(data);
-        @memcpy(remainder_buf[0 .. bytes - aligned_part], data_bytes[aligned_part..bytes]);
-
-        c.wgpuQueueWriteBuffer(self.gloc.device.queue, self.raw, aligned_part, &remainder_buf, 4);
-    }
+    c.wgpuQueueWriteTexture(
+        self.gloc.device.queue,
+        &.{
+            .texture = self.raw,
+            .mipLevel = 0,
+            .origin = .{ .x = 0, .y = 0, .z = 0 },
+            .aspect = c.WGPUTextureAspect_All,
+        },
+        data.ptr,
+        bytes,
+        &.{
+            .offset = 0,
+            .bytesPerRow = self.bytesSizeRow(),
+            .rowsPerImage = self.def.size.height,
+        },
+        &self.def.size,
+    );
 }
 
 // GPU to CPU
 pub fn read(self: @This(), alloc: std.mem.Allocator, T: type) ![]T {
     const out = try alloc.alloc(T, @divExact(self.size, @sizeOf(T)));
 
-    const staging = try init(
-        self.gloc,
-        self.size,
-        .initMany(&.{ .MapRead, .CopyDst }),
-    );
+    const staging = try init(self.gloc, .{
+        .size = self.size,
+        .usage = .initMany(&.{ .MapRead, .CopyDst }),
+        .label = "texture_read_staging",
+    });
     defer staging.deinit();
 
     const enc = c.wgpuDeviceCreateCommandEncoder(self.gloc.device.device, null) orelse return error.Encoder;
-    c.wgpuCommandEncoderCopyBufferToBuffer(enc, self.raw, 0, staging.raw, 0, self.size);
+    const src_copy = c.WGPUTexelCopyTextureInfo{
+        .texture = self.raw,
+        .mipLevel = 0,
+        .origin = .{ .x = 0, .y = 0, .z = 0 },
+        .aspect = c.WGPUTextureAspect_All,
+    };
+    const dst_copy = c.WGPUTexelCopyBufferInfo{
+        .buffer = staging.raw,
+        .layout = .{
+            .offset = 0,
+            .bytesPerRow = self.bytesSizeRow(),
+            .rowsPerImage = self.def.size.height,
+        },
+    };
+    c.wgpuCommandEncoderCopyTextureToBuffer(enc, &src_copy, &dst_copy, &self.def.size);
     const cmd = c.wgpuCommandEncoderFinish(enc, null);
     defer c.wgpuCommandEncoderRelease(enc);
     defer c.wgpuCommandBufferRelease(cmd);
diff --git a/src/GpuTextureView.zig b/src/GpuTextureView.zig
index 9475b9b..11b0f96 100644
--- a/src/GpuTextureView.zig
+++ b/src/GpuTextureView.zig
@@ -1,11 +1,13 @@
 const std = @import("std");
 const c = @import("utils.zig").c;
+const svOpt = @import("utils.zig").svOpt;
 const GpuAllocator = @import("GpuAllocator.zig");
 const GpuTexture = @import("lib.zig").GpuTexture;
 const GpuTextureFormat = @import("lib.zig").GpuTextureFormat;
 const GpuTextureUsage = @import("lib.zig").GpuTextureUsage;
 
 pub const GpuViewDef = struct {
+    label: ?[]const u8 = null,
     usage: std.EnumSet(GpuTextureUsage) = .empty,
     format: GpuTextureFormat = .Undefined,
 };
@@ -19,6 +21,7 @@ pub fn init(gloc: GpuAllocator, texture: GpuTexture, def: GpuViewDef) !@This() {
     while (iter.next()) |flag| use |= @intFromEnum(flag);
 
     const raw = try gloc.allocTextureView(texture.raw, .{
+        .label = svOpt(def.label),
         .format = @intFromEnum(def.format),
         .usage = use,
         .mipLevelCount = 1,
diff --git a/src/utils.zig b/src/utils.zig
index f4d62b4..ebddfc6 100644
--- a/src/utils.zig
+++ b/src/utils.zig
@@ -3,3 +3,17 @@ pub const c = @cImport(@cInclude("wgpu.h"));
 pub fn sv(s: []const u8) c.WGPUStringView {
     return .{ .data = s.ptr, .length = s.len };
 }
+
+/// Allows safely passing an optional Zig string to a WebGPU string view.
+pub fn svOpt(s: ?[]const u8) c.WGPUStringView {
+    if (s) |str| return sv(str);
+    return .{ .data = null, .length = 0 };
+}
+
+/// Helper to print a WGPUStringView in your logs.
+pub fn viewStr(view: c.WGPUStringView) []const u8 {
+    if (view.data != null and view.length > 0) {
+        return view.data[0..view.length];
+    }
+    return "unnamed";
+}