diff --git a/examples/bench.zig b/examples/bench_cp.zig similarity index 96% rename from examples/bench.zig rename to examples/bench_cp.zig index 635b14e..3c506d0 100644 --- a/examples/bench.zig +++ b/examples/bench_cp.zig @@ -1,7 +1,7 @@ const std = @import("std"); const gpu = @import("gpu"); const GpuDevice = gpu.GpuDevice; -const GpuArena = gpu.GpuArena; +const GpuArenaAllocator = gpu.GpuArenaAllocator; const GpuAllocator = gpu.GpuAllocator; const GpuBuffer = gpu.GpuBuffer; const GpuCompute = gpu.GpuCompute; @@ -60,12 +60,11 @@ pub fn main(init: std.process.Init) !void { const device = try GpuDevice.init(.{ .vram_bytes_limit = 4 * 1024 * 1024 * 1024 }); defer device.deinit(); - var grena = GpuArena.init(init.gpa, device); + var grena = GpuArenaAllocator.init(init.gpa, device.gpuAllocator()); defer grena.deinit(); - const gloc = grena.gpuAllocator(); - const add_pip = try GpuCompute.init(device, @embedFile("shaders/add.wgsl"), .{ .bindings = &.{ + const add_pip = try GpuCompute.init(gloc, @embedFile("shaders/add.wgsl"), .{ .bindings = &.{ .{ .element_size = @sizeOf(f16) }, .{ .element_size = @sizeOf(f16) }, .{ .element_size = @sizeOf(f16) }, diff --git a/examples/circle.zig b/examples/circle.zig new file mode 100644 index 0000000..60b00ab --- /dev/null +++ b/examples/circle.zig @@ -0,0 +1,74 @@ +const std = @import("std"); +const gpu = @import("gpu"); +const GpuDevice = gpu.GpuDevice; +const GpuArenaAllocator = gpu.GpuArenaAllocator; +const GpuBuffer = gpu.GpuBuffer; +const GpuRender = gpu.GpuRender; +const GpuTexture = gpu.GpuTexture; +const GpuTextureView = gpu.GpuTextureView; + +const width: u32 = 512; +const height: u32 = 512; + +pub fn main(init: std.process.Init) !void { + const allocator = init.gpa; + + // 1. Open GPU Device + const device = try GpuDevice.init(.{}); + defer device.deinit(); + + // 2. Init VRAM Arena + const gloc = device.gpuAllocator(); + + // 3. Load Render Pipeline + const circle_rp = try GpuRender.init( + gloc, + @embedFile("shaders/circle.wgsl"), + .{ .bindings = &.{}, .texture_format = .RGBA8Unorm, .topology = .TriangleStrip }, + ); + defer circle_rp.deinit(); + + // 4. Create VRAM texture to render into + const texture = try GpuTexture.init(gloc, .{ + .format = .RGBA8Unorm, + .size = .{ .width = width, .height = height, .depthOrArrayLayers = 1 }, + .usage = .initMany(&.{ .RenderAttachment, .CopySrc }), + }); + defer texture.deinit(); + + // 5. Create a view from texture + const view = try GpuTextureView.init(gloc, texture, .{}); + defer view.deinit(); + + // 6. Run the rendering pipeline + try circle_rp.draw(gloc, view, 4, .{}); + + // 7. Load Texture into GpuBuffer + const cpu_staging_cpu = try texture.buffCopy(gloc); + defer cpu_staging_cpu.deinit(); + + // 8. Read GpuBuffer to CPU + // This need to be free manually because CPU memory + const pixels = try cpu_staging_cpu.read(allocator, u8); + defer allocator.free(pixels); + + // 9. Write a simple ppm image + try savePpm(init.io, "circle.ppm", width, height, pixels); +} + +fn savePpm(io: std.Io, filename: []const u8, w: u32, h: u32, rgba_pixels: []const u8) !void { + const file = try std.Io.Dir.cwd().createFile(io, filename, .{}); + defer file.close(io); + + var buf: [255]u8 = undefined; + var writer = file.writer(io, &buf); + + // PPM Header: P6 format means raw RGB bytes + try writer.interface.print("P6\n{d} {d}\n255\n", .{ w, h }); + + // Strip Alpha channel when writing out to standard RGB PPM format + var i: usize = 0; + while (i < rgba_pixels.len) : (i += 4) { + try writer.interface.writeAll(rgba_pixels[i .. i + 3]); + } +} diff --git a/examples/add.zig b/examples/compute.zig similarity index 85% rename from examples/add.zig rename to examples/compute.zig index 46c0176..e67286c 100644 --- a/examples/add.zig +++ b/examples/compute.zig @@ -1,7 +1,7 @@ const std = @import("std"); const gpu = @import("gpu"); const GpuDevice = gpu.GpuDevice; -const GpuArena = gpu.GpuArena; +const GpuArenaAllocator = gpu.GpuArenaAllocator; const GpuBuffer = gpu.GpuBuffer; const GpuCompute = gpu.GpuCompute; @@ -13,13 +13,13 @@ pub fn main(init: std.process.Init) !void { defer device.deinit(); // 2. Create a GPU Arena to manage VRAM - var grena = GpuArena.init(allocator, device); + var grena = GpuArenaAllocator.init(allocator, device.gpuAllocator()); defer grena.deinit(); const gloc = grena.gpuAllocator(); // 3. Load the WGSL compute pipeline const add_cp = try GpuCompute.init( - device, + gloc, @embedFile("shaders/add.wgsl"), .{ .bindings = &.{ .{ .element_size = @sizeOf(f16) }, @@ -27,7 +27,6 @@ pub fn main(init: std.process.Init) !void { .{ .element_size = @sizeOf(f16) }, } }, ); - defer add_cp.deinit(); // 4. Setup CPU data const len: usize = 16; @@ -47,8 +46,9 @@ pub fn main(init: std.process.Init) !void { const buf_b = try GpuBuffer.init(gloc, byte_size, .initMany(&.{ .Storage, .CopyDst, .CopySrc })); const buf_out = try GpuBuffer.init(gloc, byte_size, .initMany(&.{ .Storage, .CopyDst, .CopySrc })); - // Note: The buffers are safely tied to the GpuArena which will automatically + // Note: Buffers are safely tied to the GpuArenaAllocator which will automatically // release them at the end. You can also manually call buf_x.deinit() if desired. + // This will also release pipelines, textures, ect. Everything using a GpuAllocator to init. // 6. Transfer data from CPU slices to GPU Buffers try buf_a.load(f16, data_a); diff --git a/examples/digit.zig b/examples/digit.zig deleted file mode 100644 index 20f35a6..0000000 --- a/examples/digit.zig +++ /dev/null @@ -1,77 +0,0 @@ -// I am using this mnist reduced dataset https://www.kaggle.com/datasets/mohamedgamal07/reduced-mnist - -const std = @import("std"); -const gpu = @import("gpu"); -const GpuDevice = gpu.GpuDevice; -const GpuArena = gpu.GpuArena; -const GpuBuffer = gpu.GpuBuffer; -const GpuProcess = gpu.GpuProcess; - -const BATCHSIZE = 10; -const EPOCH = 10; - -pub fn main(init: std.process.Init) !void { - const allocator = init.gpa; - const io = init.io; - - // 1. Open GPU Device - const device = try GpuDevice.init(.{}); - defer device.deinit(); - - // 2. Create a GPU Arena to manage VRAM - var grena = GpuArena.init(allocator, device); - defer grena.deinit(); - const gloc = grena.gpuAllocator(); - - // 3. Load the WGSL compute pipeline - const add_process = try GpuProcess.init(device, @embedFile("shaders/add.wgsl")); - defer add_process.deinit(); - - var train_dir = try std.Io.Dir.cwd().openDir(io, "mnist/train", .{}); - - var images: [BATCHSIZE * 28 * 28]f16 = undefined; - for (EPOCH) |epoch| { - // Load random images from train dir - train_dir.openDir(io, "0", .{}); - for (BATCHSIZE) |i| { - const file = try train_dir.openFile(io, "0.jpg", .{}); - images[28 * 28 * i .. 28 * 28 * (i + 1)] = file.read - } - } - - // 4. Setup CPU data - const len: usize = 16; - const data_a = try allocator.alloc(f16, len); - defer allocator.free(data_a); - const data_b = try allocator.alloc(f16, len); - defer allocator.free(data_b); - - for (0..len) |i| { - data_a[i] = @floatFromInt(i); - data_b[i] = @floatFromInt(len - 1 - i); - } - - // 5. Initialize raw GPU Buffers - // We pass the EnumSet inline using `.initMany` since the Enum itself isn't exported - const byte_size = len * @sizeOf(f16); - const buf_a = try GpuBuffer.init(gloc, byte_size, .initMany(&.{ .Storage, .CopyDst, .CopySrc })); - const buf_b = try GpuBuffer.init(gloc, byte_size, .initMany(&.{ .Storage, .CopyDst, .CopySrc })); - const buf_out = try GpuBuffer.init(gloc, byte_size, .initMany(&.{ .Storage, .CopyDst, .CopySrc })); - - // Note: The buffers are safely tied to the GpuArena which will automatically - // release them at the end. You can also manually call buf_x.deinit() if desired. - - // 6. Transfer data from CPU slices to GPU Buffers - try buf_a.load(f16, data_a); - try buf_b.load(f16, data_b); - - // 7. Dispatch the Compute Process - // We pass the data type (f16) to allow GpuProcess to calculate chunks correctly - try add_process.run(gloc, f16, buf_a, buf_b, buf_out); - - // 8. Map and copy the resulting buffer back to the CPU - const out = try buf_out.read(allocator, f16); - defer allocator.free(out); - - std.debug.print("Result: {any}\n", .{out}); -} diff --git a/examples/shaders/circle.wgsl b/examples/shaders/circle.wgsl new file mode 100644 index 0000000..1a99608 --- /dev/null +++ b/examples/shaders/circle.wgsl @@ -0,0 +1,39 @@ +struct VertexOutput { + @builtin(position) position: vec4f, + @location(0) uv: vec2f, +}; + +@vertex +fn vs_main(@builtin(vertex_index) vertex_index: u32) -> VertexOutput { + var output: VertexOutput; + // Hardcoded fullscreen quad layout using 4 vertices (Triangle Strip) + // Indexes: 0: Top-Left, 1: Bottom-Left, 2: Top-Right, 3: Bottom-Right + var pos = array( + vec2f(-1.0, 1.0), + vec2f(-1.0, -1.0), + vec2f( 1.0, 1.0), + vec2f( 1.0, -1.0) + ); + + output.position = vec4f(pos[vertex_index], 0.0, 1.0); + output.uv = pos[vertex_index]; // Ranges cleanly from -1.0 to 1.0 + return output; +} + +@fragment +fn fs_main(input: VertexOutput) -> @location(0) vec4f { + // Distance from the center (0,0) + let distance = length(input.uv); + let radius = 0.5; + + // Smooth out pixel edges (anti-aliasing) + let edge_softness = 0.005; + let alpha = 1.0 - smoothstep(radius - edge_softness, radius + edge_softness, distance); + + if (alpha <= 0.0) { + discard; + } + + // Draw a sharp/smooth red circle + return vec4f(1.0, 0.3, 0.3, alpha); +} diff --git a/src/GpuAllocator.zig b/src/GpuAllocator.zig index 48f4fe7..977b327 100644 --- a/src/GpuAllocator.zig +++ b/src/GpuAllocator.zig @@ -2,18 +2,58 @@ const GpuDevice = @import("GpuDevice.zig"); const c = @import("utils.zig").c; pub const VTable = struct { - alloc: *const fn (ctx: *anyopaque, bytes: u64, usage: c.WGPUBufferUsage) anyerror!c.WGPUBuffer, - free: *const fn (ctx: *anyopaque, buf_raw: c.WGPUBuffer, size: u64) void, + allocBuffer: *const fn (ctx: *anyopaque, desc: c.WGPUBufferDescriptor) anyerror!c.WGPUBuffer, + freeBuffer: *const fn (ctx: *anyopaque, buf_raw: c.WGPUBuffer) void, + allocTexture: *const fn (ctx: *anyopaque, desc: c.WGPUTextureDescriptor) anyerror!c.WGPUTexture, + freeTexture: *const fn (ctx: *anyopaque, buf_raw: c.WGPUTexture) void, + allocTextureView: *const fn (ctx: *anyopaque, texture: c.WGPUTexture, desc: c.WGPUTextureViewDescriptor) anyerror!c.WGPUTextureView, + freeTextureView: *const fn (ctx: *anyopaque, buf_raw: c.WGPUTextureView) void, + allocRenderPipeline: *const fn (ctx: *anyopaque, desc: c.WGPURenderPipelineDescriptor) anyerror!c.WGPURenderPipeline, + freeRenderPipeline: *const fn (ctx: *anyopaque, buf_raw: c.WGPURenderPipeline) void, + allocComputePipeline: *const fn (ctx: *anyopaque, desc: c.WGPUComputePipelineDescriptor) anyerror!c.WGPUComputePipeline, + freeComputePipeline: *const fn (ctx: *anyopaque, buf_raw: c.WGPUComputePipeline) void, }; device: GpuDevice, ptr: *anyopaque, vtable: *const VTable, -pub fn allocBuffer(self: @This(), bytes: u64, usage: c.WGPUBufferUsage) !c.WGPUBuffer { - return self.vtable.alloc(self.ptr, bytes, usage); +pub fn allocBuffer(self: @This(), desc: c.WGPUBufferDescriptor) !c.WGPUBuffer { + return self.vtable.allocBuffer(self.ptr, desc); } -pub fn freeBuffer(self: @This(), buf_raw: c.WGPUBuffer, size: u64) void { - self.vtable.free(self.ptr, buf_raw, size); +pub fn freeBuffer(self: @This(), raw: c.WGPUBuffer) void { + self.vtable.freeBuffer(self.ptr, raw); +} + +pub fn allocTexture(self: @This(), desc: c.WGPUTextureDescriptor) !c.WGPUTexture { + return self.vtable.allocTexture(self.ptr, desc); +} + +pub fn freeTexture(self: @This(), raw: c.WGPUTexture) void { + self.vtable.freeTexture(self.ptr, raw); +} + +pub fn allocTextureView(self: @This(), texture: c.WGPUTexture, desc: c.WGPUTextureViewDescriptor) !c.WGPUTextureView { + return self.vtable.allocTextureView(self.ptr, texture, desc); +} + +pub fn freeTextureView(self: @This(), raw: c.WGPUTextureView) void { + self.vtable.freeTextureView(self.ptr, raw); +} + +pub fn allocRenderPipeline(self: @This(), desc: c.WGPURenderPipelineDescriptor) !c.WGPURenderPipeline { + return self.vtable.allocRenderPipeline(self.ptr, desc); +} + +pub fn freeRenderPipeline(self: @This(), raw: c.WGPURenderPipeline) void { + self.vtable.freeRenderPipeline(self.ptr, raw); +} + +pub fn allocComputePipeline(self: @This(), desc: c.WGPUComputePipelineDescriptor) !c.WGPUComputePipeline { + return self.vtable.allocComputePipeline(self.ptr, desc); +} + +pub fn freeComputePipeline(self: @This(), raw: c.WGPUComputePipeline) void { + self.vtable.freeComputePipeline(self.ptr, raw); } diff --git a/src/GpuArena.zig b/src/GpuArena.zig deleted file mode 100644 index 890d71c..0000000 --- a/src/GpuArena.zig +++ /dev/null @@ -1,69 +0,0 @@ -const std = @import("std"); -const GpuDevice = @import("GpuDevice.zig"); -const GpuAllocator = @import("GpuAllocator.zig"); -const c = @import("utils.zig").c; - -device: GpuDevice, -tracked_buffers: std.AutoHashMap(c.WGPUBuffer, void), -allocated_vram_bytes: u64 = 0, - -pub fn init(cpu_allocator: std.mem.Allocator, device: GpuDevice) @This() { - return .{ - .device = device, - .tracked_buffers = .init(cpu_allocator), - }; -} - -pub fn deinit(self: *@This()) void { - var it = self.tracked_buffers.keyIterator(); - while (it.next()) |buf_ptr| { - c.wgpuBufferDestroy(buf_ptr.*); - c.wgpuBufferRelease(buf_ptr.*); - } - self.tracked_buffers.deinit(); -} - -/// Returns the type-erased immutable interface wrapper -pub fn gpuAllocator(self: *@This()) GpuAllocator { - return .{ - .device = self.device, - .ptr = self, - .vtable = &.{ - .alloc = alloc, - .free = free, - }, - }; -} - -fn alloc(ctx: *anyopaque, bytes: u64, usage: c.WGPUBufferUsage) anyerror!c.WGPUBuffer { - const self: *@This() = @ptrCast(@alignCast(ctx)); - - if (bytes > self.device.limits.maxBufferSize) - return error.SingleBufferExceedsLimit; - - if (bytes + self.allocated_vram_bytes > self.device.config.vram_bytes_limit) - return error.ExceedsVramBudget; - - const buf = c.wgpuDeviceCreateBuffer(self.device.device, &.{ - .usage = usage, - .size = bytes, - }) orelse return error.BufferAlloc; - errdefer { - c.wgpuBufferDestroy(buf); - c.wgpuBufferRelease(buf); - } - - try self.tracked_buffers.put(buf, {}); - self.allocated_vram_bytes += bytes; - return buf; -} - -fn free(ctx: *anyopaque, buf_raw: c.WGPUBuffer, size: u64) void { - const self: *@This() = @ptrCast(@alignCast(ctx)); - - if (self.tracked_buffers.remove(buf_raw)) { - c.wgpuBufferDestroy(buf_raw); - c.wgpuBufferRelease(buf_raw); - self.allocated_vram_bytes -= size; - } -} diff --git a/src/GpuArenaAllocator.zig b/src/GpuArenaAllocator.zig new file mode 100644 index 0000000..36da463 --- /dev/null +++ b/src/GpuArenaAllocator.zig @@ -0,0 +1,163 @@ +const std = @import("std"); +const GpuDevice = @import("GpuDevice.zig"); +const GpuAllocator = @import("GpuAllocator.zig"); +const GpuTextureFormat = @import("lib.zig").GpuTextureFormat; +const c = @import("utils.zig").c; + +child_allocator: GpuAllocator, // I use Zig naming child_allocator, but that should be a parent for me. Likely something idk +tracked_buffers: std.AutoHashMap(c.WGPUBuffer, c.WGPUBufferDescriptor), +tracked_textures: std.AutoHashMap(c.WGPUTexture, c.WGPUTextureDescriptor), +tracked_views: std.AutoHashMap(c.WGPUTextureView, c.WGPUTextureViewDescriptor), +tracked_renders: std.AutoHashMap(c.WGPURenderPipeline, c.WGPURenderPipelineDescriptor), +tracked_computes: std.AutoHashMap(c.WGPUComputePipeline, c.WGPUComputePipelineDescriptor), +allocated_vram_bytes: u64 = 0, + +pub fn init(cpu_allocator: std.mem.Allocator, child_allocator: GpuAllocator) @This() { + return .{ + .child_allocator = child_allocator, + .tracked_buffers = .init(cpu_allocator), + .tracked_textures = .init(cpu_allocator), + .tracked_views = .init(cpu_allocator), + .tracked_computes = .init(cpu_allocator), + .tracked_renders = .init(cpu_allocator), + }; +} + +pub fn deinit(self: *@This()) void { + var it_buffer = self.tracked_buffers.keyIterator(); + while (it_buffer.next()) |buf_ptr| + self.child_allocator.freeBuffer(buf_ptr.*); + self.tracked_buffers.deinit(); + + var it_tex = self.tracked_textures.keyIterator(); + while (it_tex.next()) |buf_ptr| + self.child_allocator.freeTexture(buf_ptr.*); + self.tracked_textures.deinit(); + + var it_view = self.tracked_views.keyIterator(); + while (it_view.next()) |buf_ptr| + self.child_allocator.freeTextureView(buf_ptr.*); + self.tracked_views.deinit(); + + var it_render = self.tracked_renders.keyIterator(); + while (it_render.next()) |buf_ptr| + self.child_allocator.freeRenderPipeline(buf_ptr.*); + self.tracked_renders.deinit(); + + var it_compute = self.tracked_computes.keyIterator(); + while (it_compute.next()) |buf_ptr| + self.child_allocator.freeComputePipeline(buf_ptr.*); + self.tracked_computes.deinit(); +} + +/// Returns the type-erased immutable interface wrapper +pub fn gpuAllocator(self: *@This()) GpuAllocator { + return .{ + .device = self.child_allocator.device, + .ptr = self, + .vtable = &.{ + .allocBuffer = allocBuffer, + .freeBuffer = freeBuffer, + .allocTexture = allocTexture, + .freeTexture = freeTexture, + .allocTextureView = allocTextureView, + .freeTextureView = freeTextureView, + .allocRenderPipeline = allocRenderPipeline, + .freeRenderPipeline = freeRenderPipeline, + .allocComputePipeline = allocComputePipeline, + .freeComputePipeline = freeComputePipeline, + }, + }; +} + +// NOTE: I use ensureTotalCapacity so I know that try self.tracked_x.put will not fail! +// Like that I dont have to use errdefer to release what I just allocated in VRAM + +fn allocBuffer(ctx: *anyopaque, desc: c.WGPUBufferDescriptor) anyerror!c.WGPUBuffer { + const self: *@This() = @ptrCast(@alignCast(ctx)); + try self.tracked_buffers.ensureTotalCapacity(self.tracked_buffers.count() + 1); + const raw = try self.child_allocator.allocBuffer(desc); + self.tracked_buffers.putAssumeCapacity(raw, desc); + self.allocated_vram_bytes += desc.size; + return raw; +} + +fn freeBuffer(ctx: *anyopaque, raw: c.WGPUBuffer) void { + const self: *@This() = @ptrCast(@alignCast(ctx)); + if (self.tracked_buffers.fetchRemove(raw)) |kv| { + self.child_allocator.freeBuffer(raw); + self.allocated_vram_bytes -= kv.value.size; + } +} + +fn allocTexture(ctx: *anyopaque, desc: c.WGPUTextureDescriptor) anyerror!c.WGPUTexture { + const self: *@This() = @ptrCast(@alignCast(ctx)); + try self.tracked_textures.ensureTotalCapacity(self.tracked_textures.count() + 1); + + const format: GpuTextureFormat = @enumFromInt(desc.format); + const bytes_size = desc.size.width * desc.size.height * format.bytesPerPixel(); + + if (bytes_size + self.allocated_vram_bytes > self.child_allocator.device.config.vram_bytes_limit) + return error.ExceedsVramBudget; + + const raw = try self.child_allocator.allocTexture(desc); + + self.tracked_textures.putAssumeCapacity(raw, desc); + self.allocated_vram_bytes += bytes_size; + return raw; +} + +fn freeTexture(ctx: *anyopaque, raw: c.WGPUTexture) void { + const self: *@This() = @ptrCast(@alignCast(ctx)); + + if (self.tracked_textures.fetchRemove(raw)) |kv| { + self.child_allocator.freeTexture(raw); + + const desc = kv.value; + const format: GpuTextureFormat = @enumFromInt(desc.format); + const bytes_size = desc.size.width * desc.size.height * format.bytesPerPixel(); + self.allocated_vram_bytes -= bytes_size; + } +} + +fn allocTextureView(ctx: *anyopaque, texture: c.WGPUTexture, desc: c.WGPUTextureViewDescriptor) anyerror!c.WGPUTextureView { + const self: *@This() = @ptrCast(@alignCast(ctx)); + try self.tracked_views.ensureTotalCapacity(self.tracked_views.count() + 1); + const raw = try self.child_allocator.allocTextureView(texture, desc); + self.tracked_views.putAssumeCapacity(raw, desc); + return raw; +} + +fn freeTextureView(ctx: *anyopaque, raw: c.WGPUTextureView) void { + const self: *@This() = @ptrCast(@alignCast(ctx)); + if (self.tracked_views.remove(raw)) + self.child_allocator.freeTextureView(raw); +} + +fn allocRenderPipeline(ctx: *anyopaque, desc: c.WGPURenderPipelineDescriptor) anyerror!c.WGPURenderPipeline { + const self: *@This() = @ptrCast(@alignCast(ctx)); + try self.tracked_renders.ensureTotalCapacity(self.tracked_renders.count() + 1); + const raw = try self.child_allocator.allocRenderPipeline(desc); + self.tracked_renders.putAssumeCapacity(raw, desc); + return raw; +} + +fn freeRenderPipeline(ctx: *anyopaque, raw: c.WGPURenderPipeline) void { + const self: *@This() = @ptrCast(@alignCast(ctx)); + if (self.tracked_renders.remove(raw)) + self.child_allocator.freeRenderPipeline(raw); +} + +fn allocComputePipeline(ctx: *anyopaque, desc: c.WGPUComputePipelineDescriptor) anyerror!c.WGPUComputePipeline { + const self: *@This() = @ptrCast(@alignCast(ctx)); + try self.tracked_computes.ensureTotalCapacity(self.tracked_computes.count() + 1); + const raw = try self.child_allocator.allocComputePipeline(desc); + self.tracked_computes.putAssumeCapacity(raw, desc); + return raw; +} + +fn freeComputePipeline(ctx: *anyopaque, raw: c.WGPUComputePipeline) void { + const self: *@This() = @ptrCast(@alignCast(ctx)); + if (self.tracked_computes.remove(raw)) + self.child_allocator.freeComputePipeline(raw); +} diff --git a/src/GpuBuffer.zig b/src/GpuBuffer.zig index 155e233..6632462 100644 --- a/src/GpuBuffer.zig +++ b/src/GpuBuffer.zig @@ -21,7 +21,6 @@ const BufferUsage = enum(u64) { QueryResolve = 0x0000000000000200, }; -/// Allocates the underlying WebGPU handle and registers it to the parent GpuAllocator pub fn init(gloc: GpuAllocator, size: u64, usage: std.EnumSet(BufferUsage)) !@This() { var use: u64 = 0; var iter = usage.iterator(); @@ -30,7 +29,7 @@ pub fn init(gloc: GpuAllocator, size: u64, usage: std.EnumSet(BufferUsage)) !@Th // Automatically align the buffer size forward to a multiple of 4 bytes under the hood const aligned_size = std.mem.alignForward(u64, size, 4); - const raw_handle = try gloc.allocBuffer(aligned_size, use); + const raw_handle = try gloc.allocBuffer(.{ .size = aligned_size, .usage = use }); return .{ .raw = raw_handle, .size = aligned_size, @@ -39,17 +38,14 @@ pub fn init(gloc: GpuAllocator, size: u64, usage: std.EnumSet(BufferUsage)) !@Th }; } -/// Unregisters from the parent GpuAllocator and cleanly destroys GPU resources pub fn deinit(self: @This()) void { - self.gloc.freeBuffer(self.raw, self.size); + self.gloc.freeBuffer(self.raw); } -/// Native getConstMappedRange wrapper pub fn getConstMappedRange(self: @This(), offset: u64, size: u64) ?*const anyopaque { return c.wgpuBufferGetConstMappedRange(self.raw, offset, size); } -/// Native mapAsync wrapper pub fn mapAsync( self: @This(), mode: c.WGPUMapMode, @@ -60,12 +56,11 @@ pub fn mapAsync( _ = c.wgpuBufferMapAsync(self.raw, mode, offset, size, callback_info); } -/// Native unmap wrapper pub fn unmap(self: @This()) void { c.wgpuBufferUnmap(self.raw); } -/// CPU to GPU. +/// CPU to GPU pub fn load( self: @This(), T: type, @@ -92,6 +87,7 @@ pub fn load( } } +/// GPU to CPU pub fn read(self: @This(), alloc: std.mem.Allocator, T: type) ![]T { const out = try alloc.alloc(T, @divExact(self.size, @sizeOf(T))); diff --git a/src/GpuCompute.zig b/src/GpuCompute.zig index 6838fa9..c0fc52e 100644 --- a/src/GpuCompute.zig +++ b/src/GpuCompute.zig @@ -20,30 +20,30 @@ pub const ComputeDef = struct { }; pip: c.WGPUComputePipeline, +gloc: GpuAllocator, def: ComputeDef, -pub fn init(device: GpuDevice, wgsl: []const u8, def: ComputeDef) !@This() { +pub fn init(gloc: GpuAllocator, wgsl: []const u8, def: ComputeDef) !@This() { var wgsl_src = c.WGPUShaderSourceWGSL{ .chain = .{ .sType = c.WGPUSType_ShaderSourceWGSL }, .code = sv(wgsl), }; - const shader = c.wgpuDeviceCreateShaderModule(device.device, &.{ + const shader = c.wgpuDeviceCreateShaderModule(gloc.device.device, &.{ .nextInChain = @ptrCast(&wgsl_src), }) orelse return error.Shader; defer c.wgpuShaderModuleRelease(shader); - const pip = c.wgpuDeviceCreateComputePipeline(device.device, &.{ - .compute = .{ .module = shader, .entryPoint = sv("main") }, - }) orelse return error.Pipeline; + const pip = try gloc.allocComputePipeline(.{ .compute = .{ .module = shader, .entryPoint = sv("main") } }); return .{ + .gloc = gloc, .pip = pip, .def = def, }; } pub fn deinit(self: @This()) void { - c.wgpuComputePipelineRelease(self.pip); + self.gloc.freeComputePipeline(self.pip); } /// Execute the compute pass with arbitrary buffer bindings via a tuple. diff --git a/src/GpuDevice.zig b/src/GpuDevice.zig index 60c137f..8463aa2 100644 --- a/src/GpuDevice.zig +++ b/src/GpuDevice.zig @@ -1,6 +1,13 @@ const std = @import("std"); const c = @import("utils.zig").c; const sv = @import("utils.zig").sv; +const GpuAllocator = @import("GpuAllocator.zig"); +const GpuTextureFormat = @import("lib.zig").GpuTextureFormat; + +// TODO: Make Allocator more zig like +// - GpuDevice can return a GpuAllocator that just allocate and nothing else +// - From this GpuAllocator, can create a GpuArena like std.heap.ArenaAllocator.init(allocator) +// - Rename GpuArenaAllocator too const Ctx = struct { adapter: c.WGPUAdapter = null, @@ -127,3 +134,75 @@ fn onDevice( const ctx: *Ctx = @ptrCast(@alignCast(userdata1.?)); ctx.device = device; } + +// Allocation stuff + +/// Returns the type-erased immutable interface wrapper +pub fn gpuAllocator(self: *const @This()) GpuAllocator { + return .{ + .device = self.*, + .ptr = @ptrCast(@constCast(self)), + .vtable = &.{ + .allocBuffer = allocBuffer, + .freeBuffer = freeBuffer, + .allocTexture = allocTexture, + .freeTexture = freeTexture, + .allocTextureView = allocTextureView, + .freeTextureView = freeTextureView, + .allocRenderPipeline = allocRenderPipeline, + .freeRenderPipeline = freeRenderPipeline, + .allocComputePipeline = allocComputePipeline, + .freeComputePipeline = freeComputePipeline, + }, + }; +} + +fn allocBuffer(ctx: *anyopaque, desc: c.WGPUBufferDescriptor) anyerror!c.WGPUBuffer { + const self: *@This() = @ptrCast(@alignCast(ctx)); + if (desc.size > self.limits.maxBufferSize) + return error.SingleBufferExceedsLimit; + return c.wgpuDeviceCreateBuffer(self.device, &desc) orelse return error.BufferAlloc; +} + +fn freeBuffer(_: *anyopaque, raw: c.WGPUBuffer) void { + c.wgpuBufferDestroy(raw); + c.wgpuBufferRelease(raw); +} + +fn allocTexture(ctx: *anyopaque, desc: c.WGPUTextureDescriptor) anyerror!c.WGPUTexture { + const self: *@This() = @ptrCast(@alignCast(ctx)); + const format: GpuTextureFormat = @enumFromInt(desc.format); + if (desc.size.width * desc.size.height * format.bytesPerPixel() > self.limits.maxBufferSize) + return error.SingleBufferExceedsLimit; + return c.wgpuDeviceCreateTexture(self.device, &desc) orelse return error.Texture; +} + +fn freeTexture(_: *anyopaque, raw: c.WGPUTexture) void { + c.wgpuTextureRelease(raw); +} + +fn allocTextureView(_: *anyopaque, texture: c.WGPUTexture, desc: c.WGPUTextureViewDescriptor) anyerror!c.WGPUTextureView { + return c.wgpuTextureCreateView(texture, &desc) orelse return error.View; +} + +fn freeTextureView(_: *anyopaque, raw: c.WGPUTextureView) void { + c.wgpuTextureViewRelease(raw); +} + +fn allocRenderPipeline(ctx: *anyopaque, desc: c.WGPURenderPipelineDescriptor) anyerror!c.WGPURenderPipeline { + const self: *@This() = @ptrCast(@alignCast(ctx)); + return c.wgpuDeviceCreateRenderPipeline(self.device, &desc) orelse return error.Pipeline; +} + +fn freeRenderPipeline(_: *anyopaque, raw: c.WGPURenderPipeline) void { + c.wgpuRenderPipelineRelease(raw); +} + +fn allocComputePipeline(ctx: *anyopaque, desc: c.WGPUComputePipelineDescriptor) anyerror!c.WGPUComputePipeline { + const self: *@This() = @ptrCast(@alignCast(ctx)); + return c.wgpuDeviceCreateComputePipeline(self.device, &desc) orelse return error.Pipeline; +} + +fn freeComputePipeline(_: *anyopaque, raw: c.WGPUComputePipeline) void { + c.wgpuComputePipelineRelease(raw); +} diff --git a/src/GpuRender.zig b/src/GpuRender.zig new file mode 100644 index 0000000..4f26d5a --- /dev/null +++ b/src/GpuRender.zig @@ -0,0 +1,181 @@ +const std = @import("std"); +const c = @import("utils.zig").c; +const sv = @import("utils.zig").sv; +const GpuAllocator = @import("GpuAllocator.zig"); +const GpuBuffer = @import("GpuBuffer.zig"); +const GpuDevice = @import("GpuDevice.zig"); +const GpuTextureView = @import("GpuTextureView.zig"); +const GpuTextureFormat = @import("lib.zig").GpuTextureFormat; + +pub const Binding = struct { + element_size: u32 = 0, +}; + +pub const GpuRenderDef = struct { + bindings: []const Binding = &.{}, + /// The surface texture format we are rendering to (e.g., BGRA8Unorm) + texture_format: GpuTextureFormat, + /// The names of the entry points inside your WGSL code + vertex_entry: []const u8 = "vs_main", + fragment_entry: []const u8 = "fs_main", + /// Primitive topology, default to triangle list + topology: GpuPrimitiveTopology = .TriangleList, +}; + +const GpuPrimitiveTopology = enum(c_uint) { + Undefined = 0x00000000, + PointList = 0x00000001, + LineList = 0x00000002, + LineStrip = 0x00000003, + TriangleList = 0x00000004, + TriangleStrip = 0x00000005, + Force32 = 0x7FFFFFFF, +}; + +gloc: GpuAllocator, +pip: c.WGPURenderPipeline, +def: GpuRenderDef, + +pub fn init(gloc: GpuAllocator, wgsl: []const u8, def: GpuRenderDef) !@This() { + var wgsl_src = c.WGPUShaderSourceWGSL{ + .chain = .{ .sType = c.WGPUSType_ShaderSourceWGSL }, + .code = sv(wgsl), + }; + const shader = c.wgpuDeviceCreateShaderModule(gloc.device.device, &.{ + .nextInChain = @ptrCast(&wgsl_src), + }) orelse return error.Shader; + defer c.wgpuShaderModuleRelease(shader); + + // 1. Setup the Color Target State (where the fragment shader outputs) + const blend = c.WGPUBlendState{ + .color = .{ .operation = c.WGPUBlendOperation_Add, .srcFactor = c.WGPUBlendFactor_SrcAlpha, .dstFactor = c.WGPUBlendFactor_OneMinusSrcAlpha }, + .alpha = .{ .operation = c.WGPUBlendOperation_Add, .srcFactor = c.WGPUBlendFactor_One, .dstFactor = c.WGPUBlendFactor_Zero }, + }; + + const color_target = c.WGPUColorTargetState{ + .format = @intFromEnum(def.texture_format), + .blend = &blend, + .writeMask = c.WGPUColorWriteMask_All, + }; + + // 2. Setup the Fragment State + const fragment_state = c.WGPUFragmentState{ + .module = shader, + .entryPoint = sv(def.fragment_entry), + .targetCount = 1, + .targets = &color_target, + }; + + // 3. Compile the Complete Render Pipeline + const pip = try gloc.allocRenderPipeline(.{ + .vertex = .{ + .module = shader, + .entryPoint = sv(def.vertex_entry), + }, + .primitive = .{ + .topology = @intFromEnum(def.topology), + .stripIndexFormat = c.WGPUIndexFormat_Undefined, + .frontFace = c.WGPUFrontFace_CCW, + .cullMode = c.WGPUCullMode_None, + }, + .multisample = .{ + .count = 1, + .mask = 0xFFFFFFFF, + .alphaToCoverageEnabled = 0, + }, + .fragment = &fragment_state, + }); + + return .{ + .gloc = gloc, + .pip = pip, + .def = def, + }; +} + +pub fn deinit(self: @This()) void { + self.gloc.freeRenderPipeline(self.pip); +} + +/// Execute the render pass targeting a specific frame texture view. +/// Passes bind groups via a tuple exactly like your original compute setup. +pub fn draw( + self: @This(), + gloc: GpuAllocator, + target_view: GpuTextureView, + vertex_count: u32, + args: anytype, +) !void { + const type_info = @typeInfo(@TypeOf(args)); + if (type_info != .@"struct" or !type_info.@"struct".is_tuple) + @compileError("Expected a tuple of GpuBuffers for args. E.g. .{ uniform_buf }"); + + const fields = type_info.@"struct".fields; + if (fields.len != self.def.bindings.len) + return error.InvalidArgumentCount; + + var entries_buf: [32]c.WGPUBindGroupEntry = undefined; + + inline for (fields, 0..) |field, i| { + const buf = @field(args, field.name); + if (@TypeOf(buf) != GpuBuffer) { + @compileError("All arguments in the tuple must be of type GpuBuffer"); + } + entries_buf[i] = .{ + .binding = @intCast(i), + .buffer = buf.raw, + .offset = 0, + .size = buf.size, + }; + } + + const entries = entries_buf[0..fields.len]; + + // Create Render Bind Group from layout + const bgl = c.wgpuRenderPipelineGetBindGroupLayout(self.pip, 0); + defer c.wgpuBindGroupLayoutRelease(bgl); + + const bg = c.wgpuDeviceCreateBindGroup(gloc.device.device, &.{ + .layout = bgl, + .entries = entries.ptr, + .entryCount = @intCast(entries.len), + }) orelse return error.BindGroup; + defer c.wgpuBindGroupRelease(bg); + + // Encode Render Command + const enc = c.wgpuDeviceCreateCommandEncoder(gloc.device.device, null) orelse return error.Encoder; + defer c.wgpuCommandEncoderRelease(enc); + + const color_attachment = c.WGPURenderPassColorAttachment{ + .view = target_view.raw, + .resolveTarget = null, + .loadOp = c.WGPULoadOp_Clear, + .storeOp = c.WGPUStoreOp_Store, + .clearValue = .{ .r = 0.1, .g = 0.1, .b = 0.1, .a = 1.0 }, + .depthSlice = c.WGPU_DEPTH_SLICE_UNDEFINED, + }; + + const pass_desc = c.WGPURenderPassDescriptor{ + .colorAttachmentCount = 1, + .colorAttachments = &color_attachment, + .depthStencilAttachment = null, + }; + + const pass = c.wgpuCommandEncoderBeginRenderPass(enc, &pass_desc); + c.wgpuRenderPassEncoderSetPipeline(pass, self.pip); + + if (fields.len > 0) { + c.wgpuRenderPassEncoderSetBindGroup(pass, 0, bg, 0, null); + } + + // Draw! (Instead of Compute Dispatch) + c.wgpuRenderPassEncoderDraw(pass, vertex_count, 1, 0, 0); + + c.wgpuRenderPassEncoderEnd(pass); + c.wgpuRenderPassEncoderRelease(pass); + + const cmd = c.wgpuCommandEncoderFinish(enc, null); + defer c.wgpuCommandBufferRelease(cmd); + + c.wgpuQueueSubmit(gloc.device.queue, 1, &cmd); +} diff --git a/src/GpuTexture.zig b/src/GpuTexture.zig new file mode 100644 index 0000000..b819ff2 --- /dev/null +++ b/src/GpuTexture.zig @@ -0,0 +1,168 @@ +const std = @import("std"); +const c = @import("utils.zig").c; +const GpuAllocator = @import("GpuAllocator.zig"); +const GpuBuffer = @import("GpuBuffer.zig"); +const GpuTextureFormat = @import("lib.zig").GpuTextureFormat; +const GpuTextureUsage = @import("lib.zig").GpuTextureUsage; + +pub const GpuTextureDef = struct { + size: c.WGPUExtent3D, + usage: std.EnumSet(GpuTextureUsage), + format: GpuTextureFormat, +}; + +raw: c.WGPUTexture, +gloc: GpuAllocator, +def: GpuTextureDef, + +pub fn init(gloc: GpuAllocator, def: GpuTextureDef) !@This() { + var use: u64 = 0; + var iter = def.usage.iterator(); + while (iter.next()) |flag| use |= @intFromEnum(flag); + + const desc = c.WGPUTextureDescriptor{ + .usage = use, + .dimension = c.WGPUTextureDimension_2D, + .size = def.size, + .format = @intFromEnum(def.format), + .mipLevelCount = 1, + .sampleCount = 1, + }; + const raw = try gloc.allocTexture(desc); + + return .{ .gloc = gloc, .raw = raw, .def = def }; +} + +pub fn deinit(self: @This()) void { + self.gloc.freeTexture(self.raw); +} + +pub fn getConstMappedRange(self: @This(), offset: u64, size: u64) ?*const anyopaque { + return c.wgpuBufferGetConstMappedRange(self.raw, offset, size); +} + +pub fn bytesSize(self: @This()) u32 { + return self.bytesSizeRow() * self.def.size.height; +} + +pub fn bytesSizeRow(self: @This()) u32 { + return self.def.size.width * self.def.format.bytesPerPixel(); +} + +/// Return a GpuBuffer containing a copy of the texture. +pub fn buffCopy(self: @This(), gloc: GpuAllocator) !GpuBuffer { + const buf = try GpuBuffer.init(gloc, self.bytesSize(), .initMany(&.{ .CopyDst, .CopySrc })); + + const enc = c.wgpuDeviceCreateCommandEncoder(gloc.device.device, null) orelse return error.Encoder; + defer c.wgpuCommandEncoderRelease(enc); + + const src_copy = c.WGPUTexelCopyTextureInfo{ + .texture = self.raw, + .mipLevel = 0, + .origin = .{ .x = 0, .y = 0, .z = 0 }, + .aspect = c.WGPUTextureAspect_All, + }; + const dst_copy = c.WGPUTexelCopyBufferInfo{ + .buffer = buf.raw, + .layout = .{ + .offset = 0, + .bytesPerRow = self.bytesSizeRow(), + .rowsPerImage = self.def.size.height, + }, + }; + + c.wgpuCommandEncoderCopyTextureToBuffer(enc, &src_copy, &dst_copy, &self.def.size); + + const cmd = c.wgpuCommandEncoderFinish(enc, null); + defer c.wgpuCommandBufferRelease(cmd); + c.wgpuQueueSubmit(gloc.device.queue, 1, &cmd); + + return buf; +} + +pub fn mapAsync( + self: @This(), + mode: c.WGPUMapMode, + offset: u64, + size: u64, + callback_info: c.WGPUBufferMapCallbackInfo, +) void { + _ = c.wgpuBufferMapAsync(self.raw, mode, offset, size, callback_info); +} + +pub fn unmap(self: @This()) void { + c.wgpuBufferUnmap(self.raw); +} + +/// CPU to GPU +pub fn load( + self: @This(), + T: type, + data: []const T, +) !void { + const bytes = data.len * @sizeOf(T); + + if (bytes == self.size) { + // Aligned path: direct download + c.wgpuQueueWriteBuffer(self.gloc.device.queue, self.raw, 0, data.ptr, self.size); + } else { + // Unaligned path: Split the write into an aligned chunk and a padded remainder + // to support arbitrary lengths without any allocations or large stack arrays. + const aligned_part = (bytes / 4) * 4; + if (aligned_part > 0) { + c.wgpuQueueWriteBuffer(self.gloc.device.queue, self.raw, 0, data.ptr, aligned_part); + } + + var remainder_buf: [4]u8 = .{ 0, 0, 0, 0 }; + const data_bytes = std.mem.sliceAsBytes(data); + @memcpy(remainder_buf[0 .. bytes - aligned_part], data_bytes[aligned_part..bytes]); + + c.wgpuQueueWriteBuffer(self.gloc.device.queue, self.raw, aligned_part, &remainder_buf, 4); + } +} + +// GPU to CPU +pub fn read(self: @This(), alloc: std.mem.Allocator, T: type) ![]T { + const out = try alloc.alloc(T, @divExact(self.size, @sizeOf(T))); + + const staging = try init( + self.gloc, + self.size, + .initMany(&.{ .MapRead, .CopyDst }), + ); + defer staging.deinit(); + + const enc = c.wgpuDeviceCreateCommandEncoder(self.gloc.device.device, null) orelse return error.Encoder; + c.wgpuCommandEncoderCopyBufferToBuffer(enc, self.raw, 0, staging.raw, 0, self.size); + const cmd = c.wgpuCommandEncoderFinish(enc, null); + defer c.wgpuCommandEncoderRelease(enc); + defer c.wgpuCommandBufferRelease(cmd); + c.wgpuQueueSubmit(self.gloc.device.queue, 1, &cmd); + + var mapped = false; + staging.mapAsync( + c.WGPUMapMode_Read, + 0, + self.size, + .{ .callback = onMapped, .userdata1 = &mapped }, + ); + while (!mapped) self.gloc.device.poll(); + + const ptr: [*]const T = @ptrCast(@alignCast( + staging.getConstMappedRange(0, self.size), + )); + @memcpy(out[0..out.len], ptr[0..out.len]); + staging.unmap(); + + return out; +} + +fn onMapped( + status: c.WGPUMapAsyncStatus, + _: c.WGPUStringView, + userdata1: ?*anyopaque, + _: ?*anyopaque, +) callconv(.c) void { + const flag: *bool = @ptrCast(@alignCast(userdata1.?)); + flag.* = (status == c.WGPUMapAsyncStatus_Success); +} diff --git a/src/GpuTextureView.zig b/src/GpuTextureView.zig new file mode 100644 index 0000000..9475b9b --- /dev/null +++ b/src/GpuTextureView.zig @@ -0,0 +1,32 @@ +const std = @import("std"); +const c = @import("utils.zig").c; +const GpuAllocator = @import("GpuAllocator.zig"); +const GpuTexture = @import("lib.zig").GpuTexture; +const GpuTextureFormat = @import("lib.zig").GpuTextureFormat; +const GpuTextureUsage = @import("lib.zig").GpuTextureUsage; + +pub const GpuViewDef = struct { + usage: std.EnumSet(GpuTextureUsage) = .empty, + format: GpuTextureFormat = .Undefined, +}; + +raw: c.WGPUTextureView, +gloc: GpuAllocator, + +pub fn init(gloc: GpuAllocator, texture: GpuTexture, def: GpuViewDef) !@This() { + var use: u64 = 0; + var iter = def.usage.iterator(); + while (iter.next()) |flag| use |= @intFromEnum(flag); + + const raw = try gloc.allocTextureView(texture.raw, .{ + .format = @intFromEnum(def.format), + .usage = use, + .mipLevelCount = 1, + .arrayLayerCount = 1, + }); + return .{ .gloc = gloc, .raw = raw }; +} + +pub fn deinit(self: @This()) void { + self.gloc.freeTextureView(self.raw); +} diff --git a/src/lib.zig b/src/lib.zig index d85f2c8..1ba633e 100644 --- a/src/lib.zig +++ b/src/lib.zig @@ -1,5 +1,187 @@ pub const GpuAllocator = @import("GpuAllocator.zig"); -pub const GpuArena = @import("GpuArena.zig"); +pub const GpuArenaAllocator = @import("GpuArenaAllocator.zig"); pub const GpuBuffer = @import("GpuBuffer.zig"); pub const GpuDevice = @import("GpuDevice.zig"); pub const GpuCompute = @import("GpuCompute.zig"); +pub const GpuRender = @import("GpuRender.zig"); +pub const GpuTexture = @import("GpuTexture.zig"); +pub const GpuTextureView = @import("GpuTextureView.zig"); + +pub const GpuTextureFormat = enum(c_uint) { + Undefined = 0, + R8Unorm = 1, + R8Snorm = 2, + R8Uint = 3, + R8Sint = 4, + R16Unorm = 5, + R16Snorm = 6, + R16Uint = 7, + R16Sint = 8, + R16Float = 9, + RG8Unorm = 10, + RG8Snorm = 11, + RG8Uint = 12, + RG8Sint = 13, + R32Float = 14, + R32Uint = 15, + R32Sint = 16, + RG16Unorm = 17, + RG16Snorm = 18, + RG16Uint = 19, + RG16Sint = 20, + RG16Float = 21, + RGBA8Unorm = 22, + RGBA8UnormSrgb = 23, + RGBA8Snorm = 24, + RGBA8Uint = 25, + RGBA8Sint = 26, + BGRA8Unorm = 27, + BGRA8UnormSrgb = 28, + RGB10A2Uint = 29, + RGB10A2Unorm = 30, + RG11B10Ufloat = 31, + RGB9E5Ufloat = 32, + RG32Float = 33, + RG32Uint = 34, + RG32Sint = 35, + RGBA16Unorm = 36, + RGBA16Snorm = 37, + RGBA16Uint = 38, + RGBA16Sint = 39, + RGBA16Float = 40, + RGBA32Float = 41, + RGBA32Uint = 42, + RGBA32Sint = 43, + Stencil8 = 44, + Depth16Unorm = 45, + Depth24Plus = 46, + Depth24PlusStencil8 = 47, + Depth32Float = 48, + Depth32FloatStencil8 = 49, + BC1RGBAUnorm = 50, + BC1RGBAUnormSrgb = 51, + BC2RGBAUnorm = 52, + BC2RGBAUnormSrgb = 53, + BC3RGBAUnorm = 54, + BC3RGBAUnormSrgb = 55, + BC4RUnorm = 56, + BC4RSnorm = 57, + BC5RGUnorm = 58, + BC5RGSnorm = 59, + BC6HRGBUfloat = 60, + BC6HRGBFloat = 61, + BC7RGBAUnorm = 62, + BC7RGBAUnormSrgb = 63, + ETC2RGB8Unorm = 64, + ETC2RGB8UnormSrgb = 65, + ETC2RGB8A1Unorm = 66, + ETC2RGB8A1UnormSrgb = 67, + ETC2RGBA8Unorm = 68, + ETC2RGBA8UnormSrgb = 69, + EACR11Unorm = 70, + EACR11Snorm = 71, + EACRG11Unorm = 72, + EACRG11Snorm = 73, + ASTC4x4Unorm = 74, + ASTC4x4UnormSrgb = 75, + ASTC5x4Unorm = 76, + ASTC5x4UnormSrgb = 77, + ASTC5x5Unorm = 78, + ASTC5x5UnormSrgb = 79, + ASTC6x5Unorm = 80, + ASTC6x5UnormSrgb = 81, + ASTC6x6Unorm = 82, + ASTC6x6UnormSrgb = 83, + ASTC8x5Unorm = 84, + ASTC8x5UnormSrgb = 85, + ASTC8x6Unorm = 86, + ASTC8x6UnormSrgb = 87, + ASTC8x8Unorm = 88, + ASTC8x8UnormSrgb = 89, + ASTC10x5Unorm = 90, + ASTC10x5UnormSrgb = 91, + ASTC10x6Unorm = 92, + ASTC10x6UnormSrgb = 93, + ASTC10x8Unorm = 94, + ASTC10x8UnormSrgb = 95, + ASTC10x10Unorm = 96, + ASTC10x10UnormSrgb = 97, + ASTC12x10Unorm = 98, + ASTC12x10UnormSrgb = 99, + ASTC12x12Unorm = 100, + ASTC12x12UnormSrgb = 101, + Force32 = 2147483647, + + pub fn bytesPerPixel(format: GpuTextureFormat) u32 { + return switch (format) { + // 8-bit formats (1 byte) + .R8Unorm, .R8Snorm, .R8Uint, .R8Sint, .Stencil8 => 1, + + // 16-bit formats (2 bytes) + .R16Unorm, + .R16Snorm, + .R16Uint, + .R16Sint, + .R16Float, + .RG8Unorm, + .RG8Snorm, + .RG8Uint, + .RG8Sint, + .Depth16Unorm, + => 2, + + // 32-bit formats (4 bytes) + .R32Float, + .R32Uint, + .R32Sint, + .RG16Unorm, + .RG16Snorm, + .RG16Uint, + .RG16Sint, + .RG16Float, + .RGBA8Unorm, + .RGBA8UnormSrgb, + .RGBA8Snorm, + .RGBA8Uint, + .RGBA8Sint, + .BGRA8Unorm, + .BGRA8UnormSrgb, + .RGB10A2Uint, + .RGB10A2Unorm, + .RG11B10Ufloat, + .RGB9E5Ufloat, + .Depth24Plus, + .Depth32Float, + => 4, + + // 64-bit formats (8 bytes) + .RG32Float, + .RG32Uint, + .RG32Sint, + .RGBA16Unorm, + .RGBA16Snorm, + .RGBA16Uint, + .RGBA16Sint, + .RGBA16Float, + .Depth24PlusStencil8, // 24-bit depth + 8-bit stencil layout padded to 4+4 or 1+3 + .Depth32FloatStencil8, // 32-bit float depth + 8-bit stencil (padded to 8 bytes) + => 8, + + // 128-bit formats (16 bytes) + .RGBA32Float, .RGBA32Uint, .RGBA32Sint => 16, + + // Block Compressed Formats (Handled separately) + else => 0, + }; + } +}; + +pub const GpuTextureUsage = enum(u64) { + None = 0x0000000000000000, + CopySrc = 0x0000000000000001, + CopyDst = 0x0000000000000002, + TextureBinding = 0x0000000000000004, + StorageBinding = 0x0000000000000008, + RenderAttachment = 0x0000000000000010, + TransientAttachment = 0x0000000000000020, +};