diff --git a/src/GpuAllocator.zig b/src/GpuAllocator.zig index 48f4fe7..ea98abd 100644 --- a/src/GpuAllocator.zig +++ b/src/GpuAllocator.zig @@ -2,18 +2,28 @@ const GpuDevice = @import("GpuDevice.zig"); const c = @import("utils.zig").c; pub const VTable = struct { - alloc: *const fn (ctx: *anyopaque, bytes: u64, usage: c.WGPUBufferUsage) anyerror!c.WGPUBuffer, - free: *const fn (ctx: *anyopaque, buf_raw: c.WGPUBuffer, size: u64) void, + allocBuffer: *const fn (ctx: *anyopaque, desc: c.WGPUBufferDescriptor) anyerror!c.WGPUBuffer, + freeBuffer: *const fn (ctx: *anyopaque, buf_raw: c.WGPUBuffer) void, + allocTexture: *const fn (ctx: *anyopaque, desc: c.WGPUTextureDescriptor) anyerror!c.WGPUTexture, + freeTexture: *const fn (ctx: *anyopaque, buf_raw: c.WGPUTexture) void, }; device: GpuDevice, ptr: *anyopaque, vtable: *const VTable, -pub fn allocBuffer(self: @This(), bytes: u64, usage: c.WGPUBufferUsage) !c.WGPUBuffer { - return self.vtable.alloc(self.ptr, bytes, usage); +pub fn allocBuffer(self: @This(), desc: c.WGPUBufferDescriptor) !c.WGPUBuffer { + return self.vtable.allocBuffer(self.ptr, desc); } -pub fn freeBuffer(self: @This(), buf_raw: c.WGPUBuffer, size: u64) void { - self.vtable.free(self.ptr, buf_raw, size); +pub fn freeBuffer(self: @This(), buf_raw: c.WGPUBuffer) void { + self.vtable.freeBuffer(self.ptr, buf_raw); +} + +pub fn allocTexture(self: @This(), desc: c.WGPUTextureDescriptor) !c.WGPUTexture { + return self.vtable.allocTexture(self.ptr, desc); +} + +pub fn freeTexture(self: @This(), buf_raw: c.WGPUTexture) void { + self.vtable.freeTexture(self.ptr, buf_raw); } diff --git a/src/GpuArena.zig b/src/GpuArena.zig index 890d71c..10c7cf8 100644 --- a/src/GpuArena.zig +++ b/src/GpuArena.zig @@ -1,26 +1,34 @@ const std = @import("std"); const GpuDevice = @import("GpuDevice.zig"); const GpuAllocator = @import("GpuAllocator.zig"); +const GpuTextureFormat = @import("lib.zig").GpuTextureFormat; const c = @import("utils.zig").c; device: GpuDevice, -tracked_buffers: std.AutoHashMap(c.WGPUBuffer, void), +tracked_buffers: std.AutoHashMap(c.WGPUBuffer, c.WGPUBufferDescriptor), +tracked_textures: std.AutoHashMap(c.WGPUTexture, c.WGPUTextureDescriptor), allocated_vram_bytes: u64 = 0, pub fn init(cpu_allocator: std.mem.Allocator, device: GpuDevice) @This() { return .{ .device = device, .tracked_buffers = .init(cpu_allocator), + .tracked_textures = .init(cpu_allocator), }; } pub fn deinit(self: *@This()) void { - var it = self.tracked_buffers.keyIterator(); - while (it.next()) |buf_ptr| { + var it_buffer = self.tracked_buffers.keyIterator(); + while (it_buffer.next()) |buf_ptr| { c.wgpuBufferDestroy(buf_ptr.*); c.wgpuBufferRelease(buf_ptr.*); } self.tracked_buffers.deinit(); + + var it_texture = self.tracked_textures.keyIterator(); + while (it_texture.next()) |tex_ptr| + c.wgpuTextureRelease(tex_ptr.*); + self.tracked_textures.deinit(); } /// Returns the type-erased immutable interface wrapper @@ -29,41 +37,71 @@ pub fn gpuAllocator(self: *@This()) GpuAllocator { .device = self.device, .ptr = self, .vtable = &.{ - .alloc = alloc, - .free = free, + .allocBuffer = allocBuffer, + .freeBuffer = freeBuffer, + .allocTexture = allocTexture, + .freeTexture = freeTexture, }, }; } -fn alloc(ctx: *anyopaque, bytes: u64, usage: c.WGPUBufferUsage) anyerror!c.WGPUBuffer { +fn allocBuffer(ctx: *anyopaque, desc: c.WGPUBufferDescriptor) anyerror!c.WGPUBuffer { const self: *@This() = @ptrCast(@alignCast(ctx)); - if (bytes > self.device.limits.maxBufferSize) + if (desc.size > self.device.limits.maxBufferSize) return error.SingleBufferExceedsLimit; - if (bytes + self.allocated_vram_bytes > self.device.config.vram_bytes_limit) + if (desc.size + self.allocated_vram_bytes > self.device.config.vram_bytes_limit) return error.ExceedsVramBudget; - const buf = c.wgpuDeviceCreateBuffer(self.device.device, &.{ - .usage = usage, - .size = bytes, - }) orelse return error.BufferAlloc; + const buf = c.wgpuDeviceCreateBuffer(self.device.device, &desc) orelse return error.BufferAlloc; errdefer { c.wgpuBufferDestroy(buf); c.wgpuBufferRelease(buf); } - try self.tracked_buffers.put(buf, {}); - self.allocated_vram_bytes += bytes; + try self.tracked_buffers.put(buf, desc); + self.allocated_vram_bytes += desc.size; return buf; } -fn free(ctx: *anyopaque, buf_raw: c.WGPUBuffer, size: u64) void { +fn freeBuffer(ctx: *anyopaque, buf_raw: c.WGPUBuffer) void { const self: *@This() = @ptrCast(@alignCast(ctx)); - if (self.tracked_buffers.remove(buf_raw)) { + if (self.tracked_buffers.fetchRemove(buf_raw)) |kv| { c.wgpuBufferDestroy(buf_raw); c.wgpuBufferRelease(buf_raw); - self.allocated_vram_bytes -= size; + self.allocated_vram_bytes -= kv.value.size; + } +} + +fn allocTexture(ctx: *anyopaque, desc: c.WGPUTextureDescriptor) anyerror!c.WGPUTexture { + const self: *@This() = @ptrCast(@alignCast(ctx)); + + const format: GpuTextureFormat = @enumFromInt(desc.format); + const bytes_size = desc.size.width * desc.size.height * format.bytesPerPixel(); + if (bytes_size > self.device.limits.maxBufferSize) + return error.SingleBufferExceedsLimit; + + if (bytes_size + self.allocated_vram_bytes > self.device.config.vram_bytes_limit) + return error.ExceedsVramBudget; + + const texture = c.wgpuDeviceCreateTexture(self.device.device, &desc) orelse return error.Texture; + + try self.tracked_textures.put(texture, desc); + self.allocated_vram_bytes += bytes_size; + return texture; +} + +fn freeTexture(ctx: *anyopaque, texture_raw: c.WGPUTexture) void { + const self: *@This() = @ptrCast(@alignCast(ctx)); + + if (self.tracked_textures.fetchRemove(texture_raw)) |kv| { + c.wgpuTextureRelease(texture_raw); + + const desc = kv.value; + const format: GpuTextureFormat = @enumFromInt(desc.format); + const bytes_size = desc.size.width * desc.size.height * format.bytesPerPixel(); + self.allocated_vram_bytes -= bytes_size; } } diff --git a/src/GpuBuffer.zig b/src/GpuBuffer.zig index 155e233..099b83c 100644 --- a/src/GpuBuffer.zig +++ b/src/GpuBuffer.zig @@ -30,7 +30,7 @@ pub fn init(gloc: GpuAllocator, size: u64, usage: std.EnumSet(BufferUsage)) !@Th // Automatically align the buffer size forward to a multiple of 4 bytes under the hood const aligned_size = std.mem.alignForward(u64, size, 4); - const raw_handle = try gloc.allocBuffer(aligned_size, use); + const raw_handle = try gloc.allocBuffer(.{ .size = aligned_size, .usage = use }); return .{ .raw = raw_handle, .size = aligned_size, @@ -41,7 +41,7 @@ pub fn init(gloc: GpuAllocator, size: u64, usage: std.EnumSet(BufferUsage)) !@Th /// Unregisters from the parent GpuAllocator and cleanly destroys GPU resources pub fn deinit(self: @This()) void { - self.gloc.freeBuffer(self.raw, self.size); + self.gloc.freeBuffer(self.raw); } /// Native getConstMappedRange wrapper diff --git a/src/GpuRender.zig b/src/GpuRender.zig index 63a65a6..1f704ba 100644 --- a/src/GpuRender.zig +++ b/src/GpuRender.zig @@ -58,7 +58,6 @@ pub fn init(device: GpuDevice, wgsl: []const u8, def: RenderDef) !@This() { .vertex = .{ .module = shader, .entryPoint = sv(def.vertex_entry), - .bufferCount = 0, // Assuming procedural drawing (like our circle!) }, .primitive = .{ .topology = def.topology, diff --git a/src/GpuTexture.zig b/src/GpuTexture.zig new file mode 100644 index 0000000..4a1500b --- /dev/null +++ b/src/GpuTexture.zig @@ -0,0 +1,137 @@ +const std = @import("std"); +const c = @import("utils.zig").c; +const GpuAllocator = @import("GpuAllocator.zig"); +const GpuTextureFormat = @import("lib.zig").GpuTextureFormat; + +const TextureUsage = enum(u64) { + None = 0x0000000000000000, + CopySrc = 0x0000000000000001, + CopyDst = 0x0000000000000002, + TextureBinding = 0x0000000000000004, + StorageBinding = 0x0000000000000008, + RenderAttachment = 0x0000000000000010, + TransientAttachment = 0x0000000000000020, +}; + +raw: c.WGPUTexture, +size: c.WGPUExtent3D, +usage: c.WGPUTextureUsage, +format: GpuTextureFormat, +gloc: GpuAllocator, + +/// Allocates the underlying WebGPU handle and registers it to the parent GpuAllocator +pub fn init(gloc: GpuAllocator, format: GpuTextureFormat, size: c.WGPUExtent3D, usage: std.EnumSet(TextureUsage)) !@This() { + var use: u64 = 0; + var iter = usage.iterator(); + while (iter.next()) |flag| use |= @intFromEnum(flag); + + const desc = c.WGPUTextureDescriptor{ + .usage = use, + .dimension = c.WGPUTextureDimension_2D, + .size = size, + .format = @intCast(@intFromEnum(format)), + .mipLevelCount = 1, + .sampleCount = 1, + }; + const raw = try gloc.allocTexture(desc); + + return .{ .gloc = gloc, .raw = raw, .size = size, .format = format, .usage = use }; +} + +/// Unregisters from the parent GpuAllocator and cleanly destroys GPU resources +pub fn deinit(self: @This()) void { + self.gloc.freeTexture(self.raw); +} + +/// Native getConstMappedRange wrapper +pub fn getConstMappedRange(self: @This(), offset: u64, size: u64) ?*const anyopaque { + return c.wgpuBufferGetConstMappedRange(self.raw, offset, size); +} + +/// Native mapAsync wrapper +pub fn mapAsync( + self: @This(), + mode: c.WGPUMapMode, + offset: u64, + size: u64, + callback_info: c.WGPUBufferMapCallbackInfo, +) void { + _ = c.wgpuBufferMapAsync(self.raw, mode, offset, size, callback_info); +} + +/// Native unmap wrapper +pub fn unmap(self: @This()) void { + c.wgpuBufferUnmap(self.raw); +} + +/// CPU to GPU. +pub fn load( + self: @This(), + T: type, + data: []const T, +) !void { + const bytes = data.len * @sizeOf(T); + + if (bytes == self.size) { + // Aligned path: direct download + c.wgpuQueueWriteBuffer(self.gloc.device.queue, self.raw, 0, data.ptr, self.size); + } else { + // Unaligned path: Split the write into an aligned chunk and a padded remainder + // to support arbitrary lengths without any allocations or large stack arrays. + const aligned_part = (bytes / 4) * 4; + if (aligned_part > 0) { + c.wgpuQueueWriteBuffer(self.gloc.device.queue, self.raw, 0, data.ptr, aligned_part); + } + + var remainder_buf: [4]u8 = .{ 0, 0, 0, 0 }; + const data_bytes = std.mem.sliceAsBytes(data); + @memcpy(remainder_buf[0 .. bytes - aligned_part], data_bytes[aligned_part..bytes]); + + c.wgpuQueueWriteBuffer(self.gloc.device.queue, self.raw, aligned_part, &remainder_buf, 4); + } +} + +pub fn read(self: @This(), alloc: std.mem.Allocator, T: type) ![]T { + const out = try alloc.alloc(T, @divExact(self.size, @sizeOf(T))); + + const staging = try init( + self.gloc, + self.size, + .initMany(&.{ .MapRead, .CopyDst }), + ); + defer staging.deinit(); + + const enc = c.wgpuDeviceCreateCommandEncoder(self.gloc.device.device, null) orelse return error.Encoder; + c.wgpuCommandEncoderCopyBufferToBuffer(enc, self.raw, 0, staging.raw, 0, self.size); + const cmd = c.wgpuCommandEncoderFinish(enc, null); + defer c.wgpuCommandEncoderRelease(enc); + defer c.wgpuCommandBufferRelease(cmd); + c.wgpuQueueSubmit(self.gloc.device.queue, 1, &cmd); + + var mapped = false; + staging.mapAsync( + c.WGPUMapMode_Read, + 0, + self.size, + .{ .callback = onMapped, .userdata1 = &mapped }, + ); + while (!mapped) self.gloc.device.poll(); + + const ptr: [*]const T = @ptrCast(@alignCast( + staging.getConstMappedRange(0, self.size), + )); + @memcpy(out[0..out.len], ptr[0..out.len]); + staging.unmap(); + + return out; +} + +fn onMapped( + status: c.WGPUMapAsyncStatus, + _: c.WGPUStringView, + userdata1: ?*anyopaque, + _: ?*anyopaque, +) callconv(.c) void { + const flag: *bool = @ptrCast(@alignCast(userdata1.?)); + flag.* = (status == c.WGPUMapAsyncStatus_Success); +} diff --git a/src/circle.zig b/src/circle.zig index 05325ca..fc537be 100644 --- a/src/circle.zig +++ b/src/circle.zig @@ -6,6 +6,7 @@ const GpuDevice = gpu.GpuDevice; const GpuArena = gpu.GpuArena; const GpuBuffer = gpu.GpuBuffer; const GpuRender = gpu.GpuRender; +const GpuTexture = gpu.GpuTexture; pub fn main(init: std.process.Init) !void { const allocator = init.gpa; @@ -20,7 +21,6 @@ pub fn main(init: std.process.Init) !void { const width: u32 = 512; const height: u32 = 512; - // We use standard RGBA8Unorm format for an offscreen image target const render_format = c.WGPUTextureFormat_RGBA8Unorm; // 2. Load our Render Pipeline (Procedural Triangle Strip) @@ -36,22 +36,15 @@ pub fn main(init: std.process.Init) !void { defer circle_rp.deinit(); // 3. Create the offscreen VRAM texture to render into - const texture_desc = c.WGPUTextureDescriptor{ - .nextInChain = null, - .label = sv("Offscreen Render Target"), - .usage = c.WGPUTextureUsage_RenderAttachment | c.WGPUTextureUsage_CopySrc, - .dimension = c.WGPUTextureDimension_2D, - .size = .{ .width = width, .height = height, .depthOrArrayLayers = 1 }, - .format = render_format, - .mipLevelCount = 1, - .sampleCount = 1, - .viewFormatCount = 0, - .viewFormats = null, - }; - const target_texture = c.wgpuDeviceCreateTexture(device.device, &texture_desc) orelse return error.Texture; - defer c.wgpuTextureRelease(target_texture); + const texture = try GpuTexture.init( + gloc, + .RGBA8Unorm, + .{ .width = width, .height = height, .depthOrArrayLayers = 1 }, + .initMany(&.{ .RenderAttachment, .CopySrc }), + ); + defer texture.deinit(); - const target_view = c.wgpuTextureCreateView(target_texture, null) orelse return error.View; + const target_view = c.wgpuTextureCreateView(texture.raw, null) orelse return error.View; defer c.wgpuTextureViewRelease(target_view); // 4. Create a staging buffer to pull pixels from VRAM to CPU @@ -70,7 +63,7 @@ pub fn main(init: std.process.Init) !void { defer c.wgpuCommandEncoderRelease(enc); const src_copy = c.WGPUTexelCopyTextureInfo{ - .texture = target_texture, + .texture = texture.raw, .mipLevel = 0, .origin = .{ .x = 0, .y = 0, .z = 0 }, .aspect = c.WGPUTextureAspect_All, diff --git a/src/lib.zig b/src/lib.zig index 7a05601..e39900d 100644 --- a/src/lib.zig +++ b/src/lib.zig @@ -4,3 +4,173 @@ pub const GpuBuffer = @import("GpuBuffer.zig"); pub const GpuDevice = @import("GpuDevice.zig"); pub const GpuCompute = @import("GpuCompute.zig"); pub const GpuRender = @import("GpuRender.zig"); +pub const GpuTexture = @import("GpuTexture.zig"); + +pub const GpuTextureFormat = enum(c_int) { + Undefined = 0, + R8Unorm = 1, + R8Snorm = 2, + R8Uint = 3, + R8Sint = 4, + R16Unorm = 5, + R16Snorm = 6, + R16Uint = 7, + R16Sint = 8, + R16Float = 9, + RG8Unorm = 10, + RG8Snorm = 11, + RG8Uint = 12, + RG8Sint = 13, + R32Float = 14, + R32Uint = 15, + R32Sint = 16, + RG16Unorm = 17, + RG16Snorm = 18, + RG16Uint = 19, + RG16Sint = 20, + RG16Float = 21, + RGBA8Unorm = 22, + RGBA8UnormSrgb = 23, + RGBA8Snorm = 24, + RGBA8Uint = 25, + RGBA8Sint = 26, + BGRA8Unorm = 27, + BGRA8UnormSrgb = 28, + RGB10A2Uint = 29, + RGB10A2Unorm = 30, + RG11B10Ufloat = 31, + RGB9E5Ufloat = 32, + RG32Float = 33, + RG32Uint = 34, + RG32Sint = 35, + RGBA16Unorm = 36, + RGBA16Snorm = 37, + RGBA16Uint = 38, + RGBA16Sint = 39, + RGBA16Float = 40, + RGBA32Float = 41, + RGBA32Uint = 42, + RGBA32Sint = 43, + Stencil8 = 44, + Depth16Unorm = 45, + Depth24Plus = 46, + Depth24PlusStencil8 = 47, + Depth32Float = 48, + Depth32FloatStencil8 = 49, + BC1RGBAUnorm = 50, + BC1RGBAUnormSrgb = 51, + BC2RGBAUnorm = 52, + BC2RGBAUnormSrgb = 53, + BC3RGBAUnorm = 54, + BC3RGBAUnormSrgb = 55, + BC4RUnorm = 56, + BC4RSnorm = 57, + BC5RGUnorm = 58, + BC5RGSnorm = 59, + BC6HRGBUfloat = 60, + BC6HRGBFloat = 61, + BC7RGBAUnorm = 62, + BC7RGBAUnormSrgb = 63, + ETC2RGB8Unorm = 64, + ETC2RGB8UnormSrgb = 65, + ETC2RGB8A1Unorm = 66, + ETC2RGB8A1UnormSrgb = 67, + ETC2RGBA8Unorm = 68, + ETC2RGBA8UnormSrgb = 69, + EACR11Unorm = 70, + EACR11Snorm = 71, + EACRG11Unorm = 72, + EACRG11Snorm = 73, + ASTC4x4Unorm = 74, + ASTC4x4UnormSrgb = 75, + ASTC5x4Unorm = 76, + ASTC5x4UnormSrgb = 77, + ASTC5x5Unorm = 78, + ASTC5x5UnormSrgb = 79, + ASTC6x5Unorm = 80, + ASTC6x5UnormSrgb = 81, + ASTC6x6Unorm = 82, + ASTC6x6UnormSrgb = 83, + ASTC8x5Unorm = 84, + ASTC8x5UnormSrgb = 85, + ASTC8x6Unorm = 86, + ASTC8x6UnormSrgb = 87, + ASTC8x8Unorm = 88, + ASTC8x8UnormSrgb = 89, + ASTC10x5Unorm = 90, + ASTC10x5UnormSrgb = 91, + ASTC10x6Unorm = 92, + ASTC10x6UnormSrgb = 93, + ASTC10x8Unorm = 94, + ASTC10x8UnormSrgb = 95, + ASTC10x10Unorm = 96, + ASTC10x10UnormSrgb = 97, + ASTC12x10Unorm = 98, + ASTC12x10UnormSrgb = 99, + ASTC12x12Unorm = 100, + ASTC12x12UnormSrgb = 101, + Force32 = 2147483647, + + pub fn bytesPerPixel(format: GpuTextureFormat) u32 { + return switch (format) { + // 8-bit formats (1 byte) + .R8Unorm, .R8Snorm, .R8Uint, .R8Sint, .Stencil8 => 1, + + // 16-bit formats (2 bytes) + .R16Unorm, + .R16Snorm, + .R16Uint, + .R16Sint, + .R16Float, + .RG8Unorm, + .RG8Snorm, + .RG8Uint, + .RG8Sint, + .Depth16Unorm, + => 2, + + // 32-bit formats (4 bytes) + .R32Float, + .R32Uint, + .R32Sint, + .RG16Unorm, + .RG16Snorm, + .RG16Uint, + .RG16Sint, + .RG16Float, + .RGBA8Unorm, + .RGBA8UnormSrgb, + .RGBA8Snorm, + .RGBA8Uint, + .RGBA8Sint, + .BGRA8Unorm, + .BGRA8UnormSrgb, + .RGB10A2Uint, + .RGB10A2Unorm, + .RG11B10Ufloat, + .RGB9E5Ufloat, + .Depth24Plus, + .Depth32Float, + => 4, + + // 64-bit formats (8 bytes) + .RG32Float, + .RG32Uint, + .RG32Sint, + .RGBA16Unorm, + .RGBA16Snorm, + .RGBA16Uint, + .RGBA16Sint, + .RGBA16Float, + .Depth24PlusStencil8, // 24-bit depth + 8-bit stencil layout padded to 4+4 or 1+3 + .Depth32FloatStencil8, // 32-bit float depth + 8-bit stencil (padded to 8 bytes) + => 8, + + // 128-bit formats (16 bytes) + .RGBA32Float, .RGBA32Uint, .RGBA32Sint => 16, + + // Block Compressed Formats (Handled separately) + else => 0, + }; + } +};