Added optional label in definitions and added logs when alloc, free and device info
This commit is contained in:
parent
69f9cad2c1
commit
d503ce7dea
@ -6,6 +6,8 @@ const GpuAllocator = gpu.GpuAllocator;
|
|||||||
const GpuBuffer = gpu.GpuBuffer;
|
const GpuBuffer = gpu.GpuBuffer;
|
||||||
const GpuCompute = gpu.GpuCompute;
|
const GpuCompute = gpu.GpuCompute;
|
||||||
|
|
||||||
|
pub const std_options = std.Options{ .log_level = .info };
|
||||||
|
|
||||||
/// Minimal implementation of a f16 Vector
|
/// Minimal implementation of a f16 Vector
|
||||||
const Vec = struct {
|
const Vec = struct {
|
||||||
buf: GpuBuffer,
|
buf: GpuBuffer,
|
||||||
@ -14,11 +16,10 @@ const Vec = struct {
|
|||||||
// Changed: gloc is passed by value (const)
|
// Changed: gloc is passed by value (const)
|
||||||
pub fn initZero(gloc: GpuAllocator, len: usize) !Vec {
|
pub fn initZero(gloc: GpuAllocator, len: usize) !Vec {
|
||||||
return .{
|
return .{
|
||||||
.buf = try GpuBuffer.init(
|
.buf = try GpuBuffer.init(gloc, .{
|
||||||
gloc,
|
.size = len * @sizeOf(f16),
|
||||||
len * @sizeOf(f16),
|
.usage = .initMany(&.{ .Storage, .CopyDst, .CopySrc }),
|
||||||
.initMany(&.{ .Storage, .CopyDst, .CopySrc }),
|
}),
|
||||||
),
|
|
||||||
.len = len,
|
.len = len,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|||||||
@ -18,7 +18,9 @@ pub fn main(init: std.process.Init) !void {
|
|||||||
defer device.deinit();
|
defer device.deinit();
|
||||||
|
|
||||||
// 2. Init VRAM Arena
|
// 2. Init VRAM Arena
|
||||||
const gloc = device.gpuAllocator();
|
var grena = GpuArenaAllocator.init(allocator, device.gpuAllocator());
|
||||||
|
defer grena.deinit();
|
||||||
|
const gloc = grena.gpuAllocator();
|
||||||
|
|
||||||
// 3. Load Render Pipeline
|
// 3. Load Render Pipeline
|
||||||
const circle_rp = try GpuRender.init(
|
const circle_rp = try GpuRender.init(
|
||||||
|
|||||||
@ -21,11 +21,14 @@ pub fn main(init: std.process.Init) !void {
|
|||||||
const add_cp = try GpuCompute.init(
|
const add_cp = try GpuCompute.init(
|
||||||
gloc,
|
gloc,
|
||||||
@embedFile("shaders/add.wgsl"),
|
@embedFile("shaders/add.wgsl"),
|
||||||
.{ .bindings = &.{
|
.{
|
||||||
.{ .element_size = @sizeOf(f16) },
|
.label = "add",
|
||||||
.{ .element_size = @sizeOf(f16) },
|
.bindings = &.{
|
||||||
.{ .element_size = @sizeOf(f16) },
|
.{ .element_size = @sizeOf(f16) },
|
||||||
} },
|
.{ .element_size = @sizeOf(f16) },
|
||||||
|
.{ .element_size = @sizeOf(f16) },
|
||||||
|
},
|
||||||
|
},
|
||||||
);
|
);
|
||||||
|
|
||||||
// 4. Setup CPU data
|
// 4. Setup CPU data
|
||||||
@ -42,9 +45,9 @@ pub fn main(init: std.process.Init) !void {
|
|||||||
|
|
||||||
// 5. Initialize raw GPU Buffers
|
// 5. Initialize raw GPU Buffers
|
||||||
const byte_size = len * @sizeOf(f16);
|
const byte_size = len * @sizeOf(f16);
|
||||||
const buf_a = try GpuBuffer.init(gloc, byte_size, .initMany(&.{ .Storage, .CopyDst, .CopySrc }));
|
const buf_a = try GpuBuffer.init(gloc, .{ .label = "a", .size = byte_size, .usage = .initMany(&.{ .Storage, .CopyDst, .CopySrc }) });
|
||||||
const buf_b = try GpuBuffer.init(gloc, byte_size, .initMany(&.{ .Storage, .CopyDst, .CopySrc }));
|
const buf_b = try GpuBuffer.init(gloc, .{ .label = "b", .size = byte_size, .usage = .initMany(&.{ .Storage, .CopyDst, .CopySrc }) });
|
||||||
const buf_out = try GpuBuffer.init(gloc, byte_size, .initMany(&.{ .Storage, .CopyDst, .CopySrc }));
|
const buf_out = try GpuBuffer.init(gloc, .{ .label = "out", .size = byte_size, .usage = .initMany(&.{ .Storage, .CopyDst, .CopySrc }) });
|
||||||
|
|
||||||
// Note: Buffers are safely tied to the GpuArenaAllocator which will automatically
|
// Note: Buffers are safely tied to the GpuArenaAllocator which will automatically
|
||||||
// release them at the end. You can also manually call buf_x.deinit() if desired.
|
// release them at the end. You can also manually call buf_x.deinit() if desired.
|
||||||
|
|||||||
@ -3,6 +3,7 @@ const GpuDevice = @import("GpuDevice.zig");
|
|||||||
const GpuAllocator = @import("GpuAllocator.zig");
|
const GpuAllocator = @import("GpuAllocator.zig");
|
||||||
const GpuTextureFormat = @import("lib.zig").GpuTextureFormat;
|
const GpuTextureFormat = @import("lib.zig").GpuTextureFormat;
|
||||||
const c = @import("utils.zig").c;
|
const c = @import("utils.zig").c;
|
||||||
|
const viewStr = @import("utils.zig").viewStr;
|
||||||
|
|
||||||
child_allocator: GpuAllocator, // I use Zig naming child_allocator, but that should be a parent for me. Likely something idk
|
child_allocator: GpuAllocator, // I use Zig naming child_allocator, but that should be a parent for me. Likely something idk
|
||||||
tracked_buffers: std.AutoHashMap(c.WGPUBuffer, c.WGPUBufferDescriptor),
|
tracked_buffers: std.AutoHashMap(c.WGPUBuffer, c.WGPUBufferDescriptor),
|
||||||
@ -24,30 +25,40 @@ pub fn init(cpu_allocator: std.mem.Allocator, child_allocator: GpuAllocator) @Th
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub fn deinit(self: *@This()) void {
|
pub fn deinit(self: *@This()) void {
|
||||||
|
std.log.debug("Freeing GpuArenaAllocator (Used VRAM: {}/{} MB)", .{
|
||||||
|
self.allocated_vram_bytes / 1024 / 1024,
|
||||||
|
self.child_allocator.device.def.vram_bytes_limit / 1024 / 1024,
|
||||||
|
});
|
||||||
|
|
||||||
var it_buffer = self.tracked_buffers.keyIterator();
|
var it_buffer = self.tracked_buffers.keyIterator();
|
||||||
while (it_buffer.next()) |buf_ptr|
|
while (it_buffer.next()) |buf_ptr|
|
||||||
self.child_allocator.freeBuffer(buf_ptr.*);
|
freeBuffer(self, buf_ptr.*);
|
||||||
self.tracked_buffers.deinit();
|
self.tracked_buffers.deinit();
|
||||||
|
|
||||||
var it_tex = self.tracked_textures.keyIterator();
|
var it_tex = self.tracked_textures.keyIterator();
|
||||||
while (it_tex.next()) |buf_ptr|
|
while (it_tex.next()) |buf_ptr|
|
||||||
self.child_allocator.freeTexture(buf_ptr.*);
|
freeTexture(self, buf_ptr.*);
|
||||||
self.tracked_textures.deinit();
|
self.tracked_textures.deinit();
|
||||||
|
|
||||||
var it_view = self.tracked_views.keyIterator();
|
var it_view = self.tracked_views.keyIterator();
|
||||||
while (it_view.next()) |buf_ptr|
|
while (it_view.next()) |buf_ptr|
|
||||||
self.child_allocator.freeTextureView(buf_ptr.*);
|
freeTextureView(self, buf_ptr.*);
|
||||||
self.tracked_views.deinit();
|
self.tracked_views.deinit();
|
||||||
|
|
||||||
var it_render = self.tracked_renders.keyIterator();
|
var it_render = self.tracked_renders.keyIterator();
|
||||||
while (it_render.next()) |buf_ptr|
|
while (it_render.next()) |buf_ptr|
|
||||||
self.child_allocator.freeRenderPipeline(buf_ptr.*);
|
freeRenderPipeline(self, buf_ptr.*);
|
||||||
self.tracked_renders.deinit();
|
self.tracked_renders.deinit();
|
||||||
|
|
||||||
var it_compute = self.tracked_computes.keyIterator();
|
var it_compute = self.tracked_computes.keyIterator();
|
||||||
while (it_compute.next()) |buf_ptr|
|
while (it_compute.next()) |buf_ptr|
|
||||||
self.child_allocator.freeComputePipeline(buf_ptr.*);
|
freeComputePipeline(self, buf_ptr.*);
|
||||||
self.tracked_computes.deinit();
|
self.tracked_computes.deinit();
|
||||||
|
|
||||||
|
std.log.debug("Freed GpuArenaAllocator (Used VRAM: {}/{} MB)", .{
|
||||||
|
self.allocated_vram_bytes / 1024 / 1024,
|
||||||
|
self.child_allocator.device.def.vram_bytes_limit / 1024 / 1024,
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns the type-erased immutable interface wrapper
|
/// Returns the type-erased immutable interface wrapper
|
||||||
@ -79,6 +90,14 @@ fn allocBuffer(ctx: *anyopaque, desc: c.WGPUBufferDescriptor) anyerror!c.WGPUBuf
|
|||||||
const raw = try self.child_allocator.allocBuffer(desc);
|
const raw = try self.child_allocator.allocBuffer(desc);
|
||||||
self.tracked_buffers.putAssumeCapacity(raw, desc);
|
self.tracked_buffers.putAssumeCapacity(raw, desc);
|
||||||
self.allocated_vram_bytes += desc.size;
|
self.allocated_vram_bytes += desc.size;
|
||||||
|
|
||||||
|
std.log.debug("Allocated Buffer '{s}': {d} B (Total VRAM: {}/{} MB)", .{
|
||||||
|
viewStr(desc.label),
|
||||||
|
desc.size,
|
||||||
|
self.allocated_vram_bytes / 1024 / 1024,
|
||||||
|
self.child_allocator.device.def.vram_bytes_limit / 1024 / 1024,
|
||||||
|
});
|
||||||
|
|
||||||
return raw;
|
return raw;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -87,6 +106,12 @@ fn freeBuffer(ctx: *anyopaque, raw: c.WGPUBuffer) void {
|
|||||||
if (self.tracked_buffers.fetchRemove(raw)) |kv| {
|
if (self.tracked_buffers.fetchRemove(raw)) |kv| {
|
||||||
self.child_allocator.freeBuffer(raw);
|
self.child_allocator.freeBuffer(raw);
|
||||||
self.allocated_vram_bytes -= kv.value.size;
|
self.allocated_vram_bytes -= kv.value.size;
|
||||||
|
|
||||||
|
std.log.debug("Freed Buffer '{s}' (Total VRAM: {}/{} MB)", .{
|
||||||
|
viewStr(kv.value.label),
|
||||||
|
self.allocated_vram_bytes / 1024 / 1024,
|
||||||
|
self.child_allocator.device.def.vram_bytes_limit / 1024 / 1024,
|
||||||
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -97,13 +122,21 @@ fn allocTexture(ctx: *anyopaque, desc: c.WGPUTextureDescriptor) anyerror!c.WGPUT
|
|||||||
const format: GpuTextureFormat = @enumFromInt(desc.format);
|
const format: GpuTextureFormat = @enumFromInt(desc.format);
|
||||||
const bytes_size = desc.size.width * desc.size.height * format.bytesPerPixel();
|
const bytes_size = desc.size.width * desc.size.height * format.bytesPerPixel();
|
||||||
|
|
||||||
if (bytes_size + self.allocated_vram_bytes > self.child_allocator.device.config.vram_bytes_limit)
|
if (bytes_size + self.allocated_vram_bytes > self.child_allocator.device.def.vram_bytes_limit)
|
||||||
return error.ExceedsVramBudget;
|
return error.ExceedsVramBudget;
|
||||||
|
|
||||||
const raw = try self.child_allocator.allocTexture(desc);
|
const raw = try self.child_allocator.allocTexture(desc);
|
||||||
|
|
||||||
self.tracked_textures.putAssumeCapacity(raw, desc);
|
self.tracked_textures.putAssumeCapacity(raw, desc);
|
||||||
self.allocated_vram_bytes += bytes_size;
|
self.allocated_vram_bytes += bytes_size;
|
||||||
|
|
||||||
|
std.log.debug("Allocated Texture '{s}': {d} B (Total VRAM: {}/{} MB)", .{
|
||||||
|
viewStr(desc.label),
|
||||||
|
bytes_size,
|
||||||
|
self.allocated_vram_bytes / 1024 / 1024,
|
||||||
|
self.child_allocator.device.def.vram_bytes_limit / 1024 / 1024,
|
||||||
|
});
|
||||||
|
|
||||||
return raw;
|
return raw;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -117,6 +150,12 @@ fn freeTexture(ctx: *anyopaque, raw: c.WGPUTexture) void {
|
|||||||
const format: GpuTextureFormat = @enumFromInt(desc.format);
|
const format: GpuTextureFormat = @enumFromInt(desc.format);
|
||||||
const bytes_size = desc.size.width * desc.size.height * format.bytesPerPixel();
|
const bytes_size = desc.size.width * desc.size.height * format.bytesPerPixel();
|
||||||
self.allocated_vram_bytes -= bytes_size;
|
self.allocated_vram_bytes -= bytes_size;
|
||||||
|
|
||||||
|
std.log.debug("Freed Texture '{s}' (Total VRAM: {}/{} MB)", .{
|
||||||
|
viewStr(desc.label),
|
||||||
|
self.allocated_vram_bytes / 1024 / 1024,
|
||||||
|
self.child_allocator.device.def.vram_bytes_limit / 1024 / 1024,
|
||||||
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -125,13 +164,17 @@ fn allocTextureView(ctx: *anyopaque, texture: c.WGPUTexture, desc: c.WGPUTexture
|
|||||||
try self.tracked_views.ensureTotalCapacity(self.tracked_views.count() + 1);
|
try self.tracked_views.ensureTotalCapacity(self.tracked_views.count() + 1);
|
||||||
const raw = try self.child_allocator.allocTextureView(texture, desc);
|
const raw = try self.child_allocator.allocTextureView(texture, desc);
|
||||||
self.tracked_views.putAssumeCapacity(raw, desc);
|
self.tracked_views.putAssumeCapacity(raw, desc);
|
||||||
|
std.log.debug("Allocated Texture View '{s}'", .{viewStr(desc.label)});
|
||||||
return raw;
|
return raw;
|
||||||
}
|
}
|
||||||
|
|
||||||
fn freeTextureView(ctx: *anyopaque, raw: c.WGPUTextureView) void {
|
fn freeTextureView(ctx: *anyopaque, raw: c.WGPUTextureView) void {
|
||||||
const self: *@This() = @ptrCast(@alignCast(ctx));
|
const self: *@This() = @ptrCast(@alignCast(ctx));
|
||||||
if (self.tracked_views.remove(raw))
|
if (self.tracked_views.fetchRemove(raw)) |kv| {
|
||||||
self.child_allocator.freeTextureView(raw);
|
self.child_allocator.freeTextureView(raw);
|
||||||
|
const desc = kv.value;
|
||||||
|
std.log.debug("Freed Texture View '{s}'", .{viewStr(desc.label)});
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn allocRenderPipeline(ctx: *anyopaque, desc: c.WGPURenderPipelineDescriptor) anyerror!c.WGPURenderPipeline {
|
fn allocRenderPipeline(ctx: *anyopaque, desc: c.WGPURenderPipelineDescriptor) anyerror!c.WGPURenderPipeline {
|
||||||
@ -139,13 +182,17 @@ fn allocRenderPipeline(ctx: *anyopaque, desc: c.WGPURenderPipelineDescriptor) an
|
|||||||
try self.tracked_renders.ensureTotalCapacity(self.tracked_renders.count() + 1);
|
try self.tracked_renders.ensureTotalCapacity(self.tracked_renders.count() + 1);
|
||||||
const raw = try self.child_allocator.allocRenderPipeline(desc);
|
const raw = try self.child_allocator.allocRenderPipeline(desc);
|
||||||
self.tracked_renders.putAssumeCapacity(raw, desc);
|
self.tracked_renders.putAssumeCapacity(raw, desc);
|
||||||
|
std.log.debug("Allocated Render Pipeline '{s}'", .{viewStr(desc.label)});
|
||||||
return raw;
|
return raw;
|
||||||
}
|
}
|
||||||
|
|
||||||
fn freeRenderPipeline(ctx: *anyopaque, raw: c.WGPURenderPipeline) void {
|
fn freeRenderPipeline(ctx: *anyopaque, raw: c.WGPURenderPipeline) void {
|
||||||
const self: *@This() = @ptrCast(@alignCast(ctx));
|
const self: *@This() = @ptrCast(@alignCast(ctx));
|
||||||
if (self.tracked_renders.remove(raw))
|
if (self.tracked_renders.fetchRemove(raw)) |kv| {
|
||||||
self.child_allocator.freeRenderPipeline(raw);
|
self.child_allocator.freeRenderPipeline(raw);
|
||||||
|
const desc = kv.value;
|
||||||
|
std.log.debug("Freed Render Pipeline '{s}'", .{viewStr(desc.label)});
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn allocComputePipeline(ctx: *anyopaque, desc: c.WGPUComputePipelineDescriptor) anyerror!c.WGPUComputePipeline {
|
fn allocComputePipeline(ctx: *anyopaque, desc: c.WGPUComputePipelineDescriptor) anyerror!c.WGPUComputePipeline {
|
||||||
@ -153,11 +200,15 @@ fn allocComputePipeline(ctx: *anyopaque, desc: c.WGPUComputePipelineDescriptor)
|
|||||||
try self.tracked_computes.ensureTotalCapacity(self.tracked_computes.count() + 1);
|
try self.tracked_computes.ensureTotalCapacity(self.tracked_computes.count() + 1);
|
||||||
const raw = try self.child_allocator.allocComputePipeline(desc);
|
const raw = try self.child_allocator.allocComputePipeline(desc);
|
||||||
self.tracked_computes.putAssumeCapacity(raw, desc);
|
self.tracked_computes.putAssumeCapacity(raw, desc);
|
||||||
|
std.log.debug("Allocated Compute Pipeline '{s}'", .{viewStr(desc.label)});
|
||||||
return raw;
|
return raw;
|
||||||
}
|
}
|
||||||
|
|
||||||
fn freeComputePipeline(ctx: *anyopaque, raw: c.WGPUComputePipeline) void {
|
fn freeComputePipeline(ctx: *anyopaque, raw: c.WGPUComputePipeline) void {
|
||||||
const self: *@This() = @ptrCast(@alignCast(ctx));
|
const self: *@This() = @ptrCast(@alignCast(ctx));
|
||||||
if (self.tracked_computes.remove(raw))
|
if (self.tracked_computes.fetchRemove(raw)) |kv| {
|
||||||
self.child_allocator.freeComputePipeline(raw);
|
self.child_allocator.freeComputePipeline(raw);
|
||||||
|
const desc = kv.value;
|
||||||
|
std.log.debug("Freed Compute Pipeline '{s}'", .{viewStr(desc.label)});
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1,6 +1,7 @@
|
|||||||
const std = @import("std");
|
const std = @import("std");
|
||||||
const c = @import("utils.zig").c;
|
const c = @import("utils.zig").c;
|
||||||
const GpuAllocator = @import("GpuAllocator.zig");
|
const GpuAllocator = @import("GpuAllocator.zig");
|
||||||
|
const svOpt = @import("utils.zig").svOpt;
|
||||||
|
|
||||||
raw: c.WGPUBuffer,
|
raw: c.WGPUBuffer,
|
||||||
size: u64,
|
size: u64,
|
||||||
@ -21,15 +22,25 @@ const BufferUsage = enum(u64) {
|
|||||||
QueryResolve = 0x0000000000000200,
|
QueryResolve = 0x0000000000000200,
|
||||||
};
|
};
|
||||||
|
|
||||||
pub fn init(gloc: GpuAllocator, size: u64, usage: std.EnumSet(BufferUsage)) !@This() {
|
const GpuBufferDef = struct {
|
||||||
|
label: ?[]const u8 = null,
|
||||||
|
size: u64,
|
||||||
|
usage: std.EnumSet(BufferUsage),
|
||||||
|
};
|
||||||
|
|
||||||
|
pub fn init(gloc: GpuAllocator, def: GpuBufferDef) !@This() {
|
||||||
var use: u64 = 0;
|
var use: u64 = 0;
|
||||||
var iter = usage.iterator();
|
var iter = def.usage.iterator();
|
||||||
while (iter.next()) |flag| use |= @intFromEnum(flag);
|
while (iter.next()) |flag| use |= @intFromEnum(flag);
|
||||||
|
|
||||||
// Automatically align the buffer size forward to a multiple of 4 bytes under the hood
|
// Automatically align the buffer size forward to a multiple of 4 bytes under the hood
|
||||||
const aligned_size = std.mem.alignForward(u64, size, 4);
|
const aligned_size = std.mem.alignForward(u64, def.size, 4);
|
||||||
|
|
||||||
const raw_handle = try gloc.allocBuffer(.{ .size = aligned_size, .usage = use });
|
const raw_handle = try gloc.allocBuffer(.{
|
||||||
|
.size = aligned_size,
|
||||||
|
.usage = use,
|
||||||
|
.label = svOpt(def.label),
|
||||||
|
});
|
||||||
return .{
|
return .{
|
||||||
.raw = raw_handle,
|
.raw = raw_handle,
|
||||||
.size = aligned_size,
|
.size = aligned_size,
|
||||||
@ -91,11 +102,11 @@ pub fn load(
|
|||||||
pub fn read(self: @This(), alloc: std.mem.Allocator, T: type) ![]T {
|
pub fn read(self: @This(), alloc: std.mem.Allocator, T: type) ![]T {
|
||||||
const out = try alloc.alloc(T, @divExact(self.size, @sizeOf(T)));
|
const out = try alloc.alloc(T, @divExact(self.size, @sizeOf(T)));
|
||||||
|
|
||||||
const staging = try init(
|
const staging = try init(self.gloc, .{
|
||||||
self.gloc,
|
.size = self.size,
|
||||||
self.size,
|
.usage = .initMany(&.{ .MapRead, .CopyDst }),
|
||||||
.initMany(&.{ .MapRead, .CopyDst }),
|
.label = "staging_read_buffer",
|
||||||
);
|
});
|
||||||
defer staging.deinit();
|
defer staging.deinit();
|
||||||
|
|
||||||
const enc = c.wgpuDeviceCreateCommandEncoder(self.gloc.device.device, null) orelse return error.Encoder;
|
const enc = c.wgpuDeviceCreateCommandEncoder(self.gloc.device.device, null) orelse return error.Encoder;
|
||||||
|
|||||||
@ -1,5 +1,6 @@
|
|||||||
const c = @import("utils.zig").c;
|
const c = @import("utils.zig").c;
|
||||||
const sv = @import("utils.zig").sv;
|
const sv = @import("utils.zig").sv;
|
||||||
|
const svOpt = @import("utils.zig").svOpt;
|
||||||
const GpuAllocator = @import("GpuAllocator.zig");
|
const GpuAllocator = @import("GpuAllocator.zig");
|
||||||
const GpuBuffer = @import("GpuBuffer.zig");
|
const GpuBuffer = @import("GpuBuffer.zig");
|
||||||
const GpuDevice = @import("GpuDevice.zig");
|
const GpuDevice = @import("GpuDevice.zig");
|
||||||
@ -11,6 +12,7 @@ pub const Binding = struct {
|
|||||||
};
|
};
|
||||||
|
|
||||||
pub const ComputeDef = struct {
|
pub const ComputeDef = struct {
|
||||||
|
label: ?[]const u8 = null,
|
||||||
bindings: []const Binding,
|
bindings: []const Binding,
|
||||||
workgroup_size: u32 = 256,
|
workgroup_size: u32 = 256,
|
||||||
max_workgroups: u32 = 65535,
|
max_workgroups: u32 = 65535,
|
||||||
@ -33,7 +35,10 @@ pub fn init(gloc: GpuAllocator, wgsl: []const u8, def: ComputeDef) !@This() {
|
|||||||
}) orelse return error.Shader;
|
}) orelse return error.Shader;
|
||||||
defer c.wgpuShaderModuleRelease(shader);
|
defer c.wgpuShaderModuleRelease(shader);
|
||||||
|
|
||||||
const pip = try gloc.allocComputePipeline(.{ .compute = .{ .module = shader, .entryPoint = sv("main") } });
|
const pip = try gloc.allocComputePipeline(.{
|
||||||
|
.label = svOpt(def.label),
|
||||||
|
.compute = .{ .module = shader, .entryPoint = sv("main") },
|
||||||
|
});
|
||||||
|
|
||||||
return .{
|
return .{
|
||||||
.gloc = gloc,
|
.gloc = gloc,
|
||||||
@ -108,11 +113,11 @@ pub fn run(
|
|||||||
defer if (info_buf) |b| b.deinit();
|
defer if (info_buf) |b| b.deinit();
|
||||||
|
|
||||||
if (self.def.append_info_buffer) {
|
if (self.def.append_info_buffer) {
|
||||||
info_buf = try GpuBuffer.init(
|
info_buf = try GpuBuffer.init(gloc, .{
|
||||||
gloc,
|
.size = @sizeOf(u32),
|
||||||
@sizeOf(u32),
|
.usage = .initMany(&.{ .Uniform, .CopyDst }),
|
||||||
.initMany(&.{ .Uniform, .CopyDst }),
|
.label = "compute_info_buffer",
|
||||||
);
|
});
|
||||||
c.wgpuQueueWriteBuffer(gloc.device.queue, info_buf.?.raw, 0, &elements_count, @sizeOf(u32));
|
c.wgpuQueueWriteBuffer(gloc.device.queue, info_buf.?.raw, 0, &elements_count, @sizeOf(u32));
|
||||||
|
|
||||||
entries_buf[entry_count] = .{
|
entries_buf[entry_count] = .{
|
||||||
|
|||||||
@ -1,20 +1,17 @@
|
|||||||
const std = @import("std");
|
const std = @import("std");
|
||||||
const c = @import("utils.zig").c;
|
const c = @import("utils.zig").c;
|
||||||
const sv = @import("utils.zig").sv;
|
const sv = @import("utils.zig").sv;
|
||||||
|
const svOpt = @import("utils.zig").svOpt;
|
||||||
const GpuAllocator = @import("GpuAllocator.zig");
|
const GpuAllocator = @import("GpuAllocator.zig");
|
||||||
const GpuTextureFormat = @import("lib.zig").GpuTextureFormat;
|
const GpuTextureFormat = @import("lib.zig").GpuTextureFormat;
|
||||||
|
|
||||||
// TODO: Make Allocator more zig like
|
|
||||||
// - GpuDevice can return a GpuAllocator that just allocate and nothing else
|
|
||||||
// - From this GpuAllocator, can create a GpuArena like std.heap.ArenaAllocator.init(allocator)
|
|
||||||
// - Rename GpuArenaAllocator too
|
|
||||||
|
|
||||||
const Ctx = struct {
|
const Ctx = struct {
|
||||||
adapter: c.WGPUAdapter = null,
|
adapter: c.WGPUAdapter = null,
|
||||||
device: c.WGPUDevice = null,
|
device: c.WGPUDevice = null,
|
||||||
};
|
};
|
||||||
|
|
||||||
const GpuDeviceConfig = struct {
|
const GpuDeviceDef = struct {
|
||||||
|
label: ?[]const u8 = null,
|
||||||
/// VRAM limit. Default 2 GB
|
/// VRAM limit. Default 2 GB
|
||||||
vram_bytes_limit: u64 = 2 * 1024 * 1024 * 1024,
|
vram_bytes_limit: u64 = 2 * 1024 * 1024 * 1024,
|
||||||
power_preference: enum(c_uint) {
|
power_preference: enum(c_uint) {
|
||||||
@ -31,9 +28,9 @@ device: c.WGPUDevice,
|
|||||||
queue: c.WGPUQueue,
|
queue: c.WGPUQueue,
|
||||||
limits: c.WGPULimits,
|
limits: c.WGPULimits,
|
||||||
|
|
||||||
config: GpuDeviceConfig,
|
def: GpuDeviceDef,
|
||||||
|
|
||||||
pub fn init(config: GpuDeviceConfig) !@This() {
|
pub fn init(def: GpuDeviceDef) !@This() {
|
||||||
const instance = c.wgpuCreateInstance(
|
const instance = c.wgpuCreateInstance(
|
||||||
&std.mem.zeroes(c.WGPUInstanceDescriptor),
|
&std.mem.zeroes(c.WGPUInstanceDescriptor),
|
||||||
) orelse return error.NoInstance;
|
) orelse return error.NoInstance;
|
||||||
@ -42,13 +39,25 @@ pub fn init(config: GpuDeviceConfig) !@This() {
|
|||||||
var ctx = Ctx{};
|
var ctx = Ctx{};
|
||||||
_ = c.wgpuInstanceRequestAdapter(
|
_ = c.wgpuInstanceRequestAdapter(
|
||||||
instance,
|
instance,
|
||||||
&.{ .powerPreference = @intFromEnum(config.power_preference) },
|
&.{ .powerPreference = @intFromEnum(def.power_preference) },
|
||||||
.{ .callback = onAdapter, .userdata1 = &ctx },
|
.{ .callback = onAdapter, .userdata1 = &ctx },
|
||||||
);
|
);
|
||||||
c.wgpuInstanceProcessEvents(instance);
|
c.wgpuInstanceProcessEvents(instance);
|
||||||
const adapter = ctx.adapter orelse return error.NoAdapter;
|
const adapter = ctx.adapter orelse return error.NoAdapter;
|
||||||
errdefer c.wgpuAdapterRelease(adapter);
|
errdefer c.wgpuAdapterRelease(adapter);
|
||||||
|
|
||||||
|
var adapter_info = std.mem.zeroes(c.WGPUAdapterInfo);
|
||||||
|
_ = c.wgpuAdapterGetInfo(adapter, &adapter_info);
|
||||||
|
|
||||||
|
std.log.info("=== WebGPU Device Initialized ===", .{});
|
||||||
|
if (adapter_info.device.length > 0 and adapter_info.device.data != null) {
|
||||||
|
std.log.info(" Device Name : {s}", .{adapter_info.device.data[0..adapter_info.device.length]});
|
||||||
|
}
|
||||||
|
if (adapter_info.architecture.length > 0 and adapter_info.architecture.data != null) {
|
||||||
|
std.log.info(" Architecture : {s}", .{adapter_info.architecture.data[0..adapter_info.architecture.length]});
|
||||||
|
}
|
||||||
|
std.log.info(" Backend Type : {d}", .{adapter_info.backendType});
|
||||||
|
|
||||||
var supported_features = std.mem.zeroes(c.WGPUSupportedFeatures);
|
var supported_features = std.mem.zeroes(c.WGPUSupportedFeatures);
|
||||||
c.wgpuAdapterGetFeatures(adapter, &supported_features);
|
c.wgpuAdapterGetFeatures(adapter, &supported_features);
|
||||||
|
|
||||||
@ -56,6 +65,11 @@ pub fn init(config: GpuDeviceConfig) !@This() {
|
|||||||
supported_limits.nextInChain = null;
|
supported_limits.nextInChain = null;
|
||||||
if (c.wgpuAdapterGetLimits(adapter, &supported_limits) != 1) return error.FailedToGetAdapterLimits;
|
if (c.wgpuAdapterGetLimits(adapter, &supported_limits) != 1) return error.FailedToGetAdapterLimits;
|
||||||
|
|
||||||
|
std.log.info(" Max Buf Size : {d} MB", .{supported_limits.maxBufferSize / 1024 / 1024});
|
||||||
|
std.log.info(" Max Storage : {d} MB", .{supported_limits.maxStorageBufferBindingSize / 1024 / 1024});
|
||||||
|
std.log.info(" Max Workgroup: X: {d}, Y: {d}, Z: {d}", .{ supported_limits.maxComputeWorkgroupSizeX, supported_limits.maxComputeWorkgroupSizeY, supported_limits.maxComputeWorkgroupSizeZ });
|
||||||
|
std.log.info(" VRAM Budget : {d} MB", .{def.vram_bytes_limit / 1024 / 1024});
|
||||||
|
|
||||||
var has_f16 = false;
|
var has_f16 = false;
|
||||||
for (0..supported_features.featureCount) |i| {
|
for (0..supported_features.featureCount) |i| {
|
||||||
if (supported_features.features[i] == c.WGPUFeatureName_ShaderF16) {
|
if (supported_features.features[i] == c.WGPUFeatureName_ShaderF16) {
|
||||||
@ -63,6 +77,8 @@ pub fn init(config: GpuDeviceConfig) !@This() {
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
std.log.info(" Shader F16 : {}", .{has_f16});
|
||||||
|
std.log.info("=================================", .{});
|
||||||
|
|
||||||
var feature_buf = [_]c.WGPUFeatureName{c.WGPUFeatureName_ShaderF16};
|
var feature_buf = [_]c.WGPUFeatureName{c.WGPUFeatureName_ShaderF16};
|
||||||
const required_features: []const c.WGPUFeatureName =
|
const required_features: []const c.WGPUFeatureName =
|
||||||
@ -70,7 +86,7 @@ pub fn init(config: GpuDeviceConfig) !@This() {
|
|||||||
|
|
||||||
const device_descriptor = c.WGPUDeviceDescriptor{
|
const device_descriptor = c.WGPUDeviceDescriptor{
|
||||||
.nextInChain = null,
|
.nextInChain = null,
|
||||||
.label = sv("TensorCompilerDevice"),
|
.label = svOpt(def.label),
|
||||||
.requiredFeatureCount = required_features.len,
|
.requiredFeatureCount = required_features.len,
|
||||||
.requiredFeatures = if (required_features.len > 0) required_features.ptr else null,
|
.requiredFeatures = if (required_features.len > 0) required_features.ptr else null,
|
||||||
.requiredLimits = &supported_limits,
|
.requiredLimits = &supported_limits,
|
||||||
@ -89,7 +105,7 @@ pub fn init(config: GpuDeviceConfig) !@This() {
|
|||||||
.device = device,
|
.device = device,
|
||||||
.queue = c.wgpuDeviceGetQueue(device),
|
.queue = c.wgpuDeviceGetQueue(device),
|
||||||
.limits = supported_limits,
|
.limits = supported_limits,
|
||||||
.config = config,
|
.def = def,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -1,6 +1,7 @@
|
|||||||
const std = @import("std");
|
const std = @import("std");
|
||||||
const c = @import("utils.zig").c;
|
const c = @import("utils.zig").c;
|
||||||
const sv = @import("utils.zig").sv;
|
const sv = @import("utils.zig").sv;
|
||||||
|
const svOpt = @import("utils.zig").svOpt;
|
||||||
const GpuAllocator = @import("GpuAllocator.zig");
|
const GpuAllocator = @import("GpuAllocator.zig");
|
||||||
const GpuBuffer = @import("GpuBuffer.zig");
|
const GpuBuffer = @import("GpuBuffer.zig");
|
||||||
const GpuDevice = @import("GpuDevice.zig");
|
const GpuDevice = @import("GpuDevice.zig");
|
||||||
@ -12,6 +13,7 @@ pub const Binding = struct {
|
|||||||
};
|
};
|
||||||
|
|
||||||
pub const GpuRenderDef = struct {
|
pub const GpuRenderDef = struct {
|
||||||
|
label: ?[]const u8 = null,
|
||||||
bindings: []const Binding = &.{},
|
bindings: []const Binding = &.{},
|
||||||
/// The surface texture format we are rendering to (e.g., BGRA8Unorm)
|
/// The surface texture format we are rendering to (e.g., BGRA8Unorm)
|
||||||
texture_format: GpuTextureFormat,
|
texture_format: GpuTextureFormat,
|
||||||
@ -68,6 +70,7 @@ pub fn init(gloc: GpuAllocator, wgsl: []const u8, def: GpuRenderDef) !@This() {
|
|||||||
|
|
||||||
// 3. Compile the Complete Render Pipeline
|
// 3. Compile the Complete Render Pipeline
|
||||||
const pip = try gloc.allocRenderPipeline(.{
|
const pip = try gloc.allocRenderPipeline(.{
|
||||||
|
.label = svOpt(def.label),
|
||||||
.vertex = .{
|
.vertex = .{
|
||||||
.module = shader,
|
.module = shader,
|
||||||
.entryPoint = sv(def.vertex_entry),
|
.entryPoint = sv(def.vertex_entry),
|
||||||
|
|||||||
@ -1,11 +1,13 @@
|
|||||||
const std = @import("std");
|
const std = @import("std");
|
||||||
const c = @import("utils.zig").c;
|
const c = @import("utils.zig").c;
|
||||||
|
const svOpt = @import("utils.zig").svOpt;
|
||||||
const GpuAllocator = @import("GpuAllocator.zig");
|
const GpuAllocator = @import("GpuAllocator.zig");
|
||||||
const GpuBuffer = @import("GpuBuffer.zig");
|
const GpuBuffer = @import("GpuBuffer.zig");
|
||||||
const GpuTextureFormat = @import("lib.zig").GpuTextureFormat;
|
const GpuTextureFormat = @import("lib.zig").GpuTextureFormat;
|
||||||
const GpuTextureUsage = @import("lib.zig").GpuTextureUsage;
|
const GpuTextureUsage = @import("lib.zig").GpuTextureUsage;
|
||||||
|
|
||||||
pub const GpuTextureDef = struct {
|
pub const GpuTextureDef = struct {
|
||||||
|
label: ?[]const u8 = null,
|
||||||
size: c.WGPUExtent3D,
|
size: c.WGPUExtent3D,
|
||||||
usage: std.EnumSet(GpuTextureUsage),
|
usage: std.EnumSet(GpuTextureUsage),
|
||||||
format: GpuTextureFormat,
|
format: GpuTextureFormat,
|
||||||
@ -21,6 +23,7 @@ pub fn init(gloc: GpuAllocator, def: GpuTextureDef) !@This() {
|
|||||||
while (iter.next()) |flag| use |= @intFromEnum(flag);
|
while (iter.next()) |flag| use |= @intFromEnum(flag);
|
||||||
|
|
||||||
const desc = c.WGPUTextureDescriptor{
|
const desc = c.WGPUTextureDescriptor{
|
||||||
|
.label = svOpt(def.label),
|
||||||
.usage = use,
|
.usage = use,
|
||||||
.dimension = c.WGPUTextureDimension_2D,
|
.dimension = c.WGPUTextureDimension_2D,
|
||||||
.size = def.size,
|
.size = def.size,
|
||||||
@ -51,7 +54,11 @@ pub fn bytesSizeRow(self: @This()) u32 {
|
|||||||
|
|
||||||
/// Return a GpuBuffer containing a copy of the texture.
|
/// Return a GpuBuffer containing a copy of the texture.
|
||||||
pub fn buffCopy(self: @This(), gloc: GpuAllocator) !GpuBuffer {
|
pub fn buffCopy(self: @This(), gloc: GpuAllocator) !GpuBuffer {
|
||||||
const buf = try GpuBuffer.init(gloc, self.bytesSize(), .initMany(&.{ .CopyDst, .CopySrc }));
|
const buf = try GpuBuffer.init(gloc, .{
|
||||||
|
.size = self.bytesSize(),
|
||||||
|
.usage = .initMany(&.{ .CopyDst, .CopySrc }),
|
||||||
|
.label = "texture_copy_buffer",
|
||||||
|
});
|
||||||
|
|
||||||
const enc = c.wgpuDeviceCreateCommandEncoder(gloc.device.device, null) orelse return error.Encoder;
|
const enc = c.wgpuDeviceCreateCommandEncoder(gloc.device.device, null) orelse return error.Encoder;
|
||||||
defer c.wgpuCommandEncoderRelease(enc);
|
defer c.wgpuCommandEncoderRelease(enc);
|
||||||
@ -102,38 +109,52 @@ pub fn load(
|
|||||||
) !void {
|
) !void {
|
||||||
const bytes = data.len * @sizeOf(T);
|
const bytes = data.len * @sizeOf(T);
|
||||||
|
|
||||||
if (bytes == self.size) {
|
c.wgpuQueueWriteTexture(
|
||||||
// Aligned path: direct download
|
self.gloc.device.queue,
|
||||||
c.wgpuQueueWriteBuffer(self.gloc.device.queue, self.raw, 0, data.ptr, self.size);
|
&.{
|
||||||
} else {
|
.texture = self.raw,
|
||||||
// Unaligned path: Split the write into an aligned chunk and a padded remainder
|
.mipLevel = 0,
|
||||||
// to support arbitrary lengths without any allocations or large stack arrays.
|
.origin = .{ .x = 0, .y = 0, .z = 0 },
|
||||||
const aligned_part = (bytes / 4) * 4;
|
.aspect = c.WGPUTextureAspect_All,
|
||||||
if (aligned_part > 0) {
|
},
|
||||||
c.wgpuQueueWriteBuffer(self.gloc.device.queue, self.raw, 0, data.ptr, aligned_part);
|
data.ptr,
|
||||||
}
|
bytes,
|
||||||
|
&.{
|
||||||
var remainder_buf: [4]u8 = .{ 0, 0, 0, 0 };
|
.offset = 0,
|
||||||
const data_bytes = std.mem.sliceAsBytes(data);
|
.bytesPerRow = self.bytesSizeRow(),
|
||||||
@memcpy(remainder_buf[0 .. bytes - aligned_part], data_bytes[aligned_part..bytes]);
|
.rowsPerImage = self.def.size.height,
|
||||||
|
},
|
||||||
c.wgpuQueueWriteBuffer(self.gloc.device.queue, self.raw, aligned_part, &remainder_buf, 4);
|
&self.def.size,
|
||||||
}
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
// GPU to CPU
|
// GPU to CPU
|
||||||
pub fn read(self: @This(), alloc: std.mem.Allocator, T: type) ![]T {
|
pub fn read(self: @This(), alloc: std.mem.Allocator, T: type) ![]T {
|
||||||
const out = try alloc.alloc(T, @divExact(self.size, @sizeOf(T)));
|
const out = try alloc.alloc(T, @divExact(self.size, @sizeOf(T)));
|
||||||
|
|
||||||
const staging = try init(
|
const staging = try init(self.gloc, .{
|
||||||
self.gloc,
|
.size = self.size,
|
||||||
self.size,
|
.usage = .initMany(&.{ .MapRead, .CopyDst }),
|
||||||
.initMany(&.{ .MapRead, .CopyDst }),
|
.label = "texture_read_staging",
|
||||||
);
|
});
|
||||||
defer staging.deinit();
|
defer staging.deinit();
|
||||||
|
|
||||||
const enc = c.wgpuDeviceCreateCommandEncoder(self.gloc.device.device, null) orelse return error.Encoder;
|
const enc = c.wgpuDeviceCreateCommandEncoder(self.gloc.device.device, null) orelse return error.Encoder;
|
||||||
c.wgpuCommandEncoderCopyBufferToBuffer(enc, self.raw, 0, staging.raw, 0, self.size);
|
const src_copy = c.WGPUTexelCopyTextureInfo{
|
||||||
|
.texture = self.raw,
|
||||||
|
.mipLevel = 0,
|
||||||
|
.origin = .{ .x = 0, .y = 0, .z = 0 },
|
||||||
|
.aspect = c.WGPUTextureAspect_All,
|
||||||
|
};
|
||||||
|
const dst_copy = c.WGPUTexelCopyBufferInfo{
|
||||||
|
.buffer = staging.raw,
|
||||||
|
.layout = .{
|
||||||
|
.offset = 0,
|
||||||
|
.bytesPerRow = self.bytesSizeRow(),
|
||||||
|
.rowsPerImage = self.def.size.height,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
c.wgpuCommandEncoderCopyTextureToBuffer(enc, &src_copy, &dst_copy, &self.def.size);
|
||||||
const cmd = c.wgpuCommandEncoderFinish(enc, null);
|
const cmd = c.wgpuCommandEncoderFinish(enc, null);
|
||||||
defer c.wgpuCommandEncoderRelease(enc);
|
defer c.wgpuCommandEncoderRelease(enc);
|
||||||
defer c.wgpuCommandBufferRelease(cmd);
|
defer c.wgpuCommandBufferRelease(cmd);
|
||||||
|
|||||||
@ -1,11 +1,13 @@
|
|||||||
const std = @import("std");
|
const std = @import("std");
|
||||||
const c = @import("utils.zig").c;
|
const c = @import("utils.zig").c;
|
||||||
|
const svOpt = @import("utils.zig").svOpt;
|
||||||
const GpuAllocator = @import("GpuAllocator.zig");
|
const GpuAllocator = @import("GpuAllocator.zig");
|
||||||
const GpuTexture = @import("lib.zig").GpuTexture;
|
const GpuTexture = @import("lib.zig").GpuTexture;
|
||||||
const GpuTextureFormat = @import("lib.zig").GpuTextureFormat;
|
const GpuTextureFormat = @import("lib.zig").GpuTextureFormat;
|
||||||
const GpuTextureUsage = @import("lib.zig").GpuTextureUsage;
|
const GpuTextureUsage = @import("lib.zig").GpuTextureUsage;
|
||||||
|
|
||||||
pub const GpuViewDef = struct {
|
pub const GpuViewDef = struct {
|
||||||
|
label: ?[]const u8 = null,
|
||||||
usage: std.EnumSet(GpuTextureUsage) = .empty,
|
usage: std.EnumSet(GpuTextureUsage) = .empty,
|
||||||
format: GpuTextureFormat = .Undefined,
|
format: GpuTextureFormat = .Undefined,
|
||||||
};
|
};
|
||||||
@ -19,6 +21,7 @@ pub fn init(gloc: GpuAllocator, texture: GpuTexture, def: GpuViewDef) !@This() {
|
|||||||
while (iter.next()) |flag| use |= @intFromEnum(flag);
|
while (iter.next()) |flag| use |= @intFromEnum(flag);
|
||||||
|
|
||||||
const raw = try gloc.allocTextureView(texture.raw, .{
|
const raw = try gloc.allocTextureView(texture.raw, .{
|
||||||
|
.label = svOpt(def.label),
|
||||||
.format = @intFromEnum(def.format),
|
.format = @intFromEnum(def.format),
|
||||||
.usage = use,
|
.usage = use,
|
||||||
.mipLevelCount = 1,
|
.mipLevelCount = 1,
|
||||||
|
|||||||
@ -3,3 +3,17 @@ pub const c = @cImport(@cInclude("wgpu.h"));
|
|||||||
pub fn sv(s: []const u8) c.WGPUStringView {
|
pub fn sv(s: []const u8) c.WGPUStringView {
|
||||||
return .{ .data = s.ptr, .length = s.len };
|
return .{ .data = s.ptr, .length = s.len };
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Allows safely passing an optional Zig string to a WebGPU string view.
|
||||||
|
pub fn svOpt(s: ?[]const u8) c.WGPUStringView {
|
||||||
|
if (s) |str| return sv(str);
|
||||||
|
return .{ .data = null, .length = 0 };
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Helper to print a WGPUStringView in your logs.
|
||||||
|
pub fn viewStr(view: c.WGPUStringView) []const u8 {
|
||||||
|
if (view.data != null and view.length > 0) {
|
||||||
|
return view.data[0..view.length];
|
||||||
|
}
|
||||||
|
return "unnamed";
|
||||||
|
}
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user