Synthax improv + GpuDeviceConfig
This commit is contained in:
parent
6a2cbe2734
commit
0fcb9ee351
@ -1,7 +1,7 @@
|
|||||||
const std = @import("std");
|
const std = @import("std");
|
||||||
const GpuDevice = @import("GpuDevice.zig");
|
const GpuDevice = @import("GpuDevice.zig");
|
||||||
const GpuBuffer = @import("GpuBuffer.zig");
|
const GpuBuffer = @import("GpuBuffer.zig");
|
||||||
const c = @import("c.zig").c;
|
const c = @import("utils.zig").c;
|
||||||
|
|
||||||
device: GpuDevice,
|
device: GpuDevice,
|
||||||
tracked_buffers: std.AutoHashMap(c.WGPUBuffer, void),
|
tracked_buffers: std.AutoHashMap(c.WGPUBuffer, void),
|
||||||
@ -39,6 +39,10 @@ pub fn registerBuffer(
|
|||||||
.usage = usage,
|
.usage = usage,
|
||||||
.size = bytes,
|
.size = bytes,
|
||||||
}) orelse return error.BufferAlloc;
|
}) orelse return error.BufferAlloc;
|
||||||
|
errdefer {
|
||||||
|
c.wgpuBufferDestroy(buf);
|
||||||
|
c.wgpuBufferRelease(buf);
|
||||||
|
}
|
||||||
|
|
||||||
try self.tracked_buffers.put(buf, {});
|
try self.tracked_buffers.put(buf, {});
|
||||||
self.allocated_vram_bytes += bytes;
|
self.allocated_vram_bytes += bytes;
|
||||||
@ -50,6 +54,5 @@ pub fn unregisterAndDestroyBuffer(self: *@This(), buf: GpuBuffer) void {
|
|||||||
c.wgpuBufferDestroy(buf.raw);
|
c.wgpuBufferDestroy(buf.raw);
|
||||||
c.wgpuBufferRelease(buf.raw);
|
c.wgpuBufferRelease(buf.raw);
|
||||||
self.allocated_vram_bytes -= buf.size;
|
self.allocated_vram_bytes -= buf.size;
|
||||||
self.device.poll();
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
const std = @import("std");
|
const std = @import("std");
|
||||||
const c = @import("c.zig").c;
|
const c = @import("utils.zig").c;
|
||||||
const GpuAllocator = @import("GpuAllocator.zig");
|
const GpuAllocator = @import("GpuAllocator.zig");
|
||||||
|
|
||||||
raw: c.WGPUBuffer,
|
raw: c.WGPUBuffer,
|
||||||
|
|||||||
@ -1,22 +1,26 @@
|
|||||||
const std = @import("std");
|
const std = @import("std");
|
||||||
const c = @import("c.zig").c;
|
const c = @import("utils.zig").c;
|
||||||
|
const sv = @import("utils.zig").sv;
|
||||||
|
|
||||||
const Ctx = struct {
|
const Ctx = struct {
|
||||||
adapter: c.WGPUAdapter = null,
|
adapter: c.WGPUAdapter = null,
|
||||||
device: c.WGPUDevice = null,
|
device: c.WGPUDevice = null,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
const GpuDeviceConfig = struct {
|
||||||
|
/// VRAM limit. Default 2 GB
|
||||||
|
vram_bytes_limit: u64 = 2 * 1024 * 1024 * 1024,
|
||||||
|
};
|
||||||
|
|
||||||
instance: c.WGPUInstance,
|
instance: c.WGPUInstance,
|
||||||
adapter: c.WGPUAdapter,
|
adapter: c.WGPUAdapter,
|
||||||
device: c.WGPUDevice,
|
device: c.WGPUDevice,
|
||||||
queue: c.WGPUQueue,
|
queue: c.WGPUQueue,
|
||||||
limits: c.WGPULimits,
|
limits: c.WGPULimits,
|
||||||
|
|
||||||
config: struct {
|
config: GpuDeviceConfig,
|
||||||
vram_bytes_limit: u64 = 10 * 1024 * 1024 * 1024, // 10 GB
|
|
||||||
} = .{},
|
|
||||||
|
|
||||||
pub fn init() !@This() {
|
pub fn init(config: GpuDeviceConfig) !@This() {
|
||||||
const instance = c.wgpuCreateInstance(
|
const instance = c.wgpuCreateInstance(
|
||||||
&std.mem.zeroes(c.WGPUInstanceDescriptor),
|
&std.mem.zeroes(c.WGPUInstanceDescriptor),
|
||||||
) orelse return error.NoInstance;
|
) orelse return error.NoInstance;
|
||||||
@ -61,6 +65,7 @@ pub fn init() !@This() {
|
|||||||
.device = device,
|
.device = device,
|
||||||
.queue = c.wgpuDeviceGetQueue(device),
|
.queue = c.wgpuDeviceGetQueue(device),
|
||||||
.limits = supported_limits,
|
.limits = supported_limits,
|
||||||
|
.config = config,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -104,7 +109,3 @@ fn onDevice(
|
|||||||
const ctx: *Ctx = @ptrCast(@alignCast(userdata1.?));
|
const ctx: *Ctx = @ptrCast(@alignCast(userdata1.?));
|
||||||
ctx.device = device;
|
ctx.device = device;
|
||||||
}
|
}
|
||||||
|
|
||||||
fn sv(s: []const u8) c.WGPUStringView {
|
|
||||||
return .{ .data = s.ptr, .length = s.len };
|
|
||||||
}
|
|
||||||
|
|||||||
@ -1,6 +1,7 @@
|
|||||||
const std = @import("std");
|
const std = @import("std");
|
||||||
const GpuDevice = @import("GpuDevice.zig");
|
const GpuDevice = @import("GpuDevice.zig");
|
||||||
const c = @import("c.zig").c;
|
const c = @import("utils.zig").c;
|
||||||
|
const sv = @import("utils.zig").sv;
|
||||||
|
|
||||||
raw: c.WGPUComputePipeline,
|
raw: c.WGPUComputePipeline,
|
||||||
|
|
||||||
@ -22,7 +23,3 @@ pub fn init(device: GpuDevice, wgsl: []const u8) !@This() {
|
|||||||
pub fn deinit(self: @This()) void {
|
pub fn deinit(self: @This()) void {
|
||||||
c.wgpuComputePipelineRelease(self.raw);
|
c.wgpuComputePipelineRelease(self.raw);
|
||||||
}
|
}
|
||||||
|
|
||||||
fn sv(s: []const u8) c.WGPUStringView {
|
|
||||||
return .{ .data = s.ptr, .length = s.len };
|
|
||||||
}
|
|
||||||
|
|||||||
@ -1,6 +1,6 @@
|
|||||||
/// Dummy
|
/// Dummy
|
||||||
const std = @import("std");
|
const std = @import("std");
|
||||||
const c = @import("c.zig").c;
|
const c = @import("utils.zig").c;
|
||||||
const GpuAllocator = @import("GpuAllocator.zig");
|
const GpuAllocator = @import("GpuAllocator.zig");
|
||||||
const GpuBuffer = @import("GpuBuffer.zig");
|
const GpuBuffer = @import("GpuBuffer.zig");
|
||||||
const GpuDevice = @import("GpuDevice.zig");
|
const GpuDevice = @import("GpuDevice.zig");
|
||||||
|
|||||||
@ -4,10 +4,10 @@ const GpuAllocator = @import("GpuAllocator.zig");
|
|||||||
const GpuPipeline = @import("GpuPipeline.zig");
|
const GpuPipeline = @import("GpuPipeline.zig");
|
||||||
const Vec = @import("Vec.zig");
|
const Vec = @import("Vec.zig");
|
||||||
|
|
||||||
const c = @import("c.zig").c;
|
const c = @import("utils.zig").c;
|
||||||
|
|
||||||
pub fn main(init: std.process.Init) !void {
|
pub fn main(init: std.process.Init) !void {
|
||||||
const device = try GpuDevice.init();
|
const device = try GpuDevice.init(.{ .vram_bytes_limit = 4 * 1024 * 1024 * 1024 });
|
||||||
defer device.deinit();
|
defer device.deinit();
|
||||||
|
|
||||||
var gloc = try GpuAllocator.init(init.gpa, device);
|
var gloc = try GpuAllocator.init(init.gpa, device);
|
||||||
@ -41,11 +41,11 @@ pub fn main(init: std.process.Init) !void {
|
|||||||
4 * 4 * 4 * 1024,
|
4 * 4 * 4 * 1024,
|
||||||
4 * 4 * 4 * 4 * 1024,
|
4 * 4 * 4 * 4 * 1024,
|
||||||
1024 * 1024,
|
1024 * 1024,
|
||||||
4 * 1024 * 1024,
|
// 4 * 1024 * 1024,
|
||||||
4 * 4 * 1024 * 1024,
|
// 4 * 4 * 1024 * 1024,
|
||||||
4 * 4 * 4 * 1024 * 1024,
|
// 4 * 4 * 4 * 1024 * 1024,
|
||||||
4 * 4 * 4 * 4 * 1024 * 1024,
|
// 4 * 4 * 4 * 4 * 1024 * 1024,
|
||||||
4 * 4 * 4 * 4 * 4 * 1024 * 1024,
|
// 4 * 4 * 4 * 4 * 4 * 1024 * 1024,
|
||||||
};
|
};
|
||||||
|
|
||||||
const iterations = 10;
|
const iterations = 10;
|
||||||
@ -56,9 +56,9 @@ pub fn main(init: std.process.Init) !void {
|
|||||||
|
|
||||||
for (sizes) |size| {
|
for (sizes) |size| {
|
||||||
// --- Phase 1: Host Init/Alloc (Outside the iteration loop for pure host prep) ---
|
// --- Phase 1: Host Init/Alloc (Outside the iteration loop for pure host prep) ---
|
||||||
var data_a = try allocator.alloc(f32, size);
|
const data_a = try allocator.alloc(f32, size);
|
||||||
defer allocator.free(data_a);
|
defer allocator.free(data_a);
|
||||||
var data_b = try allocator.alloc(f32, size);
|
const data_b = try allocator.alloc(f32, size);
|
||||||
defer allocator.free(data_b);
|
defer allocator.free(data_b);
|
||||||
|
|
||||||
for (0..size) |i| {
|
for (0..size) |i| {
|
||||||
@ -72,7 +72,7 @@ pub fn main(init: std.process.Init) !void {
|
|||||||
var min_compute_ns: u64 = std.math.maxInt(u64);
|
var min_compute_ns: u64 = std.math.maxInt(u64);
|
||||||
|
|
||||||
// Track peak VRAM usage observed during the iterations
|
// Track peak VRAM usage observed during the iterations
|
||||||
var peak_vram_bytes: usize = 0;
|
var peak_vram_bytes: u64 = 0;
|
||||||
|
|
||||||
for (0..iterations) |_| {
|
for (0..iterations) |_| {
|
||||||
// --- 1. GPU ALLOCATION PHASE ---
|
// --- 1. GPU ALLOCATION PHASE ---
|
||||||
@ -95,9 +95,8 @@ pub fn main(init: std.process.Init) !void {
|
|||||||
|
|
||||||
// All 3 buffers (a, b, sum) are currently resident in VRAM here.
|
// All 3 buffers (a, b, sum) are currently resident in VRAM here.
|
||||||
// Querying now catches the true peak allocation step.
|
// Querying now catches the true peak allocation step.
|
||||||
if (gloc.allocated_vram_bytes > peak_vram_bytes) {
|
if (gloc.allocated_vram_bytes > peak_vram_bytes)
|
||||||
peak_vram_bytes = gloc.allocated_vram_bytes;
|
peak_vram_bytes = gloc.allocated_vram_bytes;
|
||||||
}
|
|
||||||
|
|
||||||
_ = c.wgpuDevicePoll(device.device, 1, null);
|
_ = c.wgpuDevicePoll(device.device, 1, null);
|
||||||
|
|
||||||
|
|||||||
5
src/utils.zig
Normal file
5
src/utils.zig
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
pub const c = @cImport(@cInclude("wgpu.h"));
|
||||||
|
|
||||||
|
pub fn sv(s: []const u8) c.WGPUStringView {
|
||||||
|
return .{ .data = s.ptr, .length = s.len };
|
||||||
|
}
|
||||||
Loading…
x
Reference in New Issue
Block a user