Splited GpuAllocator into GpuAllocator and GpuDevice

2026-05-17 20:55:49 +02:00 · 2026-05-17 20:55:49 +02:00 · cef6155f41
commit cef6155f41
parent d57968d6df
4 changed files with 131 additions and 123 deletions
--- a/src/GpuAllocator.zig
+++ b/src/GpuAllocator.zig
@ -1,69 +1,24 @@
 const std = @import("std");
 const sh = @import("shaders.zig");
+const GpuDevice = @import("GpuDevice.zig");
 const c = @import("c.zig").c;

 const GpuAllocator = @This();

+device: GpuDevice,
 cpu_allocator: std.mem.Allocator,
-instance: c.WGPUInstance,
-adapter: c.WGPUAdapter,
-device: c.WGPUDevice,
-queue: c.WGPUQueue,
-
 tracked_buffers: std.AutoHashMap(c.WGPUBuffer, void),
-
 pipelines: struct {
    add: c.WGPUComputePipeline,
 },

-pub fn init(cpu_allocator: std.mem.Allocator) !GpuAllocator {
-    const instance = c.wgpuCreateInstance(
-        &std.mem.zeroes(c.WGPUInstanceDescriptor),
-    ) orelse return error.NoInstance;
-    errdefer c.wgpuInstanceRelease(instance);
-
-    var ctx = Ctx{};
-    _ = c.wgpuInstanceRequestAdapter(
-        instance,
-        &.{ .powerPreference = c.WGPUPowerPreference_HighPerformance },
-        .{ .callback = onAdapter, .userdata1 = &ctx },
-    );
-    c.wgpuInstanceProcessEvents(instance);
-    const adapter = ctx.adapter orelse return error.NoAdapter;
-    errdefer c.wgpuAdapterRelease(adapter);
-
-    // --- QUERY HARDWARE LIMITS ---
-    var supported_limits = std.mem.zeroes(c.WGPULimits);
-    supported_limits.nextInChain = null;
-
-    // Fetch what your physical graphic card can actually handle
-    if (c.wgpuAdapterGetLimits(adapter, &supported_limits) != 1) return error.FailedToGetAdapterLimits;
-
-    const device_descriptor = c.WGPUDeviceDescriptor{
-        .nextInChain = null,
-        .label = sv("TensorCompilerDevice"),
-        .requiredFeatureCount = 0,
-        .requiredFeatures = null,
-        .requiredLimits = &supported_limits,
-    };
-
-    _ = c.wgpuAdapterRequestDevice(
-        adapter,
-        &device_descriptor,
-        .{ .callback = onDevice, .userdata1 = &ctx },
-    );
-    c.wgpuInstanceProcessEvents(instance);
-    const device = ctx.device orelse return error.NoDevice;
-
+pub fn init(cpu_allocator: std.mem.Allocator, device: GpuDevice) !GpuAllocator {
    return .{
-        .cpu_allocator = cpu_allocator,
-        .instance = instance,
-        .adapter = adapter,
        .device = device,
-        .queue = c.wgpuDeviceGetQueue(device),
+        .cpu_allocator = cpu_allocator,
        .tracked_buffers = .init(cpu_allocator),
        .pipelines = .{
-            .add = try buildPipeline(device, sh.SHADER_ADD),
+            .add = try buildPipeline(device.device, sh.SHADER_ADD),
        },
    };
 }
@ -79,11 +34,6 @@ pub fn deinit(self: *GpuAllocator) void {
        c.wgpuBufferRelease(buf);
    }
    self.tracked_buffers.deinit();
-
-    c.wgpuQueueRelease(self.queue);
-    c.wgpuDeviceRelease(self.device);
-    c.wgpuAdapterRelease(self.adapter);
-    c.wgpuInstanceRelease(self.instance);
 }

 pub fn registerBuffer(
@ -91,7 +41,7 @@ pub fn registerBuffer(
    bytes: u64,
    usage: c.WGPUBufferUsage,
 ) !c.WGPUBuffer {
-    const buf = c.wgpuDeviceCreateBuffer(self.device, &.{
+    const buf = c.wgpuDeviceCreateBuffer(self.device.device, &.{
        .usage = usage,
        .size = bytes,
    }) orelse return error.BufferAlloc;
@ -107,59 +57,6 @@ pub fn unregisterAndDestroyBuffer(self: *GpuAllocator, buf: c.WGPUBuffer) void {
    }
 }

-// ── Internal ─────────────────────────────────────────────────────────────
-
-pub fn makeBuffer(
-    self: *GpuAllocator,
-    bytes: u64,
-    usage: c.WGPUBufferUsage,
-) !c.WGPUBuffer {
-    return c.wgpuDeviceCreateBuffer(self.device, &.{
-        .usage = usage,
-        .size = bytes,
-    }) orelse error.BufferAlloc;
-}
-
-/// Poll until GPU work completes. Use after submit if you need CPU sync.
-pub fn poll(self: *GpuAllocator) void {
-    _ = c.wgpuDevicePoll(self.device, 1, null);
-}
-
-const Ctx = struct {
-    adapter: c.WGPUAdapter = null,
-    device: c.WGPUDevice = null,
-};
-
-fn onAdapter(
-    status: c.WGPURequestAdapterStatus,
-    adapter: c.WGPUAdapter,
-    _: c.WGPUStringView,
-    userdata1: ?*anyopaque,
-    _: ?*anyopaque,
-) callconv(.c) void {
-    if (status != c.WGPURequestAdapterStatus_Success) {
-        std.log.err("Adapter request failed (status={d})", .{status});
-        return;
-    }
-    const ctx: *Ctx = @ptrCast(@alignCast(userdata1.?));
-    ctx.adapter = adapter;
-}
-
-fn onDevice(
-    status: c.WGPURequestDeviceStatus,
-    device: c.WGPUDevice,
-    _: c.WGPUStringView,
-    userdata1: ?*anyopaque,
-    _: ?*anyopaque,
-) callconv(.c) void {
-    if (status != c.WGPURequestDeviceStatus_Success) {
-        std.log.err("Device request failed (status={d})", .{status});
-        return;
-    }
-    const ctx: *Ctx = @ptrCast(@alignCast(userdata1.?));
-    ctx.device = device;
-}
-
 fn buildPipeline(device: c.WGPUDevice, wgsl: []const u8) !c.WGPUComputePipeline {
    var wgsl_src = c.WGPUShaderSourceWGSL{
        .chain = .{ .sType = c.WGPUSType_ShaderSourceWGSL },
--- a/src/GpuDevice.zig
+++ b/src/GpuDevice.zig
@ -0,0 +1,107 @@
+const std = @import("std");
+const sh = @import("shaders.zig");
+const c = @import("c.zig").c;
+
+const Ctx = struct {
+    adapter: c.WGPUAdapter = null,
+    device: c.WGPUDevice = null,
+};
+
+const GpuAllocator = @This();
+
+instance: c.WGPUInstance,
+adapter: c.WGPUAdapter,
+device: c.WGPUDevice,
+queue: c.WGPUQueue,
+
+pub fn init() !GpuAllocator {
+    const instance = c.wgpuCreateInstance(
+        &std.mem.zeroes(c.WGPUInstanceDescriptor),
+    ) orelse return error.NoInstance;
+    errdefer c.wgpuInstanceRelease(instance);
+
+    var ctx = Ctx{};
+    _ = c.wgpuInstanceRequestAdapter(
+        instance,
+        &.{ .powerPreference = c.WGPUPowerPreference_HighPerformance },
+        .{ .callback = onAdapter, .userdata1 = &ctx },
+    );
+    c.wgpuInstanceProcessEvents(instance);
+    const adapter = ctx.adapter orelse return error.NoAdapter;
+    errdefer c.wgpuAdapterRelease(adapter);
+
+    // --- QUERY HARDWARE LIMITS ---
+    var supported_limits = std.mem.zeroes(c.WGPULimits);
+    supported_limits.nextInChain = null;
+
+    // Fetch what your physical graphic card can actually handle
+    if (c.wgpuAdapterGetLimits(adapter, &supported_limits) != 1) return error.FailedToGetAdapterLimits;
+
+    const device_descriptor = c.WGPUDeviceDescriptor{
+        .nextInChain = null,
+        .label = sv("TensorCompilerDevice"),
+        .requiredFeatureCount = 0,
+        .requiredFeatures = null,
+        .requiredLimits = &supported_limits,
+    };
+
+    _ = c.wgpuAdapterRequestDevice(
+        adapter,
+        &device_descriptor,
+        .{ .callback = onDevice, .userdata1 = &ctx },
+    );
+    c.wgpuInstanceProcessEvents(instance);
+    const device = ctx.device orelse return error.NoDevice;
+
+    return .{
+        .instance = instance,
+        .adapter = adapter,
+        .device = device,
+        .queue = c.wgpuDeviceGetQueue(device),
+    };
+}
+
+pub fn deinit(self: GpuAllocator) void {
+    c.wgpuQueueRelease(self.queue);
+    c.wgpuDeviceRelease(self.device);
+    c.wgpuAdapterRelease(self.adapter);
+    c.wgpuInstanceRelease(self.instance);
+}
+
+pub fn poll(self: *GpuAllocator) void {
+    _ = c.wgpuDevicePoll(self.device, 1, null);
+}
+
+fn onAdapter(
+    status: c.WGPURequestAdapterStatus,
+    adapter: c.WGPUAdapter,
+    _: c.WGPUStringView,
+    userdata1: ?*anyopaque,
+    _: ?*anyopaque,
+) callconv(.c) void {
+    if (status != c.WGPURequestAdapterStatus_Success) {
+        std.log.err("Adapter request failed (status={d})", .{status});
+        return;
+    }
+    const ctx: *Ctx = @ptrCast(@alignCast(userdata1.?));
+    ctx.adapter = adapter;
+}
+
+fn onDevice(
+    status: c.WGPURequestDeviceStatus,
+    device: c.WGPUDevice,
+    _: c.WGPUStringView,
+    userdata1: ?*anyopaque,
+    _: ?*anyopaque,
+) callconv(.c) void {
+    if (status != c.WGPURequestDeviceStatus_Success) {
+        std.log.err("Device request failed (status={d})", .{status});
+        return;
+    }
+    const ctx: *Ctx = @ptrCast(@alignCast(userdata1.?));
+    ctx.device = device;
+}
+
+fn sv(s: []const u8) c.WGPUStringView {
+    return .{ .data = s.ptr, .length = s.len };
+}
--- a/src/Mat.zig
+++ b/src/Mat.zig
@ -26,7 +26,7 @@ pub fn load(
        c.WGPUBufferUsage_Storage | c.WGPUBufferUsage_CopyDst | c.WGPUBufferUsage_CopySrc,
    );

-    c.wgpuQueueWriteBuffer(gloc.queue, buf.raw, 0, data.ptr, bytes);
+    c.wgpuQueueWriteBuffer(gloc.device.queue, buf.raw, 0, data.ptr, bytes);
    return .{ .buf = buf, .rows = rows, .cols = cols };
 }

@ -74,12 +74,12 @@ pub fn read(self: Mat, gloc: *GpuAllocator, alloc: std.mem.Allocator) ![]f32 {
    );
    defer staging.deinit();

-    const enc = c.wgpuDeviceCreateCommandEncoder(gloc.device, null) orelse return error.Encoder;
+    const enc = c.wgpuDeviceCreateCommandEncoder(gloc.device.device, null) orelse return error.Encoder;
    c.wgpuCommandEncoderCopyBufferToBuffer(enc, self.buf.raw, 0, staging.raw, 0, bytes);
    const cmd = c.wgpuCommandEncoderFinish(enc, null);
    defer c.wgpuCommandEncoderRelease(enc);
    defer c.wgpuCommandBufferRelease(cmd);
-    c.wgpuQueueSubmit(gloc.queue, 1, &cmd);
+    c.wgpuQueueSubmit(gloc.device.queue, 1, &cmd);

    var mapped = false;
    staging.mapAsync(
@ -88,7 +88,7 @@ pub fn read(self: Mat, gloc: *GpuAllocator, alloc: std.mem.Allocator) ![]f32 {
        bytes,
        .{ .callback = onMapped, .userdata1 = &mapped },
    );
-    while (!mapped) gloc.poll();
+    while (!mapped) gloc.device.poll();

    const ptr: [*]const f32 = @ptrCast(@alignCast(
        staging.getConstMappedRange(0, bytes),
@ -137,7 +137,7 @@ fn dispatch2in1out(
        defer info_buf.deinit();

        // Write the number of elements *in this chunk* to the uniform buffer
-        c.wgpuQueueWriteBuffer(gloc.queue, info_buf.raw, 0, &current_chunk_elements, @sizeOf(u32));
+        c.wgpuQueueWriteBuffer(gloc.device.queue, info_buf.raw, 0, &current_chunk_elements, @sizeOf(u32));

        // Bind only the sub-slice for this chunk using `.offset` and `.size`
        const entries = [_]c.WGPUBindGroupEntry{
@ -164,14 +164,14 @@ fn submitPass(
    const bgl = c.wgpuComputePipelineGetBindGroupLayout(pipeline, 0);
    defer c.wgpuBindGroupLayoutRelease(bgl);

-    const bg = c.wgpuDeviceCreateBindGroup(gloc.device, &.{
+    const bg = c.wgpuDeviceCreateBindGroup(gloc.device.device, &.{
        .layout = bgl,
        .entries = entries.ptr,
        .entryCount = entries.len,
    }) orelse return error.BindGroup;
    defer c.wgpuBindGroupRelease(bg);

-    const enc = c.wgpuDeviceCreateCommandEncoder(gloc.device, null) orelse
+    const enc = c.wgpuDeviceCreateCommandEncoder(gloc.device.device, null) orelse
        return error.Encoder;
    const pass = c.wgpuCommandEncoderBeginComputePass(enc, null);
    c.wgpuComputePassEncoderSetPipeline(pass, pipeline);
@ -190,7 +190,7 @@ fn submitPass(
    const cmd = c.wgpuCommandEncoderFinish(enc, null);
    defer c.wgpuCommandEncoderRelease(enc);
    defer c.wgpuCommandBufferRelease(cmd);
-    c.wgpuQueueSubmit(gloc.queue, 1, &cmd);
+    c.wgpuQueueSubmit(gloc.device.queue, 1, &cmd);
 }

 fn ceilDiv(n: usize, d: usize) usize {
--- a/src/main.zig
+++ b/src/main.zig
@ -1,9 +1,13 @@
 const std = @import("std");
+const GpuDevice = @import("GpuDevice.zig");
 const GpuAllocator = @import("GpuAllocator.zig");
 const Mat = @import("Mat.zig");

 pub fn main(init: std.process.Init) !void {
-    var gloc = try GpuAllocator.init(init.gpa);
+    const device = try GpuDevice.init();
+    defer device.deinit();
+
+    var gloc = try GpuAllocator.init(init.gpa, device);
    defer gloc.deinit();

    // Define the sizes you want to benchmark
@ -15,11 +19,11 @@ pub fn main(init: std.process.Init) !void {
        65536,
        262144,
        1024 * 1024,
-        4 * 1024 * 1024,
-        4 * 4 * 1024 * 1024,
-        4 * 4 * 4 * 1024 * 1024,
-        4 * 4 * 4 * 4 * 1024 * 1024,
-        4 * 4 * 4 * 4 * 2 * 1024 * 1024,
+        // 4 * 1024 * 1024,
+        // 4 * 4 * 1024 * 1024,
+        // 4 * 4 * 4 * 1024 * 1024,
+        // 4 * 4 * 4 * 4 * 1024 * 1024,
+        // 4 * 4 * 4 * 4 * 2 * 1024 * 1024,
    };

    // Print table header