Basic GpuAlloc + Mat
This commit is contained in:
parent
8050bab969
commit
8ff91d35ec
399
src/gpu.zig
Normal file
399
src/gpu.zig
Normal file
@ -0,0 +1,399 @@
|
||||
/// gpu.zig — WebGPU compute module
|
||||
///
|
||||
/// Usage:
|
||||
/// var gloc = try GpuAllocator.init();
|
||||
/// defer gloc.deinit();
|
||||
///
|
||||
/// const a = try Mat.load(&gloc, &data_a, 4, 4);
|
||||
/// defer a.deinit();
|
||||
/// const b = try Mat.load(&gloc, &data_b, 4, 4);
|
||||
/// defer b.deinit();
|
||||
///
|
||||
/// const c = try a.add(&gloc, b);
|
||||
/// defer c.deinit();
|
||||
///
|
||||
/// var out: [16]f32 = undefined;
|
||||
/// try c.read(&gloc, &out);
|
||||
const std = @import("std");
|
||||
const c = @cImport(@cInclude("wgpu.h"));
|
||||
|
||||
// ── Shaders ───────────────────────────────────────────────────────────────────
|
||||
|
||||
const SHADER_ADD =
|
||||
\\@group(0) @binding(0) var<storage, read> a : array<f32>;
|
||||
\\@group(0) @binding(1) var<storage, read> b : array<f32>;
|
||||
\\@group(0) @binding(2) var<storage, read_write> out : array<f32>;
|
||||
\\
|
||||
\\@compute @workgroup_size(64)
|
||||
\\fn main(@builtin(global_invocation_id) gid : vec3<u32>) {
|
||||
\\ let i = gid.x;
|
||||
\\ if (i < arrayLength(&out)) {
|
||||
\\ out[i] = a[i] + b[i];
|
||||
\\ }
|
||||
\\}
|
||||
;
|
||||
|
||||
const SHADER_SCALE =
|
||||
\\struct Uniforms { scalar : f32 }
|
||||
\\@group(0) @binding(0) var<storage, read> a : array<f32>;
|
||||
\\@group(0) @binding(1) var<storage, read_write> out : array<f32>;
|
||||
\\@group(0) @binding(2) var<uniform> u : Uniforms;
|
||||
\\
|
||||
\\@compute @workgroup_size(64)
|
||||
\\fn main(@builtin(global_invocation_id) gid : vec3<u32>) {
|
||||
\\ let i = gid.x;
|
||||
\\ if (i < arrayLength(&out)) {
|
||||
\\ out[i] = a[i] * u.scalar;
|
||||
\\ }
|
||||
\\}
|
||||
;
|
||||
|
||||
// ── Helpers ───────────────────────────────────────────────────────────────────
|
||||
|
||||
fn sv(s: []const u8) c.WGPUStringView {
|
||||
return .{ .data = s.ptr, .length = s.len };
|
||||
}
|
||||
|
||||
fn ceilDiv(n: u32, d: u32) u32 {
|
||||
return (n + d - 1) / d;
|
||||
}
|
||||
|
||||
// ── Callbacks ─────────────────────────────────────────────────────────────────
|
||||
|
||||
const Ctx = struct {
|
||||
adapter: c.WGPUAdapter = null,
|
||||
device: c.WGPUDevice = null,
|
||||
};
|
||||
|
||||
fn onAdapter(
|
||||
status: c.WGPURequestAdapterStatus,
|
||||
adapter: c.WGPUAdapter,
|
||||
_: c.WGPUStringView,
|
||||
userdata1: ?*anyopaque,
|
||||
_: ?*anyopaque,
|
||||
) callconv(.c) void {
|
||||
if (status != c.WGPURequestAdapterStatus_Success) {
|
||||
std.log.err("Adapter request failed (status={d})", .{status});
|
||||
return;
|
||||
}
|
||||
const ctx: *Ctx = @ptrCast(@alignCast(userdata1.?));
|
||||
ctx.adapter = adapter;
|
||||
}
|
||||
|
||||
fn onDevice(
|
||||
status: c.WGPURequestDeviceStatus,
|
||||
device: c.WGPUDevice,
|
||||
_: c.WGPUStringView,
|
||||
userdata1: ?*anyopaque,
|
||||
_: ?*anyopaque,
|
||||
) callconv(.c) void {
|
||||
if (status != c.WGPURequestDeviceStatus_Success) {
|
||||
std.log.err("Device request failed (status={d})", .{status});
|
||||
return;
|
||||
}
|
||||
const ctx: *Ctx = @ptrCast(@alignCast(userdata1.?));
|
||||
ctx.device = device;
|
||||
std.debug.print("{?}", .{device});
|
||||
}
|
||||
|
||||
fn onMapped(
|
||||
status: c.WGPUMapAsyncStatus,
|
||||
_: c.WGPUStringView,
|
||||
userdata1: ?*anyopaque,
|
||||
_: ?*anyopaque,
|
||||
) callconv(.c) void {
|
||||
const flag: *bool = @ptrCast(@alignCast(userdata1.?));
|
||||
flag.* = (status == c.WGPUMapAsyncStatus_Success);
|
||||
}
|
||||
|
||||
// ── Pipeline factory ──────────────────────────────────────────────────────────
|
||||
|
||||
fn buildPipeline(device: c.WGPUDevice, wgsl: []const u8) !c.WGPUComputePipeline {
|
||||
var wgsl_src = c.WGPUShaderSourceWGSL{
|
||||
.chain = .{ .sType = c.WGPUSType_ShaderSourceWGSL },
|
||||
.code = sv(wgsl),
|
||||
};
|
||||
const shader = c.wgpuDeviceCreateShaderModule(device, &.{
|
||||
.nextInChain = @ptrCast(&wgsl_src),
|
||||
}) orelse return error.Shader;
|
||||
defer c.wgpuShaderModuleRelease(shader);
|
||||
|
||||
return c.wgpuDeviceCreateComputePipeline(device, &.{
|
||||
.compute = .{ .module = shader, .entryPoint = sv("main") },
|
||||
}) orelse error.Pipeline;
|
||||
}
|
||||
|
||||
// ── GpuAllocator ──────────────────────────────────────────────────────────────
|
||||
|
||||
/// Owns the WebGPU context. One per application.
|
||||
/// All Mat instances must not outlive their GpuAllocator.
|
||||
pub const GpuAllocator = struct {
|
||||
instance: c.WGPUInstance,
|
||||
adapter: c.WGPUAdapter,
|
||||
device: c.WGPUDevice,
|
||||
queue: c.WGPUQueue,
|
||||
|
||||
// Lazily created, cached for lifetime of allocator
|
||||
_pip_add: c.WGPUComputePipeline = null,
|
||||
_pip_scale: c.WGPUComputePipeline = null,
|
||||
|
||||
pub fn init() !GpuAllocator {
|
||||
const instance = c.wgpuCreateInstance(
|
||||
&std.mem.zeroes(c.WGPUInstanceDescriptor),
|
||||
) orelse return error.NoInstance;
|
||||
errdefer c.wgpuInstanceRelease(instance);
|
||||
|
||||
var ctx = Ctx{};
|
||||
_ = c.wgpuInstanceRequestAdapter(
|
||||
instance,
|
||||
&.{ .powerPreference = c.WGPUPowerPreference_HighPerformance },
|
||||
.{ .callback = onAdapter, .userdata1 = &ctx },
|
||||
);
|
||||
c.wgpuInstanceProcessEvents(instance);
|
||||
const adapter = ctx.adapter orelse return error.NoAdapter;
|
||||
errdefer c.wgpuAdapterRelease(adapter);
|
||||
|
||||
_ = c.wgpuAdapterRequestDevice(
|
||||
adapter,
|
||||
null,
|
||||
.{ .callback = onDevice, .userdata1 = &ctx },
|
||||
);
|
||||
c.wgpuInstanceProcessEvents(instance);
|
||||
const device = ctx.device orelse return error.NoDevice;
|
||||
|
||||
return .{
|
||||
.instance = instance,
|
||||
.adapter = adapter,
|
||||
.device = device,
|
||||
.queue = c.wgpuDeviceGetQueue(device),
|
||||
};
|
||||
}
|
||||
|
||||
pub fn deinit(self: *GpuAllocator) void {
|
||||
if (self._pip_add) |p| c.wgpuComputePipelineRelease(p);
|
||||
if (self._pip_scale) |p| c.wgpuComputePipelineRelease(p);
|
||||
c.wgpuQueueRelease(self.queue);
|
||||
c.wgpuDeviceRelease(self.device);
|
||||
c.wgpuAdapterRelease(self.adapter);
|
||||
c.wgpuInstanceRelease(self.instance);
|
||||
}
|
||||
|
||||
// ── Internal ─────────────────────────────────────────────────────────────
|
||||
|
||||
pub fn makeBuffer(
|
||||
self: *GpuAllocator,
|
||||
bytes: u64,
|
||||
usage: c.WGPUBufferUsage,
|
||||
) !c.WGPUBuffer {
|
||||
return c.wgpuDeviceCreateBuffer(self.device, &.{
|
||||
.usage = usage,
|
||||
.size = bytes,
|
||||
}) orelse error.BufferAlloc;
|
||||
}
|
||||
|
||||
fn pipAdd(self: *GpuAllocator) !c.WGPUComputePipeline {
|
||||
if (self._pip_add == null)
|
||||
self._pip_add = try buildPipeline(self.device, SHADER_ADD);
|
||||
return self._pip_add.?;
|
||||
}
|
||||
|
||||
fn pipScale(self: *GpuAllocator) !c.WGPUComputePipeline {
|
||||
if (self._pip_scale == null)
|
||||
self._pip_scale = try buildPipeline(self.device, SHADER_SCALE);
|
||||
return self._pip_scale.?;
|
||||
}
|
||||
|
||||
/// Poll until GPU work completes. Use after submit if you need CPU sync.
|
||||
pub fn poll(self: *GpuAllocator) void {
|
||||
_ = c.wgpuDevicePoll(self.device, 1, null);
|
||||
}
|
||||
};
|
||||
|
||||
/// GPU-resident f32 matrix.
|
||||
/// Buffer usage: Storage | CopyDst | CopySrc (usable as input and output).
|
||||
pub const Mat = struct {
|
||||
buf: c.WGPUBuffer,
|
||||
rows: u32,
|
||||
cols: u32,
|
||||
|
||||
// ── Lifecycle ─────────────────────────────────────────────────────────────
|
||||
|
||||
/// Allocate GPU buffer and upload `data`. `data.len` must equal rows*cols.
|
||||
pub fn load(
|
||||
gloc: *GpuAllocator,
|
||||
data: []const f32,
|
||||
rows: u32,
|
||||
cols: u32,
|
||||
) !Mat {
|
||||
std.debug.assert(data.len == @as(usize, rows) * cols);
|
||||
const bytes = data.len * @sizeOf(f32);
|
||||
const buf = try gloc.makeBuffer(
|
||||
bytes,
|
||||
c.WGPUBufferUsage_Storage |
|
||||
c.WGPUBufferUsage_CopyDst |
|
||||
c.WGPUBufferUsage_CopySrc,
|
||||
);
|
||||
c.wgpuQueueWriteBuffer(gloc.queue, buf, 0, data.ptr, bytes);
|
||||
return .{ .buf = buf, .rows = rows, .cols = cols };
|
||||
}
|
||||
|
||||
/// Allocate zeroed GPU buffer (no upload).
|
||||
pub fn zeros(gloc: *GpuAllocator, rows: u32, cols: u32) !Mat {
|
||||
const bytes: u64 = @as(u64, rows) * cols * @sizeOf(f32);
|
||||
const buf = try gloc.makeBuffer(
|
||||
bytes,
|
||||
c.WGPUBufferUsage_Storage |
|
||||
c.WGPUBufferUsage_CopyDst |
|
||||
c.WGPUBufferUsage_CopySrc,
|
||||
);
|
||||
return .{ .buf = buf, .rows = rows, .cols = cols };
|
||||
}
|
||||
|
||||
pub fn deinit(self: Mat) void {
|
||||
c.wgpuBufferRelease(self.buf);
|
||||
}
|
||||
|
||||
pub fn len(self: Mat) u32 {
|
||||
return self.rows * self.cols;
|
||||
}
|
||||
|
||||
pub fn byteSize(self: Mat) u64 {
|
||||
return @as(u64, self.len()) * @sizeOf(f32);
|
||||
}
|
||||
|
||||
/// Element-wise add. Shapes must match. Returns new Mat (caller owns).
|
||||
pub fn add(self: Mat, gloc: *GpuAllocator, other: Mat) !Mat {
|
||||
std.debug.assert(self.rows == other.rows and self.cols == other.cols);
|
||||
|
||||
const result = try Mat.zeros(gloc, self.rows, self.cols);
|
||||
errdefer result.deinit();
|
||||
|
||||
const pipeline = try gloc.pipAdd();
|
||||
try dispatch2in1out(gloc, pipeline, self.buf, other.buf, result.buf, self.byteSize(), self.len());
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/// Element-wise multiply by scalar. Returns new Mat (caller owns).
|
||||
pub fn scale(self: Mat, gloc: *GpuAllocator, scalar: f32) !Mat {
|
||||
const result = try Mat.zeros(gloc, self.rows, self.cols);
|
||||
errdefer result.deinit();
|
||||
|
||||
const bytes = self.byteSize();
|
||||
const n = self.len();
|
||||
|
||||
// Upload scalar as uniform buffer
|
||||
const uni_buf = try gloc.makeBuffer(
|
||||
@sizeOf(f32),
|
||||
c.WGPUBufferUsage_Uniform | c.WGPUBufferUsage_CopyDst,
|
||||
);
|
||||
defer c.wgpuBufferRelease(uni_buf);
|
||||
c.wgpuQueueWriteBuffer(gloc.queue, uni_buf, 0, &scalar, @sizeOf(f32));
|
||||
|
||||
const pipeline = try gloc.pipScale();
|
||||
const bgl = c.wgpuComputePipelineGetBindGroupLayout(pipeline, 0);
|
||||
defer c.wgpuBindGroupLayoutRelease(bgl);
|
||||
|
||||
const entries = [_]c.WGPUBindGroupEntry{
|
||||
.{ .binding = 0, .buffer = self.buf, .offset = 0, .size = bytes },
|
||||
.{ .binding = 1, .buffer = result.buf, .offset = 0, .size = bytes },
|
||||
.{ .binding = 2, .buffer = uni_buf, .offset = 0, .size = @sizeOf(f32) },
|
||||
};
|
||||
try submitPass(gloc, pipeline, &entries, n);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/// Read GPU buffer back to CPU. `out.len` must be >= rows*cols.
|
||||
pub fn read(self: Mat, gloc: *GpuAllocator, out: []f32) !void {
|
||||
std.debug.assert(out.len >= self.len());
|
||||
const bytes = self.byteSize();
|
||||
|
||||
const staging = try gloc.makeBuffer(
|
||||
bytes,
|
||||
c.WGPUBufferUsage_MapRead | c.WGPUBufferUsage_CopyDst,
|
||||
);
|
||||
defer c.wgpuBufferRelease(staging);
|
||||
|
||||
// Copy result → staging
|
||||
const enc = c.wgpuDeviceCreateCommandEncoder(gloc.device, null) orelse
|
||||
return error.Encoder;
|
||||
c.wgpuCommandEncoderCopyBufferToBuffer(enc, self.buf, 0, staging, 0, bytes);
|
||||
const cmd = c.wgpuCommandEncoderFinish(enc, null);
|
||||
defer c.wgpuCommandEncoderRelease(enc);
|
||||
defer c.wgpuCommandBufferRelease(cmd);
|
||||
c.wgpuQueueSubmit(gloc.queue, 1, &cmd);
|
||||
|
||||
// Map and copy to slice
|
||||
var mapped = false;
|
||||
_ = c.wgpuBufferMapAsync(
|
||||
staging,
|
||||
c.WGPUMapMode_Read,
|
||||
0,
|
||||
bytes,
|
||||
.{ .callback = onMapped, .userdata1 = &mapped },
|
||||
);
|
||||
while (!mapped) gloc.poll();
|
||||
|
||||
const ptr: [*]const f32 = @ptrCast(@alignCast(
|
||||
c.wgpuBufferGetConstMappedRange(staging, 0, bytes),
|
||||
));
|
||||
@memcpy(out[0..self.len()], ptr[0..self.len()]);
|
||||
c.wgpuBufferUnmap(staging);
|
||||
}
|
||||
};
|
||||
|
||||
// ── Dispatch helpers ──────────────────────────────────────────────────────────
|
||||
|
||||
/// Encode + submit a 2-input, 1-output compute pass (used by add).
|
||||
fn dispatch2in1out(
|
||||
gloc: *GpuAllocator,
|
||||
pipeline: c.WGPUComputePipeline,
|
||||
buf_a: c.WGPUBuffer,
|
||||
buf_b: c.WGPUBuffer,
|
||||
buf_out: c.WGPUBuffer,
|
||||
bytes: u64,
|
||||
n: u32,
|
||||
) !void {
|
||||
const bgl = c.wgpuComputePipelineGetBindGroupLayout(pipeline, 0);
|
||||
defer c.wgpuBindGroupLayoutRelease(bgl);
|
||||
|
||||
const entries = [_]c.WGPUBindGroupEntry{
|
||||
.{ .binding = 0, .buffer = buf_a, .offset = 0, .size = bytes },
|
||||
.{ .binding = 1, .buffer = buf_b, .offset = 0, .size = bytes },
|
||||
.{ .binding = 2, .buffer = buf_out, .offset = 0, .size = bytes },
|
||||
};
|
||||
try submitPass(gloc, pipeline, &entries, n);
|
||||
}
|
||||
|
||||
/// Create bind group, encode pass, submit. workgroup_size=64.
|
||||
fn submitPass(
|
||||
gloc: *GpuAllocator,
|
||||
pipeline: c.WGPUComputePipeline,
|
||||
entries: []const c.WGPUBindGroupEntry,
|
||||
n: u32,
|
||||
) !void {
|
||||
const bgl = c.wgpuComputePipelineGetBindGroupLayout(pipeline, 0);
|
||||
defer c.wgpuBindGroupLayoutRelease(bgl);
|
||||
|
||||
const bg = c.wgpuDeviceCreateBindGroup(gloc.device, &.{
|
||||
.layout = bgl,
|
||||
.entries = entries.ptr,
|
||||
.entryCount = entries.len,
|
||||
}) orelse return error.BindGroup;
|
||||
defer c.wgpuBindGroupRelease(bg);
|
||||
|
||||
const enc = c.wgpuDeviceCreateCommandEncoder(gloc.device, null) orelse
|
||||
return error.Encoder;
|
||||
const pass = c.wgpuCommandEncoderBeginComputePass(enc, null);
|
||||
c.wgpuComputePassEncoderSetPipeline(pass, pipeline);
|
||||
c.wgpuComputePassEncoderSetBindGroup(pass, 0, bg, 0, null);
|
||||
c.wgpuComputePassEncoderDispatchWorkgroups(pass, ceilDiv(n, 64), 1, 1);
|
||||
c.wgpuComputePassEncoderEnd(pass);
|
||||
c.wgpuComputePassEncoderRelease(pass);
|
||||
|
||||
const cmd = c.wgpuCommandEncoderFinish(enc, null);
|
||||
defer c.wgpuCommandEncoderRelease(enc);
|
||||
defer c.wgpuCommandBufferRelease(cmd);
|
||||
c.wgpuQueueSubmit(gloc.queue, 1, &cmd);
|
||||
}
|
||||
258
src/main.zig
258
src/main.zig
@ -1,239 +1,49 @@
|
||||
// Minimal WebGPU compute in Zig: element-wise matrix addition
|
||||
// Uses wgpu-native C bindings.
|
||||
// Build: see ../build.zig
|
||||
//
|
||||
// Data flow:
|
||||
// CPU (mat_a, mat_b) → GPU storage buffers → compute shader → GPU buf_c
|
||||
// → staging buffer (mapped) → CPU read → print
|
||||
|
||||
const std = @import("std");
|
||||
const c = @cImport(@cInclude("wgpu.h"));
|
||||
const gpu = @import("gpu.zig");
|
||||
|
||||
// ── Config ────────────────────────────────────────────────────────────────────
|
||||
const ROWS: u32 = 4;
|
||||
const COLS: u32 = 4;
|
||||
const N = ROWS * COLS; // 16 elements
|
||||
const BUF_BYTES = N * @sizeOf(f32);
|
||||
|
||||
// ── WGSL Compute Shader ───────────────────────────────────────────────────────
|
||||
// workgroup_size(4,4) matches one full 4×4 matrix → dispatch(1,1,1)
|
||||
const SHADER =
|
||||
\\@group(0) @binding(0) var<storage, read> mat_a : array<f32>;
|
||||
\\@group(0) @binding(1) var<storage, read> mat_b : array<f32>;
|
||||
\\@group(0) @binding(2) var<storage, read_write> mat_c : array<f32>;
|
||||
\\
|
||||
\\@compute @workgroup_size(4, 4)
|
||||
\\fn main(@builtin(global_invocation_id) gid : vec3<u32>) {
|
||||
\\ let idx = gid.y * 4u + gid.x;
|
||||
\\ if (idx < arrayLength(&mat_c)) {
|
||||
\\ mat_c[idx] = mat_a[idx] + mat_b[idx];
|
||||
\\ }
|
||||
\\}
|
||||
;
|
||||
|
||||
// ── Callback state ────────────────────────────────────────────────────────────
|
||||
const Ctx = struct {
|
||||
adapter: c.WGPUAdapter = null,
|
||||
device: c.WGPUDevice = null,
|
||||
};
|
||||
|
||||
fn onAdapter(
|
||||
status: c.WGPURequestAdapterStatus,
|
||||
adapter: c.WGPUAdapter,
|
||||
_: c.WGPUStringView,
|
||||
userdata1: ?*anyopaque,
|
||||
_: ?*anyopaque,
|
||||
) callconv(.c) void {
|
||||
if (status != c.WGPURequestAdapterStatus_Success) {
|
||||
std.log.err("Adapter request failed (status={d})", .{status});
|
||||
return;
|
||||
}
|
||||
const ctx: *Ctx = @ptrCast(@alignCast(userdata1.?));
|
||||
ctx.adapter = adapter;
|
||||
}
|
||||
|
||||
fn onDevice(
|
||||
status: c.WGPURequestDeviceStatus,
|
||||
device: c.WGPUDevice,
|
||||
_: c.WGPUStringView,
|
||||
userdata1: ?*anyopaque,
|
||||
_: ?*anyopaque,
|
||||
) callconv(.c) void {
|
||||
if (status != c.WGPURequestDeviceStatus_Success) {
|
||||
std.log.err("Device request failed (status={d})", .{status});
|
||||
return;
|
||||
}
|
||||
const ctx: *Ctx = @ptrCast(@alignCast(userdata1.?));
|
||||
ctx.device = device;
|
||||
std.debug.print("{?}", .{device});
|
||||
}
|
||||
|
||||
fn onMapped(
|
||||
status: c.WGPUMapAsyncStatus,
|
||||
_: c.WGPUStringView,
|
||||
userdata1: ?*anyopaque,
|
||||
_: ?*anyopaque,
|
||||
) callconv(.c) void {
|
||||
const flag: *bool = @ptrCast(@alignCast(userdata1.?));
|
||||
flag.* = (status == c.WGPUMapAsyncStatus_Success);
|
||||
}
|
||||
|
||||
fn sv(s: []const u8) c.WGPUStringView {
|
||||
return .{ .data = s.ptr, .length = s.len };
|
||||
}
|
||||
|
||||
// ── Main ──────────────────────────────────────────────────────────────────────
|
||||
pub fn main() !void {
|
||||
var gloc = try gpu.GpuAllocator.init();
|
||||
defer gloc.deinit();
|
||||
|
||||
// 1. Instance ──────────────────────────────────────────────────────────────
|
||||
const instance = c.wgpuCreateInstance(&std.mem.zeroes(c.WGPUInstanceDescriptor)) orelse
|
||||
return error.NoInstance;
|
||||
defer c.wgpuInstanceRelease(instance);
|
||||
|
||||
// 2. Adapter (async → poll) ────────────────────────────────────────────────
|
||||
var ctx = Ctx{};
|
||||
_ = c.wgpuInstanceRequestAdapter(
|
||||
instance,
|
||||
&.{ .powerPreference = c.WGPUPowerPreference_HighPerformance },
|
||||
.{ .callback = onAdapter, .userdata1 = &ctx },
|
||||
);
|
||||
c.wgpuInstanceProcessEvents(instance); // drive callbacks
|
||||
const adapter = ctx.adapter orelse return error.NoAdapter;
|
||||
defer c.wgpuAdapterRelease(adapter);
|
||||
|
||||
// 3. Device ────────────────────────────────────────────────────────────────
|
||||
_ = c.wgpuAdapterRequestDevice(adapter, null, .{ .callback = onDevice, .userdata1 = &ctx });
|
||||
c.wgpuInstanceProcessEvents(instance);
|
||||
const device = ctx.device orelse return error.NoDevice;
|
||||
defer c.wgpuDeviceRelease(device);
|
||||
|
||||
const queue = c.wgpuDeviceGetQueue(device);
|
||||
defer c.wgpuQueueRelease(queue);
|
||||
|
||||
// 4. Input data ────────────────────────────────────────────────────────────
|
||||
// mat_a[i] = i (0 … 15)
|
||||
// mat_b[i] = 15 − i → every element of mat_c should equal 15
|
||||
var mat_a: [N]f32 = undefined;
|
||||
var mat_b: [N]f32 = undefined;
|
||||
for (0..N) |i| {
|
||||
mat_a[i] = @floatFromInt(i);
|
||||
mat_b[i] = @floatFromInt(N - 1 - i);
|
||||
// Input data: a[i] = i, b[i] = 15 - i → add should give all 15s
|
||||
var data_a: [16]f32 = undefined;
|
||||
var data_b: [16]f32 = undefined;
|
||||
for (0..16) |i| {
|
||||
data_a[i] = @floatFromInt(i);
|
||||
data_b[i] = @floatFromInt(15 - i);
|
||||
}
|
||||
|
||||
// 5. GPU Buffers ───────────────────────────────────────────────────────────
|
||||
const buf_a = c.wgpuDeviceCreateBuffer(device, &.{
|
||||
.usage = c.WGPUBufferUsage_Storage | c.WGPUBufferUsage_CopyDst,
|
||||
.size = BUF_BYTES,
|
||||
}) orelse return error.BufferA;
|
||||
const a = try gpu.Mat.load(&gloc, &data_a, 4, 4);
|
||||
defer a.deinit();
|
||||
const b = try gpu.Mat.load(&gloc, &data_b, 4, 4);
|
||||
defer b.deinit();
|
||||
|
||||
const buf_b = c.wgpuDeviceCreateBuffer(device, &.{
|
||||
.usage = c.WGPUBufferUsage_Storage | c.WGPUBufferUsage_CopyDst,
|
||||
.size = BUF_BYTES,
|
||||
}) orelse return error.BufferB;
|
||||
// a + b
|
||||
const sum = try a.add(&gloc, b);
|
||||
defer sum.deinit();
|
||||
|
||||
// buf_c: GPU-only result; staging: CPU-readable copy
|
||||
const buf_c = c.wgpuDeviceCreateBuffer(device, &.{
|
||||
.usage = c.WGPUBufferUsage_Storage | c.WGPUBufferUsage_CopySrc,
|
||||
.size = BUF_BYTES,
|
||||
}) orelse return error.BufferC;
|
||||
// sum * 2
|
||||
const scaled = try sum.scale(&gloc, 2.0);
|
||||
defer scaled.deinit();
|
||||
|
||||
const buf_staging = c.wgpuDeviceCreateBuffer(device, &.{
|
||||
.usage = c.WGPUBufferUsage_MapRead | c.WGPUBufferUsage_CopyDst,
|
||||
.size = BUF_BYTES,
|
||||
}) orelse return error.BufferStaging;
|
||||
// Read back
|
||||
var out_sum: [16]f32 = undefined;
|
||||
var out_scaled: [16]f32 = undefined;
|
||||
try sum.read(&gloc, &out_sum);
|
||||
try scaled.read(&gloc, &out_scaled);
|
||||
|
||||
defer c.wgpuBufferRelease(buf_a);
|
||||
defer c.wgpuBufferRelease(buf_b);
|
||||
defer c.wgpuBufferRelease(buf_c);
|
||||
defer c.wgpuBufferRelease(buf_staging);
|
||||
// Print
|
||||
std.debug.print("\na + b (expect all 15):\n", .{});
|
||||
printMat(&out_sum, 4, 4);
|
||||
|
||||
// Upload inputs
|
||||
c.wgpuQueueWriteBuffer(queue, buf_a, 0, &mat_a, BUF_BYTES);
|
||||
c.wgpuQueueWriteBuffer(queue, buf_b, 0, &mat_b, BUF_BYTES);
|
||||
std.debug.print("\n(a + b) * 2 (expect all 30):\n", .{});
|
||||
printMat(&out_scaled, 4, 4);
|
||||
}
|
||||
|
||||
// 6. Shader module ─────────────────────────────────────────────────────────
|
||||
// ✅ New API (0.20+)
|
||||
var wgsl_src = c.WGPUShaderSourceWGSL{
|
||||
.chain = .{ .sType = c.WGPUSType_ShaderSourceWGSL },
|
||||
.code = sv(SHADER),
|
||||
};
|
||||
const shader = c.wgpuDeviceCreateShaderModule(device, &.{
|
||||
.nextInChain = @ptrCast(&wgsl_src),
|
||||
}) orelse return error.Shader;
|
||||
|
||||
// 7. Compute pipeline (layout auto-inferred from shader) ───────────────────
|
||||
// ✅
|
||||
const pipeline = c.wgpuDeviceCreateComputePipeline(device, &.{
|
||||
.compute = .{
|
||||
.module = shader,
|
||||
.entryPoint = sv("main"),
|
||||
},
|
||||
}) orelse return error.Pipeline;
|
||||
defer c.wgpuComputePipelineRelease(pipeline);
|
||||
|
||||
// 8. Bind group ────────────────────────────────────────────────────────────
|
||||
const bgl = c.wgpuComputePipelineGetBindGroupLayout(pipeline, 0);
|
||||
defer c.wgpuBindGroupLayoutRelease(bgl);
|
||||
|
||||
const entries = [_]c.WGPUBindGroupEntry{
|
||||
.{ .binding = 0, .buffer = buf_a, .offset = 0, .size = BUF_BYTES },
|
||||
.{ .binding = 1, .buffer = buf_b, .offset = 0, .size = BUF_BYTES },
|
||||
.{ .binding = 2, .buffer = buf_c, .offset = 0, .size = BUF_BYTES },
|
||||
};
|
||||
const bind_group = c.wgpuDeviceCreateBindGroup(device, &.{
|
||||
.layout = bgl,
|
||||
.entries = &entries,
|
||||
.entryCount = entries.len,
|
||||
}) orelse return error.BindGroup;
|
||||
defer c.wgpuBindGroupRelease(bind_group);
|
||||
|
||||
// 9. Encode compute pass + buffer copy ────────────────────────────────────
|
||||
const encoder = c.wgpuDeviceCreateCommandEncoder(device, null) orelse
|
||||
return error.Encoder;
|
||||
|
||||
const pass = c.wgpuCommandEncoderBeginComputePass(encoder, null);
|
||||
c.wgpuComputePassEncoderSetPipeline(pass, pipeline);
|
||||
c.wgpuComputePassEncoderSetBindGroup(pass, 0, bind_group, 0, null);
|
||||
// dispatch(1,1,1): one workgroup of size (4,4) covers the whole 4×4 matrix
|
||||
c.wgpuComputePassEncoderDispatchWorkgroups(pass, 1, 1, 1);
|
||||
c.wgpuComputePassEncoderEnd(pass);
|
||||
c.wgpuComputePassEncoderRelease(pass);
|
||||
|
||||
// Copy result buffer → CPU-readable staging buffer
|
||||
c.wgpuCommandEncoderCopyBufferToBuffer(encoder, buf_c, 0, buf_staging, 0, BUF_BYTES);
|
||||
|
||||
const cmdbuf = c.wgpuCommandEncoderFinish(encoder, null);
|
||||
defer c.wgpuCommandEncoderRelease(encoder);
|
||||
defer c.wgpuCommandBufferRelease(cmdbuf);
|
||||
|
||||
// 10. Submit ───────────────────────────────────────────────────────────────
|
||||
c.wgpuQueueSubmit(queue, 1, &cmdbuf);
|
||||
|
||||
// 11. Map staging buffer back to CPU ──────────────────────────────────────
|
||||
var mapped = false;
|
||||
_ = c.wgpuBufferMapAsync(
|
||||
buf_staging,
|
||||
c.WGPUMapMode_Read,
|
||||
0,
|
||||
BUF_BYTES,
|
||||
.{ .callback = onMapped, .userdata1 = &mapped },
|
||||
);
|
||||
// Poll the device until the async map completes
|
||||
while (!mapped) _ = c.wgpuDevicePoll(device, 1, null);
|
||||
|
||||
const ptr: [*]const f32 = @ptrCast(@alignCast(
|
||||
c.wgpuBufferGetConstMappedRange(buf_staging, 0, BUF_BYTES),
|
||||
));
|
||||
const result = ptr[0..N];
|
||||
|
||||
// 12. Print ────────────────────────────────────────────────────────────────
|
||||
std.debug.print("\nmat_a + mat_b ({d}×{d}):\n", .{ ROWS, COLS });
|
||||
for (0..ROWS) |r| {
|
||||
for (0..COLS) |col|
|
||||
std.debug.print("{d:6.0}", .{result[r * COLS + col]});
|
||||
fn printMat(data: []const f32, rows: u32, cols: u32) void {
|
||||
for (0..rows) |r| {
|
||||
for (0..cols) |col|
|
||||
std.debug.print("{d:6.0}", .{data[r * cols + col]});
|
||||
std.debug.print("\n", .{});
|
||||
}
|
||||
// Expected output: every cell = 15.0
|
||||
|
||||
c.wgpuBufferUnmap(buf_staging);
|
||||
}
|
||||
|
||||
239
src/reference.zig
Normal file
239
src/reference.zig
Normal file
@ -0,0 +1,239 @@
|
||||
// Minimal WebGPU compute in Zig: element-wise matrix addition
|
||||
// Uses wgpu-native C bindings.
|
||||
// Build: see ../build.zig
|
||||
//
|
||||
// Data flow:
|
||||
// CPU (mat_a, mat_b) → GPU storage buffers → compute shader → GPU buf_c
|
||||
// → staging buffer (mapped) → CPU read → print
|
||||
|
||||
const std = @import("std");
|
||||
const c = @cImport(@cInclude("wgpu.h"));
|
||||
|
||||
// ── Config ────────────────────────────────────────────────────────────────────
|
||||
const ROWS: u32 = 4;
|
||||
const COLS: u32 = 4;
|
||||
const N = ROWS * COLS; // 16 elements
|
||||
const BUF_BYTES = N * @sizeOf(f32);
|
||||
|
||||
// ── WGSL Compute Shader ───────────────────────────────────────────────────────
|
||||
// workgroup_size(4,4) matches one full 4×4 matrix → dispatch(1,1,1)
|
||||
const SHADER =
|
||||
\\@group(0) @binding(0) var<storage, read> mat_a : array<f32>;
|
||||
\\@group(0) @binding(1) var<storage, read> mat_b : array<f32>;
|
||||
\\@group(0) @binding(2) var<storage, read_write> mat_c : array<f32>;
|
||||
\\
|
||||
\\@compute @workgroup_size(4, 4)
|
||||
\\fn main(@builtin(global_invocation_id) gid : vec3<u32>) {
|
||||
\\ let idx = gid.y * 4u + gid.x;
|
||||
\\ if (idx < arrayLength(&mat_c)) {
|
||||
\\ mat_c[idx] = mat_a[idx] + mat_b[idx];
|
||||
\\ }
|
||||
\\}
|
||||
;
|
||||
|
||||
// ── Callback state ────────────────────────────────────────────────────────────
|
||||
const Ctx = struct {
|
||||
adapter: c.WGPUAdapter = null,
|
||||
device: c.WGPUDevice = null,
|
||||
};
|
||||
|
||||
fn onAdapter(
|
||||
status: c.WGPURequestAdapterStatus,
|
||||
adapter: c.WGPUAdapter,
|
||||
_: c.WGPUStringView,
|
||||
userdata1: ?*anyopaque,
|
||||
_: ?*anyopaque,
|
||||
) callconv(.c) void {
|
||||
if (status != c.WGPURequestAdapterStatus_Success) {
|
||||
std.log.err("Adapter request failed (status={d})", .{status});
|
||||
return;
|
||||
}
|
||||
const ctx: *Ctx = @ptrCast(@alignCast(userdata1.?));
|
||||
ctx.adapter = adapter;
|
||||
}
|
||||
|
||||
fn onDevice(
|
||||
status: c.WGPURequestDeviceStatus,
|
||||
device: c.WGPUDevice,
|
||||
_: c.WGPUStringView,
|
||||
userdata1: ?*anyopaque,
|
||||
_: ?*anyopaque,
|
||||
) callconv(.c) void {
|
||||
if (status != c.WGPURequestDeviceStatus_Success) {
|
||||
std.log.err("Device request failed (status={d})", .{status});
|
||||
return;
|
||||
}
|
||||
const ctx: *Ctx = @ptrCast(@alignCast(userdata1.?));
|
||||
ctx.device = device;
|
||||
std.debug.print("{?}", .{device});
|
||||
}
|
||||
|
||||
fn onMapped(
|
||||
status: c.WGPUMapAsyncStatus,
|
||||
_: c.WGPUStringView,
|
||||
userdata1: ?*anyopaque,
|
||||
_: ?*anyopaque,
|
||||
) callconv(.c) void {
|
||||
const flag: *bool = @ptrCast(@alignCast(userdata1.?));
|
||||
flag.* = (status == c.WGPUMapAsyncStatus_Success);
|
||||
}
|
||||
|
||||
fn sv(s: []const u8) c.WGPUStringView {
|
||||
return .{ .data = s.ptr, .length = s.len };
|
||||
}
|
||||
|
||||
// ── Main ──────────────────────────────────────────────────────────────────────
|
||||
pub fn main() !void {
|
||||
|
||||
// 1. Instance ──────────────────────────────────────────────────────────────
|
||||
const instance = c.wgpuCreateInstance(&std.mem.zeroes(c.WGPUInstanceDescriptor)) orelse
|
||||
return error.NoInstance;
|
||||
defer c.wgpuInstanceRelease(instance);
|
||||
|
||||
// 2. Adapter (async → poll) ────────────────────────────────────────────────
|
||||
var ctx = Ctx{};
|
||||
_ = c.wgpuInstanceRequestAdapter(
|
||||
instance,
|
||||
&.{ .powerPreference = c.WGPUPowerPreference_HighPerformance },
|
||||
.{ .callback = onAdapter, .userdata1 = &ctx },
|
||||
);
|
||||
c.wgpuInstanceProcessEvents(instance); // drive callbacks
|
||||
const adapter = ctx.adapter orelse return error.NoAdapter;
|
||||
defer c.wgpuAdapterRelease(adapter);
|
||||
|
||||
// 3. Device ────────────────────────────────────────────────────────────────
|
||||
_ = c.wgpuAdapterRequestDevice(adapter, null, .{ .callback = onDevice, .userdata1 = &ctx });
|
||||
c.wgpuInstanceProcessEvents(instance);
|
||||
const device = ctx.device orelse return error.NoDevice;
|
||||
defer c.wgpuDeviceRelease(device);
|
||||
|
||||
const queue = c.wgpuDeviceGetQueue(device);
|
||||
defer c.wgpuQueueRelease(queue);
|
||||
|
||||
// 4. Input data ────────────────────────────────────────────────────────────
|
||||
// mat_a[i] = i (0 … 15)
|
||||
// mat_b[i] = 15 − i → every element of mat_c should equal 15
|
||||
var mat_a: [N]f32 = undefined;
|
||||
var mat_b: [N]f32 = undefined;
|
||||
for (0..N) |i| {
|
||||
mat_a[i] = @floatFromInt(i);
|
||||
mat_b[i] = @floatFromInt(N - 1 - i);
|
||||
}
|
||||
|
||||
// 5. GPU Buffers ───────────────────────────────────────────────────────────
|
||||
const buf_a = c.wgpuDeviceCreateBuffer(device, &.{
|
||||
.usage = c.WGPUBufferUsage_Storage | c.WGPUBufferUsage_CopyDst,
|
||||
.size = BUF_BYTES,
|
||||
}) orelse return error.BufferA;
|
||||
|
||||
const buf_b = c.wgpuDeviceCreateBuffer(device, &.{
|
||||
.usage = c.WGPUBufferUsage_Storage | c.WGPUBufferUsage_CopyDst,
|
||||
.size = BUF_BYTES,
|
||||
}) orelse return error.BufferB;
|
||||
|
||||
// buf_c: GPU-only result; staging: CPU-readable copy
|
||||
const buf_c = c.wgpuDeviceCreateBuffer(device, &.{
|
||||
.usage = c.WGPUBufferUsage_Storage | c.WGPUBufferUsage_CopySrc,
|
||||
.size = BUF_BYTES,
|
||||
}) orelse return error.BufferC;
|
||||
|
||||
const buf_staging = c.wgpuDeviceCreateBuffer(device, &.{
|
||||
.usage = c.WGPUBufferUsage_MapRead | c.WGPUBufferUsage_CopyDst,
|
||||
.size = BUF_BYTES,
|
||||
}) orelse return error.BufferStaging;
|
||||
|
||||
defer c.wgpuBufferRelease(buf_a);
|
||||
defer c.wgpuBufferRelease(buf_b);
|
||||
defer c.wgpuBufferRelease(buf_c);
|
||||
defer c.wgpuBufferRelease(buf_staging);
|
||||
|
||||
// Upload inputs
|
||||
c.wgpuQueueWriteBuffer(queue, buf_a, 0, &mat_a, BUF_BYTES);
|
||||
c.wgpuQueueWriteBuffer(queue, buf_b, 0, &mat_b, BUF_BYTES);
|
||||
|
||||
// 6. Shader module ─────────────────────────────────────────────────────────
|
||||
// ✅ New API (0.20+)
|
||||
var wgsl_src = c.WGPUShaderSourceWGSL{
|
||||
.chain = .{ .sType = c.WGPUSType_ShaderSourceWGSL },
|
||||
.code = sv(SHADER),
|
||||
};
|
||||
const shader = c.wgpuDeviceCreateShaderModule(device, &.{
|
||||
.nextInChain = @ptrCast(&wgsl_src),
|
||||
}) orelse return error.Shader;
|
||||
|
||||
// 7. Compute pipeline (layout auto-inferred from shader) ───────────────────
|
||||
// ✅
|
||||
const pipeline = c.wgpuDeviceCreateComputePipeline(device, &.{
|
||||
.compute = .{
|
||||
.module = shader,
|
||||
.entryPoint = sv("main"),
|
||||
},
|
||||
}) orelse return error.Pipeline;
|
||||
defer c.wgpuComputePipelineRelease(pipeline);
|
||||
|
||||
// 8. Bind group ────────────────────────────────────────────────────────────
|
||||
const bgl = c.wgpuComputePipelineGetBindGroupLayout(pipeline, 0);
|
||||
defer c.wgpuBindGroupLayoutRelease(bgl);
|
||||
|
||||
const entries = [_]c.WGPUBindGroupEntry{
|
||||
.{ .binding = 0, .buffer = buf_a, .offset = 0, .size = BUF_BYTES },
|
||||
.{ .binding = 1, .buffer = buf_b, .offset = 0, .size = BUF_BYTES },
|
||||
.{ .binding = 2, .buffer = buf_c, .offset = 0, .size = BUF_BYTES },
|
||||
};
|
||||
const bind_group = c.wgpuDeviceCreateBindGroup(device, &.{
|
||||
.layout = bgl,
|
||||
.entries = &entries,
|
||||
.entryCount = entries.len,
|
||||
}) orelse return error.BindGroup;
|
||||
defer c.wgpuBindGroupRelease(bind_group);
|
||||
|
||||
// 9. Encode compute pass + buffer copy ────────────────────────────────────
|
||||
const encoder = c.wgpuDeviceCreateCommandEncoder(device, null) orelse
|
||||
return error.Encoder;
|
||||
|
||||
const pass = c.wgpuCommandEncoderBeginComputePass(encoder, null);
|
||||
c.wgpuComputePassEncoderSetPipeline(pass, pipeline);
|
||||
c.wgpuComputePassEncoderSetBindGroup(pass, 0, bind_group, 0, null);
|
||||
// dispatch(1,1,1): one workgroup of size (4,4) covers the whole 4×4 matrix
|
||||
c.wgpuComputePassEncoderDispatchWorkgroups(pass, 1, 1, 1);
|
||||
c.wgpuComputePassEncoderEnd(pass);
|
||||
c.wgpuComputePassEncoderRelease(pass);
|
||||
|
||||
// Copy result buffer → CPU-readable staging buffer
|
||||
c.wgpuCommandEncoderCopyBufferToBuffer(encoder, buf_c, 0, buf_staging, 0, BUF_BYTES);
|
||||
|
||||
const cmdbuf = c.wgpuCommandEncoderFinish(encoder, null);
|
||||
defer c.wgpuCommandEncoderRelease(encoder);
|
||||
defer c.wgpuCommandBufferRelease(cmdbuf);
|
||||
|
||||
// 10. Submit ───────────────────────────────────────────────────────────────
|
||||
c.wgpuQueueSubmit(queue, 1, &cmdbuf);
|
||||
|
||||
// 11. Map staging buffer back to CPU ──────────────────────────────────────
|
||||
var mapped = false;
|
||||
_ = c.wgpuBufferMapAsync(
|
||||
buf_staging,
|
||||
c.WGPUMapMode_Read,
|
||||
0,
|
||||
BUF_BYTES,
|
||||
.{ .callback = onMapped, .userdata1 = &mapped },
|
||||
);
|
||||
// Poll the device until the async map completes
|
||||
while (!mapped) _ = c.wgpuDevicePoll(device, 1, null);
|
||||
|
||||
const ptr: [*]const f32 = @ptrCast(@alignCast(
|
||||
c.wgpuBufferGetConstMappedRange(buf_staging, 0, BUF_BYTES),
|
||||
));
|
||||
const result = ptr[0..N];
|
||||
|
||||
// 12. Print ────────────────────────────────────────────────────────────────
|
||||
std.debug.print("\nmat_a + mat_b ({d}×{d}):\n", .{ ROWS, COLS });
|
||||
for (0..ROWS) |r| {
|
||||
for (0..COLS) |col|
|
||||
std.debug.print("{d:6.0}", .{result[r * COLS + col]});
|
||||
std.debug.print("\n", .{});
|
||||
}
|
||||
// Expected output: every cell = 15.0
|
||||
|
||||
c.wgpuBufferUnmap(buf_staging);
|
||||
}
|
||||
137
src/wgpu.zig
137
src/wgpu.zig
@ -1,137 +0,0 @@
|
||||
const std = @import("std");
|
||||
const c = @cImport(@cInclude("wgpu.h"));
|
||||
|
||||
/// Replace enum_WGPURequestAdapterStatus
|
||||
pub const RequestAdapterStatus = enum {
|
||||
Success,
|
||||
CallbackCancelled,
|
||||
Unavailable,
|
||||
Error,
|
||||
Force32,
|
||||
};
|
||||
|
||||
pub const BufferUsage = enum(u64) {
|
||||
None = 0,
|
||||
MapRead = 1, // CPU can read after GPU finishes
|
||||
MapWrite = 2,
|
||||
CopySrc = 4, // can copy from GPU to staging.
|
||||
CopyDst = 8, // CPU can write to it
|
||||
Index = 16,
|
||||
Vertex = 32,
|
||||
Uniform = 64,
|
||||
Storage = 128,
|
||||
Indirect = 256,
|
||||
QueryResolve = 512,
|
||||
};
|
||||
|
||||
const Ctx = struct {
|
||||
adapter: c.WGPUAdapter = null,
|
||||
device: c.WGPUDevice = null,
|
||||
};
|
||||
|
||||
fn onAdapter(
|
||||
status: c.WGPURequestAdapterStatus,
|
||||
adapter: c.WGPUAdapter,
|
||||
_: c.WGPUStringView,
|
||||
userdata1: ?*anyopaque,
|
||||
_: ?*anyopaque,
|
||||
) callconv(.c) void {
|
||||
if (status != c.WGPURequestAdapterStatus_Success) {
|
||||
std.log.err("Adapter request failed (status={d})", .{status});
|
||||
return;
|
||||
}
|
||||
const ctx: *Ctx = @ptrCast(@alignCast(userdata1.?));
|
||||
ctx.adapter = adapter;
|
||||
}
|
||||
|
||||
fn onDevice(
|
||||
status: c.WGPURequestDeviceStatus,
|
||||
device: c.WGPUDevice,
|
||||
_: c.WGPUStringView,
|
||||
userdata1: ?*anyopaque,
|
||||
_: ?*anyopaque,
|
||||
) callconv(.c) void {
|
||||
if (status != c.WGPURequestDeviceStatus_Success) {
|
||||
std.log.err("Device request failed (status={d})", .{status});
|
||||
return;
|
||||
}
|
||||
const ctx: *Ctx = @ptrCast(@alignCast(userdata1.?));
|
||||
ctx.device = device;
|
||||
std.debug.print("{?}", .{device});
|
||||
}
|
||||
|
||||
fn onMapped(
|
||||
status: c.WGPUMapAsyncStatus,
|
||||
_: c.WGPUStringView,
|
||||
userdata1: ?*anyopaque,
|
||||
_: ?*anyopaque,
|
||||
) callconv(.c) void {
|
||||
const flag: *bool = @ptrCast(@alignCast(userdata1.?));
|
||||
flag.* = (status == c.WGPUMapAsyncStatus_Success);
|
||||
}
|
||||
|
||||
fn sv(s: []const u8) c.WGPUStringView {
|
||||
return .{ .data = s.ptr, .length = s.len };
|
||||
}
|
||||
|
||||
const AllocatorGPU = @This();
|
||||
|
||||
allocator: std.mem.Allocator,
|
||||
instance: *c.struct_WGPUInstanceImpl,
|
||||
adapter: *c.struct_WGPUAdapterImpl,
|
||||
device: c.struct_WGPUDeviceImpl,
|
||||
queue: c.struct_WGPUQueueImpl,
|
||||
ctx: Ctx,
|
||||
|
||||
buffers: std.AutoHashMap(*c.struct_WGPUBufferImpl, void),
|
||||
|
||||
pub fn init(allocator: std.mem.Allocator) !AllocatorGPU {
|
||||
var self: AllocatorGPU = undefined;
|
||||
self.allocator = allocator;
|
||||
self.ctx = .{};
|
||||
self.buffers = try .init(allocator);
|
||||
|
||||
// 1. Instance ──────────────────────────────────────────────────────────────
|
||||
self.instance = c.wgpuCreateInstance(&std.mem.zeroes(c.WGPUInstanceDescriptor)) orelse
|
||||
return error.NoInstance;
|
||||
|
||||
// 2. Adapter (async → poll) ────────────────────────────────────────────────
|
||||
_ = c.wgpuInstanceRequestAdapter(
|
||||
self.instance,
|
||||
&.{ .powerPreference = c.WGPUPowerPreference_HighPerformance },
|
||||
.{ .callback = onAdapter, .userdata1 = &self.ctx },
|
||||
);
|
||||
c.wgpuInstanceProcessEvents(self.instance); // drive callbacks
|
||||
self.adapter = self.ctx.adapter orelse return error.NoAdapter;
|
||||
|
||||
// 3. Device ────────────────────────────────────────────────────────────────
|
||||
_ = c.wgpuAdapterRequestDevice(self.adapter, null, .{ .callback = onDevice, .userdata1 = &self.ctx });
|
||||
c.wgpuInstanceProcessEvents(self.instance);
|
||||
self.device = self.ctx.device orelse return error.NoDevice;
|
||||
|
||||
self.queue = c.wgpuDeviceGetQueue(self.device);
|
||||
|
||||
return self;
|
||||
}
|
||||
|
||||
pub fn deinit(self: AllocatorGPU) void {
|
||||
c.wgpuInstanceRelease(self.instance);
|
||||
defer c.wgpuAdapterRelease(self.adapter);
|
||||
defer c.wgpuDeviceRelease(self.device);
|
||||
defer c.wgpuQueueRelease(self.queue);
|
||||
}
|
||||
|
||||
pub fn addBuff(
|
||||
self: AllocatorGPU,
|
||||
comptime T: type,
|
||||
comptime len: comptime_int,
|
||||
comptime opt: struct {},
|
||||
) !void {
|
||||
self.buffers.put(
|
||||
c.wgpuDeviceCreateBuffer(self.device, &.{
|
||||
.usage = c.WGPUBufferUsage_Storage | c.WGPUBufferUsage_CopyDst,
|
||||
.size = len * @bitSizeOf(T),
|
||||
}) orelse return error.Buffer,
|
||||
{},
|
||||
);
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user