Basic working matrix addition

This commit is contained in:
adrien 2026-04-28 10:22:22 +02:00
commit 532e8c1897
10 changed files with 13911 additions and 0 deletions

2
.gitignore vendored Normal file
View File

@ -0,0 +1,2 @@
.zig-cache
zig-out

56
build.zig Normal file
View File

@ -0,0 +1,56 @@
// build.zig
// zig build run
//
// Expects wgpu-native pre-built in libs/wgpu-native/:
// include/wgpu.h
// lib/libwgpu_native.a (or .so / .dylib / .dll)
//
// Download release: https://github.com/gfx-rs/wgpu-native/releases
// Pick the archive matching your OS/arch, e.g.:
// wgpu-linux-x86_64-release.zip libwgpu_native.a + wgpu.h
// wgpu-macos-aarch64-release.zip
// wgpu-windows-x86_64-msvc-release.zip
const std = @import("std");
pub fn build(b: *std.Build) void {
const target = b.standardTargetOptions(.{});
const optimize = b.standardOptimizeOption(.{});
const exe = b.addExecutable(.{
.root_module = b.createModule(.{
.root_source_file = b.path("src/main.zig"),
.link_libc = true,
.target = target,
.optimize = optimize,
}),
.name = "gpu_matrix_add",
});
// wgpu-native headers + pre-built static library
exe.root_module.addIncludePath(b.path("libs/wgpu-native/include"));
exe.root_module.addLibraryPath(b.path("libs/wgpu-native/lib"));
exe.root_module.addObjectFile(b.path("libs/wgpu-native/lib/libwgpu_native.a"));
// Platform-specific system frameworks needed by wgpu-native
const t = target.result;
if (t.os.tag == .macos) {
exe.root_module.linkFramework("Metal", .{});
exe.root_module.linkFramework("QuartzCore", .{});
exe.root_module.linkFramework("Foundation", .{});
exe.root_module.linkFramework("CoreGraphics", .{});
} else if (t.os.tag == .windows) {
exe.root_module.linkSystemLibrary("d3d12", .{});
exe.root_module.linkSystemLibrary("dxgi", .{});
exe.root_module.linkSystemLibrary("user32", .{});
} else {
exe.root_module.linkSystemLibrary("vulkan", .{});
exe.root_module.linkSystemLibrary("gcc_s", .{});
}
b.installArtifact(exe);
const run = b.addRunArtifact(exe);
run.step.dependOn(b.getInstallStep());
b.step("run", "Build and run").dependOn(&run.step);
}

81
build.zig.zon Normal file
View File

@ -0,0 +1,81 @@
.{
// This is the default name used by packages depending on this one. For
// example, when a user runs `zig fetch --save <url>`, this field is used
// as the key in the `dependencies` table. Although the user can choose a
// different name, most users will stick with this provided value.
//
// It is redundant to include "zig" in this name because it is already
// within the Zig package namespace.
.name = .zig_wgpu,
// This is a [Semantic Version](https://semver.org/).
// In a future version of Zig it will be used for package deduplication.
.version = "0.0.0",
// Together with name, this represents a globally unique package
// identifier. This field is generated by the Zig toolchain when the
// package is first created, and then *never changes*. This allows
// unambiguous detection of one package being an updated version of
// another.
//
// When forking a Zig project, this id should be regenerated (delete the
// field and run `zig build`) if the upstream project is still maintained.
// Otherwise, the fork is *hostile*, attempting to take control over the
// original project's identity. Thus it is recommended to leave the comment
// on the following line intact, so that it shows up in code reviews that
// modify the field.
.fingerprint = 0x5d0e853acbc0c2c6, // Changing this has security and trust implications.
// Tracks the earliest Zig version that the package considers to be a
// supported use case.
.minimum_zig_version = "0.16.0",
// This field is optional.
// Each dependency must either provide a `url` and `hash`, or a `path`.
// `zig build --fetch` can be used to fetch all dependencies of a package, recursively.
// Once all dependencies are fetched, `zig build` no longer requires
// internet connectivity.
.dependencies = .{
// See `zig fetch --save <url>` for a command-line interface for adding dependencies.
//.example = .{
// // When updating this field to a new URL, be sure to delete the corresponding
// // `hash`, otherwise you are communicating that you expect to find the old hash at
// // the new URL. If the contents of a URL change this will result in a hash mismatch
// // which will prevent zig from using it.
// .url = "https://example.com/foo.tar.gz",
//
// // This is computed from the file contents of the directory of files that is
// // obtained after fetching `url` and applying the inclusion rules given by
// // `paths`.
// //
// // This field is the source of truth; packages do not come from a `url`; they
// // come from a `hash`. `url` is just one of many possible mirrors for how to
// // obtain a package matching this `hash`.
// //
// // Uses the [multihash](https://multiformats.io/multihash/) format.
// .hash = "...",
//
// // When this is provided, the package is found in a directory relative to the
// // build root. In this case the package's hash is irrelevant and therefore not
// // computed. This field and `url` are mutually exclusive.
// .path = "foo",
//
// // When this is set to `true`, a package is declared to be lazily
// // fetched. This makes the dependency only get fetched if it is
// // actually used.
// .lazy = false,
//},
},
// Specifies the set of files and directories that are included in this package.
// Only files and directories listed here are included in the `hash` that
// is computed for this package. Only files listed here will remain on disk
// when using the zig package manager. As a rule of thumb, one should list
// files required for compilation plus any license(s).
// Paths are relative to the build root. Use the empty string (`""`) to refer to
// the build root itself.
// A directory listed here means that all files within, recursively, are included.
.paths = .{
"build.zig",
"build.zig.zon",
"src",
// For example...
//"LICENSE",
//"README.md",
},
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

Binary file not shown.

Binary file not shown.

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1 @@
v29.0.0.0

238
src/main.zig Normal file
View File

@ -0,0 +1,238 @@
// Minimal WebGPU compute in Zig: element-wise matrix addition
// Uses wgpu-native C bindings.
// Build: see ../build.zig
//
// Data flow:
// CPU (mat_a, mat_b) GPU storage buffers compute shader GPU buf_c
// staging buffer (mapped) CPU read print
const std = @import("std");
const c = @cImport(@cInclude("wgpu.h"));
// Config
const ROWS: u32 = 4;
const COLS: u32 = 4;
const N = ROWS * COLS; // 16 elements
const BUF_BYTES = N * @sizeOf(f32);
// WGSL Compute Shader
// workgroup_size(4,4) matches one full 4×4 matrix dispatch(1,1,1)
const SHADER =
\\@group(0) @binding(0) var<storage, read> mat_a : array<f32>;
\\@group(0) @binding(1) var<storage, read> mat_b : array<f32>;
\\@group(0) @binding(2) var<storage, read_write> mat_c : array<f32>;
\\
\\@compute @workgroup_size(4, 4)
\\fn main(@builtin(global_invocation_id) gid : vec3<u32>) {
\\ let idx = gid.y * 4u + gid.x;
\\ if (idx < arrayLength(&mat_c)) {
\\ mat_c[idx] = mat_a[idx] + mat_b[idx];
\\ }
\\}
;
// Callback state
const Ctx = struct {
adapter: c.WGPUAdapter = null,
device: c.WGPUDevice = null,
};
fn onAdapter(
status: c.WGPURequestAdapterStatus,
adapter: c.WGPUAdapter,
_: c.WGPUStringView,
userdata1: ?*anyopaque,
_: ?*anyopaque,
) callconv(.c) void {
if (status != c.WGPURequestAdapterStatus_Success) {
std.log.err("Adapter request failed (status={d})", .{status});
return;
}
const ctx: *Ctx = @ptrCast(@alignCast(userdata1.?));
ctx.adapter = adapter;
}
fn onDevice(
status: c.WGPURequestDeviceStatus,
device: c.WGPUDevice,
_: c.WGPUStringView,
userdata1: ?*anyopaque,
_: ?*anyopaque,
) callconv(.c) void {
if (status != c.WGPURequestDeviceStatus_Success) {
std.log.err("Device request failed (status={d})", .{status});
return;
}
const ctx: *Ctx = @ptrCast(@alignCast(userdata1.?));
ctx.device = device;
}
fn onMapped(
status: c.WGPUMapAsyncStatus,
_: c.WGPUStringView,
userdata1: ?*anyopaque,
_: ?*anyopaque,
) callconv(.c) void {
const flag: *bool = @ptrCast(@alignCast(userdata1.?));
flag.* = (status == c.WGPUMapAsyncStatus_Success);
}
fn sv(s: []const u8) c.WGPUStringView {
return .{ .data = s.ptr, .length = s.len };
}
// Main
pub fn main() !void {
// 1. Instance
const instance = c.wgpuCreateInstance(&std.mem.zeroes(c.WGPUInstanceDescriptor)) orelse
return error.NoInstance;
defer c.wgpuInstanceRelease(instance);
// 2. Adapter (async poll)
var ctx = Ctx{};
_ = c.wgpuInstanceRequestAdapter(
instance,
&.{ .powerPreference = c.WGPUPowerPreference_HighPerformance },
.{ .callback = onAdapter, .userdata1 = &ctx },
);
c.wgpuInstanceProcessEvents(instance); // drive callbacks
const adapter = ctx.adapter orelse return error.NoAdapter;
defer c.wgpuAdapterRelease(adapter);
// 3. Device
_ = c.wgpuAdapterRequestDevice(adapter, null, .{ .callback = onDevice, .userdata1 = &ctx });
c.wgpuInstanceProcessEvents(instance);
const device = ctx.device orelse return error.NoDevice;
defer c.wgpuDeviceRelease(device);
const queue = c.wgpuDeviceGetQueue(device);
defer c.wgpuQueueRelease(queue);
// 4. Input data
// mat_a[i] = i (0 15)
// mat_b[i] = 15 i every element of mat_c should equal 15
var mat_a: [N]f32 = undefined;
var mat_b: [N]f32 = undefined;
for (0..N) |i| {
mat_a[i] = @floatFromInt(i);
mat_b[i] = @floatFromInt(N - 1 - i);
}
// 5. GPU Buffers
const buf_a = c.wgpuDeviceCreateBuffer(device, &.{
.usage = c.WGPUBufferUsage_Storage | c.WGPUBufferUsage_CopyDst,
.size = BUF_BYTES,
}) orelse return error.BufferA;
const buf_b = c.wgpuDeviceCreateBuffer(device, &.{
.usage = c.WGPUBufferUsage_Storage | c.WGPUBufferUsage_CopyDst,
.size = BUF_BYTES,
}) orelse return error.BufferB;
// buf_c: GPU-only result; staging: CPU-readable copy
const buf_c = c.wgpuDeviceCreateBuffer(device, &.{
.usage = c.WGPUBufferUsage_Storage | c.WGPUBufferUsage_CopySrc,
.size = BUF_BYTES,
}) orelse return error.BufferC;
const buf_staging = c.wgpuDeviceCreateBuffer(device, &.{
.usage = c.WGPUBufferUsage_MapRead | c.WGPUBufferUsage_CopyDst,
.size = BUF_BYTES,
}) orelse return error.BufferStaging;
defer c.wgpuBufferRelease(buf_a);
defer c.wgpuBufferRelease(buf_b);
defer c.wgpuBufferRelease(buf_c);
defer c.wgpuBufferRelease(buf_staging);
// Upload inputs
c.wgpuQueueWriteBuffer(queue, buf_a, 0, &mat_a, BUF_BYTES);
c.wgpuQueueWriteBuffer(queue, buf_b, 0, &mat_b, BUF_BYTES);
// 6. Shader module
// New API (0.20+)
var wgsl_src = c.WGPUShaderSourceWGSL{
.chain = .{ .sType = c.WGPUSType_ShaderSourceWGSL },
.code = sv(SHADER),
};
const shader = c.wgpuDeviceCreateShaderModule(device, &.{
.nextInChain = @ptrCast(&wgsl_src),
}) orelse return error.Shader;
// 7. Compute pipeline (layout auto-inferred from shader)
//
const pipeline = c.wgpuDeviceCreateComputePipeline(device, &.{
.compute = .{
.module = shader,
.entryPoint = sv("main"),
},
}) orelse return error.Pipeline;
defer c.wgpuComputePipelineRelease(pipeline);
// 8. Bind group
const bgl = c.wgpuComputePipelineGetBindGroupLayout(pipeline, 0);
defer c.wgpuBindGroupLayoutRelease(bgl);
const entries = [_]c.WGPUBindGroupEntry{
.{ .binding = 0, .buffer = buf_a, .offset = 0, .size = BUF_BYTES },
.{ .binding = 1, .buffer = buf_b, .offset = 0, .size = BUF_BYTES },
.{ .binding = 2, .buffer = buf_c, .offset = 0, .size = BUF_BYTES },
};
const bind_group = c.wgpuDeviceCreateBindGroup(device, &.{
.layout = bgl,
.entries = &entries,
.entryCount = entries.len,
}) orelse return error.BindGroup;
defer c.wgpuBindGroupRelease(bind_group);
// 9. Encode compute pass + buffer copy
const encoder = c.wgpuDeviceCreateCommandEncoder(device, null) orelse
return error.Encoder;
const pass = c.wgpuCommandEncoderBeginComputePass(encoder, null);
c.wgpuComputePassEncoderSetPipeline(pass, pipeline);
c.wgpuComputePassEncoderSetBindGroup(pass, 0, bind_group, 0, null);
// dispatch(1,1,1): one workgroup of size (4,4) covers the whole 4×4 matrix
c.wgpuComputePassEncoderDispatchWorkgroups(pass, 1, 1, 1);
c.wgpuComputePassEncoderEnd(pass);
c.wgpuComputePassEncoderRelease(pass);
// Copy result buffer CPU-readable staging buffer
c.wgpuCommandEncoderCopyBufferToBuffer(encoder, buf_c, 0, buf_staging, 0, BUF_BYTES);
const cmdbuf = c.wgpuCommandEncoderFinish(encoder, null);
defer c.wgpuCommandEncoderRelease(encoder);
defer c.wgpuCommandBufferRelease(cmdbuf);
// 10. Submit
c.wgpuQueueSubmit(queue, 1, &cmdbuf);
// 11. Map staging buffer back to CPU
var mapped = false;
_ = c.wgpuBufferMapAsync(
buf_staging,
c.WGPUMapMode_Read,
0,
BUF_BYTES,
.{ .callback = onMapped, .userdata1 = &mapped },
);
// Poll the device until the async map completes
while (!mapped) _ = c.wgpuDevicePoll(device, 1, null);
const ptr: [*]const f32 = @ptrCast(@alignCast(
c.wgpuBufferGetConstMappedRange(buf_staging, 0, BUF_BYTES),
));
const result = ptr[0..N];
// 12. Print
std.debug.print("\nmat_a + mat_b ({d}×{d}):\n", .{ ROWS, COLS });
for (0..ROWS) |r| {
for (0..COLS) |col|
std.debug.print("{d:6.0}", .{result[r * COLS + col]});
std.debug.print("\n", .{});
}
// Expected output: every cell = 15.0
c.wgpuBufferUnmap(buf_staging);
}