Basic working matrix addition

2026-04-28 10:22:22 +02:00 · 2026-04-28 10:22:22 +02:00 · 532e8c1897
commit 532e8c1897
10 changed files with 13911 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,2 @@
+.zig-cache
+zig-out
--- a/build.zig
+++ b/build.zig
@ -0,0 +1,56 @@
+// build.zig
+// zig build run
+//
+// Expects wgpu-native pre-built in libs/wgpu-native/:
+//   include/wgpu.h
+//   lib/libwgpu_native.a   (or .so / .dylib / .dll)
+//
+// Download release: https://github.com/gfx-rs/wgpu-native/releases
+// Pick the archive matching your OS/arch, e.g.:
+//   wgpu-linux-x86_64-release.zip     → libwgpu_native.a  + wgpu.h
+//   wgpu-macos-aarch64-release.zip
+//   wgpu-windows-x86_64-msvc-release.zip
+
+const std = @import("std");
+
+pub fn build(b: *std.Build) void {
+    const target = b.standardTargetOptions(.{});
+    const optimize = b.standardOptimizeOption(.{});
+
+    const exe = b.addExecutable(.{
+        .root_module = b.createModule(.{
+            .root_source_file = b.path("src/main.zig"),
+            .link_libc = true,
+            .target = target,
+            .optimize = optimize,
+        }),
+        .name = "gpu_matrix_add",
+    });
+
+    // wgpu-native headers + pre-built static library
+    exe.root_module.addIncludePath(b.path("libs/wgpu-native/include"));
+    exe.root_module.addLibraryPath(b.path("libs/wgpu-native/lib"));
+    exe.root_module.addObjectFile(b.path("libs/wgpu-native/lib/libwgpu_native.a"));
+
+    // Platform-specific system frameworks needed by wgpu-native
+    const t = target.result;
+    if (t.os.tag == .macos) {
+        exe.root_module.linkFramework("Metal", .{});
+        exe.root_module.linkFramework("QuartzCore", .{});
+        exe.root_module.linkFramework("Foundation", .{});
+        exe.root_module.linkFramework("CoreGraphics", .{});
+    } else if (t.os.tag == .windows) {
+        exe.root_module.linkSystemLibrary("d3d12", .{});
+        exe.root_module.linkSystemLibrary("dxgi", .{});
+        exe.root_module.linkSystemLibrary("user32", .{});
+    } else {
+        exe.root_module.linkSystemLibrary("vulkan", .{});
+        exe.root_module.linkSystemLibrary("gcc_s", .{});
+    }
+
+    b.installArtifact(exe);
+
+    const run = b.addRunArtifact(exe);
+    run.step.dependOn(b.getInstallStep());
+    b.step("run", "Build and run").dependOn(&run.step);
+}
--- a/build.zig.zon
+++ b/build.zig.zon
@ -0,0 +1,81 @@
+.{
+    // This is the default name used by packages depending on this one. For
+    // example, when a user runs `zig fetch --save <url>`, this field is used
+    // as the key in the `dependencies` table. Although the user can choose a
+    // different name, most users will stick with this provided value.
+    //
+    // It is redundant to include "zig" in this name because it is already
+    // within the Zig package namespace.
+    .name = .zig_wgpu,
+    // This is a [Semantic Version](https://semver.org/).
+    // In a future version of Zig it will be used for package deduplication.
+    .version = "0.0.0",
+    // Together with name, this represents a globally unique package
+    // identifier. This field is generated by the Zig toolchain when the
+    // package is first created, and then *never changes*. This allows
+    // unambiguous detection of one package being an updated version of
+    // another.
+    //
+    // When forking a Zig project, this id should be regenerated (delete the
+    // field and run `zig build`) if the upstream project is still maintained.
+    // Otherwise, the fork is *hostile*, attempting to take control over the
+    // original project's identity. Thus it is recommended to leave the comment
+    // on the following line intact, so that it shows up in code reviews that
+    // modify the field.
+    .fingerprint = 0x5d0e853acbc0c2c6, // Changing this has security and trust implications.
+    // Tracks the earliest Zig version that the package considers to be a
+    // supported use case.
+    .minimum_zig_version = "0.16.0",
+    // This field is optional.
+    // Each dependency must either provide a `url` and `hash`, or a `path`.
+    // `zig build --fetch` can be used to fetch all dependencies of a package, recursively.
+    // Once all dependencies are fetched, `zig build` no longer requires
+    // internet connectivity.
+    .dependencies = .{
+        // See `zig fetch --save <url>` for a command-line interface for adding dependencies.
+        //.example = .{
+        //    // When updating this field to a new URL, be sure to delete the corresponding
+        //    // `hash`, otherwise you are communicating that you expect to find the old hash at
+        //    // the new URL. If the contents of a URL change this will result in a hash mismatch
+        //    // which will prevent zig from using it.
+        //    .url = "https://example.com/foo.tar.gz",
+        //
+        //    // This is computed from the file contents of the directory of files that is
+        //    // obtained after fetching `url` and applying the inclusion rules given by
+        //    // `paths`.
+        //    //
+        //    // This field is the source of truth; packages do not come from a `url`; they
+        //    // come from a `hash`. `url` is just one of many possible mirrors for how to
+        //    // obtain a package matching this `hash`.
+        //    //
+        //    // Uses the [multihash](https://multiformats.io/multihash/) format.
+        //    .hash = "...",
+        //
+        //    // When this is provided, the package is found in a directory relative to the
+        //    // build root. In this case the package's hash is irrelevant and therefore not
+        //    // computed. This field and `url` are mutually exclusive.
+        //    .path = "foo",
+        //
+        //    // When this is set to `true`, a package is declared to be lazily
+        //    // fetched. This makes the dependency only get fetched if it is
+        //    // actually used.
+        //    .lazy = false,
+        //},
+    },
+    // Specifies the set of files and directories that are included in this package.
+    // Only files and directories listed here are included in the `hash` that
+    // is computed for this package. Only files listed here will remain on disk
+    // when using the zig package manager. As a rule of thumb, one should list
+    // files required for compilation plus any license(s).
+    // Paths are relative to the build root. Use the empty string (`""`) to refer to
+    // the build root itself.
+    // A directory listed here means that all files within, recursively, are included.
+    .paths = .{
+        "build.zig",
+        "build.zig.zon",
+        "src",
+        // For example...
+        //"LICENSE",
+        //"README.md",
+    },
+}
--- a/libs/wgpu-native/include/webgpu.h
+++ b/libs/wgpu-native/include/webgpu.h
--- a/libs/wgpu-native/include/wgpu.h
+++ b/libs/wgpu-native/include/wgpu.h
--- a/libs/wgpu-native/lib/libwgpu_native.a
+++ b/libs/wgpu-native/lib/libwgpu_native.a
--- a/libs/wgpu-native/lib/libwgpu_native.so
+++ b/libs/wgpu-native/lib/libwgpu_native.so
--- a/libs/wgpu-native/wgpu-native-meta/webgpu.yml
+++ b/libs/wgpu-native/wgpu-native-meta/webgpu.yml
--- a/libs/wgpu-native/wgpu-native-meta/wgpu-native-git-tag
+++ b/libs/wgpu-native/wgpu-native-meta/wgpu-native-git-tag
@ -0,0 +1 @@
+v29.0.0.0
--- a/src/main.zig
+++ b/src/main.zig
@ -0,0 +1,238 @@
+// Minimal WebGPU compute in Zig: element-wise matrix addition
+// Uses wgpu-native C bindings.
+// Build: see ../build.zig
+//
+// Data flow:
+//   CPU (mat_a, mat_b) → GPU storage buffers → compute shader → GPU buf_c
+//   → staging buffer (mapped) → CPU read → print
+
+const std = @import("std");
+const c = @cImport(@cInclude("wgpu.h"));
+
+// ── Config ────────────────────────────────────────────────────────────────────
+const ROWS: u32 = 4;
+const COLS: u32 = 4;
+const N = ROWS * COLS; // 16 elements
+const BUF_BYTES = N * @sizeOf(f32);
+
+// ── WGSL Compute Shader ───────────────────────────────────────────────────────
+//   workgroup_size(4,4) matches one full 4×4 matrix → dispatch(1,1,1)
+const SHADER =
+    \\@group(0) @binding(0) var<storage, read>       mat_a : array<f32>;
+    \\@group(0) @binding(1) var<storage, read>       mat_b : array<f32>;
+    \\@group(0) @binding(2) var<storage, read_write> mat_c : array<f32>;
+    \\
+    \\@compute @workgroup_size(4, 4)
+    \\fn main(@builtin(global_invocation_id) gid : vec3<u32>) {
+    \\    let idx = gid.y * 4u + gid.x;
+    \\    if (idx < arrayLength(&mat_c)) {
+    \\        mat_c[idx] = mat_a[idx] + mat_b[idx];
+    \\    }
+    \\}
+;
+
+// ── Callback state ────────────────────────────────────────────────────────────
+const Ctx = struct {
+    adapter: c.WGPUAdapter = null,
+    device: c.WGPUDevice = null,
+};
+
+fn onAdapter(
+    status: c.WGPURequestAdapterStatus,
+    adapter: c.WGPUAdapter,
+    _: c.WGPUStringView,
+    userdata1: ?*anyopaque,
+    _: ?*anyopaque,
+) callconv(.c) void {
+    if (status != c.WGPURequestAdapterStatus_Success) {
+        std.log.err("Adapter request failed (status={d})", .{status});
+        return;
+    }
+    const ctx: *Ctx = @ptrCast(@alignCast(userdata1.?));
+    ctx.adapter = adapter;
+}
+
+fn onDevice(
+    status: c.WGPURequestDeviceStatus,
+    device: c.WGPUDevice,
+    _: c.WGPUStringView,
+    userdata1: ?*anyopaque,
+    _: ?*anyopaque,
+) callconv(.c) void {
+    if (status != c.WGPURequestDeviceStatus_Success) {
+        std.log.err("Device request failed (status={d})", .{status});
+        return;
+    }
+    const ctx: *Ctx = @ptrCast(@alignCast(userdata1.?));
+    ctx.device = device;
+}
+
+fn onMapped(
+    status: c.WGPUMapAsyncStatus,
+    _: c.WGPUStringView,
+    userdata1: ?*anyopaque,
+    _: ?*anyopaque,
+) callconv(.c) void {
+    const flag: *bool = @ptrCast(@alignCast(userdata1.?));
+    flag.* = (status == c.WGPUMapAsyncStatus_Success);
+}
+
+fn sv(s: []const u8) c.WGPUStringView {
+    return .{ .data = s.ptr, .length = s.len };
+}
+
+// ── Main ──────────────────────────────────────────────────────────────────────
+pub fn main() !void {
+
+    // 1. Instance ──────────────────────────────────────────────────────────────
+    const instance = c.wgpuCreateInstance(&std.mem.zeroes(c.WGPUInstanceDescriptor)) orelse
+        return error.NoInstance;
+    defer c.wgpuInstanceRelease(instance);
+
+    // 2. Adapter (async → poll) ────────────────────────────────────────────────
+    var ctx = Ctx{};
+    _ = c.wgpuInstanceRequestAdapter(
+        instance,
+        &.{ .powerPreference = c.WGPUPowerPreference_HighPerformance },
+        .{ .callback = onAdapter, .userdata1 = &ctx },
+    );
+    c.wgpuInstanceProcessEvents(instance); // drive callbacks
+    const adapter = ctx.adapter orelse return error.NoAdapter;
+    defer c.wgpuAdapterRelease(adapter);
+
+    // 3. Device ────────────────────────────────────────────────────────────────
+    _ = c.wgpuAdapterRequestDevice(adapter, null, .{ .callback = onDevice, .userdata1 = &ctx });
+    c.wgpuInstanceProcessEvents(instance);
+    const device = ctx.device orelse return error.NoDevice;
+    defer c.wgpuDeviceRelease(device);
+
+    const queue = c.wgpuDeviceGetQueue(device);
+    defer c.wgpuQueueRelease(queue);
+
+    // 4. Input data ────────────────────────────────────────────────────────────
+    //   mat_a[i] = i        (0 … 15)
+    //   mat_b[i] = 15 − i   → every element of mat_c should equal 15
+    var mat_a: [N]f32 = undefined;
+    var mat_b: [N]f32 = undefined;
+    for (0..N) |i| {
+        mat_a[i] = @floatFromInt(i);
+        mat_b[i] = @floatFromInt(N - 1 - i);
+    }
+
+    // 5. GPU Buffers ───────────────────────────────────────────────────────────
+    const buf_a = c.wgpuDeviceCreateBuffer(device, &.{
+        .usage = c.WGPUBufferUsage_Storage | c.WGPUBufferUsage_CopyDst,
+        .size = BUF_BYTES,
+    }) orelse return error.BufferA;
+
+    const buf_b = c.wgpuDeviceCreateBuffer(device, &.{
+        .usage = c.WGPUBufferUsage_Storage | c.WGPUBufferUsage_CopyDst,
+        .size = BUF_BYTES,
+    }) orelse return error.BufferB;
+
+    // buf_c: GPU-only result; staging: CPU-readable copy
+    const buf_c = c.wgpuDeviceCreateBuffer(device, &.{
+        .usage = c.WGPUBufferUsage_Storage | c.WGPUBufferUsage_CopySrc,
+        .size = BUF_BYTES,
+    }) orelse return error.BufferC;
+
+    const buf_staging = c.wgpuDeviceCreateBuffer(device, &.{
+        .usage = c.WGPUBufferUsage_MapRead | c.WGPUBufferUsage_CopyDst,
+        .size = BUF_BYTES,
+    }) orelse return error.BufferStaging;
+
+    defer c.wgpuBufferRelease(buf_a);
+    defer c.wgpuBufferRelease(buf_b);
+    defer c.wgpuBufferRelease(buf_c);
+    defer c.wgpuBufferRelease(buf_staging);
+
+    // Upload inputs
+    c.wgpuQueueWriteBuffer(queue, buf_a, 0, &mat_a, BUF_BYTES);
+    c.wgpuQueueWriteBuffer(queue, buf_b, 0, &mat_b, BUF_BYTES);
+
+    // 6. Shader module ─────────────────────────────────────────────────────────
+    // ✅ New API (0.20+)
+    var wgsl_src = c.WGPUShaderSourceWGSL{
+        .chain = .{ .sType = c.WGPUSType_ShaderSourceWGSL },
+        .code = sv(SHADER),
+    };
+    const shader = c.wgpuDeviceCreateShaderModule(device, &.{
+        .nextInChain = @ptrCast(&wgsl_src),
+    }) orelse return error.Shader;
+
+    // 7. Compute pipeline (layout auto-inferred from shader) ───────────────────
+    // ✅
+    const pipeline = c.wgpuDeviceCreateComputePipeline(device, &.{
+        .compute = .{
+            .module = shader,
+            .entryPoint = sv("main"),
+        },
+    }) orelse return error.Pipeline;
+    defer c.wgpuComputePipelineRelease(pipeline);
+
+    // 8. Bind group ────────────────────────────────────────────────────────────
+    const bgl = c.wgpuComputePipelineGetBindGroupLayout(pipeline, 0);
+    defer c.wgpuBindGroupLayoutRelease(bgl);
+
+    const entries = [_]c.WGPUBindGroupEntry{
+        .{ .binding = 0, .buffer = buf_a, .offset = 0, .size = BUF_BYTES },
+        .{ .binding = 1, .buffer = buf_b, .offset = 0, .size = BUF_BYTES },
+        .{ .binding = 2, .buffer = buf_c, .offset = 0, .size = BUF_BYTES },
+    };
+    const bind_group = c.wgpuDeviceCreateBindGroup(device, &.{
+        .layout = bgl,
+        .entries = &entries,
+        .entryCount = entries.len,
+    }) orelse return error.BindGroup;
+    defer c.wgpuBindGroupRelease(bind_group);
+
+    // 9. Encode compute pass + buffer copy ────────────────────────────────────
+    const encoder = c.wgpuDeviceCreateCommandEncoder(device, null) orelse
+        return error.Encoder;
+
+    const pass = c.wgpuCommandEncoderBeginComputePass(encoder, null);
+    c.wgpuComputePassEncoderSetPipeline(pass, pipeline);
+    c.wgpuComputePassEncoderSetBindGroup(pass, 0, bind_group, 0, null);
+    // dispatch(1,1,1): one workgroup of size (4,4) covers the whole 4×4 matrix
+    c.wgpuComputePassEncoderDispatchWorkgroups(pass, 1, 1, 1);
+    c.wgpuComputePassEncoderEnd(pass);
+    c.wgpuComputePassEncoderRelease(pass);
+
+    // Copy result buffer → CPU-readable staging buffer
+    c.wgpuCommandEncoderCopyBufferToBuffer(encoder, buf_c, 0, buf_staging, 0, BUF_BYTES);
+
+    const cmdbuf = c.wgpuCommandEncoderFinish(encoder, null);
+    defer c.wgpuCommandEncoderRelease(encoder);
+    defer c.wgpuCommandBufferRelease(cmdbuf);
+
+    // 10. Submit ───────────────────────────────────────────────────────────────
+    c.wgpuQueueSubmit(queue, 1, &cmdbuf);
+
+    // 11. Map staging buffer back to CPU ──────────────────────────────────────
+    var mapped = false;
+    _ = c.wgpuBufferMapAsync(
+        buf_staging,
+        c.WGPUMapMode_Read,
+        0,
+        BUF_BYTES,
+        .{ .callback = onMapped, .userdata1 = &mapped },
+    );
+    // Poll the device until the async map completes
+    while (!mapped) _ = c.wgpuDevicePoll(device, 1, null);
+
+    const ptr: [*]const f32 = @ptrCast(@alignCast(
+        c.wgpuBufferGetConstMappedRange(buf_staging, 0, BUF_BYTES),
+    ));
+    const result = ptr[0..N];
+
+    // 12. Print ────────────────────────────────────────────────────────────────
+    std.debug.print("\nmat_a + mat_b  ({d}×{d}):\n", .{ ROWS, COLS });
+    for (0..ROWS) |r| {
+        for (0..COLS) |col|
+            std.debug.print("{d:6.0}", .{result[r * COLS + col]});
+        std.debug.print("\n", .{});
+    }
+    // Expected output: every cell = 15.0
+
+    c.wgpuBufferUnmap(buf_staging);
+}