Better example (no Vec)

This commit is contained in:
adrien 2026-05-18 16:38:30 +02:00
parent 9dbd668de9
commit c62bead6e9

View File

@ -1,92 +1,58 @@
/// This is a fully self contained example.
/// It set a simple f16 Vector and do a add operation on it
const std = @import("std"); const std = @import("std");
const GpuDevice = @import("GpuDevice.zig"); const GpuDevice = @import("GpuDevice.zig");
const GpuArena = @import("GpuArena.zig"); const GpuArena = @import("GpuArena.zig");
const GpuAllocator = @import("GpuAllocator.zig");
const GpuBuffer = @import("GpuBuffer.zig"); const GpuBuffer = @import("GpuBuffer.zig");
const GpuProcess = @import("GpuProcess.zig"); const GpuProcess = @import("GpuProcess.zig");
pub fn main(init: std.process.Init) !void { pub fn main(init: std.process.Init) !void {
const allocator = init.gpa; const allocator = init.gpa;
// Open GPU Device // 1. Open GPU Device
const device = try GpuDevice.init(.{}); const device = try GpuDevice.init(.{});
defer device.deinit(); defer device.deinit();
// Create a GPU Arena to hold GPU memory // 2. Create a GPU Arena to manage VRAM
var grena = GpuArena.init(allocator, device); var grena = GpuArena.init(allocator, device);
defer grena.deinit(); defer grena.deinit();
const gloc = grena.gpuAllocator(); const gloc = grena.gpuAllocator();
// Create a GPU process that load the pipeline/shader // 3. Load the WGSL compute pipeline
const add = try GpuProcess.init(device, @embedFile("shaders/add.wgsl")); const add_process = try GpuProcess.init(device, @embedFile("shaders/add.wgsl"));
defer add.deinit(); defer add_process.deinit();
// Allocate CPU memory // 4. Setup CPU data
const data_a = try allocator.alloc(f16, 16); const len: usize = 16;
const data_a = try allocator.alloc(f16, len);
defer allocator.free(data_a); defer allocator.free(data_a);
const data_b = try allocator.alloc(f16, 16); const data_b = try allocator.alloc(f16, len);
defer allocator.free(data_b); defer allocator.free(data_b);
for (0..16) |i| { for (0..len) |i| {
data_a[i] = @floatFromInt(i); data_a[i] = @floatFromInt(i);
data_b[i] = @floatFromInt(16 - 1 - i); data_b[i] = @floatFromInt(len - 1 - i);
} }
// Allocate GPU memory (Vec.deinit isn't necessary because grena will do it when deinit) // 5. Initialize raw GPU Buffers
const a = try Vec.initZero(gloc, 16); // We pass the EnumSet inline using `.initMany` since the Enum itself isn't exported
const b = try Vec.initZero(gloc, 16); const byte_size = len * @sizeOf(f16);
const buf_a = try GpuBuffer.init(gloc, byte_size, .initMany(&.{ .Storage, .CopyDst, .CopySrc }));
const buf_b = try GpuBuffer.init(gloc, byte_size, .initMany(&.{ .Storage, .CopyDst, .CopySrc }));
const buf_out = try GpuBuffer.init(gloc, byte_size, .initMany(&.{ .Storage, .CopyDst, .CopySrc }));
// Load CPU -> GPU // Note: The buffers are safely tied to the GpuArena which will automatically
try a.load(data_a); // release them at the end. You can also manually call buf_x.deinit() if desired.
try b.load(data_b);
// Run GPU Pipeline // 6. Transfer data from CPU slices to GPU Buffers
const sum = try a.run(gloc, b, add); try buf_a.load(f16, data_a);
try buf_b.load(f16, data_b);
// Read GPU -> CPU // 7. Dispatch the Compute Process
const out = try sum.read(allocator); // We pass the data type (f16) to allow GpuProcess to calculate chunks correctly
try add_process.run(gloc, f16, buf_a, buf_b, buf_out);
// 8. Map and copy the resulting buffer back to the CPU
const out = try buf_out.read(allocator, f16);
defer allocator.free(out); defer allocator.free(out);
std.debug.print("{any}\n", .{out}); std.debug.print("Result: {any}\n", .{out});
} }
/// Minimal implementation of a f16 Vector
const Vec = struct {
buf: GpuBuffer,
len: usize,
pub fn initZero(gloc: GpuAllocator, len: usize) !Vec {
return .{
.buf = try GpuBuffer.init(
gloc,
len * @sizeOf(f16),
.initMany(&.{ .Storage, .CopyDst, .CopySrc }),
),
.len = len,
};
}
pub fn deinit(self: Vec) void {
self.buf.deinit();
}
pub fn load(self: Vec, data: []const f16) !void {
try self.buf.load(f16, data);
}
pub fn read(self: Vec, alloc: std.mem.Allocator) ![]f16 {
return self.buf.read(alloc, f16);
}
pub fn run(self: Vec, gloc: GpuAllocator, other: Vec, process: GpuProcess) !Vec {
std.debug.assert(self.len == other.len);
const result = try Vec.initZero(gloc, self.len);
errdefer result.deinit();
try process.run(gloc, f16, self.buf, other.buf, result.buf);
return result;
}
};