Working self contained rendering (simple circle)
This commit is contained in:
parent
545c4b98e9
commit
af210e2fb2
35
build.zig
35
build.zig
@ -56,39 +56,4 @@ pub fn build(b: *std.Build) !void {
|
|||||||
const run_cmd = b.addRunArtifact(exe);
|
const run_cmd = b.addRunArtifact(exe);
|
||||||
run_step.dependOn(&run_cmd.step);
|
run_step.dependOn(&run_cmd.step);
|
||||||
}
|
}
|
||||||
|
|
||||||
const exe = b.addExecutable(.{
|
|
||||||
.name = "circle",
|
|
||||||
.root_module = b.createModule(.{
|
|
||||||
.root_source_file = b.path("src/circle.zig"),
|
|
||||||
.target = target,
|
|
||||||
.optimize = optimize,
|
|
||||||
.imports = &.{},
|
|
||||||
}),
|
|
||||||
});
|
|
||||||
|
|
||||||
exe.root_module.addIncludePath(b.path("libs/wgpu-native/include"));
|
|
||||||
exe.root_module.addLibraryPath(b.path("libs/wgpu-native/lib"));
|
|
||||||
exe.root_module.addObjectFile(b.path("libs/wgpu-native/lib/libwgpu_native.a"));
|
|
||||||
|
|
||||||
// Platform-specific system frameworks needed by wgpu-native
|
|
||||||
if (t.os.tag == .macos) {
|
|
||||||
exe.root_module.linkFramework("Metal", .{});
|
|
||||||
exe.root_module.linkFramework("QuartzCore", .{});
|
|
||||||
exe.root_module.linkFramework("Foundation", .{});
|
|
||||||
exe.root_module.linkFramework("CoreGraphics", .{});
|
|
||||||
} else if (t.os.tag == .windows) {
|
|
||||||
exe.root_module.linkSystemLibrary("d3d12", .{});
|
|
||||||
exe.root_module.linkSystemLibrary("dxgi", .{});
|
|
||||||
exe.root_module.linkSystemLibrary("user32", .{});
|
|
||||||
} else {
|
|
||||||
exe.root_module.linkSystemLibrary("vulkan", .{});
|
|
||||||
exe.root_module.linkSystemLibrary("gcc_s", .{});
|
|
||||||
}
|
|
||||||
|
|
||||||
b.installArtifact(exe);
|
|
||||||
|
|
||||||
const run_step = b.step("circle", "Run circle");
|
|
||||||
const run_cmd = b.addRunArtifact(exe);
|
|
||||||
run_step.dependOn(&run_cmd.step);
|
|
||||||
}
|
}
|
||||||
|
|||||||
76
examples/circle.zig
Normal file
76
examples/circle.zig
Normal file
@ -0,0 +1,76 @@
|
|||||||
|
const std = @import("std");
|
||||||
|
const gpu = @import("gpu");
|
||||||
|
const GpuDevice = gpu.GpuDevice;
|
||||||
|
const GpuArena = gpu.GpuArena;
|
||||||
|
const GpuBuffer = gpu.GpuBuffer;
|
||||||
|
const GpuRender = gpu.GpuRender;
|
||||||
|
const GpuTexture = gpu.GpuTexture;
|
||||||
|
const GpuTextureView = gpu.GpuTextureView;
|
||||||
|
|
||||||
|
const width: u32 = 512;
|
||||||
|
const height: u32 = 512;
|
||||||
|
|
||||||
|
pub fn main(init: std.process.Init) !void {
|
||||||
|
const allocator = init.gpa;
|
||||||
|
|
||||||
|
// 1. Open GPU Device
|
||||||
|
const device = try GpuDevice.init(.{});
|
||||||
|
defer device.deinit();
|
||||||
|
|
||||||
|
// 2. Init VRAM Arena
|
||||||
|
var grena = GpuArena.init(allocator, device);
|
||||||
|
defer grena.deinit();
|
||||||
|
const gloc = grena.gpuAllocator();
|
||||||
|
|
||||||
|
// 3. Load Render Pipeline
|
||||||
|
const circle_rp = try GpuRender.init(
|
||||||
|
device, // Change to gloc + track them
|
||||||
|
@embedFile("shaders/circle.wgsl"),
|
||||||
|
.{ .bindings = &.{}, .texture_format = .RGBA8Unorm, .topology = .TriangleStrip },
|
||||||
|
);
|
||||||
|
defer circle_rp.deinit();
|
||||||
|
|
||||||
|
// 4. Create VRAM texture to render into
|
||||||
|
const texture = try GpuTexture.init(gloc, .{
|
||||||
|
.format = .RGBA8Unorm,
|
||||||
|
.size = .{ .width = width, .height = height, .depthOrArrayLayers = 1 },
|
||||||
|
.usage = .initMany(&.{ .RenderAttachment, .CopySrc }),
|
||||||
|
});
|
||||||
|
defer texture.deinit();
|
||||||
|
|
||||||
|
// 5. Create a view from texture
|
||||||
|
const view = try GpuTextureView.init(gloc, texture, .{});
|
||||||
|
defer view.deinit();
|
||||||
|
|
||||||
|
// 6. Run the rendering pipeline
|
||||||
|
try circle_rp.draw(gloc, view, 4, .{});
|
||||||
|
|
||||||
|
// 7. Load Texture into GpuBuffer
|
||||||
|
const cpu_staging_cpu = try texture.buffCopy(gloc);
|
||||||
|
defer cpu_staging_cpu.deinit();
|
||||||
|
|
||||||
|
// 8. Read GpuBuffer to CPU
|
||||||
|
const pixels = try cpu_staging_cpu.read(allocator, u8);
|
||||||
|
defer allocator.free(pixels);
|
||||||
|
|
||||||
|
// 9. Write a simple ppm image
|
||||||
|
try savePpm(init.io, "circle.ppm", width, height, pixels);
|
||||||
|
std.debug.print("Successfully rendered circle to circle.ppm!\n", .{});
|
||||||
|
}
|
||||||
|
|
||||||
|
fn savePpm(io: std.Io, filename: []const u8, w: u32, h: u32, rgba_pixels: []const u8) !void {
|
||||||
|
const file = try std.Io.Dir.cwd().createFile(io, filename, .{});
|
||||||
|
defer file.close(io);
|
||||||
|
|
||||||
|
var buf: [255]u8 = undefined;
|
||||||
|
var writer = file.writer(io, &buf);
|
||||||
|
|
||||||
|
// PPM Header: P6 format means raw RGB bytes
|
||||||
|
try writer.interface.print("P6\n{d} {d}\n255\n", .{ w, h });
|
||||||
|
|
||||||
|
// Strip Alpha channel when writing out to standard RGB PPM format
|
||||||
|
var i: usize = 0;
|
||||||
|
while (i < rgba_pixels.len) : (i += 4) {
|
||||||
|
try writer.interface.writeAll(rgba_pixels[i .. i + 3]);
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -1,77 +0,0 @@
|
|||||||
// I am using this mnist reduced dataset https://www.kaggle.com/datasets/mohamedgamal07/reduced-mnist
|
|
||||||
|
|
||||||
const std = @import("std");
|
|
||||||
const gpu = @import("gpu");
|
|
||||||
const GpuDevice = gpu.GpuDevice;
|
|
||||||
const GpuArena = gpu.GpuArena;
|
|
||||||
const GpuBuffer = gpu.GpuBuffer;
|
|
||||||
const GpuProcess = gpu.GpuProcess;
|
|
||||||
|
|
||||||
const BATCHSIZE = 10;
|
|
||||||
const EPOCH = 10;
|
|
||||||
|
|
||||||
pub fn main(init: std.process.Init) !void {
|
|
||||||
const allocator = init.gpa;
|
|
||||||
const io = init.io;
|
|
||||||
|
|
||||||
// 1. Open GPU Device
|
|
||||||
const device = try GpuDevice.init(.{});
|
|
||||||
defer device.deinit();
|
|
||||||
|
|
||||||
// 2. Create a GPU Arena to manage VRAM
|
|
||||||
var grena = GpuArena.init(allocator, device);
|
|
||||||
defer grena.deinit();
|
|
||||||
const gloc = grena.gpuAllocator();
|
|
||||||
|
|
||||||
// 3. Load the WGSL compute pipeline
|
|
||||||
const add_process = try GpuProcess.init(device, @embedFile("shaders/add.wgsl"));
|
|
||||||
defer add_process.deinit();
|
|
||||||
|
|
||||||
var train_dir = try std.Io.Dir.cwd().openDir(io, "mnist/train", .{});
|
|
||||||
|
|
||||||
var images: [BATCHSIZE * 28 * 28]f16 = undefined;
|
|
||||||
for (EPOCH) |epoch| {
|
|
||||||
// Load random images from train dir
|
|
||||||
train_dir.openDir(io, "0", .{});
|
|
||||||
for (BATCHSIZE) |i| {
|
|
||||||
const file = try train_dir.openFile(io, "0.jpg", .{});
|
|
||||||
images[28 * 28 * i .. 28 * 28 * (i + 1)] = file.read
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// 4. Setup CPU data
|
|
||||||
const len: usize = 16;
|
|
||||||
const data_a = try allocator.alloc(f16, len);
|
|
||||||
defer allocator.free(data_a);
|
|
||||||
const data_b = try allocator.alloc(f16, len);
|
|
||||||
defer allocator.free(data_b);
|
|
||||||
|
|
||||||
for (0..len) |i| {
|
|
||||||
data_a[i] = @floatFromInt(i);
|
|
||||||
data_b[i] = @floatFromInt(len - 1 - i);
|
|
||||||
}
|
|
||||||
|
|
||||||
// 5. Initialize raw GPU Buffers
|
|
||||||
// We pass the EnumSet inline using `.initMany` since the Enum itself isn't exported
|
|
||||||
const byte_size = len * @sizeOf(f16);
|
|
||||||
const buf_a = try GpuBuffer.init(gloc, byte_size, .initMany(&.{ .Storage, .CopyDst, .CopySrc }));
|
|
||||||
const buf_b = try GpuBuffer.init(gloc, byte_size, .initMany(&.{ .Storage, .CopyDst, .CopySrc }));
|
|
||||||
const buf_out = try GpuBuffer.init(gloc, byte_size, .initMany(&.{ .Storage, .CopyDst, .CopySrc }));
|
|
||||||
|
|
||||||
// Note: The buffers are safely tied to the GpuArena which will automatically
|
|
||||||
// release them at the end. You can also manually call buf_x.deinit() if desired.
|
|
||||||
|
|
||||||
// 6. Transfer data from CPU slices to GPU Buffers
|
|
||||||
try buf_a.load(f16, data_a);
|
|
||||||
try buf_b.load(f16, data_b);
|
|
||||||
|
|
||||||
// 7. Dispatch the Compute Process
|
|
||||||
// We pass the data type (f16) to allow GpuProcess to calculate chunks correctly
|
|
||||||
try add_process.run(gloc, f16, buf_a, buf_b, buf_out);
|
|
||||||
|
|
||||||
// 8. Map and copy the resulting buffer back to the CPU
|
|
||||||
const out = try buf_out.read(allocator, f16);
|
|
||||||
defer allocator.free(out);
|
|
||||||
|
|
||||||
std.debug.print("Result: {any}\n", .{out});
|
|
||||||
}
|
|
||||||
@ -1,6 +1,7 @@
|
|||||||
const std = @import("std");
|
const std = @import("std");
|
||||||
const c = @import("utils.zig").c;
|
const c = @import("utils.zig").c;
|
||||||
const GpuAllocator = @import("GpuAllocator.zig");
|
const GpuAllocator = @import("GpuAllocator.zig");
|
||||||
|
const GpuBuffer = @import("GpuBuffer.zig");
|
||||||
const GpuTextureFormat = @import("lib.zig").GpuTextureFormat;
|
const GpuTextureFormat = @import("lib.zig").GpuTextureFormat;
|
||||||
const GpuTextureUsage = @import("lib.zig").GpuTextureUsage;
|
const GpuTextureUsage = @import("lib.zig").GpuTextureUsage;
|
||||||
|
|
||||||
@ -40,6 +41,45 @@ pub fn getConstMappedRange(self: @This(), offset: u64, size: u64) ?*const anyopa
|
|||||||
return c.wgpuBufferGetConstMappedRange(self.raw, offset, size);
|
return c.wgpuBufferGetConstMappedRange(self.raw, offset, size);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn bytesSize(self: @This()) u32 {
|
||||||
|
return self.bytesSizeRow() * self.def.size.height;
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn bytesSizeRow(self: @This()) u32 {
|
||||||
|
return self.def.size.width * self.def.format.bytesPerPixel();
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Return a GpuBuffer containing a copy of the texture.
|
||||||
|
pub fn buffCopy(self: @This(), gloc: GpuAllocator) !GpuBuffer {
|
||||||
|
const buf = try GpuBuffer.init(gloc, self.bytesSize(), .initMany(&.{ .CopyDst, .CopySrc }));
|
||||||
|
|
||||||
|
const enc = c.wgpuDeviceCreateCommandEncoder(gloc.device.device, null) orelse return error.Encoder;
|
||||||
|
defer c.wgpuCommandEncoderRelease(enc);
|
||||||
|
|
||||||
|
const src_copy = c.WGPUTexelCopyTextureInfo{
|
||||||
|
.texture = self.raw,
|
||||||
|
.mipLevel = 0,
|
||||||
|
.origin = .{ .x = 0, .y = 0, .z = 0 },
|
||||||
|
.aspect = c.WGPUTextureAspect_All,
|
||||||
|
};
|
||||||
|
const dst_copy = c.WGPUTexelCopyBufferInfo{
|
||||||
|
.buffer = buf.raw,
|
||||||
|
.layout = .{
|
||||||
|
.offset = 0,
|
||||||
|
.bytesPerRow = self.bytesSizeRow(),
|
||||||
|
.rowsPerImage = self.def.size.height,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
c.wgpuCommandEncoderCopyTextureToBuffer(enc, &src_copy, &dst_copy, &self.def.size);
|
||||||
|
|
||||||
|
const cmd = c.wgpuCommandEncoderFinish(enc, null);
|
||||||
|
defer c.wgpuCommandBufferRelease(cmd);
|
||||||
|
c.wgpuQueueSubmit(gloc.device.queue, 1, &cmd);
|
||||||
|
|
||||||
|
return buf;
|
||||||
|
}
|
||||||
|
|
||||||
pub fn mapAsync(
|
pub fn mapAsync(
|
||||||
self: @This(),
|
self: @This(),
|
||||||
mode: c.WGPUMapMode,
|
mode: c.WGPUMapMode,
|
||||||
|
|||||||
112
src/circle.zig
112
src/circle.zig
@ -1,112 +0,0 @@
|
|||||||
const std = @import("std");
|
|
||||||
const gpu = @import("lib.zig");
|
|
||||||
const c = @import("utils.zig").c;
|
|
||||||
const sv = @import("utils.zig").sv;
|
|
||||||
const GpuDevice = gpu.GpuDevice;
|
|
||||||
const GpuArena = gpu.GpuArena;
|
|
||||||
const GpuBuffer = gpu.GpuBuffer;
|
|
||||||
const GpuRender = gpu.GpuRender;
|
|
||||||
const GpuTexture = gpu.GpuTexture;
|
|
||||||
const GpuTextureView = gpu.GpuTextureView;
|
|
||||||
|
|
||||||
pub fn main(init: std.process.Init) !void {
|
|
||||||
const allocator = init.gpa;
|
|
||||||
|
|
||||||
// 1. Open the raw headless GPU Device you shared
|
|
||||||
const device = try GpuDevice.init(.{});
|
|
||||||
defer device.deinit();
|
|
||||||
|
|
||||||
var grena = GpuArena.init(allocator, device);
|
|
||||||
defer grena.deinit();
|
|
||||||
const gloc = grena.gpuAllocator();
|
|
||||||
|
|
||||||
const width: u32 = 512;
|
|
||||||
const height: u32 = 512;
|
|
||||||
|
|
||||||
// 2. Load our Render Pipeline (Procedural Triangle Strip)
|
|
||||||
const circle_rp = try GpuRender.init(
|
|
||||||
device,
|
|
||||||
@embedFile("shaders/circle.wgsl"),
|
|
||||||
.{
|
|
||||||
.bindings = &.{},
|
|
||||||
.texture_format = .RGBA8Unorm,
|
|
||||||
.topology = .TriangleStrip,
|
|
||||||
},
|
|
||||||
);
|
|
||||||
defer circle_rp.deinit();
|
|
||||||
|
|
||||||
// 3. Create the offscreen VRAM texture to render into
|
|
||||||
const texture = try GpuTexture.init(gloc, .{
|
|
||||||
.format = .RGBA8Unorm,
|
|
||||||
.size = .{ .width = width, .height = height, .depthOrArrayLayers = 1 },
|
|
||||||
.usage = .initMany(&.{ .RenderAttachment, .CopySrc }),
|
|
||||||
});
|
|
||||||
defer texture.deinit();
|
|
||||||
|
|
||||||
const view = try GpuTextureView.init(gloc, texture, .{});
|
|
||||||
defer view.deinit();
|
|
||||||
|
|
||||||
// 4. Create a staging buffer to pull pixels from VRAM to CPU
|
|
||||||
// 4 bytes per pixel (RGBA8)
|
|
||||||
const row_bytes = width * 4;
|
|
||||||
const buffer_bytes = row_bytes * height;
|
|
||||||
|
|
||||||
// Create a regular GpuBuffer set up to receive texture copy transfers
|
|
||||||
const cpu_staging_buf = try GpuBuffer.init(gloc, buffer_bytes, .initMany(&.{ .CopyDst, .CopySrc }));
|
|
||||||
|
|
||||||
// 5. Draw the Circle Frame into the texture view!
|
|
||||||
try circle_rp.draw(gloc, view, 4, .{});
|
|
||||||
|
|
||||||
// 6. Copy the texture data into our CPU staging buffer
|
|
||||||
const enc = c.wgpuDeviceCreateCommandEncoder(device.device, null) orelse return error.Encoder;
|
|
||||||
defer c.wgpuCommandEncoderRelease(enc);
|
|
||||||
|
|
||||||
const src_copy = c.WGPUTexelCopyTextureInfo{
|
|
||||||
.texture = texture.raw,
|
|
||||||
.mipLevel = 0,
|
|
||||||
.origin = .{ .x = 0, .y = 0, .z = 0 },
|
|
||||||
.aspect = c.WGPUTextureAspect_All,
|
|
||||||
};
|
|
||||||
const dst_copy = c.WGPUTexelCopyBufferInfo{
|
|
||||||
.buffer = cpu_staging_buf.raw,
|
|
||||||
.layout = .{
|
|
||||||
.offset = 0,
|
|
||||||
.bytesPerRow = row_bytes,
|
|
||||||
.rowsPerImage = height,
|
|
||||||
},
|
|
||||||
};
|
|
||||||
const copy_size = c.WGPUExtent3D{ .width = width, .height = height, .depthOrArrayLayers = 1 };
|
|
||||||
|
|
||||||
c.wgpuCommandEncoderCopyTextureToBuffer(enc, &src_copy, &dst_copy, ©_size);
|
|
||||||
|
|
||||||
const cmd = c.wgpuCommandEncoderFinish(enc, null);
|
|
||||||
defer c.wgpuCommandBufferRelease(cmd);
|
|
||||||
c.wgpuQueueSubmit(device.queue, 1, &cmd);
|
|
||||||
|
|
||||||
// 7. Map and read the raw image bytes back to CPU
|
|
||||||
// (This uses whatever slice-reading helpers your `GpuBuffer` wrapper provides)
|
|
||||||
const pixels = try cpu_staging_buf.read(allocator, u8);
|
|
||||||
defer allocator.free(pixels);
|
|
||||||
|
|
||||||
// Now you have the raw binary image data! Let's output a simple Netpbm PPM image file
|
|
||||||
// so you can actually open and look at your rendered circle.
|
|
||||||
try savePpm(init.io, "circle.ppm", width, height, pixels);
|
|
||||||
std.debug.print("Successfully rendered circle to circle.ppm!\n", .{});
|
|
||||||
}
|
|
||||||
|
|
||||||
fn savePpm(io: std.Io, filename: []const u8, w: u32, h: u32, rgba_pixels: []const u8) !void {
|
|
||||||
const file = try std.Io.Dir.cwd().createFile(io, filename, .{});
|
|
||||||
defer file.close(io);
|
|
||||||
|
|
||||||
var buf: [255]u8 = undefined;
|
|
||||||
var writer = file.writer(io, &buf);
|
|
||||||
|
|
||||||
// PPM Header: P6 format means raw RGB bytes
|
|
||||||
try writer.interface.print("P6\n{d} {d}\n255\n", .{ w, h });
|
|
||||||
|
|
||||||
// Strip Alpha channel when writing out to standard RGB PPM format
|
|
||||||
var i: usize = 0;
|
|
||||||
while (i < rgba_pixels.len) : (i += 4) {
|
|
||||||
try writer.interface.writeAll(rgba_pixels[i .. i + 3]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@ -1,24 +0,0 @@
|
|||||||
enable f16;
|
|
||||||
|
|
||||||
@group(0) @binding(0) var<storage, read> A: array<f16>;
|
|
||||||
@group(0) @binding(1) var<storage, read> B: array<f16>;
|
|
||||||
@group(0) @binding(2) var<storage, read_write> C: array<f16>;
|
|
||||||
@group(0) @binding(3) var<uniform> size: u32;
|
|
||||||
|
|
||||||
@compute @workgroup_size(256)
|
|
||||||
fn main(
|
|
||||||
@builtin(global_invocation_id) global_id : vec3<u32>,
|
|
||||||
@builtin(num_workgroups) num_workgroups: vec3<u32>
|
|
||||||
) {
|
|
||||||
// 1. Calculate the total number of threads across the entire grid
|
|
||||||
let total_threads = num_workgroups.x * 256u;
|
|
||||||
|
|
||||||
// 2. Start at this thread's unique global ID
|
|
||||||
var index = global_id.x;
|
|
||||||
|
|
||||||
// 3. Stride through the tensor elements
|
|
||||||
while (index < size) {
|
|
||||||
C[index] = A[index] + B[index];
|
|
||||||
index += total_threads; // Jump forward by the total thread count
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Loading…
x
Reference in New Issue
Block a user