Compare commits

...

3 Commits

Author SHA1 Message Date
adrien
af210e2fb2 Working self contained rendering (simple circle) 2026-05-20 11:46:06 +02:00
adrien
545c4b98e9 Create a GpuTextureView 2026-05-20 10:56:35 +02:00
adrien
45c0f3180e Created a GpuTextureDef 2026-05-20 09:55:34 +02:00
16 changed files with 224 additions and 288 deletions

View File

@ -56,39 +56,4 @@ pub fn build(b: *std.Build) !void {
const run_cmd = b.addRunArtifact(exe);
run_step.dependOn(&run_cmd.step);
}
const exe = b.addExecutable(.{
.name = "circle",
.root_module = b.createModule(.{
.root_source_file = b.path("src/circle.zig"),
.target = target,
.optimize = optimize,
.imports = &.{},
}),
});
exe.root_module.addIncludePath(b.path("libs/wgpu-native/include"));
exe.root_module.addLibraryPath(b.path("libs/wgpu-native/lib"));
exe.root_module.addObjectFile(b.path("libs/wgpu-native/lib/libwgpu_native.a"));
// Platform-specific system frameworks needed by wgpu-native
if (t.os.tag == .macos) {
exe.root_module.linkFramework("Metal", .{});
exe.root_module.linkFramework("QuartzCore", .{});
exe.root_module.linkFramework("Foundation", .{});
exe.root_module.linkFramework("CoreGraphics", .{});
} else if (t.os.tag == .windows) {
exe.root_module.linkSystemLibrary("d3d12", .{});
exe.root_module.linkSystemLibrary("dxgi", .{});
exe.root_module.linkSystemLibrary("user32", .{});
} else {
exe.root_module.linkSystemLibrary("vulkan", .{});
exe.root_module.linkSystemLibrary("gcc_s", .{});
}
b.installArtifact(exe);
const run_step = b.step("circle", "Run circle");
const run_cmd = b.addRunArtifact(exe);
run_step.dependOn(&run_cmd.step);
}

76
examples/circle.zig Normal file
View File

@ -0,0 +1,76 @@
const std = @import("std");
const gpu = @import("gpu");
const GpuDevice = gpu.GpuDevice;
const GpuArena = gpu.GpuArena;
const GpuBuffer = gpu.GpuBuffer;
const GpuRender = gpu.GpuRender;
const GpuTexture = gpu.GpuTexture;
const GpuTextureView = gpu.GpuTextureView;
const width: u32 = 512;
const height: u32 = 512;
pub fn main(init: std.process.Init) !void {
const allocator = init.gpa;
// 1. Open GPU Device
const device = try GpuDevice.init(.{});
defer device.deinit();
// 2. Init VRAM Arena
var grena = GpuArena.init(allocator, device);
defer grena.deinit();
const gloc = grena.gpuAllocator();
// 3. Load Render Pipeline
const circle_rp = try GpuRender.init(
device, // Change to gloc + track them
@embedFile("shaders/circle.wgsl"),
.{ .bindings = &.{}, .texture_format = .RGBA8Unorm, .topology = .TriangleStrip },
);
defer circle_rp.deinit();
// 4. Create VRAM texture to render into
const texture = try GpuTexture.init(gloc, .{
.format = .RGBA8Unorm,
.size = .{ .width = width, .height = height, .depthOrArrayLayers = 1 },
.usage = .initMany(&.{ .RenderAttachment, .CopySrc }),
});
defer texture.deinit();
// 5. Create a view from texture
const view = try GpuTextureView.init(gloc, texture, .{});
defer view.deinit();
// 6. Run the rendering pipeline
try circle_rp.draw(gloc, view, 4, .{});
// 7. Load Texture into GpuBuffer
const cpu_staging_cpu = try texture.buffCopy(gloc);
defer cpu_staging_cpu.deinit();
// 8. Read GpuBuffer to CPU
const pixels = try cpu_staging_cpu.read(allocator, u8);
defer allocator.free(pixels);
// 9. Write a simple ppm image
try savePpm(init.io, "circle.ppm", width, height, pixels);
std.debug.print("Successfully rendered circle to circle.ppm!\n", .{});
}
fn savePpm(io: std.Io, filename: []const u8, w: u32, h: u32, rgba_pixels: []const u8) !void {
const file = try std.Io.Dir.cwd().createFile(io, filename, .{});
defer file.close(io);
var buf: [255]u8 = undefined;
var writer = file.writer(io, &buf);
// PPM Header: P6 format means raw RGB bytes
try writer.interface.print("P6\n{d} {d}\n255\n", .{ w, h });
// Strip Alpha channel when writing out to standard RGB PPM format
var i: usize = 0;
while (i < rgba_pixels.len) : (i += 4) {
try writer.interface.writeAll(rgba_pixels[i .. i + 3]);
}
}

View File

@ -1,77 +0,0 @@
// I am using this mnist reduced dataset https://www.kaggle.com/datasets/mohamedgamal07/reduced-mnist
const std = @import("std");
const gpu = @import("gpu");
const GpuDevice = gpu.GpuDevice;
const GpuArena = gpu.GpuArena;
const GpuBuffer = gpu.GpuBuffer;
const GpuProcess = gpu.GpuProcess;
const BATCHSIZE = 10;
const EPOCH = 10;
pub fn main(init: std.process.Init) !void {
const allocator = init.gpa;
const io = init.io;
// 1. Open GPU Device
const device = try GpuDevice.init(.{});
defer device.deinit();
// 2. Create a GPU Arena to manage VRAM
var grena = GpuArena.init(allocator, device);
defer grena.deinit();
const gloc = grena.gpuAllocator();
// 3. Load the WGSL compute pipeline
const add_process = try GpuProcess.init(device, @embedFile("shaders/add.wgsl"));
defer add_process.deinit();
var train_dir = try std.Io.Dir.cwd().openDir(io, "mnist/train", .{});
var images: [BATCHSIZE * 28 * 28]f16 = undefined;
for (EPOCH) |epoch| {
// Load random images from train dir
train_dir.openDir(io, "0", .{});
for (BATCHSIZE) |i| {
const file = try train_dir.openFile(io, "0.jpg", .{});
images[28 * 28 * i .. 28 * 28 * (i + 1)] = file.read
}
}
// 4. Setup CPU data
const len: usize = 16;
const data_a = try allocator.alloc(f16, len);
defer allocator.free(data_a);
const data_b = try allocator.alloc(f16, len);
defer allocator.free(data_b);
for (0..len) |i| {
data_a[i] = @floatFromInt(i);
data_b[i] = @floatFromInt(len - 1 - i);
}
// 5. Initialize raw GPU Buffers
// We pass the EnumSet inline using `.initMany` since the Enum itself isn't exported
const byte_size = len * @sizeOf(f16);
const buf_a = try GpuBuffer.init(gloc, byte_size, .initMany(&.{ .Storage, .CopyDst, .CopySrc }));
const buf_b = try GpuBuffer.init(gloc, byte_size, .initMany(&.{ .Storage, .CopyDst, .CopySrc }));
const buf_out = try GpuBuffer.init(gloc, byte_size, .initMany(&.{ .Storage, .CopyDst, .CopySrc }));
// Note: The buffers are safely tied to the GpuArena which will automatically
// release them at the end. You can also manually call buf_x.deinit() if desired.
// 6. Transfer data from CPU slices to GPU Buffers
try buf_a.load(f16, data_a);
try buf_b.load(f16, data_b);
// 7. Dispatch the Compute Process
// We pass the data type (f16) to allow GpuProcess to calculate chunks correctly
try add_process.run(gloc, f16, buf_a, buf_b, buf_out);
// 8. Map and copy the resulting buffer back to the CPU
const out = try buf_out.read(allocator, f16);
defer allocator.free(out);
std.debug.print("Result: {any}\n", .{out});
}

View File

@ -6,6 +6,8 @@ pub const VTable = struct {
freeBuffer: *const fn (ctx: *anyopaque, buf_raw: c.WGPUBuffer) void,
allocTexture: *const fn (ctx: *anyopaque, desc: c.WGPUTextureDescriptor) anyerror!c.WGPUTexture,
freeTexture: *const fn (ctx: *anyopaque, buf_raw: c.WGPUTexture) void,
allocTextureView: *const fn (ctx: *anyopaque, texture: c.WGPUTexture, desc: c.WGPUTextureViewDescriptor) anyerror!c.WGPUTextureView,
freeTextureView: *const fn (ctx: *anyopaque, buf_raw: c.WGPUTextureView) void,
};
device: GpuDevice,
@ -27,3 +29,11 @@ pub fn allocTexture(self: @This(), desc: c.WGPUTextureDescriptor) !c.WGPUTexture
pub fn freeTexture(self: @This(), buf_raw: c.WGPUTexture) void {
self.vtable.freeTexture(self.ptr, buf_raw);
}
pub fn allocTextureView(self: @This(), texture: c.WGPUTexture, desc: c.WGPUTextureViewDescriptor) !c.WGPUTextureView {
return self.vtable.allocTextureView(self.ptr, texture, desc);
}
pub fn freeTextureView(self: @This(), buf_raw: c.WGPUTextureView) void {
self.vtable.freeTextureView(self.ptr, buf_raw);
}

View File

@ -7,6 +7,7 @@ const c = @import("utils.zig").c;
device: GpuDevice,
tracked_buffers: std.AutoHashMap(c.WGPUBuffer, c.WGPUBufferDescriptor),
tracked_textures: std.AutoHashMap(c.WGPUTexture, c.WGPUTextureDescriptor),
tracked_views: std.AutoHashMap(c.WGPUTextureView, c.WGPUTextureViewDescriptor),
allocated_vram_bytes: u64 = 0,
pub fn init(cpu_allocator: std.mem.Allocator, device: GpuDevice) @This() {
@ -14,6 +15,7 @@ pub fn init(cpu_allocator: std.mem.Allocator, device: GpuDevice) @This() {
.device = device,
.tracked_buffers = .init(cpu_allocator),
.tracked_textures = .init(cpu_allocator),
.tracked_views = .init(cpu_allocator),
};
}
@ -29,6 +31,11 @@ pub fn deinit(self: *@This()) void {
while (it_texture.next()) |tex_ptr|
c.wgpuTextureRelease(tex_ptr.*);
self.tracked_textures.deinit();
var it_view = self.tracked_views.keyIterator();
while (it_view.next()) |view_ptr|
c.wgpuTextureViewRelease(view_ptr.*);
self.tracked_views.deinit();
}
/// Returns the type-erased immutable interface wrapper
@ -41,6 +48,8 @@ pub fn gpuAllocator(self: *@This()) GpuAllocator {
.freeBuffer = freeBuffer,
.allocTexture = allocTexture,
.freeTexture = freeTexture,
.allocTextureView = allocTextureView,
.freeTextureView = freeTextureView,
},
};
}
@ -65,12 +74,12 @@ fn allocBuffer(ctx: *anyopaque, desc: c.WGPUBufferDescriptor) anyerror!c.WGPUBuf
return buf;
}
fn freeBuffer(ctx: *anyopaque, buf_raw: c.WGPUBuffer) void {
fn freeBuffer(ctx: *anyopaque, raw: c.WGPUBuffer) void {
const self: *@This() = @ptrCast(@alignCast(ctx));
if (self.tracked_buffers.fetchRemove(buf_raw)) |kv| {
c.wgpuBufferDestroy(buf_raw);
c.wgpuBufferRelease(buf_raw);
if (self.tracked_buffers.fetchRemove(raw)) |kv| {
c.wgpuBufferDestroy(raw);
c.wgpuBufferRelease(raw);
self.allocated_vram_bytes -= kv.value.size;
}
}
@ -93,11 +102,11 @@ fn allocTexture(ctx: *anyopaque, desc: c.WGPUTextureDescriptor) anyerror!c.WGPUT
return texture;
}
fn freeTexture(ctx: *anyopaque, texture_raw: c.WGPUTexture) void {
fn freeTexture(ctx: *anyopaque, raw: c.WGPUTexture) void {
const self: *@This() = @ptrCast(@alignCast(ctx));
if (self.tracked_textures.fetchRemove(texture_raw)) |kv| {
c.wgpuTextureRelease(texture_raw);
if (self.tracked_textures.fetchRemove(raw)) |kv| {
c.wgpuTextureRelease(raw);
const desc = kv.value;
const format: GpuTextureFormat = @enumFromInt(desc.format);
@ -105,3 +114,16 @@ fn freeTexture(ctx: *anyopaque, texture_raw: c.WGPUTexture) void {
self.allocated_vram_bytes -= bytes_size;
}
}
fn allocTextureView(ctx: *anyopaque, texture: c.WGPUTexture, desc: c.WGPUTextureViewDescriptor) anyerror!c.WGPUTextureView {
const self: *@This() = @ptrCast(@alignCast(ctx));
const view = c.wgpuTextureCreateView(texture, &desc) orelse return error.View;
try self.tracked_views.put(view, desc);
return view;
}
fn freeTextureView(ctx: *anyopaque, raw: c.WGPUTextureView) void {
const self: *@This() = @ptrCast(@alignCast(ctx));
if (self.tracked_views.remove(raw))
c.wgpuTextureViewRelease(raw);
}

View File

@ -21,7 +21,6 @@ const BufferUsage = enum(u64) {
QueryResolve = 0x0000000000000200,
};
/// Allocates the underlying WebGPU handle and registers it to the parent GpuAllocator
pub fn init(gloc: GpuAllocator, size: u64, usage: std.EnumSet(BufferUsage)) !@This() {
var use: u64 = 0;
var iter = usage.iterator();
@ -39,17 +38,14 @@ pub fn init(gloc: GpuAllocator, size: u64, usage: std.EnumSet(BufferUsage)) !@Th
};
}
/// Unregisters from the parent GpuAllocator and cleanly destroys GPU resources
pub fn deinit(self: @This()) void {
self.gloc.freeBuffer(self.raw);
}
/// Native getConstMappedRange wrapper
pub fn getConstMappedRange(self: @This(), offset: u64, size: u64) ?*const anyopaque {
return c.wgpuBufferGetConstMappedRange(self.raw, offset, size);
}
/// Native mapAsync wrapper
pub fn mapAsync(
self: @This(),
mode: c.WGPUMapMode,
@ -60,12 +56,11 @@ pub fn mapAsync(
_ = c.wgpuBufferMapAsync(self.raw, mode, offset, size, callback_info);
}
/// Native unmap wrapper
pub fn unmap(self: @This()) void {
c.wgpuBufferUnmap(self.raw);
}
/// CPU to GPU.
/// CPU to GPU
pub fn load(
self: @This(),
T: type,
@ -92,6 +87,7 @@ pub fn load(
}
}
/// GPU to CPU
pub fn read(self: @This(), alloc: std.mem.Allocator, T: type) ![]T {
const out = try alloc.alloc(T, @divExact(self.size, @sizeOf(T)));

View File

@ -2,6 +2,11 @@ const std = @import("std");
const c = @import("utils.zig").c;
const sv = @import("utils.zig").sv;
// TODO: Make Allocator more zig like
// - GpuDevice can return a GpuAllocator that just allocate and nothing else
// - From this GpuAllocator, can create a GpuArena like std.heap.ArenaAllocator.init(allocator)
// - Rename GpuArenaAllocator too
const Ctx = struct {
adapter: c.WGPUAdapter = null,
device: c.WGPUDevice = null,

View File

@ -4,13 +4,14 @@ const sv = @import("utils.zig").sv;
const GpuAllocator = @import("GpuAllocator.zig");
const GpuBuffer = @import("GpuBuffer.zig");
const GpuDevice = @import("GpuDevice.zig");
const GpuTextureView = @import("GpuTextureView.zig");
const GpuTextureFormat = @import("lib.zig").GpuTextureFormat;
pub const Binding = struct {
element_size: u32 = 0,
};
pub const RenderDef = struct {
pub const GpuRenderDef = struct {
bindings: []const Binding = &.{},
/// The surface texture format we are rendering to (e.g., BGRA8Unorm)
texture_format: GpuTextureFormat,
@ -32,9 +33,9 @@ const GpuPrimitiveTopology = enum(c_uint) {
};
pip: c.WGPURenderPipeline,
def: RenderDef,
def: GpuRenderDef,
pub fn init(device: GpuDevice, wgsl: []const u8, def: RenderDef) !@This() {
pub fn init(device: GpuDevice, wgsl: []const u8, def: GpuRenderDef) !@This() {
var wgsl_src = c.WGPUShaderSourceWGSL{
.chain = .{ .sType = c.WGPUSType_ShaderSourceWGSL },
.code = sv(wgsl),
@ -99,7 +100,7 @@ pub fn deinit(self: @This()) void {
pub fn draw(
self: @This(),
gloc: GpuAllocator,
target_view: c.WGPUTextureView,
target_view: GpuTextureView,
vertex_count: u32,
args: anytype,
) !void {
@ -144,7 +145,7 @@ pub fn draw(
defer c.wgpuCommandEncoderRelease(enc);
const color_attachment = c.WGPURenderPassColorAttachment{
.view = target_view,
.view = target_view.raw,
.resolveTarget = null,
.loadOp = c.WGPULoadOp_Clear,
.storeOp = c.WGPUStoreOp_Store,

View File

@ -1,54 +1,85 @@
const std = @import("std");
const c = @import("utils.zig").c;
const GpuAllocator = @import("GpuAllocator.zig");
const GpuBuffer = @import("GpuBuffer.zig");
const GpuTextureFormat = @import("lib.zig").GpuTextureFormat;
const GpuTextureUsage = @import("lib.zig").GpuTextureUsage;
const TextureUsage = enum(u64) {
None = 0x0000000000000000,
CopySrc = 0x0000000000000001,
CopyDst = 0x0000000000000002,
TextureBinding = 0x0000000000000004,
StorageBinding = 0x0000000000000008,
RenderAttachment = 0x0000000000000010,
TransientAttachment = 0x0000000000000020,
pub const GpuTextureDef = struct {
size: c.WGPUExtent3D,
usage: std.EnumSet(GpuTextureUsage),
format: GpuTextureFormat,
};
raw: c.WGPUTexture,
size: c.WGPUExtent3D,
usage: c.WGPUTextureUsage,
format: GpuTextureFormat,
gloc: GpuAllocator,
def: GpuTextureDef,
/// Allocates the underlying WebGPU handle and registers it to the parent GpuAllocator
pub fn init(gloc: GpuAllocator, format: GpuTextureFormat, size: c.WGPUExtent3D, usage: std.EnumSet(TextureUsage)) !@This() {
pub fn init(gloc: GpuAllocator, def: GpuTextureDef) !@This() {
var use: u64 = 0;
var iter = usage.iterator();
var iter = def.usage.iterator();
while (iter.next()) |flag| use |= @intFromEnum(flag);
const desc = c.WGPUTextureDescriptor{
.usage = use,
.dimension = c.WGPUTextureDimension_2D,
.size = size,
.format = @intFromEnum(format),
.size = def.size,
.format = @intFromEnum(def.format),
.mipLevelCount = 1,
.sampleCount = 1,
};
const raw = try gloc.allocTexture(desc);
return .{ .gloc = gloc, .raw = raw, .size = size, .format = format, .usage = use };
return .{ .gloc = gloc, .raw = raw, .def = def };
}
/// Unregisters from the parent GpuAllocator and cleanly destroys GPU resources
pub fn deinit(self: @This()) void {
self.gloc.freeTexture(self.raw);
}
/// Native getConstMappedRange wrapper
pub fn getConstMappedRange(self: @This(), offset: u64, size: u64) ?*const anyopaque {
return c.wgpuBufferGetConstMappedRange(self.raw, offset, size);
}
/// Native mapAsync wrapper
pub fn bytesSize(self: @This()) u32 {
return self.bytesSizeRow() * self.def.size.height;
}
pub fn bytesSizeRow(self: @This()) u32 {
return self.def.size.width * self.def.format.bytesPerPixel();
}
/// Return a GpuBuffer containing a copy of the texture.
pub fn buffCopy(self: @This(), gloc: GpuAllocator) !GpuBuffer {
const buf = try GpuBuffer.init(gloc, self.bytesSize(), .initMany(&.{ .CopyDst, .CopySrc }));
const enc = c.wgpuDeviceCreateCommandEncoder(gloc.device.device, null) orelse return error.Encoder;
defer c.wgpuCommandEncoderRelease(enc);
const src_copy = c.WGPUTexelCopyTextureInfo{
.texture = self.raw,
.mipLevel = 0,
.origin = .{ .x = 0, .y = 0, .z = 0 },
.aspect = c.WGPUTextureAspect_All,
};
const dst_copy = c.WGPUTexelCopyBufferInfo{
.buffer = buf.raw,
.layout = .{
.offset = 0,
.bytesPerRow = self.bytesSizeRow(),
.rowsPerImage = self.def.size.height,
},
};
c.wgpuCommandEncoderCopyTextureToBuffer(enc, &src_copy, &dst_copy, &self.def.size);
const cmd = c.wgpuCommandEncoderFinish(enc, null);
defer c.wgpuCommandBufferRelease(cmd);
c.wgpuQueueSubmit(gloc.device.queue, 1, &cmd);
return buf;
}
pub fn mapAsync(
self: @This(),
mode: c.WGPUMapMode,
@ -59,12 +90,11 @@ pub fn mapAsync(
_ = c.wgpuBufferMapAsync(self.raw, mode, offset, size, callback_info);
}
/// Native unmap wrapper
pub fn unmap(self: @This()) void {
c.wgpuBufferUnmap(self.raw);
}
/// CPU to GPU.
/// CPU to GPU
pub fn load(
self: @This(),
T: type,
@ -91,6 +121,7 @@ pub fn load(
}
}
// GPU to CPU
pub fn read(self: @This(), alloc: std.mem.Allocator, T: type) ![]T {
const out = try alloc.alloc(T, @divExact(self.size, @sizeOf(T)));

32
src/GpuTextureView.zig Normal file
View File

@ -0,0 +1,32 @@
const std = @import("std");
const c = @import("utils.zig").c;
const GpuAllocator = @import("GpuAllocator.zig");
const GpuTexture = @import("lib.zig").GpuTexture;
const GpuTextureFormat = @import("lib.zig").GpuTextureFormat;
const GpuTextureUsage = @import("lib.zig").GpuTextureUsage;
pub const GpuViewDef = struct {
usage: std.EnumSet(GpuTextureUsage) = .empty,
format: GpuTextureFormat = .Undefined,
};
raw: c.WGPUTextureView,
gloc: GpuAllocator,
pub fn init(gloc: GpuAllocator, texture: GpuTexture, def: GpuViewDef) !@This() {
var use: u64 = 0;
var iter = def.usage.iterator();
while (iter.next()) |flag| use |= @intFromEnum(flag);
const raw = try gloc.allocTextureView(texture.raw, .{
.format = @intFromEnum(def.format),
.usage = use,
.mipLevelCount = 1,
.arrayLayerCount = 1,
});
return .{ .gloc = gloc, .raw = raw };
}
pub fn deinit(self: @This()) void {
self.gloc.freeTextureView(self.raw);
}

View File

@ -1,112 +0,0 @@
const std = @import("std");
const gpu = @import("lib.zig");
const c = @import("utils.zig").c;
const sv = @import("utils.zig").sv;
const GpuDevice = gpu.GpuDevice;
const GpuArena = gpu.GpuArena;
const GpuBuffer = gpu.GpuBuffer;
const GpuRender = gpu.GpuRender;
const GpuTexture = gpu.GpuTexture;
pub fn main(init: std.process.Init) !void {
const allocator = init.gpa;
// 1. Open the raw headless GPU Device you shared
const device = try GpuDevice.init(.{});
defer device.deinit();
var grena = GpuArena.init(allocator, device);
defer grena.deinit();
const gloc = grena.gpuAllocator();
const width: u32 = 512;
const height: u32 = 512;
// 2. Load our Render Pipeline (Procedural Triangle Strip)
const circle_rp = try GpuRender.init(
device,
@embedFile("shaders/circle.wgsl"),
.{
.bindings = &.{},
.texture_format = .RGBA8Unorm,
.topology = .TriangleStrip,
},
);
defer circle_rp.deinit();
// 3. Create the offscreen VRAM texture to render into
const texture = try GpuTexture.init(
gloc,
.RGBA8Unorm,
.{ .width = width, .height = height, .depthOrArrayLayers = 1 },
.initMany(&.{ .RenderAttachment, .CopySrc }),
);
defer texture.deinit();
const target_view = c.wgpuTextureCreateView(texture.raw, null) orelse return error.View;
defer c.wgpuTextureViewRelease(target_view);
// 4. Create a staging buffer to pull pixels from VRAM to CPU
// 4 bytes per pixel (RGBA8)
const row_bytes = width * 4;
const buffer_bytes = row_bytes * height;
// Create a regular GpuBuffer set up to receive texture copy transfers
const cpu_staging_buf = try GpuBuffer.init(gloc, buffer_bytes, .initMany(&.{ .CopyDst, .CopySrc }));
// 5. Draw the Circle Frame into the texture view!
try circle_rp.draw(gloc, target_view, 4, .{});
// 6. Copy the texture data into our CPU staging buffer
const enc = c.wgpuDeviceCreateCommandEncoder(device.device, null) orelse return error.Encoder;
defer c.wgpuCommandEncoderRelease(enc);
const src_copy = c.WGPUTexelCopyTextureInfo{
.texture = texture.raw,
.mipLevel = 0,
.origin = .{ .x = 0, .y = 0, .z = 0 },
.aspect = c.WGPUTextureAspect_All,
};
const dst_copy = c.WGPUTexelCopyBufferInfo{
.buffer = cpu_staging_buf.raw,
.layout = .{
.offset = 0,
.bytesPerRow = row_bytes,
.rowsPerImage = height,
},
};
const copy_size = c.WGPUExtent3D{ .width = width, .height = height, .depthOrArrayLayers = 1 };
c.wgpuCommandEncoderCopyTextureToBuffer(enc, &src_copy, &dst_copy, &copy_size);
const cmd = c.wgpuCommandEncoderFinish(enc, null);
defer c.wgpuCommandBufferRelease(cmd);
c.wgpuQueueSubmit(device.queue, 1, &cmd);
// 7. Map and read the raw image bytes back to CPU
// (This uses whatever slice-reading helpers your `GpuBuffer` wrapper provides)
const pixels = try cpu_staging_buf.read(allocator, u8);
defer allocator.free(pixels);
// Now you have the raw binary image data! Let's output a simple Netpbm PPM image file
// so you can actually open and look at your rendered circle.
try savePpm(init.io, "circle.ppm", width, height, pixels);
std.debug.print("Successfully rendered circle to circle.ppm!\n", .{});
}
fn savePpm(io: std.Io, filename: []const u8, w: u32, h: u32, rgba_pixels: []const u8) !void {
const file = try std.Io.Dir.cwd().createFile(io, filename, .{});
defer file.close(io);
var buf: [255]u8 = undefined;
var writer = file.writer(io, &buf);
// PPM Header: P6 format means raw RGB bytes
try writer.interface.print("P6\n{d} {d}\n255\n", .{ w, h });
// Strip Alpha channel when writing out to standard RGB PPM format
var i: usize = 0;
while (i < rgba_pixels.len) : (i += 4) {
try writer.interface.writeAll(rgba_pixels[i .. i + 3]);
}
}

View File

@ -5,6 +5,7 @@ pub const GpuDevice = @import("GpuDevice.zig");
pub const GpuCompute = @import("GpuCompute.zig");
pub const GpuRender = @import("GpuRender.zig");
pub const GpuTexture = @import("GpuTexture.zig");
pub const GpuTextureView = @import("GpuTextureView.zig");
pub const GpuTextureFormat = enum(c_uint) {
Undefined = 0,
@ -174,3 +175,13 @@ pub const GpuTextureFormat = enum(c_uint) {
};
}
};
pub const GpuTextureUsage = enum(u64) {
None = 0x0000000000000000,
CopySrc = 0x0000000000000001,
CopyDst = 0x0000000000000002,
TextureBinding = 0x0000000000000004,
StorageBinding = 0x0000000000000008,
RenderAttachment = 0x0000000000000010,
TransientAttachment = 0x0000000000000020,
};

View File

@ -1,24 +0,0 @@
enable f16;
@group(0) @binding(0) var<storage, read> A: array<f16>;
@group(0) @binding(1) var<storage, read> B: array<f16>;
@group(0) @binding(2) var<storage, read_write> C: array<f16>;
@group(0) @binding(3) var<uniform> size: u32;
@compute @workgroup_size(256)
fn main(
@builtin(global_invocation_id) global_id : vec3<u32>,
@builtin(num_workgroups) num_workgroups: vec3<u32>
) {
// 1. Calculate the total number of threads across the entire grid
let total_threads = num_workgroups.x * 256u;
// 2. Start at this thread's unique global ID
var index = global_id.x;
// 3. Stride through the tensor elements
while (index < size) {
C[index] = A[index] + B[index];
index += total_threads; // Jump forward by the total thread count
}
}