Merge branch 'render'

2026-05-20 13:47:43 +02:00 · 2026-05-20 13:47:43 +02:00 · 17977cc718
commit 17977cc718
parent 83ef8bcd12 5104d61ef6
15 changed files with 983 additions and 176 deletions
--- a/examples/bench_cp.zig
+++ b/examples/bench_cp.zig
@ -1,7 +1,7 @@
 const std = @import("std");
 const gpu = @import("gpu");
 const GpuDevice = gpu.GpuDevice;
-const GpuArena = gpu.GpuArena;
+const GpuArenaAllocator = gpu.GpuArenaAllocator;
 const GpuAllocator = gpu.GpuAllocator;
 const GpuBuffer = gpu.GpuBuffer;
 const GpuCompute = gpu.GpuCompute;
@ -60,12 +60,11 @@ pub fn main(init: std.process.Init) !void {
    const device = try GpuDevice.init(.{ .vram_bytes_limit = 4 * 1024 * 1024 * 1024 });
    defer device.deinit();

-    var grena = GpuArena.init(init.gpa, device);
+    var grena = GpuArenaAllocator.init(init.gpa, device.gpuAllocator());
    defer grena.deinit();
-
    const gloc = grena.gpuAllocator();

-    const add_pip = try GpuCompute.init(device, @embedFile("shaders/add.wgsl"), .{ .bindings = &.{
+    const add_pip = try GpuCompute.init(gloc, @embedFile("shaders/add.wgsl"), .{ .bindings = &.{
        .{ .element_size = @sizeOf(f16) },
        .{ .element_size = @sizeOf(f16) },
        .{ .element_size = @sizeOf(f16) },
--- a/examples/circle.zig
+++ b/examples/circle.zig
@ -0,0 +1,74 @@
+const std = @import("std");
+const gpu = @import("gpu");
+const GpuDevice = gpu.GpuDevice;
+const GpuArenaAllocator = gpu.GpuArenaAllocator;
+const GpuBuffer = gpu.GpuBuffer;
+const GpuRender = gpu.GpuRender;
+const GpuTexture = gpu.GpuTexture;
+const GpuTextureView = gpu.GpuTextureView;
+
+const width: u32 = 512;
+const height: u32 = 512;
+
+pub fn main(init: std.process.Init) !void {
+    const allocator = init.gpa;
+
+    // 1. Open GPU Device
+    const device = try GpuDevice.init(.{});
+    defer device.deinit();
+
+    // 2. Init VRAM Arena
+    const gloc = device.gpuAllocator();
+
+    // 3. Load Render Pipeline
+    const circle_rp = try GpuRender.init(
+        gloc,
+        @embedFile("shaders/circle.wgsl"),
+        .{ .bindings = &.{}, .texture_format = .RGBA8Unorm, .topology = .TriangleStrip },
+    );
+    defer circle_rp.deinit();
+
+    // 4. Create VRAM texture to render into
+    const texture = try GpuTexture.init(gloc, .{
+        .format = .RGBA8Unorm,
+        .size = .{ .width = width, .height = height, .depthOrArrayLayers = 1 },
+        .usage = .initMany(&.{ .RenderAttachment, .CopySrc }),
+    });
+    defer texture.deinit();
+
+    // 5. Create a view from texture
+    const view = try GpuTextureView.init(gloc, texture, .{});
+    defer view.deinit();
+
+    // 6. Run the rendering pipeline
+    try circle_rp.draw(gloc, view, 4, .{});
+
+    // 7. Load Texture into GpuBuffer
+    const cpu_staging_cpu = try texture.buffCopy(gloc);
+    defer cpu_staging_cpu.deinit();
+
+    // 8. Read GpuBuffer to CPU
+    // This need to be free manually because CPU memory
+    const pixels = try cpu_staging_cpu.read(allocator, u8);
+    defer allocator.free(pixels);
+
+    // 9. Write a simple ppm image
+    try savePpm(init.io, "circle.ppm", width, height, pixels);
+}
+
+fn savePpm(io: std.Io, filename: []const u8, w: u32, h: u32, rgba_pixels: []const u8) !void {
+    const file = try std.Io.Dir.cwd().createFile(io, filename, .{});
+    defer file.close(io);
+
+    var buf: [255]u8 = undefined;
+    var writer = file.writer(io, &buf);
+
+    // PPM Header: P6 format means raw RGB bytes
+    try writer.interface.print("P6\n{d} {d}\n255\n", .{ w, h });
+
+    // Strip Alpha channel when writing out to standard RGB PPM format
+    var i: usize = 0;
+    while (i < rgba_pixels.len) : (i += 4) {
+        try writer.interface.writeAll(rgba_pixels[i .. i + 3]);
+    }
+}
--- a/examples/compute.zig
+++ b/examples/compute.zig
@ -1,7 +1,7 @@
 const std = @import("std");
 const gpu = @import("gpu");
 const GpuDevice = gpu.GpuDevice;
-const GpuArena = gpu.GpuArena;
+const GpuArenaAllocator = gpu.GpuArenaAllocator;
 const GpuBuffer = gpu.GpuBuffer;
 const GpuCompute = gpu.GpuCompute;

@ -13,13 +13,13 @@ pub fn main(init: std.process.Init) !void {
    defer device.deinit();

    // 2. Create a GPU Arena to manage VRAM
-    var grena = GpuArena.init(allocator, device);
+    var grena = GpuArenaAllocator.init(allocator, device.gpuAllocator());
    defer grena.deinit();
    const gloc = grena.gpuAllocator();

    // 3. Load the WGSL compute pipeline
    const add_cp = try GpuCompute.init(
-        device,
+        gloc,
        @embedFile("shaders/add.wgsl"),
        .{ .bindings = &.{
            .{ .element_size = @sizeOf(f16) },
@ -27,7 +27,6 @@ pub fn main(init: std.process.Init) !void {
            .{ .element_size = @sizeOf(f16) },
        } },
    );
-    defer add_cp.deinit();

    // 4. Setup CPU data
    const len: usize = 16;
@ -47,8 +46,9 @@ pub fn main(init: std.process.Init) !void {
    const buf_b = try GpuBuffer.init(gloc, byte_size, .initMany(&.{ .Storage, .CopyDst, .CopySrc }));
    const buf_out = try GpuBuffer.init(gloc, byte_size, .initMany(&.{ .Storage, .CopyDst, .CopySrc }));

-    // Note: The buffers are safely tied to the GpuArena which will automatically
+    // Note: Buffers are safely tied to the GpuArenaAllocator which will automatically
    // release them at the end. You can also manually call buf_x.deinit() if desired.
+    // This will also release pipelines, textures, ect. Everything using a GpuAllocator to init.

    // 6. Transfer data from CPU slices to GPU Buffers
    try buf_a.load(f16, data_a);
--- a/examples/digit.zig
+++ b/examples/digit.zig
@ -1,77 +0,0 @@
-// I am using this mnist reduced dataset https://www.kaggle.com/datasets/mohamedgamal07/reduced-mnist
-
-const std = @import("std");
-const gpu = @import("gpu");
-const GpuDevice = gpu.GpuDevice;
-const GpuArena = gpu.GpuArena;
-const GpuBuffer = gpu.GpuBuffer;
-const GpuProcess = gpu.GpuProcess;
-
-const BATCHSIZE = 10;
-const EPOCH = 10;
-
-pub fn main(init: std.process.Init) !void {
-    const allocator = init.gpa;
-    const io = init.io;
-
-    // 1. Open GPU Device
-    const device = try GpuDevice.init(.{});
-    defer device.deinit();
-
-    // 2. Create a GPU Arena to manage VRAM
-    var grena = GpuArena.init(allocator, device);
-    defer grena.deinit();
-    const gloc = grena.gpuAllocator();
-
-    // 3. Load the WGSL compute pipeline
-    const add_process = try GpuProcess.init(device, @embedFile("shaders/add.wgsl"));
-    defer add_process.deinit();
-
-    var train_dir = try std.Io.Dir.cwd().openDir(io, "mnist/train", .{});
-
-    var images: [BATCHSIZE * 28 * 28]f16 = undefined;
-    for (EPOCH) |epoch| {
-        // Load random images from train dir
-        train_dir.openDir(io, "0", .{});
-        for (BATCHSIZE) |i| {
-            const file = try train_dir.openFile(io, "0.jpg", .{});
-            images[28 * 28 * i .. 28 * 28 * (i + 1)] = file.read
-        }
-    }
-
-    // 4. Setup CPU data
-    const len: usize = 16;
-    const data_a = try allocator.alloc(f16, len);
-    defer allocator.free(data_a);
-    const data_b = try allocator.alloc(f16, len);
-    defer allocator.free(data_b);
-
-    for (0..len) |i| {
-        data_a[i] = @floatFromInt(i);
-        data_b[i] = @floatFromInt(len - 1 - i);
-    }
-
-    // 5. Initialize raw GPU Buffers
-    // We pass the EnumSet inline using `.initMany` since the Enum itself isn't exported
-    const byte_size = len * @sizeOf(f16);
-    const buf_a = try GpuBuffer.init(gloc, byte_size, .initMany(&.{ .Storage, .CopyDst, .CopySrc }));
-    const buf_b = try GpuBuffer.init(gloc, byte_size, .initMany(&.{ .Storage, .CopyDst, .CopySrc }));
-    const buf_out = try GpuBuffer.init(gloc, byte_size, .initMany(&.{ .Storage, .CopyDst, .CopySrc }));
-
-    // Note: The buffers are safely tied to the GpuArena which will automatically
-    // release them at the end. You can also manually call buf_x.deinit() if desired.
-
-    // 6. Transfer data from CPU slices to GPU Buffers
-    try buf_a.load(f16, data_a);
-    try buf_b.load(f16, data_b);
-
-    // 7. Dispatch the Compute Process
-    // We pass the data type (f16) to allow GpuProcess to calculate chunks correctly
-    try add_process.run(gloc, f16, buf_a, buf_b, buf_out);
-
-    // 8. Map and copy the resulting buffer back to the CPU
-    const out = try buf_out.read(allocator, f16);
-    defer allocator.free(out);
-
-    std.debug.print("Result: {any}\n", .{out});
-}
--- a/examples/shaders/circle.wgsl
+++ b/examples/shaders/circle.wgsl
@ -0,0 +1,39 @@
+struct VertexOutput {
+    @builtin(position) position: vec4f,
+    @location(0) uv: vec2f,
+};
+
+@vertex
+fn vs_main(@builtin(vertex_index) vertex_index: u32) -> VertexOutput {
+    var output: VertexOutput;
+    // Hardcoded fullscreen quad layout using 4 vertices (Triangle Strip)
+    // Indexes: 0: Top-Left, 1: Bottom-Left, 2: Top-Right, 3: Bottom-Right
+    var pos = array<vec2f, 4>(
+        vec2f(-1.0,  1.0),
+        vec2f(-1.0, -1.0),
+        vec2f( 1.0,  1.0),
+        vec2f( 1.0, -1.0)
+    );
+    
+    output.position = vec4f(pos[vertex_index], 0.0, 1.0);
+    output.uv = pos[vertex_index]; // Ranges cleanly from -1.0 to 1.0
+    return output;
+}
+
+@fragment
+fn fs_main(input: VertexOutput) -> @location(0) vec4f {
+    // Distance from the center (0,0)
+    let distance = length(input.uv);
+    let radius = 0.5;
+    
+    // Smooth out pixel edges (anti-aliasing)
+    let edge_softness = 0.005;
+    let alpha = 1.0 - smoothstep(radius - edge_softness, radius + edge_softness, distance);
+    
+    if (alpha <= 0.0) {
+        discard; 
+    }
+    
+    // Draw a sharp/smooth red circle
+    return vec4f(1.0, 0.3, 0.3, alpha);
+}
--- a/src/GpuAllocator.zig
+++ b/src/GpuAllocator.zig
@ -2,18 +2,58 @@ const GpuDevice = @import("GpuDevice.zig");
 const c = @import("utils.zig").c;

 pub const VTable = struct {
-    alloc: *const fn (ctx: *anyopaque, bytes: u64, usage: c.WGPUBufferUsage) anyerror!c.WGPUBuffer,
-    free: *const fn (ctx: *anyopaque, buf_raw: c.WGPUBuffer, size: u64) void,
+    allocBuffer: *const fn (ctx: *anyopaque, desc: c.WGPUBufferDescriptor) anyerror!c.WGPUBuffer,
+    freeBuffer: *const fn (ctx: *anyopaque, buf_raw: c.WGPUBuffer) void,
+    allocTexture: *const fn (ctx: *anyopaque, desc: c.WGPUTextureDescriptor) anyerror!c.WGPUTexture,
+    freeTexture: *const fn (ctx: *anyopaque, buf_raw: c.WGPUTexture) void,
+    allocTextureView: *const fn (ctx: *anyopaque, texture: c.WGPUTexture, desc: c.WGPUTextureViewDescriptor) anyerror!c.WGPUTextureView,
+    freeTextureView: *const fn (ctx: *anyopaque, buf_raw: c.WGPUTextureView) void,
+    allocRenderPipeline: *const fn (ctx: *anyopaque, desc: c.WGPURenderPipelineDescriptor) anyerror!c.WGPURenderPipeline,
+    freeRenderPipeline: *const fn (ctx: *anyopaque, buf_raw: c.WGPURenderPipeline) void,
+    allocComputePipeline: *const fn (ctx: *anyopaque, desc: c.WGPUComputePipelineDescriptor) anyerror!c.WGPUComputePipeline,
+    freeComputePipeline: *const fn (ctx: *anyopaque, buf_raw: c.WGPUComputePipeline) void,
 };

 device: GpuDevice,
 ptr: *anyopaque,
 vtable: *const VTable,

-pub fn allocBuffer(self: @This(), bytes: u64, usage: c.WGPUBufferUsage) !c.WGPUBuffer {
-    return self.vtable.alloc(self.ptr, bytes, usage);
+pub fn allocBuffer(self: @This(), desc: c.WGPUBufferDescriptor) !c.WGPUBuffer {
+    return self.vtable.allocBuffer(self.ptr, desc);
 }

-pub fn freeBuffer(self: @This(), buf_raw: c.WGPUBuffer, size: u64) void {
-    self.vtable.free(self.ptr, buf_raw, size);
+pub fn freeBuffer(self: @This(), raw: c.WGPUBuffer) void {
+    self.vtable.freeBuffer(self.ptr, raw);
+}
+
+pub fn allocTexture(self: @This(), desc: c.WGPUTextureDescriptor) !c.WGPUTexture {
+    return self.vtable.allocTexture(self.ptr, desc);
+}
+
+pub fn freeTexture(self: @This(), raw: c.WGPUTexture) void {
+    self.vtable.freeTexture(self.ptr, raw);
+}
+
+pub fn allocTextureView(self: @This(), texture: c.WGPUTexture, desc: c.WGPUTextureViewDescriptor) !c.WGPUTextureView {
+    return self.vtable.allocTextureView(self.ptr, texture, desc);
+}
+
+pub fn freeTextureView(self: @This(), raw: c.WGPUTextureView) void {
+    self.vtable.freeTextureView(self.ptr, raw);
+}
+
+pub fn allocRenderPipeline(self: @This(), desc: c.WGPURenderPipelineDescriptor) !c.WGPURenderPipeline {
+    return self.vtable.allocRenderPipeline(self.ptr, desc);
+}
+
+pub fn freeRenderPipeline(self: @This(), raw: c.WGPURenderPipeline) void {
+    self.vtable.freeRenderPipeline(self.ptr, raw);
+}
+
+pub fn allocComputePipeline(self: @This(), desc: c.WGPUComputePipelineDescriptor) !c.WGPUComputePipeline {
+    return self.vtable.allocComputePipeline(self.ptr, desc);
+}
+
+pub fn freeComputePipeline(self: @This(), raw: c.WGPUComputePipeline) void {
+    self.vtable.freeComputePipeline(self.ptr, raw);
 }
--- a/src/GpuArena.zig
+++ b/src/GpuArena.zig
@ -1,69 +0,0 @@
-const std = @import("std");
-const GpuDevice = @import("GpuDevice.zig");
-const GpuAllocator = @import("GpuAllocator.zig");
-const c = @import("utils.zig").c;
-
-device: GpuDevice,
-tracked_buffers: std.AutoHashMap(c.WGPUBuffer, void),
-allocated_vram_bytes: u64 = 0,
-
-pub fn init(cpu_allocator: std.mem.Allocator, device: GpuDevice) @This() {
-    return .{
-        .device = device,
-        .tracked_buffers = .init(cpu_allocator),
-    };
-}
-
-pub fn deinit(self: *@This()) void {
-    var it = self.tracked_buffers.keyIterator();
-    while (it.next()) |buf_ptr| {
-        c.wgpuBufferDestroy(buf_ptr.*);
-        c.wgpuBufferRelease(buf_ptr.*);
-    }
-    self.tracked_buffers.deinit();
-}
-
-/// Returns the type-erased immutable interface wrapper
-pub fn gpuAllocator(self: *@This()) GpuAllocator {
-    return .{
-        .device = self.device,
-        .ptr = self,
-        .vtable = &.{
-            .alloc = alloc,
-            .free = free,
-        },
-    };
-}
-
-fn alloc(ctx: *anyopaque, bytes: u64, usage: c.WGPUBufferUsage) anyerror!c.WGPUBuffer {
-    const self: *@This() = @ptrCast(@alignCast(ctx));
-
-    if (bytes > self.device.limits.maxBufferSize)
-        return error.SingleBufferExceedsLimit;
-
-    if (bytes + self.allocated_vram_bytes > self.device.config.vram_bytes_limit)
-        return error.ExceedsVramBudget;
-
-    const buf = c.wgpuDeviceCreateBuffer(self.device.device, &.{
-        .usage = usage,
-        .size = bytes,
-    }) orelse return error.BufferAlloc;
-    errdefer {
-        c.wgpuBufferDestroy(buf);
-        c.wgpuBufferRelease(buf);
-    }
-
-    try self.tracked_buffers.put(buf, {});
-    self.allocated_vram_bytes += bytes;
-    return buf;
-}
-
-fn free(ctx: *anyopaque, buf_raw: c.WGPUBuffer, size: u64) void {
-    const self: *@This() = @ptrCast(@alignCast(ctx));
-
-    if (self.tracked_buffers.remove(buf_raw)) {
-        c.wgpuBufferDestroy(buf_raw);
-        c.wgpuBufferRelease(buf_raw);
-        self.allocated_vram_bytes -= size;
-    }
-}
--- a/src/GpuArenaAllocator.zig
+++ b/src/GpuArenaAllocator.zig
@ -0,0 +1,163 @@
+const std = @import("std");
+const GpuDevice = @import("GpuDevice.zig");
+const GpuAllocator = @import("GpuAllocator.zig");
+const GpuTextureFormat = @import("lib.zig").GpuTextureFormat;
+const c = @import("utils.zig").c;
+
+child_allocator: GpuAllocator, // I use Zig naming child_allocator, but that should be a parent for me. Likely something idk
+tracked_buffers: std.AutoHashMap(c.WGPUBuffer, c.WGPUBufferDescriptor),
+tracked_textures: std.AutoHashMap(c.WGPUTexture, c.WGPUTextureDescriptor),
+tracked_views: std.AutoHashMap(c.WGPUTextureView, c.WGPUTextureViewDescriptor),
+tracked_renders: std.AutoHashMap(c.WGPURenderPipeline, c.WGPURenderPipelineDescriptor),
+tracked_computes: std.AutoHashMap(c.WGPUComputePipeline, c.WGPUComputePipelineDescriptor),
+allocated_vram_bytes: u64 = 0,
+
+pub fn init(cpu_allocator: std.mem.Allocator, child_allocator: GpuAllocator) @This() {
+    return .{
+        .child_allocator = child_allocator,
+        .tracked_buffers = .init(cpu_allocator),
+        .tracked_textures = .init(cpu_allocator),
+        .tracked_views = .init(cpu_allocator),
+        .tracked_computes = .init(cpu_allocator),
+        .tracked_renders = .init(cpu_allocator),
+    };
+}
+
+pub fn deinit(self: *@This()) void {
+    var it_buffer = self.tracked_buffers.keyIterator();
+    while (it_buffer.next()) |buf_ptr|
+        self.child_allocator.freeBuffer(buf_ptr.*);
+    self.tracked_buffers.deinit();
+
+    var it_tex = self.tracked_textures.keyIterator();
+    while (it_tex.next()) |buf_ptr|
+        self.child_allocator.freeTexture(buf_ptr.*);
+    self.tracked_textures.deinit();
+
+    var it_view = self.tracked_views.keyIterator();
+    while (it_view.next()) |buf_ptr|
+        self.child_allocator.freeTextureView(buf_ptr.*);
+    self.tracked_views.deinit();
+
+    var it_render = self.tracked_renders.keyIterator();
+    while (it_render.next()) |buf_ptr|
+        self.child_allocator.freeRenderPipeline(buf_ptr.*);
+    self.tracked_renders.deinit();
+
+    var it_compute = self.tracked_computes.keyIterator();
+    while (it_compute.next()) |buf_ptr|
+        self.child_allocator.freeComputePipeline(buf_ptr.*);
+    self.tracked_computes.deinit();
+}
+
+/// Returns the type-erased immutable interface wrapper
+pub fn gpuAllocator(self: *@This()) GpuAllocator {
+    return .{
+        .device = self.child_allocator.device,
+        .ptr = self,
+        .vtable = &.{
+            .allocBuffer = allocBuffer,
+            .freeBuffer = freeBuffer,
+            .allocTexture = allocTexture,
+            .freeTexture = freeTexture,
+            .allocTextureView = allocTextureView,
+            .freeTextureView = freeTextureView,
+            .allocRenderPipeline = allocRenderPipeline,
+            .freeRenderPipeline = freeRenderPipeline,
+            .allocComputePipeline = allocComputePipeline,
+            .freeComputePipeline = freeComputePipeline,
+        },
+    };
+}
+
+// NOTE: I use ensureTotalCapacity so I know that try self.tracked_x.put will not fail!
+// Like that I dont have to use errdefer to release what I just allocated in VRAM
+
+fn allocBuffer(ctx: *anyopaque, desc: c.WGPUBufferDescriptor) anyerror!c.WGPUBuffer {
+    const self: *@This() = @ptrCast(@alignCast(ctx));
+    try self.tracked_buffers.ensureTotalCapacity(self.tracked_buffers.count() + 1);
+    const raw = try self.child_allocator.allocBuffer(desc);
+    self.tracked_buffers.putAssumeCapacity(raw, desc);
+    self.allocated_vram_bytes += desc.size;
+    return raw;
+}
+
+fn freeBuffer(ctx: *anyopaque, raw: c.WGPUBuffer) void {
+    const self: *@This() = @ptrCast(@alignCast(ctx));
+    if (self.tracked_buffers.fetchRemove(raw)) |kv| {
+        self.child_allocator.freeBuffer(raw);
+        self.allocated_vram_bytes -= kv.value.size;
+    }
+}
+
+fn allocTexture(ctx: *anyopaque, desc: c.WGPUTextureDescriptor) anyerror!c.WGPUTexture {
+    const self: *@This() = @ptrCast(@alignCast(ctx));
+    try self.tracked_textures.ensureTotalCapacity(self.tracked_textures.count() + 1);
+
+    const format: GpuTextureFormat = @enumFromInt(desc.format);
+    const bytes_size = desc.size.width * desc.size.height * format.bytesPerPixel();
+
+    if (bytes_size + self.allocated_vram_bytes > self.child_allocator.device.config.vram_bytes_limit)
+        return error.ExceedsVramBudget;
+
+    const raw = try self.child_allocator.allocTexture(desc);
+
+    self.tracked_textures.putAssumeCapacity(raw, desc);
+    self.allocated_vram_bytes += bytes_size;
+    return raw;
+}
+
+fn freeTexture(ctx: *anyopaque, raw: c.WGPUTexture) void {
+    const self: *@This() = @ptrCast(@alignCast(ctx));
+
+    if (self.tracked_textures.fetchRemove(raw)) |kv| {
+        self.child_allocator.freeTexture(raw);
+
+        const desc = kv.value;
+        const format: GpuTextureFormat = @enumFromInt(desc.format);
+        const bytes_size = desc.size.width * desc.size.height * format.bytesPerPixel();
+        self.allocated_vram_bytes -= bytes_size;
+    }
+}
+
+fn allocTextureView(ctx: *anyopaque, texture: c.WGPUTexture, desc: c.WGPUTextureViewDescriptor) anyerror!c.WGPUTextureView {
+    const self: *@This() = @ptrCast(@alignCast(ctx));
+    try self.tracked_views.ensureTotalCapacity(self.tracked_views.count() + 1);
+    const raw = try self.child_allocator.allocTextureView(texture, desc);
+    self.tracked_views.putAssumeCapacity(raw, desc);
+    return raw;
+}
+
+fn freeTextureView(ctx: *anyopaque, raw: c.WGPUTextureView) void {
+    const self: *@This() = @ptrCast(@alignCast(ctx));
+    if (self.tracked_views.remove(raw))
+        self.child_allocator.freeTextureView(raw);
+}
+
+fn allocRenderPipeline(ctx: *anyopaque, desc: c.WGPURenderPipelineDescriptor) anyerror!c.WGPURenderPipeline {
+    const self: *@This() = @ptrCast(@alignCast(ctx));
+    try self.tracked_renders.ensureTotalCapacity(self.tracked_renders.count() + 1);
+    const raw = try self.child_allocator.allocRenderPipeline(desc);
+    self.tracked_renders.putAssumeCapacity(raw, desc);
+    return raw;
+}
+
+fn freeRenderPipeline(ctx: *anyopaque, raw: c.WGPURenderPipeline) void {
+    const self: *@This() = @ptrCast(@alignCast(ctx));
+    if (self.tracked_renders.remove(raw))
+        self.child_allocator.freeRenderPipeline(raw);
+}
+
+fn allocComputePipeline(ctx: *anyopaque, desc: c.WGPUComputePipelineDescriptor) anyerror!c.WGPUComputePipeline {
+    const self: *@This() = @ptrCast(@alignCast(ctx));
+    try self.tracked_computes.ensureTotalCapacity(self.tracked_computes.count() + 1);
+    const raw = try self.child_allocator.allocComputePipeline(desc);
+    self.tracked_computes.putAssumeCapacity(raw, desc);
+    return raw;
+}
+
+fn freeComputePipeline(ctx: *anyopaque, raw: c.WGPUComputePipeline) void {
+    const self: *@This() = @ptrCast(@alignCast(ctx));
+    if (self.tracked_computes.remove(raw))
+        self.child_allocator.freeComputePipeline(raw);
+}
--- a/src/GpuBuffer.zig
+++ b/src/GpuBuffer.zig
@ -21,7 +21,6 @@ const BufferUsage = enum(u64) {
    QueryResolve = 0x0000000000000200,
 };

-/// Allocates the underlying WebGPU handle and registers it to the parent GpuAllocator
 pub fn init(gloc: GpuAllocator, size: u64, usage: std.EnumSet(BufferUsage)) !@This() {
    var use: u64 = 0;
    var iter = usage.iterator();
@ -30,7 +29,7 @@ pub fn init(gloc: GpuAllocator, size: u64, usage: std.EnumSet(BufferUsage)) !@Th
    // Automatically align the buffer size forward to a multiple of 4 bytes under the hood
    const aligned_size = std.mem.alignForward(u64, size, 4);

-    const raw_handle = try gloc.allocBuffer(aligned_size, use);
+    const raw_handle = try gloc.allocBuffer(.{ .size = aligned_size, .usage = use });
    return .{
        .raw = raw_handle,
        .size = aligned_size,
@ -39,17 +38,14 @@ pub fn init(gloc: GpuAllocator, size: u64, usage: std.EnumSet(BufferUsage)) !@Th
    };
 }

-/// Unregisters from the parent GpuAllocator and cleanly destroys GPU resources
 pub fn deinit(self: @This()) void {
-    self.gloc.freeBuffer(self.raw, self.size);
+    self.gloc.freeBuffer(self.raw);
 }

-/// Native getConstMappedRange wrapper
 pub fn getConstMappedRange(self: @This(), offset: u64, size: u64) ?*const anyopaque {
    return c.wgpuBufferGetConstMappedRange(self.raw, offset, size);
 }

-/// Native mapAsync wrapper
 pub fn mapAsync(
    self: @This(),
    mode: c.WGPUMapMode,
@ -60,12 +56,11 @@ pub fn mapAsync(
    _ = c.wgpuBufferMapAsync(self.raw, mode, offset, size, callback_info);
 }

-/// Native unmap wrapper
 pub fn unmap(self: @This()) void {
    c.wgpuBufferUnmap(self.raw);
 }

-/// CPU to GPU.
+/// CPU to GPU
 pub fn load(
    self: @This(),
    T: type,
@ -92,6 +87,7 @@ pub fn load(
    }
 }

+/// GPU to CPU
 pub fn read(self: @This(), alloc: std.mem.Allocator, T: type) ![]T {
    const out = try alloc.alloc(T, @divExact(self.size, @sizeOf(T)));

--- a/src/GpuCompute.zig
+++ b/src/GpuCompute.zig
@ -20,30 +20,30 @@ pub const ComputeDef = struct {
 };

 pip: c.WGPUComputePipeline,
+gloc: GpuAllocator,
 def: ComputeDef,

-pub fn init(device: GpuDevice, wgsl: []const u8, def: ComputeDef) !@This() {
+pub fn init(gloc: GpuAllocator, wgsl: []const u8, def: ComputeDef) !@This() {
    var wgsl_src = c.WGPUShaderSourceWGSL{
        .chain = .{ .sType = c.WGPUSType_ShaderSourceWGSL },
        .code = sv(wgsl),
    };
-    const shader = c.wgpuDeviceCreateShaderModule(device.device, &.{
+    const shader = c.wgpuDeviceCreateShaderModule(gloc.device.device, &.{
        .nextInChain = @ptrCast(&wgsl_src),
    }) orelse return error.Shader;
    defer c.wgpuShaderModuleRelease(shader);

-    const pip = c.wgpuDeviceCreateComputePipeline(device.device, &.{
-        .compute = .{ .module = shader, .entryPoint = sv("main") },
-    }) orelse return error.Pipeline;
+    const pip = try gloc.allocComputePipeline(.{ .compute = .{ .module = shader, .entryPoint = sv("main") } });

    return .{
+        .gloc = gloc,
        .pip = pip,
        .def = def,
    };
 }

 pub fn deinit(self: @This()) void {
-    c.wgpuComputePipelineRelease(self.pip);
+    self.gloc.freeComputePipeline(self.pip);
 }

 /// Execute the compute pass with arbitrary buffer bindings via a tuple.
--- a/src/GpuDevice.zig
+++ b/src/GpuDevice.zig
@ -1,6 +1,13 @@
 const std = @import("std");
 const c = @import("utils.zig").c;
 const sv = @import("utils.zig").sv;
+const GpuAllocator = @import("GpuAllocator.zig");
+const GpuTextureFormat = @import("lib.zig").GpuTextureFormat;
+
+// TODO: Make Allocator more zig like
+//  - GpuDevice can return a GpuAllocator that just allocate and nothing else
+//  - From this GpuAllocator, can create a GpuArena like std.heap.ArenaAllocator.init(allocator)
+//  - Rename GpuArenaAllocator too

 const Ctx = struct {
    adapter: c.WGPUAdapter = null,
@ -127,3 +134,75 @@ fn onDevice(
    const ctx: *Ctx = @ptrCast(@alignCast(userdata1.?));
    ctx.device = device;
 }
+
+// Allocation stuff
+
+/// Returns the type-erased immutable interface wrapper
+pub fn gpuAllocator(self: *const @This()) GpuAllocator {
+    return .{
+        .device = self.*,
+        .ptr = @ptrCast(@constCast(self)),
+        .vtable = &.{
+            .allocBuffer = allocBuffer,
+            .freeBuffer = freeBuffer,
+            .allocTexture = allocTexture,
+            .freeTexture = freeTexture,
+            .allocTextureView = allocTextureView,
+            .freeTextureView = freeTextureView,
+            .allocRenderPipeline = allocRenderPipeline,
+            .freeRenderPipeline = freeRenderPipeline,
+            .allocComputePipeline = allocComputePipeline,
+            .freeComputePipeline = freeComputePipeline,
+        },
+    };
+}
+
+fn allocBuffer(ctx: *anyopaque, desc: c.WGPUBufferDescriptor) anyerror!c.WGPUBuffer {
+    const self: *@This() = @ptrCast(@alignCast(ctx));
+    if (desc.size > self.limits.maxBufferSize)
+        return error.SingleBufferExceedsLimit;
+    return c.wgpuDeviceCreateBuffer(self.device, &desc) orelse return error.BufferAlloc;
+}
+
+fn freeBuffer(_: *anyopaque, raw: c.WGPUBuffer) void {
+    c.wgpuBufferDestroy(raw);
+    c.wgpuBufferRelease(raw);
+}
+
+fn allocTexture(ctx: *anyopaque, desc: c.WGPUTextureDescriptor) anyerror!c.WGPUTexture {
+    const self: *@This() = @ptrCast(@alignCast(ctx));
+    const format: GpuTextureFormat = @enumFromInt(desc.format);
+    if (desc.size.width * desc.size.height * format.bytesPerPixel() > self.limits.maxBufferSize)
+        return error.SingleBufferExceedsLimit;
+    return c.wgpuDeviceCreateTexture(self.device, &desc) orelse return error.Texture;
+}
+
+fn freeTexture(_: *anyopaque, raw: c.WGPUTexture) void {
+    c.wgpuTextureRelease(raw);
+}
+
+fn allocTextureView(_: *anyopaque, texture: c.WGPUTexture, desc: c.WGPUTextureViewDescriptor) anyerror!c.WGPUTextureView {
+    return c.wgpuTextureCreateView(texture, &desc) orelse return error.View;
+}
+
+fn freeTextureView(_: *anyopaque, raw: c.WGPUTextureView) void {
+    c.wgpuTextureViewRelease(raw);
+}
+
+fn allocRenderPipeline(ctx: *anyopaque, desc: c.WGPURenderPipelineDescriptor) anyerror!c.WGPURenderPipeline {
+    const self: *@This() = @ptrCast(@alignCast(ctx));
+    return c.wgpuDeviceCreateRenderPipeline(self.device, &desc) orelse return error.Pipeline;
+}
+
+fn freeRenderPipeline(_: *anyopaque, raw: c.WGPURenderPipeline) void {
+    c.wgpuRenderPipelineRelease(raw);
+}
+
+fn allocComputePipeline(ctx: *anyopaque, desc: c.WGPUComputePipelineDescriptor) anyerror!c.WGPUComputePipeline {
+    const self: *@This() = @ptrCast(@alignCast(ctx));
+    return c.wgpuDeviceCreateComputePipeline(self.device, &desc) orelse return error.Pipeline;
+}
+
+fn freeComputePipeline(_: *anyopaque, raw: c.WGPUComputePipeline) void {
+    c.wgpuComputePipelineRelease(raw);
+}
--- a/src/GpuRender.zig
+++ b/src/GpuRender.zig
@ -0,0 +1,181 @@
+const std = @import("std");
+const c = @import("utils.zig").c;
+const sv = @import("utils.zig").sv;
+const GpuAllocator = @import("GpuAllocator.zig");
+const GpuBuffer = @import("GpuBuffer.zig");
+const GpuDevice = @import("GpuDevice.zig");
+const GpuTextureView = @import("GpuTextureView.zig");
+const GpuTextureFormat = @import("lib.zig").GpuTextureFormat;
+
+pub const Binding = struct {
+    element_size: u32 = 0,
+};
+
+pub const GpuRenderDef = struct {
+    bindings: []const Binding = &.{},
+    /// The surface texture format we are rendering to (e.g., BGRA8Unorm)
+    texture_format: GpuTextureFormat,
+    /// The names of the entry points inside your WGSL code
+    vertex_entry: []const u8 = "vs_main",
+    fragment_entry: []const u8 = "fs_main",
+    /// Primitive topology, default to triangle list
+    topology: GpuPrimitiveTopology = .TriangleList,
+};
+
+const GpuPrimitiveTopology = enum(c_uint) {
+    Undefined = 0x00000000,
+    PointList = 0x00000001,
+    LineList = 0x00000002,
+    LineStrip = 0x00000003,
+    TriangleList = 0x00000004,
+    TriangleStrip = 0x00000005,
+    Force32 = 0x7FFFFFFF,
+};
+
+gloc: GpuAllocator,
+pip: c.WGPURenderPipeline,
+def: GpuRenderDef,
+
+pub fn init(gloc: GpuAllocator, wgsl: []const u8, def: GpuRenderDef) !@This() {
+    var wgsl_src = c.WGPUShaderSourceWGSL{
+        .chain = .{ .sType = c.WGPUSType_ShaderSourceWGSL },
+        .code = sv(wgsl),
+    };
+    const shader = c.wgpuDeviceCreateShaderModule(gloc.device.device, &.{
+        .nextInChain = @ptrCast(&wgsl_src),
+    }) orelse return error.Shader;
+    defer c.wgpuShaderModuleRelease(shader);
+
+    // 1. Setup the Color Target State (where the fragment shader outputs)
+    const blend = c.WGPUBlendState{
+        .color = .{ .operation = c.WGPUBlendOperation_Add, .srcFactor = c.WGPUBlendFactor_SrcAlpha, .dstFactor = c.WGPUBlendFactor_OneMinusSrcAlpha },
+        .alpha = .{ .operation = c.WGPUBlendOperation_Add, .srcFactor = c.WGPUBlendFactor_One, .dstFactor = c.WGPUBlendFactor_Zero },
+    };
+
+    const color_target = c.WGPUColorTargetState{
+        .format = @intFromEnum(def.texture_format),
+        .blend = &blend,
+        .writeMask = c.WGPUColorWriteMask_All,
+    };
+
+    // 2. Setup the Fragment State
+    const fragment_state = c.WGPUFragmentState{
+        .module = shader,
+        .entryPoint = sv(def.fragment_entry),
+        .targetCount = 1,
+        .targets = &color_target,
+    };
+
+    // 3. Compile the Complete Render Pipeline
+    const pip = try gloc.allocRenderPipeline(.{
+        .vertex = .{
+            .module = shader,
+            .entryPoint = sv(def.vertex_entry),
+        },
+        .primitive = .{
+            .topology = @intFromEnum(def.topology),
+            .stripIndexFormat = c.WGPUIndexFormat_Undefined,
+            .frontFace = c.WGPUFrontFace_CCW,
+            .cullMode = c.WGPUCullMode_None,
+        },
+        .multisample = .{
+            .count = 1,
+            .mask = 0xFFFFFFFF,
+            .alphaToCoverageEnabled = 0,
+        },
+        .fragment = &fragment_state,
+    });
+
+    return .{
+        .gloc = gloc,
+        .pip = pip,
+        .def = def,
+    };
+}
+
+pub fn deinit(self: @This()) void {
+    self.gloc.freeRenderPipeline(self.pip);
+}
+
+/// Execute the render pass targeting a specific frame texture view.
+/// Passes bind groups via a tuple exactly like your original compute setup.
+pub fn draw(
+    self: @This(),
+    gloc: GpuAllocator,
+    target_view: GpuTextureView,
+    vertex_count: u32,
+    args: anytype,
+) !void {
+    const type_info = @typeInfo(@TypeOf(args));
+    if (type_info != .@"struct" or !type_info.@"struct".is_tuple)
+        @compileError("Expected a tuple of GpuBuffers for args. E.g. .{ uniform_buf }");
+
+    const fields = type_info.@"struct".fields;
+    if (fields.len != self.def.bindings.len)
+        return error.InvalidArgumentCount;
+
+    var entries_buf: [32]c.WGPUBindGroupEntry = undefined;
+
+    inline for (fields, 0..) |field, i| {
+        const buf = @field(args, field.name);
+        if (@TypeOf(buf) != GpuBuffer) {
+            @compileError("All arguments in the tuple must be of type GpuBuffer");
+        }
+        entries_buf[i] = .{
+            .binding = @intCast(i),
+            .buffer = buf.raw,
+            .offset = 0,
+            .size = buf.size,
+        };
+    }
+
+    const entries = entries_buf[0..fields.len];
+
+    // Create Render Bind Group from layout
+    const bgl = c.wgpuRenderPipelineGetBindGroupLayout(self.pip, 0);
+    defer c.wgpuBindGroupLayoutRelease(bgl);
+
+    const bg = c.wgpuDeviceCreateBindGroup(gloc.device.device, &.{
+        .layout = bgl,
+        .entries = entries.ptr,
+        .entryCount = @intCast(entries.len),
+    }) orelse return error.BindGroup;
+    defer c.wgpuBindGroupRelease(bg);
+
+    // Encode Render Command
+    const enc = c.wgpuDeviceCreateCommandEncoder(gloc.device.device, null) orelse return error.Encoder;
+    defer c.wgpuCommandEncoderRelease(enc);
+
+    const color_attachment = c.WGPURenderPassColorAttachment{
+        .view = target_view.raw,
+        .resolveTarget = null,
+        .loadOp = c.WGPULoadOp_Clear,
+        .storeOp = c.WGPUStoreOp_Store,
+        .clearValue = .{ .r = 0.1, .g = 0.1, .b = 0.1, .a = 1.0 },
+        .depthSlice = c.WGPU_DEPTH_SLICE_UNDEFINED,
+    };
+
+    const pass_desc = c.WGPURenderPassDescriptor{
+        .colorAttachmentCount = 1,
+        .colorAttachments = &color_attachment,
+        .depthStencilAttachment = null,
+    };
+
+    const pass = c.wgpuCommandEncoderBeginRenderPass(enc, &pass_desc);
+    c.wgpuRenderPassEncoderSetPipeline(pass, self.pip);
+
+    if (fields.len > 0) {
+        c.wgpuRenderPassEncoderSetBindGroup(pass, 0, bg, 0, null);
+    }
+
+    // Draw! (Instead of Compute Dispatch)
+    c.wgpuRenderPassEncoderDraw(pass, vertex_count, 1, 0, 0);
+
+    c.wgpuRenderPassEncoderEnd(pass);
+    c.wgpuRenderPassEncoderRelease(pass);
+
+    const cmd = c.wgpuCommandEncoderFinish(enc, null);
+    defer c.wgpuCommandBufferRelease(cmd);
+
+    c.wgpuQueueSubmit(gloc.device.queue, 1, &cmd);
+}
--- a/src/GpuTexture.zig
+++ b/src/GpuTexture.zig
@ -0,0 +1,168 @@
+const std = @import("std");
+const c = @import("utils.zig").c;
+const GpuAllocator = @import("GpuAllocator.zig");
+const GpuBuffer = @import("GpuBuffer.zig");
+const GpuTextureFormat = @import("lib.zig").GpuTextureFormat;
+const GpuTextureUsage = @import("lib.zig").GpuTextureUsage;
+
+pub const GpuTextureDef = struct {
+    size: c.WGPUExtent3D,
+    usage: std.EnumSet(GpuTextureUsage),
+    format: GpuTextureFormat,
+};
+
+raw: c.WGPUTexture,
+gloc: GpuAllocator,
+def: GpuTextureDef,
+
+pub fn init(gloc: GpuAllocator, def: GpuTextureDef) !@This() {
+    var use: u64 = 0;
+    var iter = def.usage.iterator();
+    while (iter.next()) |flag| use |= @intFromEnum(flag);
+
+    const desc = c.WGPUTextureDescriptor{
+        .usage = use,
+        .dimension = c.WGPUTextureDimension_2D,
+        .size = def.size,
+        .format = @intFromEnum(def.format),
+        .mipLevelCount = 1,
+        .sampleCount = 1,
+    };
+    const raw = try gloc.allocTexture(desc);
+
+    return .{ .gloc = gloc, .raw = raw, .def = def };
+}
+
+pub fn deinit(self: @This()) void {
+    self.gloc.freeTexture(self.raw);
+}
+
+pub fn getConstMappedRange(self: @This(), offset: u64, size: u64) ?*const anyopaque {
+    return c.wgpuBufferGetConstMappedRange(self.raw, offset, size);
+}
+
+pub fn bytesSize(self: @This()) u32 {
+    return self.bytesSizeRow() * self.def.size.height;
+}
+
+pub fn bytesSizeRow(self: @This()) u32 {
+    return self.def.size.width * self.def.format.bytesPerPixel();
+}
+
+/// Return a GpuBuffer containing a copy of the texture.
+pub fn buffCopy(self: @This(), gloc: GpuAllocator) !GpuBuffer {
+    const buf = try GpuBuffer.init(gloc, self.bytesSize(), .initMany(&.{ .CopyDst, .CopySrc }));
+
+    const enc = c.wgpuDeviceCreateCommandEncoder(gloc.device.device, null) orelse return error.Encoder;
+    defer c.wgpuCommandEncoderRelease(enc);
+
+    const src_copy = c.WGPUTexelCopyTextureInfo{
+        .texture = self.raw,
+        .mipLevel = 0,
+        .origin = .{ .x = 0, .y = 0, .z = 0 },
+        .aspect = c.WGPUTextureAspect_All,
+    };
+    const dst_copy = c.WGPUTexelCopyBufferInfo{
+        .buffer = buf.raw,
+        .layout = .{
+            .offset = 0,
+            .bytesPerRow = self.bytesSizeRow(),
+            .rowsPerImage = self.def.size.height,
+        },
+    };
+
+    c.wgpuCommandEncoderCopyTextureToBuffer(enc, &src_copy, &dst_copy, &self.def.size);
+
+    const cmd = c.wgpuCommandEncoderFinish(enc, null);
+    defer c.wgpuCommandBufferRelease(cmd);
+    c.wgpuQueueSubmit(gloc.device.queue, 1, &cmd);
+
+    return buf;
+}
+
+pub fn mapAsync(
+    self: @This(),
+    mode: c.WGPUMapMode,
+    offset: u64,
+    size: u64,
+    callback_info: c.WGPUBufferMapCallbackInfo,
+) void {
+    _ = c.wgpuBufferMapAsync(self.raw, mode, offset, size, callback_info);
+}
+
+pub fn unmap(self: @This()) void {
+    c.wgpuBufferUnmap(self.raw);
+}
+
+/// CPU to GPU
+pub fn load(
+    self: @This(),
+    T: type,
+    data: []const T,
+) !void {
+    const bytes = data.len * @sizeOf(T);
+
+    if (bytes == self.size) {
+        // Aligned path: direct download
+        c.wgpuQueueWriteBuffer(self.gloc.device.queue, self.raw, 0, data.ptr, self.size);
+    } else {
+        // Unaligned path: Split the write into an aligned chunk and a padded remainder
+        // to support arbitrary lengths without any allocations or large stack arrays.
+        const aligned_part = (bytes / 4) * 4;
+        if (aligned_part > 0) {
+            c.wgpuQueueWriteBuffer(self.gloc.device.queue, self.raw, 0, data.ptr, aligned_part);
+        }
+
+        var remainder_buf: [4]u8 = .{ 0, 0, 0, 0 };
+        const data_bytes = std.mem.sliceAsBytes(data);
+        @memcpy(remainder_buf[0 .. bytes - aligned_part], data_bytes[aligned_part..bytes]);
+
+        c.wgpuQueueWriteBuffer(self.gloc.device.queue, self.raw, aligned_part, &remainder_buf, 4);
+    }
+}
+
+// GPU to CPU
+pub fn read(self: @This(), alloc: std.mem.Allocator, T: type) ![]T {
+    const out = try alloc.alloc(T, @divExact(self.size, @sizeOf(T)));
+
+    const staging = try init(
+        self.gloc,
+        self.size,
+        .initMany(&.{ .MapRead, .CopyDst }),
+    );
+    defer staging.deinit();
+
+    const enc = c.wgpuDeviceCreateCommandEncoder(self.gloc.device.device, null) orelse return error.Encoder;
+    c.wgpuCommandEncoderCopyBufferToBuffer(enc, self.raw, 0, staging.raw, 0, self.size);
+    const cmd = c.wgpuCommandEncoderFinish(enc, null);
+    defer c.wgpuCommandEncoderRelease(enc);
+    defer c.wgpuCommandBufferRelease(cmd);
+    c.wgpuQueueSubmit(self.gloc.device.queue, 1, &cmd);
+
+    var mapped = false;
+    staging.mapAsync(
+        c.WGPUMapMode_Read,
+        0,
+        self.size,
+        .{ .callback = onMapped, .userdata1 = &mapped },
+    );
+    while (!mapped) self.gloc.device.poll();
+
+    const ptr: [*]const T = @ptrCast(@alignCast(
+        staging.getConstMappedRange(0, self.size),
+    ));
+    @memcpy(out[0..out.len], ptr[0..out.len]);
+    staging.unmap();
+
+    return out;
+}
+
+fn onMapped(
+    status: c.WGPUMapAsyncStatus,
+    _: c.WGPUStringView,
+    userdata1: ?*anyopaque,
+    _: ?*anyopaque,
+) callconv(.c) void {
+    const flag: *bool = @ptrCast(@alignCast(userdata1.?));
+    flag.* = (status == c.WGPUMapAsyncStatus_Success);
+}
--- a/src/GpuTextureView.zig
+++ b/src/GpuTextureView.zig
@ -0,0 +1,32 @@
+const std = @import("std");
+const c = @import("utils.zig").c;
+const GpuAllocator = @import("GpuAllocator.zig");
+const GpuTexture = @import("lib.zig").GpuTexture;
+const GpuTextureFormat = @import("lib.zig").GpuTextureFormat;
+const GpuTextureUsage = @import("lib.zig").GpuTextureUsage;
+
+pub const GpuViewDef = struct {
+    usage: std.EnumSet(GpuTextureUsage) = .empty,
+    format: GpuTextureFormat = .Undefined,
+};
+
+raw: c.WGPUTextureView,
+gloc: GpuAllocator,
+
+pub fn init(gloc: GpuAllocator, texture: GpuTexture, def: GpuViewDef) !@This() {
+    var use: u64 = 0;
+    var iter = def.usage.iterator();
+    while (iter.next()) |flag| use |= @intFromEnum(flag);
+
+    const raw = try gloc.allocTextureView(texture.raw, .{
+        .format = @intFromEnum(def.format),
+        .usage = use,
+        .mipLevelCount = 1,
+        .arrayLayerCount = 1,
+    });
+    return .{ .gloc = gloc, .raw = raw };
+}
+
+pub fn deinit(self: @This()) void {
+    self.gloc.freeTextureView(self.raw);
+}
--- a/src/lib.zig
+++ b/src/lib.zig
@ -1,5 +1,187 @@
 pub const GpuAllocator = @import("GpuAllocator.zig");
-pub const GpuArena = @import("GpuArena.zig");
+pub const GpuArenaAllocator = @import("GpuArenaAllocator.zig");
 pub const GpuBuffer = @import("GpuBuffer.zig");
 pub const GpuDevice = @import("GpuDevice.zig");
 pub const GpuCompute = @import("GpuCompute.zig");
+pub const GpuRender = @import("GpuRender.zig");
+pub const GpuTexture = @import("GpuTexture.zig");
+pub const GpuTextureView = @import("GpuTextureView.zig");
+
+pub const GpuTextureFormat = enum(c_uint) {
+    Undefined = 0,
+    R8Unorm = 1,
+    R8Snorm = 2,
+    R8Uint = 3,
+    R8Sint = 4,
+    R16Unorm = 5,
+    R16Snorm = 6,
+    R16Uint = 7,
+    R16Sint = 8,
+    R16Float = 9,
+    RG8Unorm = 10,
+    RG8Snorm = 11,
+    RG8Uint = 12,
+    RG8Sint = 13,
+    R32Float = 14,
+    R32Uint = 15,
+    R32Sint = 16,
+    RG16Unorm = 17,
+    RG16Snorm = 18,
+    RG16Uint = 19,
+    RG16Sint = 20,
+    RG16Float = 21,
+    RGBA8Unorm = 22,
+    RGBA8UnormSrgb = 23,
+    RGBA8Snorm = 24,
+    RGBA8Uint = 25,
+    RGBA8Sint = 26,
+    BGRA8Unorm = 27,
+    BGRA8UnormSrgb = 28,
+    RGB10A2Uint = 29,
+    RGB10A2Unorm = 30,
+    RG11B10Ufloat = 31,
+    RGB9E5Ufloat = 32,
+    RG32Float = 33,
+    RG32Uint = 34,
+    RG32Sint = 35,
+    RGBA16Unorm = 36,
+    RGBA16Snorm = 37,
+    RGBA16Uint = 38,
+    RGBA16Sint = 39,
+    RGBA16Float = 40,
+    RGBA32Float = 41,
+    RGBA32Uint = 42,
+    RGBA32Sint = 43,
+    Stencil8 = 44,
+    Depth16Unorm = 45,
+    Depth24Plus = 46,
+    Depth24PlusStencil8 = 47,
+    Depth32Float = 48,
+    Depth32FloatStencil8 = 49,
+    BC1RGBAUnorm = 50,
+    BC1RGBAUnormSrgb = 51,
+    BC2RGBAUnorm = 52,
+    BC2RGBAUnormSrgb = 53,
+    BC3RGBAUnorm = 54,
+    BC3RGBAUnormSrgb = 55,
+    BC4RUnorm = 56,
+    BC4RSnorm = 57,
+    BC5RGUnorm = 58,
+    BC5RGSnorm = 59,
+    BC6HRGBUfloat = 60,
+    BC6HRGBFloat = 61,
+    BC7RGBAUnorm = 62,
+    BC7RGBAUnormSrgb = 63,
+    ETC2RGB8Unorm = 64,
+    ETC2RGB8UnormSrgb = 65,
+    ETC2RGB8A1Unorm = 66,
+    ETC2RGB8A1UnormSrgb = 67,
+    ETC2RGBA8Unorm = 68,
+    ETC2RGBA8UnormSrgb = 69,
+    EACR11Unorm = 70,
+    EACR11Snorm = 71,
+    EACRG11Unorm = 72,
+    EACRG11Snorm = 73,
+    ASTC4x4Unorm = 74,
+    ASTC4x4UnormSrgb = 75,
+    ASTC5x4Unorm = 76,
+    ASTC5x4UnormSrgb = 77,
+    ASTC5x5Unorm = 78,
+    ASTC5x5UnormSrgb = 79,
+    ASTC6x5Unorm = 80,
+    ASTC6x5UnormSrgb = 81,
+    ASTC6x6Unorm = 82,
+    ASTC6x6UnormSrgb = 83,
+    ASTC8x5Unorm = 84,
+    ASTC8x5UnormSrgb = 85,
+    ASTC8x6Unorm = 86,
+    ASTC8x6UnormSrgb = 87,
+    ASTC8x8Unorm = 88,
+    ASTC8x8UnormSrgb = 89,
+    ASTC10x5Unorm = 90,
+    ASTC10x5UnormSrgb = 91,
+    ASTC10x6Unorm = 92,
+    ASTC10x6UnormSrgb = 93,
+    ASTC10x8Unorm = 94,
+    ASTC10x8UnormSrgb = 95,
+    ASTC10x10Unorm = 96,
+    ASTC10x10UnormSrgb = 97,
+    ASTC12x10Unorm = 98,
+    ASTC12x10UnormSrgb = 99,
+    ASTC12x12Unorm = 100,
+    ASTC12x12UnormSrgb = 101,
+    Force32 = 2147483647,
+
+    pub fn bytesPerPixel(format: GpuTextureFormat) u32 {
+        return switch (format) {
+            // 8-bit formats (1 byte)
+            .R8Unorm, .R8Snorm, .R8Uint, .R8Sint, .Stencil8 => 1,
+
+            // 16-bit formats (2 bytes)
+            .R16Unorm,
+            .R16Snorm,
+            .R16Uint,
+            .R16Sint,
+            .R16Float,
+            .RG8Unorm,
+            .RG8Snorm,
+            .RG8Uint,
+            .RG8Sint,
+            .Depth16Unorm,
+            => 2,
+
+            // 32-bit formats (4 bytes)
+            .R32Float,
+            .R32Uint,
+            .R32Sint,
+            .RG16Unorm,
+            .RG16Snorm,
+            .RG16Uint,
+            .RG16Sint,
+            .RG16Float,
+            .RGBA8Unorm,
+            .RGBA8UnormSrgb,
+            .RGBA8Snorm,
+            .RGBA8Uint,
+            .RGBA8Sint,
+            .BGRA8Unorm,
+            .BGRA8UnormSrgb,
+            .RGB10A2Uint,
+            .RGB10A2Unorm,
+            .RG11B10Ufloat,
+            .RGB9E5Ufloat,
+            .Depth24Plus,
+            .Depth32Float,
+            => 4,
+
+            // 64-bit formats (8 bytes)
+            .RG32Float,
+            .RG32Uint,
+            .RG32Sint,
+            .RGBA16Unorm,
+            .RGBA16Snorm,
+            .RGBA16Uint,
+            .RGBA16Sint,
+            .RGBA16Float,
+            .Depth24PlusStencil8, // 24-bit depth + 8-bit stencil layout padded to 4+4 or 1+3
+            .Depth32FloatStencil8, // 32-bit float depth + 8-bit stencil (padded to 8 bytes)
+            => 8,
+
+            // 128-bit formats (16 bytes)
+            .RGBA32Float, .RGBA32Uint, .RGBA32Sint => 16,
+
+            // Block Compressed Formats (Handled separately)
+            else => 0,
+        };
+    }
+};
+
+pub const GpuTextureUsage = enum(u64) {
+    None = 0x0000000000000000,
+    CopySrc = 0x0000000000000001,
+    CopyDst = 0x0000000000000002,
+    TextureBinding = 0x0000000000000004,
+    StorageBinding = 0x0000000000000008,
+    RenderAttachment = 0x0000000000000010,
+    TransientAttachment = 0x0000000000000020,
+};