diff --git a/.gitignore b/.gitignore
index d8c8979..f85f7a7 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,2 +1,3 @@
 .zig-cache
 zig-out
+examples/mnist
diff --git a/examples/digit.zig b/examples/digit.zig
new file mode 100644
index 0000000..2b9ac7e
--- /dev/null
+++ b/examples/digit.zig
@@ -0,0 +1,67 @@
+// I am using this mnist reduced dataset https://www.kaggle.com/datasets/mohamedgamal07/reduced-mnist
+
+const std = @import("std");
+const gpu = @import("gpu");
+const GpuDevice = gpu.GpuDevice;
+const GpuArena = gpu.GpuArena;
+const GpuBuffer = gpu.GpuBuffer;
+const GpuProcess = gpu.GpuProcess;
+
+const BATCHSIZE = 10;
+const EPOCH = 10;
+
+pub fn main(init: std.process.Init) !void {
+    const allocator = init.gpa;
+    const io = init.io;
+
+    // 1. Open GPU Device
+    const device = try GpuDevice.init(.{});
+    defer device.deinit();
+
+    // 2. Create a GPU Arena to manage VRAM
+    var grena = GpuArena.init(allocator, device);
+    defer grena.deinit();
+    const gloc = grena.gpuAllocator();
+
+    // 3. Load the WGSL compute pipeline
+    const add_process = try GpuProcess.init(device, @embedFile("shaders/add.wgsl"));
+    defer add_process.deinit();
+
+    for (EPOCH) |epoch| {}
+
+    // 4. Setup CPU data
+    const len: usize = 16;
+    const data_a = try allocator.alloc(f16, len);
+    defer allocator.free(data_a);
+    const data_b = try allocator.alloc(f16, len);
+    defer allocator.free(data_b);
+
+    for (0..len) |i| {
+        data_a[i] = @floatFromInt(i);
+        data_b[i] = @floatFromInt(len - 1 - i);
+    }
+
+    // 5. Initialize raw GPU Buffers
+    // We pass the EnumSet inline using `.initMany` since the Enum itself isn't exported
+    const byte_size = len * @sizeOf(f16);
+    const buf_a = try GpuBuffer.init(gloc, byte_size, .initMany(&.{ .Storage, .CopyDst, .CopySrc }));
+    const buf_b = try GpuBuffer.init(gloc, byte_size, .initMany(&.{ .Storage, .CopyDst, .CopySrc }));
+    const buf_out = try GpuBuffer.init(gloc, byte_size, .initMany(&.{ .Storage, .CopyDst, .CopySrc }));
+
+    // Note: The buffers are safely tied to the GpuArena which will automatically
+    // release them at the end. You can also manually call buf_x.deinit() if desired.
+
+    // 6. Transfer data from CPU slices to GPU Buffers
+    try buf_a.load(f16, data_a);
+    try buf_b.load(f16, data_b);
+
+    // 7. Dispatch the Compute Process
+    // We pass the data type (f16) to allow GpuProcess to calculate chunks correctly
+    try add_process.run(gloc, f16, buf_a, buf_b, buf_out);
+
+    // 8. Map and copy the resulting buffer back to the CPU
+    const out = try buf_out.read(allocator, f16);
+    defer allocator.free(out);
+
+    std.debug.print("Result: {any}\n", .{out});
+}