Mat to Vec + changed how its API work
This commit is contained in:
parent
7b9a7fe7a9
commit
cb19cd8e30
@ -3,60 +3,54 @@ const std = @import("std");
|
|||||||
const c = @import("c.zig").c;
|
const c = @import("c.zig").c;
|
||||||
const GpuAllocator = @import("GpuAllocator.zig");
|
const GpuAllocator = @import("GpuAllocator.zig");
|
||||||
const GpuBuffer = @import("GpuBuffer.zig");
|
const GpuBuffer = @import("GpuBuffer.zig");
|
||||||
|
const GpuDevice = @import("GpuDevice.zig");
|
||||||
const GpuPipeline = @import("GpuPipeline.zig");
|
const GpuPipeline = @import("GpuPipeline.zig");
|
||||||
|
|
||||||
const Mat = @This();
|
const Vec = @This();
|
||||||
|
|
||||||
buf: GpuBuffer,
|
buf: GpuBuffer,
|
||||||
rows: usize,
|
len: usize,
|
||||||
cols: usize,
|
|
||||||
|
|
||||||
pub fn load(
|
pub fn initZero(gloc: *GpuAllocator, len: usize) !Vec {
|
||||||
gloc: *GpuAllocator,
|
return .{
|
||||||
data: []const f32,
|
.buf = try GpuBuffer.init(
|
||||||
rows: usize,
|
gloc,
|
||||||
cols: usize,
|
len * @sizeOf(f32),
|
||||||
) !Mat {
|
c.WGPUBufferUsage_Storage | c.WGPUBufferUsage_CopyDst | c.WGPUBufferUsage_CopySrc,
|
||||||
std.debug.assert(data.len == @as(usize, rows) * cols);
|
),
|
||||||
const bytes = data.len * @sizeOf(f32);
|
.len = len,
|
||||||
|
};
|
||||||
// Uses structural constructor initialization
|
|
||||||
const buf = try GpuBuffer.init(
|
|
||||||
gloc,
|
|
||||||
bytes,
|
|
||||||
c.WGPUBufferUsage_Storage | c.WGPUBufferUsage_CopyDst | c.WGPUBufferUsage_CopySrc,
|
|
||||||
);
|
|
||||||
|
|
||||||
c.wgpuQueueWriteBuffer(gloc.device.queue, buf.raw, 0, data.ptr, bytes);
|
|
||||||
return .{ .buf = buf, .rows = rows, .cols = cols };
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn zeros(gloc: *GpuAllocator, rows: usize, cols: usize) !Mat {
|
pub fn initLoad(gloc: *GpuAllocator, data: []const f32) !Vec {
|
||||||
const bytes: u64 = @as(u64, rows) * cols * @sizeOf(f32);
|
var self = try initZero(gloc, data.len);
|
||||||
const buf = try GpuBuffer.init(
|
try self.load(gloc.device, data);
|
||||||
gloc,
|
return self;
|
||||||
bytes,
|
|
||||||
c.WGPUBufferUsage_Storage | c.WGPUBufferUsage_CopyDst | c.WGPUBufferUsage_CopySrc,
|
|
||||||
);
|
|
||||||
return .{ .buf = buf, .rows = rows, .cols = cols };
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn deinit(self: Mat) void {
|
pub fn deinit(self: Vec) void {
|
||||||
self.buf.deinit();
|
self.buf.deinit();
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn len(self: Mat) usize {
|
/// CPU to GPU.
|
||||||
return self.rows * self.cols;
|
pub fn load(
|
||||||
|
self: Vec,
|
||||||
|
device: GpuDevice,
|
||||||
|
data: []const f32,
|
||||||
|
) !void {
|
||||||
|
std.debug.assert(data.len == self.len);
|
||||||
|
const bytes = data.len * @sizeOf(f32);
|
||||||
|
c.wgpuQueueWriteBuffer(device.queue, self.buf.raw, 0, data.ptr, bytes);
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn byteSize(self: Mat) u64 {
|
pub fn byteSize(self: Vec) u64 {
|
||||||
return @as(u64, self.len()) * @sizeOf(f32);
|
return @as(u64, self.len) * @sizeOf(f32);
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn run(self: Mat, gloc: *GpuAllocator, other: Mat, pip: GpuPipeline) !Mat {
|
pub fn run(self: Vec, gloc: *GpuAllocator, other: Vec, pip: GpuPipeline) !Vec {
|
||||||
std.debug.assert(self.rows == other.rows and self.cols == other.cols);
|
std.debug.assert(self.len == other.len);
|
||||||
|
|
||||||
const result = try Mat.zeros(gloc, self.rows, self.cols);
|
const result = try Vec.initZero(gloc, self.len);
|
||||||
errdefer result.deinit();
|
errdefer result.deinit();
|
||||||
|
|
||||||
try dispatch2in1out(gloc, pip.raw, self.buf, other.buf, result.buf, self.byteSize());
|
try dispatch2in1out(gloc, pip.raw, self.buf, other.buf, result.buf, self.byteSize());
|
||||||
@ -64,8 +58,9 @@ pub fn run(self: Mat, gloc: *GpuAllocator, other: Mat, pip: GpuPipeline) !Mat {
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn read(self: Mat, gloc: *GpuAllocator, alloc: std.mem.Allocator) ![]f32 {
|
/// GPU to CPU.
|
||||||
const out = try alloc.alloc(f32, self.len());
|
pub fn read(self: Vec, gloc: *GpuAllocator, alloc: std.mem.Allocator) ![]f32 {
|
||||||
|
const out = try alloc.alloc(f32, self.len);
|
||||||
const bytes = self.byteSize();
|
const bytes = self.byteSize();
|
||||||
|
|
||||||
const staging = try GpuBuffer.init(
|
const staging = try GpuBuffer.init(
|
||||||
@ -94,7 +89,7 @@ pub fn read(self: Mat, gloc: *GpuAllocator, alloc: std.mem.Allocator) ![]f32 {
|
|||||||
const ptr: [*]const f32 = @ptrCast(@alignCast(
|
const ptr: [*]const f32 = @ptrCast(@alignCast(
|
||||||
staging.getConstMappedRange(0, bytes),
|
staging.getConstMappedRange(0, bytes),
|
||||||
));
|
));
|
||||||
@memcpy(out[0..self.len()], ptr[0..self.len()]);
|
@memcpy(out[0..self.len], ptr[0..self.len]);
|
||||||
staging.unmap();
|
staging.unmap();
|
||||||
|
|
||||||
return out;
|
return out;
|
||||||
@ -110,8 +105,6 @@ fn onMapped(
|
|||||||
flag.* = (status == c.WGPUMapAsyncStatus_Success);
|
flag.* = (status == c.WGPUMapAsyncStatus_Success);
|
||||||
}
|
}
|
||||||
|
|
||||||
// ── Dispatch helpers ──────────────────────────────────────────────────────────
|
|
||||||
|
|
||||||
/// Encode + submit a 2-input, 1-output compute pass (used by add).
|
/// Encode + submit a 2-input, 1-output compute pass (used by add).
|
||||||
fn dispatch2in1out(
|
fn dispatch2in1out(
|
||||||
gloc: *GpuAllocator,
|
gloc: *GpuAllocator,
|
||||||
14
src/main.zig
14
src/main.zig
@ -2,7 +2,7 @@ const std = @import("std");
|
|||||||
const GpuDevice = @import("GpuDevice.zig");
|
const GpuDevice = @import("GpuDevice.zig");
|
||||||
const GpuAllocator = @import("GpuAllocator.zig");
|
const GpuAllocator = @import("GpuAllocator.zig");
|
||||||
const GpuPipeline = @import("GpuPipeline.zig");
|
const GpuPipeline = @import("GpuPipeline.zig");
|
||||||
const Mat = @import("Mat.zig");
|
const Vec = @import("Vec.zig");
|
||||||
|
|
||||||
pub fn main(init: std.process.Init) !void {
|
pub fn main(init: std.process.Init) !void {
|
||||||
const device = try GpuDevice.init();
|
const device = try GpuDevice.init();
|
||||||
@ -52,9 +52,9 @@ pub fn main(init: std.process.Init) !void {
|
|||||||
// Start timing the GPU operations
|
// Start timing the GPU operations
|
||||||
const start = std.Io.Clock.awake.now(init.io);
|
const start = std.Io.Clock.awake.now(init.io);
|
||||||
|
|
||||||
const a = try Mat.load(&gloc, data_a, size, 1);
|
const a = try Vec.initLoad(&gloc, data_a);
|
||||||
defer a.deinit();
|
defer a.deinit();
|
||||||
const b = try Mat.load(&gloc, data_b, size, 1);
|
const b = try Vec.initLoad(&gloc, data_b);
|
||||||
defer b.deinit();
|
defer b.deinit();
|
||||||
|
|
||||||
// a + b
|
// a + b
|
||||||
@ -73,11 +73,3 @@ pub fn main(init: std.process.Init) !void {
|
|||||||
std.debug.print("| {d:12} | {d:8.2} | {d:9.3} | {d:9} |\n", .{ size, mb, ms, ns });
|
std.debug.print("| {d:12} | {d:8.2} | {d:9.3} | {d:9} |\n", .{ size, mb, ms, ns });
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn printMat(data: []const f32, rows: u32, cols: u32) void {
|
|
||||||
for (0..rows) |r| {
|
|
||||||
for (0..cols) |col|
|
|
||||||
std.debug.print("{d:6.0}", .{data[r * cols + col]});
|
|
||||||
std.debug.print("\n", .{});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user