Removed GpuPipeline for a GpuProcess
This commit is contained in:
parent
d5e7f60926
commit
97d5f9001f
@ -1,25 +0,0 @@
|
|||||||
const std = @import("std");
|
|
||||||
const GpuDevice = @import("GpuDevice.zig");
|
|
||||||
const c = @import("utils.zig").c;
|
|
||||||
const sv = @import("utils.zig").sv;
|
|
||||||
|
|
||||||
raw: c.WGPUComputePipeline,
|
|
||||||
|
|
||||||
pub fn init(device: GpuDevice, wgsl: []const u8) !@This() {
|
|
||||||
var wgsl_src = c.WGPUShaderSourceWGSL{
|
|
||||||
.chain = .{ .sType = c.WGPUSType_ShaderSourceWGSL },
|
|
||||||
.code = sv(wgsl),
|
|
||||||
};
|
|
||||||
const shader = c.wgpuDeviceCreateShaderModule(device.device, &.{
|
|
||||||
.nextInChain = @ptrCast(&wgsl_src),
|
|
||||||
}) orelse return error.Shader;
|
|
||||||
defer c.wgpuShaderModuleRelease(shader);
|
|
||||||
|
|
||||||
return .{ .raw = c.wgpuDeviceCreateComputePipeline(device.device, &.{
|
|
||||||
.compute = .{ .module = shader, .entryPoint = sv("main") },
|
|
||||||
}) orelse return error.Pipeline };
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn deinit(self: @This()) void {
|
|
||||||
c.wgpuComputePipelineRelease(self.raw);
|
|
||||||
}
|
|
||||||
118
src/GpuProcess.zig
Normal file
118
src/GpuProcess.zig
Normal file
@ -0,0 +1,118 @@
|
|||||||
|
const std = @import("std");
|
||||||
|
const c = @import("utils.zig").c;
|
||||||
|
const sv = @import("utils.zig").sv;
|
||||||
|
const GpuAllocator = @import("GpuAllocator.zig");
|
||||||
|
const GpuBuffer = @import("GpuBuffer.zig");
|
||||||
|
const GpuDevice = @import("GpuDevice.zig");
|
||||||
|
|
||||||
|
pip: c.WGPUComputePipeline,
|
||||||
|
|
||||||
|
pub fn init(device: GpuDevice, wgsl: []const u8) !@This() {
|
||||||
|
var wgsl_src = c.WGPUShaderSourceWGSL{
|
||||||
|
.chain = .{ .sType = c.WGPUSType_ShaderSourceWGSL },
|
||||||
|
.code = sv(wgsl),
|
||||||
|
};
|
||||||
|
const shader = c.wgpuDeviceCreateShaderModule(device.device, &.{
|
||||||
|
.nextInChain = @ptrCast(&wgsl_src),
|
||||||
|
}) orelse return error.Shader;
|
||||||
|
defer c.wgpuShaderModuleRelease(shader);
|
||||||
|
|
||||||
|
return .{ .pip = c.wgpuDeviceCreateComputePipeline(device.device, &.{
|
||||||
|
.compute = .{ .module = shader, .entryPoint = sv("main") },
|
||||||
|
}) orelse return error.Pipeline };
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn deinit(self: @This()) void {
|
||||||
|
c.wgpuComputePipelineRelease(self.pip);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn onMapped(
|
||||||
|
status: c.WGPUMapAsyncStatus,
|
||||||
|
_: c.WGPUStringView,
|
||||||
|
userdata1: ?*anyopaque,
|
||||||
|
_: ?*anyopaque,
|
||||||
|
) callconv(.c) void {
|
||||||
|
const flag: *bool = @ptrCast(@alignCast(userdata1.?));
|
||||||
|
flag.* = (status == c.WGPUMapAsyncStatus_Success);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Changed: gloc is passed by value instead of *GpuAllocator
|
||||||
|
pub fn run(
|
||||||
|
self: @This(),
|
||||||
|
gloc: GpuAllocator,
|
||||||
|
buf_a: GpuBuffer,
|
||||||
|
buf_b: GpuBuffer,
|
||||||
|
buf_out: GpuBuffer,
|
||||||
|
) !void {
|
||||||
|
const max_chunk_bytes: u64 = 1024 * 1024 * 1024; // 1 GB
|
||||||
|
|
||||||
|
const bytes = buf_a.size;
|
||||||
|
var offset: u64 = 0;
|
||||||
|
while (offset < bytes) {
|
||||||
|
const current_chunk_bytes = @min(max_chunk_bytes, bytes - offset);
|
||||||
|
const current_chunk_elements: u32 = @intCast(current_chunk_bytes / @sizeOf(f16));
|
||||||
|
|
||||||
|
const info_buf = try GpuBuffer.init(
|
||||||
|
gloc,
|
||||||
|
@sizeOf(u32),
|
||||||
|
.initMany(&.{ .Uniform, .CopyDst }),
|
||||||
|
);
|
||||||
|
defer info_buf.deinit();
|
||||||
|
|
||||||
|
c.wgpuQueueWriteBuffer(gloc.device.queue, info_buf.raw, 0, ¤t_chunk_elements, @sizeOf(u32));
|
||||||
|
|
||||||
|
const entries = [_]c.WGPUBindGroupEntry{
|
||||||
|
.{ .binding = 0, .buffer = buf_a.raw, .offset = offset, .size = current_chunk_bytes },
|
||||||
|
.{ .binding = 1, .buffer = buf_b.raw, .offset = offset, .size = current_chunk_bytes },
|
||||||
|
.{ .binding = 2, .buffer = buf_out.raw, .offset = offset, .size = current_chunk_bytes },
|
||||||
|
.{ .binding = 3, .buffer = info_buf.raw, .offset = 0, .size = @sizeOf(u32) },
|
||||||
|
};
|
||||||
|
|
||||||
|
try submitPass(gloc, self.pip, &entries, current_chunk_elements);
|
||||||
|
|
||||||
|
offset += current_chunk_bytes;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Changed: gloc is passed by value instead of *GpuAllocator
|
||||||
|
fn submitPass(
|
||||||
|
gloc: GpuAllocator,
|
||||||
|
pipeline: c.WGPUComputePipeline,
|
||||||
|
entries: []const c.WGPUBindGroupEntry,
|
||||||
|
n: usize,
|
||||||
|
) !void {
|
||||||
|
const bgl = c.wgpuComputePipelineGetBindGroupLayout(pipeline, 0);
|
||||||
|
defer c.wgpuBindGroupLayoutRelease(bgl);
|
||||||
|
|
||||||
|
const bg = c.wgpuDeviceCreateBindGroup(gloc.device.device, &.{
|
||||||
|
.layout = bgl,
|
||||||
|
.entries = entries.ptr,
|
||||||
|
.entryCount = entries.len,
|
||||||
|
}) orelse return error.BindGroup;
|
||||||
|
defer c.wgpuBindGroupRelease(bg);
|
||||||
|
|
||||||
|
const enc = c.wgpuDeviceCreateCommandEncoder(gloc.device.device, null) orelse
|
||||||
|
return error.Encoder;
|
||||||
|
const pass = c.wgpuCommandEncoderBeginComputePass(enc, null);
|
||||||
|
c.wgpuComputePassEncoderSetPipeline(pass, pipeline);
|
||||||
|
c.wgpuComputePassEncoderSetBindGroup(pass, 0, bg, 0, null);
|
||||||
|
|
||||||
|
const WORKGROUP_SIZE = 256;
|
||||||
|
const MAX_WORKGROUPS = 65535;
|
||||||
|
|
||||||
|
const desired_workgroups = ceilDiv(n, WORKGROUP_SIZE);
|
||||||
|
const dispatch_count = @min(desired_workgroups, MAX_WORKGROUPS);
|
||||||
|
|
||||||
|
c.wgpuComputePassEncoderDispatchWorkgroups(pass, @intCast(dispatch_count), 1, 1);
|
||||||
|
c.wgpuComputePassEncoderEnd(pass);
|
||||||
|
c.wgpuComputePassEncoderRelease(pass);
|
||||||
|
|
||||||
|
const cmd = c.wgpuCommandEncoderFinish(enc, null);
|
||||||
|
defer c.wgpuCommandEncoderRelease(enc);
|
||||||
|
defer c.wgpuCommandBufferRelease(cmd);
|
||||||
|
c.wgpuQueueSubmit(gloc.device.queue, 1, &cmd);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn ceilDiv(n: usize, d: usize) usize {
|
||||||
|
return (n + d - 1) / d;
|
||||||
|
}
|
||||||
93
src/Vec.zig
93
src/Vec.zig
@ -3,7 +3,7 @@ const c = @import("utils.zig").c;
|
|||||||
const GpuAllocator = @import("GpuAllocator.zig");
|
const GpuAllocator = @import("GpuAllocator.zig");
|
||||||
const GpuBuffer = @import("GpuBuffer.zig");
|
const GpuBuffer = @import("GpuBuffer.zig");
|
||||||
const GpuDevice = @import("GpuDevice.zig");
|
const GpuDevice = @import("GpuDevice.zig");
|
||||||
const GpuPipeline = @import("GpuPipeline.zig");
|
const GpuProcess = @import("GpuProcess.zig");
|
||||||
|
|
||||||
const Vec = @This();
|
const Vec = @This();
|
||||||
|
|
||||||
@ -34,10 +34,7 @@ pub fn deinit(self: Vec) void {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// CPU to GPU.
|
/// CPU to GPU.
|
||||||
pub fn load(
|
pub fn load(self: Vec, data: []const f16) !void {
|
||||||
self: Vec,
|
|
||||||
data: []const f16,
|
|
||||||
) !void {
|
|
||||||
try self.buf.load(data);
|
try self.buf.load(data);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -46,14 +43,13 @@ pub fn byteSize(self: Vec) u64 {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Changed: gloc is passed by value instead of *GpuAllocator
|
// Changed: gloc is passed by value instead of *GpuAllocator
|
||||||
pub fn run(self: Vec, gloc: GpuAllocator, other: Vec, pip: GpuPipeline) !Vec {
|
pub fn run(self: Vec, gloc: GpuAllocator, other: Vec, process: GpuProcess) !Vec {
|
||||||
std.debug.assert(self.len == other.len);
|
std.debug.assert(self.len == other.len);
|
||||||
|
|
||||||
const result = try Vec.initZero(gloc, self.len);
|
const result = try Vec.initZero(gloc, self.len);
|
||||||
errdefer result.deinit();
|
errdefer result.deinit();
|
||||||
|
|
||||||
try dispatch2in1out(gloc, pip.raw, self.buf, other.buf, result.buf, self.byteSize());
|
try process.run(gloc, self.buf, other.buf, result.buf);
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -71,84 +67,3 @@ fn onMapped(
|
|||||||
const flag: *bool = @ptrCast(@alignCast(userdata1.?));
|
const flag: *bool = @ptrCast(@alignCast(userdata1.?));
|
||||||
flag.* = (status == c.WGPUMapAsyncStatus_Success);
|
flag.* = (status == c.WGPUMapAsyncStatus_Success);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Changed: gloc is passed by value instead of *GpuAllocator
|
|
||||||
fn dispatch2in1out(
|
|
||||||
gloc: GpuAllocator,
|
|
||||||
pipeline: c.WGPUComputePipeline,
|
|
||||||
buf_a: GpuBuffer,
|
|
||||||
buf_b: GpuBuffer,
|
|
||||||
buf_out: GpuBuffer,
|
|
||||||
bytes: u64,
|
|
||||||
) !void {
|
|
||||||
const max_chunk_bytes: u64 = 1024 * 1024 * 1024; // 1 GB
|
|
||||||
|
|
||||||
var offset: u64 = 0;
|
|
||||||
while (offset < bytes) {
|
|
||||||
const current_chunk_bytes = @min(max_chunk_bytes, bytes - offset);
|
|
||||||
const current_chunk_elements: u32 = @intCast(current_chunk_bytes / @sizeOf(f16));
|
|
||||||
|
|
||||||
const info_buf = try GpuBuffer.init(
|
|
||||||
gloc,
|
|
||||||
@sizeOf(u32),
|
|
||||||
.initMany(&.{ .Uniform, .CopyDst }),
|
|
||||||
);
|
|
||||||
defer info_buf.deinit();
|
|
||||||
|
|
||||||
c.wgpuQueueWriteBuffer(gloc.device.queue, info_buf.raw, 0, ¤t_chunk_elements, @sizeOf(u32));
|
|
||||||
|
|
||||||
const entries = [_]c.WGPUBindGroupEntry{
|
|
||||||
.{ .binding = 0, .buffer = buf_a.raw, .offset = offset, .size = current_chunk_bytes },
|
|
||||||
.{ .binding = 1, .buffer = buf_b.raw, .offset = offset, .size = current_chunk_bytes },
|
|
||||||
.{ .binding = 2, .buffer = buf_out.raw, .offset = offset, .size = current_chunk_bytes },
|
|
||||||
.{ .binding = 3, .buffer = info_buf.raw, .offset = 0, .size = @sizeOf(u32) },
|
|
||||||
};
|
|
||||||
|
|
||||||
try submitPass(gloc, pipeline, &entries, current_chunk_elements);
|
|
||||||
|
|
||||||
offset += current_chunk_bytes;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Changed: gloc is passed by value instead of *GpuAllocator
|
|
||||||
fn submitPass(
|
|
||||||
gloc: GpuAllocator,
|
|
||||||
pipeline: c.WGPUComputePipeline,
|
|
||||||
entries: []const c.WGPUBindGroupEntry,
|
|
||||||
n: usize,
|
|
||||||
) !void {
|
|
||||||
const bgl = c.wgpuComputePipelineGetBindGroupLayout(pipeline, 0);
|
|
||||||
defer c.wgpuBindGroupLayoutRelease(bgl);
|
|
||||||
|
|
||||||
const bg = c.wgpuDeviceCreateBindGroup(gloc.device.device, &.{
|
|
||||||
.layout = bgl,
|
|
||||||
.entries = entries.ptr,
|
|
||||||
.entryCount = entries.len,
|
|
||||||
}) orelse return error.BindGroup;
|
|
||||||
defer c.wgpuBindGroupRelease(bg);
|
|
||||||
|
|
||||||
const enc = c.wgpuDeviceCreateCommandEncoder(gloc.device.device, null) orelse
|
|
||||||
return error.Encoder;
|
|
||||||
const pass = c.wgpuCommandEncoderBeginComputePass(enc, null);
|
|
||||||
c.wgpuComputePassEncoderSetPipeline(pass, pipeline);
|
|
||||||
c.wgpuComputePassEncoderSetBindGroup(pass, 0, bg, 0, null);
|
|
||||||
|
|
||||||
const WORKGROUP_SIZE = 256;
|
|
||||||
const MAX_WORKGROUPS = 65535;
|
|
||||||
|
|
||||||
const desired_workgroups = ceilDiv(n, WORKGROUP_SIZE);
|
|
||||||
const dispatch_count = @min(desired_workgroups, MAX_WORKGROUPS);
|
|
||||||
|
|
||||||
c.wgpuComputePassEncoderDispatchWorkgroups(pass, @intCast(dispatch_count), 1, 1);
|
|
||||||
c.wgpuComputePassEncoderEnd(pass);
|
|
||||||
c.wgpuComputePassEncoderRelease(pass);
|
|
||||||
|
|
||||||
const cmd = c.wgpuCommandEncoderFinish(enc, null);
|
|
||||||
defer c.wgpuCommandEncoderRelease(enc);
|
|
||||||
defer c.wgpuCommandBufferRelease(cmd);
|
|
||||||
c.wgpuQueueSubmit(gloc.device.queue, 1, &cmd);
|
|
||||||
}
|
|
||||||
|
|
||||||
fn ceilDiv(n: usize, d: usize) usize {
|
|
||||||
return (n + d - 1) / d;
|
|
||||||
}
|
|
||||||
|
|||||||
@ -2,7 +2,7 @@ const std = @import("std");
|
|||||||
const GpuDevice = @import("GpuDevice.zig");
|
const GpuDevice = @import("GpuDevice.zig");
|
||||||
const GpuAllocator = @import("GpuAllocator.zig");
|
const GpuAllocator = @import("GpuAllocator.zig");
|
||||||
const GpuArena = @import("GpuArena.zig");
|
const GpuArena = @import("GpuArena.zig");
|
||||||
const GpuPipeline = @import("GpuPipeline.zig");
|
const GpuProcess = @import("GpuProcess.zig");
|
||||||
const Vec = @import("Vec.zig");
|
const Vec = @import("Vec.zig");
|
||||||
|
|
||||||
const c = @import("utils.zig").c;
|
const c = @import("utils.zig").c;
|
||||||
@ -18,8 +18,8 @@ pub fn main(init: std.process.Init) !void {
|
|||||||
|
|
||||||
const gloc = grena.gpuAllocator();
|
const gloc = grena.gpuAllocator();
|
||||||
|
|
||||||
const add_pip = try GpuPipeline.init(device, @embedFile("shaders/add.wgsl"));
|
const add = try GpuProcess.init(device, @embedFile("shaders/add.wgsl"));
|
||||||
defer add_pip.deinit();
|
defer add.deinit();
|
||||||
|
|
||||||
const data_a = try allocator.alloc(f16, 16);
|
const data_a = try allocator.alloc(f16, 16);
|
||||||
defer allocator.free(data_a);
|
defer allocator.free(data_a);
|
||||||
@ -36,7 +36,7 @@ pub fn main(init: std.process.Init) !void {
|
|||||||
const b = try Vec.initLoad(gloc, data_b);
|
const b = try Vec.initLoad(gloc, data_b);
|
||||||
defer b.deinit();
|
defer b.deinit();
|
||||||
|
|
||||||
const sum = try a.run(gloc, b, add_pip);
|
const sum = try a.run(gloc, b, add);
|
||||||
// Don't need `sum.deinit()` because grena will deallocate everything when deinit
|
// Don't need `sum.deinit()` because grena will deallocate everything when deinit
|
||||||
|
|
||||||
std.debug.print("Bytes used: {d} (3 * {d})\n", .{ grena.allocated_vram_bytes, a.byteSize() });
|
std.debug.print("Bytes used: {d} (3 * {d})\n", .{ grena.allocated_vram_bytes, a.byteSize() });
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user