mirror of
https://github.com/ziglang/zig.git
synced 2026-02-13 12:59:04 +00:00
nvptx: add implementations for GPU builtins
This commit is contained in:
parent
76f062690c
commit
bcfc7cf13c
@ -26703,7 +26703,7 @@ fn zirWorkItem(
|
||||
|
||||
switch (target.cpu.arch) {
|
||||
// TODO: Allow for other GPU targets.
|
||||
.amdgcn, .spirv, .spirv64, .spirv32 => {},
|
||||
.amdgcn, .spirv, .spirv64, .spirv32, .nvptx, .nvptx64 => {},
|
||||
else => {
|
||||
return sema.fail(block, builtin_src, "builtin only available on GPU targets; targeted architecture is {s}", .{@tagName(target.cpu.arch)});
|
||||
},
|
||||
|
||||
@ -10286,7 +10286,7 @@ pub const FuncGen = struct {
|
||||
return self.wip.cast(.addrspacecast, operand, try o.lowerType(inst_ty), "");
|
||||
}
|
||||
|
||||
fn amdgcnWorkIntrinsic(
|
||||
fn workIntrinsic(
|
||||
self: *FuncGen,
|
||||
dimension: u32,
|
||||
default: u32,
|
||||
@ -10303,44 +10303,60 @@ pub const FuncGen = struct {
|
||||
fn airWorkItemId(self: *FuncGen, inst: Air.Inst.Index) !Builder.Value {
|
||||
const o = self.ng.object;
|
||||
const target = o.pt.zcu.getTarget();
|
||||
assert(target.cpu.arch == .amdgcn); // TODO is to port this function to other GPU architectures
|
||||
|
||||
const pl_op = self.air.instructions.items(.data)[@intFromEnum(inst)].pl_op;
|
||||
const dimension = pl_op.payload;
|
||||
return self.amdgcnWorkIntrinsic(dimension, 0, "amdgcn.workitem.id");
|
||||
|
||||
return switch (target.cpu.arch) {
|
||||
.amdgcn => self.workIntrinsic(dimension, 0, "amdgcn.workitem.id"),
|
||||
.nvptx, .nvptx64 => self.workIntrinsic(dimension, 0, "nvvm.read.ptx.sreg.tid"),
|
||||
else => unreachable,
|
||||
};
|
||||
}
|
||||
|
||||
fn airWorkGroupSize(self: *FuncGen, inst: Air.Inst.Index) !Builder.Value {
|
||||
const o = self.ng.object;
|
||||
const target = o.pt.zcu.getTarget();
|
||||
assert(target.cpu.arch == .amdgcn); // TODO is to port this function to other GPU architectures
|
||||
|
||||
const pl_op = self.air.instructions.items(.data)[@intFromEnum(inst)].pl_op;
|
||||
const dimension = pl_op.payload;
|
||||
if (dimension >= 3) return .@"1";
|
||||
|
||||
// Fetch the dispatch pointer, which points to this structure:
|
||||
// https://github.com/RadeonOpenCompute/ROCR-Runtime/blob/adae6c61e10d371f7cbc3d0e94ae2c070cab18a4/src/inc/hsa.h#L2913
|
||||
const dispatch_ptr =
|
||||
try self.wip.callIntrinsic(.normal, .none, .@"amdgcn.dispatch.ptr", &.{}, &.{}, "");
|
||||
switch (target.cpu.arch) {
|
||||
.amdgcn => {
|
||||
if (dimension >= 3) return .@"1";
|
||||
|
||||
// Load the work_group_* member from the struct as u16.
|
||||
// Just treat the dispatch pointer as an array of u16 to keep things simple.
|
||||
const workgroup_size_ptr = try self.wip.gep(.inbounds, .i16, dispatch_ptr, &.{
|
||||
try o.builder.intValue(try o.lowerType(Type.usize), 2 + dimension),
|
||||
}, "");
|
||||
const workgroup_size_alignment = comptime Builder.Alignment.fromByteUnits(2);
|
||||
return self.wip.load(.normal, .i16, workgroup_size_ptr, workgroup_size_alignment, "");
|
||||
// Fetch the dispatch pointer, which points to this structure:
|
||||
// https://github.com/RadeonOpenCompute/ROCR-Runtime/blob/adae6c61e10d371f7cbc3d0e94ae2c070cab18a4/src/inc/hsa.h#L2913
|
||||
const dispatch_ptr =
|
||||
try self.wip.callIntrinsic(.normal, .none, .@"amdgcn.dispatch.ptr", &.{}, &.{}, "");
|
||||
|
||||
// Load the work_group_* member from the struct as u16.
|
||||
// Just treat the dispatch pointer as an array of u16 to keep things simple.
|
||||
const workgroup_size_ptr = try self.wip.gep(.inbounds, .i16, dispatch_ptr, &.{
|
||||
try o.builder.intValue(try o.lowerType(Type.usize), 2 + dimension),
|
||||
}, "");
|
||||
const workgroup_size_alignment = comptime Builder.Alignment.fromByteUnits(2);
|
||||
return self.wip.load(.normal, .i16, workgroup_size_ptr, workgroup_size_alignment, "");
|
||||
},
|
||||
.nvptx, .nvptx64 => {
|
||||
return self.workIntrinsic(dimension, 1, "nvvm.read.ptx.sreg.ntid");
|
||||
},
|
||||
else => unreachable,
|
||||
}
|
||||
}
|
||||
|
||||
fn airWorkGroupId(self: *FuncGen, inst: Air.Inst.Index) !Builder.Value {
|
||||
const o = self.ng.object;
|
||||
const target = o.pt.zcu.getTarget();
|
||||
assert(target.cpu.arch == .amdgcn); // TODO is to port this function to other GPU architectures
|
||||
|
||||
const pl_op = self.air.instructions.items(.data)[@intFromEnum(inst)].pl_op;
|
||||
const dimension = pl_op.payload;
|
||||
return self.amdgcnWorkIntrinsic(dimension, 0, "amdgcn.workgroup.id");
|
||||
|
||||
return switch (target.cpu.arch) {
|
||||
.amdgcn => self.workIntrinsic(dimension, 0, "amdgcn.workgroup.id"),
|
||||
.nvptx, .nvptx64 => self.workIntrinsic(dimension, 0, "nvvm.read.ptx.sreg.ctaid"),
|
||||
else => unreachable,
|
||||
};
|
||||
}
|
||||
|
||||
fn getErrorNameTable(self: *FuncGen) Allocator.Error!Builder.Variable.Index {
|
||||
|
||||
@ -2729,6 +2729,17 @@ pub const Intrinsic = enum {
|
||||
@"amdgcn.workgroup.id.z",
|
||||
@"amdgcn.dispatch.ptr",
|
||||
|
||||
// NVPTX
|
||||
@"nvvm.read.ptx.sreg.tid.x",
|
||||
@"nvvm.read.ptx.sreg.tid.y",
|
||||
@"nvvm.read.ptx.sreg.tid.z",
|
||||
@"nvvm.read.ptx.sreg.ntid.x",
|
||||
@"nvvm.read.ptx.sreg.ntid.y",
|
||||
@"nvvm.read.ptx.sreg.ntid.z",
|
||||
@"nvvm.read.ptx.sreg.ctaid.x",
|
||||
@"nvvm.read.ptx.sreg.ctaid.y",
|
||||
@"nvvm.read.ptx.sreg.ctaid.z",
|
||||
|
||||
// WebAssembly
|
||||
@"wasm.memory.size",
|
||||
@"wasm.memory.grow",
|
||||
@ -3886,6 +3897,72 @@ pub const Intrinsic = enum {
|
||||
.attrs = &.{ .nocallback, .nofree, .nosync, .nounwind, .speculatable, .willreturn, .{ .memory = Attribute.Memory.all(.none) } },
|
||||
},
|
||||
|
||||
.@"nvvm.read.ptx.sreg.tid.x" = .{
|
||||
.ret_len = 1,
|
||||
.params = &.{
|
||||
.{ .kind = .{ .type = .i32 } },
|
||||
},
|
||||
.attrs = &.{ .nounwind, .readnone },
|
||||
},
|
||||
.@"nvvm.read.ptx.sreg.tid.y" = .{
|
||||
.ret_len = 1,
|
||||
.params = &.{
|
||||
.{ .kind = .{ .type = .i32 } },
|
||||
},
|
||||
.attrs = &.{ .nounwind, .readnone },
|
||||
},
|
||||
.@"nvvm.read.ptx.sreg.tid.z" = .{
|
||||
.ret_len = 1,
|
||||
.params = &.{
|
||||
.{ .kind = .{ .type = .i32 } },
|
||||
},
|
||||
.attrs = &.{ .nounwind, .readnone },
|
||||
},
|
||||
|
||||
.@"nvvm.read.ptx.sreg.ntid.x" = .{
|
||||
.ret_len = 1,
|
||||
.params = &.{
|
||||
.{ .kind = .{ .type = .i32 } },
|
||||
},
|
||||
.attrs = &.{ .nounwind, .readnone },
|
||||
},
|
||||
.@"nvvm.read.ptx.sreg.ntid.y" = .{
|
||||
.ret_len = 1,
|
||||
.params = &.{
|
||||
.{ .kind = .{ .type = .i32 } },
|
||||
},
|
||||
.attrs = &.{ .nounwind, .readnone },
|
||||
},
|
||||
.@"nvvm.read.ptx.sreg.ntid.z" = .{
|
||||
.ret_len = 1,
|
||||
.params = &.{
|
||||
.{ .kind = .{ .type = .i32 } },
|
||||
},
|
||||
.attrs = &.{ .nounwind, .readnone },
|
||||
},
|
||||
|
||||
.@"nvvm.read.ptx.sreg.ctaid.x" = .{
|
||||
.ret_len = 1,
|
||||
.params = &.{
|
||||
.{ .kind = .{ .type = .i32 } },
|
||||
},
|
||||
.attrs = &.{ .nounwind, .readnone },
|
||||
},
|
||||
.@"nvvm.read.ptx.sreg.ctaid.y" = .{
|
||||
.ret_len = 1,
|
||||
.params = &.{
|
||||
.{ .kind = .{ .type = .i32 } },
|
||||
},
|
||||
.attrs = &.{ .nounwind, .readnone },
|
||||
},
|
||||
.@"nvvm.read.ptx.sreg.ctaid.z" = .{
|
||||
.ret_len = 1,
|
||||
.params = &.{
|
||||
.{ .kind = .{ .type = .i32 } },
|
||||
},
|
||||
.attrs = &.{ .nounwind, .readnone },
|
||||
},
|
||||
|
||||
.@"wasm.memory.size" = .{
|
||||
.ret_len = 1,
|
||||
.params = &.{
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user