mirror of
https://github.com/ziglang/zig.git
synced 2025-12-13 17:53:07 +00:00
Merge pull request #12878 from gwenzek/ptx
Update Nvptx backend for Zig 0.10
This commit is contained in:
commit
feab1ebe1b
@ -833,6 +833,7 @@ pub fn default_panic(msg: []const u8, error_return_trace: ?*StackTrace, ret_addr
|
|||||||
// Didn't have boot_services, just fallback to whatever.
|
// Didn't have boot_services, just fallback to whatever.
|
||||||
std.os.abort();
|
std.os.abort();
|
||||||
},
|
},
|
||||||
|
.cuda => std.os.abort(),
|
||||||
else => {
|
else => {
|
||||||
const first_trace_addr = ret_addr orelse @returnAddress();
|
const first_trace_addr = ret_addr orelse @returnAddress();
|
||||||
std.debug.panicImpl(error_return_trace, first_trace_addr, msg);
|
std.debug.panicImpl(error_return_trace, first_trace_addr, msg);
|
||||||
|
|||||||
@ -500,10 +500,16 @@ pub fn abort() noreturn {
|
|||||||
@breakpoint();
|
@breakpoint();
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
|
if (builtin.os.tag == .cuda) {
|
||||||
|
// TODO: introduce `@trap` instead of abusing https://github.com/ziglang/zig/issues/2291
|
||||||
|
@"llvm.trap"();
|
||||||
|
}
|
||||||
|
|
||||||
system.abort();
|
system.abort();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
extern fn @"llvm.trap"() noreturn;
|
||||||
|
|
||||||
pub const RaiseError = UnexpectedError;
|
pub const RaiseError = UnexpectedError;
|
||||||
|
|
||||||
pub fn raise(sig: u8) RaiseError!void {
|
pub fn raise(sig: u8) RaiseError!void {
|
||||||
|
|||||||
@ -951,6 +951,13 @@ pub const Target = struct {
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn isNvptx(arch: Arch) bool {
|
||||||
|
return switch (arch) {
|
||||||
|
.nvptx, .nvptx64 => true,
|
||||||
|
else => false,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
pub fn parseCpuModel(arch: Arch, cpu_name: []const u8) !*const Cpu.Model {
|
pub fn parseCpuModel(arch: Arch, cpu_name: []const u8) !*const Cpu.Model {
|
||||||
for (arch.allCpuModels()) |cpu| {
|
for (arch.allCpuModels()) |cpu| {
|
||||||
if (mem.eql(u8, cpu_name, cpu.name)) {
|
if (mem.eql(u8, cpu_name, cpu.name)) {
|
||||||
|
|||||||
@ -720,6 +720,15 @@ pub const Decl = struct {
|
|||||||
var buffer = std.ArrayList(u8).init(mod.gpa);
|
var buffer = std.ArrayList(u8).init(mod.gpa);
|
||||||
defer buffer.deinit();
|
defer buffer.deinit();
|
||||||
try decl.renderFullyQualifiedName(mod, buffer.writer());
|
try decl.renderFullyQualifiedName(mod, buffer.writer());
|
||||||
|
|
||||||
|
// Sanitize the name for nvptx which is more restrictive.
|
||||||
|
if (mod.comp.bin_file.options.target.cpu.arch.isNvptx()) {
|
||||||
|
for (buffer.items) |*byte| switch (byte.*) {
|
||||||
|
'{', '}', '*', '[', ']', '(', ')', ',', ' ', '\'' => byte.* = '_',
|
||||||
|
else => {},
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
return buffer.toOwnedSliceSentinel(0);
|
return buffer.toOwnedSliceSentinel(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
13
src/Sema.zig
13
src/Sema.zig
@ -18202,12 +18202,6 @@ fn zirAddrSpaceCast(sema: *Sema, block: *Block, extended: Zir.Inst.Extended.Inst
|
|||||||
else
|
else
|
||||||
dest_ptr_ty;
|
dest_ptr_ty;
|
||||||
|
|
||||||
if (try sema.resolveMaybeUndefVal(block, ptr_src, ptr)) |val| {
|
|
||||||
// Pointer value should compatible with both address spaces.
|
|
||||||
// TODO: Figure out why this generates an invalid bitcast.
|
|
||||||
return sema.addConstant(dest_ty, val);
|
|
||||||
}
|
|
||||||
|
|
||||||
try sema.requireRuntimeBlock(block, src, ptr_src);
|
try sema.requireRuntimeBlock(block, src, ptr_src);
|
||||||
// TODO: Address space cast safety?
|
// TODO: Address space cast safety?
|
||||||
|
|
||||||
@ -21397,7 +21391,12 @@ fn validateExternType(
|
|||||||
},
|
},
|
||||||
.Fn => {
|
.Fn => {
|
||||||
if (position != .other) return false;
|
if (position != .other) return false;
|
||||||
return !Type.fnCallingConventionAllowsZigTypes(ty.fnCallingConvention());
|
return switch (ty.fnCallingConvention()) {
|
||||||
|
// For now we want to authorize PTX kernel to use zig objects, even if we end up exposing the ABI.
|
||||||
|
// The goal is to experiment with more integrated CPU/GPU code.
|
||||||
|
.PtxKernel => true,
|
||||||
|
else => !Type.fnCallingConventionAllowsZigTypes(ty.fnCallingConvention()),
|
||||||
|
};
|
||||||
},
|
},
|
||||||
.Enum => {
|
.Enum => {
|
||||||
var buf: Type.Payload.Bits = undefined;
|
var buf: Type.Payload.Bits = undefined;
|
||||||
|
|||||||
@ -28,10 +28,7 @@ pub fn createEmpty(gpa: Allocator, options: link.Options) !*NvPtx {
|
|||||||
if (!build_options.have_llvm) return error.PtxArchNotSupported;
|
if (!build_options.have_llvm) return error.PtxArchNotSupported;
|
||||||
if (!options.use_llvm) return error.PtxArchNotSupported;
|
if (!options.use_llvm) return error.PtxArchNotSupported;
|
||||||
|
|
||||||
switch (options.target.cpu.arch) {
|
if (!options.target.cpu.arch.isNvptx()) return error.PtxArchNotSupported;
|
||||||
.nvptx, .nvptx64 => {},
|
|
||||||
else => return error.PtxArchNotSupported,
|
|
||||||
}
|
|
||||||
|
|
||||||
switch (options.target.os.tag) {
|
switch (options.target.os.tag) {
|
||||||
// TODO: does it also work with nvcl ?
|
// TODO: does it also work with nvcl ?
|
||||||
@ -59,9 +56,8 @@ pub fn openPath(allocator: Allocator, sub_path: []const u8, options: link.Option
|
|||||||
if (!options.use_llvm) return error.PtxArchNotSupported;
|
if (!options.use_llvm) return error.PtxArchNotSupported;
|
||||||
assert(options.target.ofmt == .nvptx);
|
assert(options.target.ofmt == .nvptx);
|
||||||
|
|
||||||
const nvptx = try createEmpty(allocator, options);
|
log.debug("Opening .ptx target file {s}", .{sub_path});
|
||||||
log.info("Opening .ptx target file {s}", .{sub_path});
|
return createEmpty(allocator, options);
|
||||||
return nvptx;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn deinit(self: *NvPtx) void {
|
pub fn deinit(self: *NvPtx) void {
|
||||||
@ -109,13 +105,19 @@ pub fn flushModule(self: *NvPtx, comp: *Compilation, prog_node: *std.Progress.No
|
|||||||
const tracy = trace(@src());
|
const tracy = trace(@src());
|
||||||
defer tracy.end();
|
defer tracy.end();
|
||||||
|
|
||||||
var hack_comp = comp;
|
const outfile = comp.bin_file.options.emit.?;
|
||||||
if (comp.bin_file.options.emit) |emit| {
|
// We modify 'comp' before passing it to LLVM, but restore value afterwards.
|
||||||
hack_comp.emit_asm = .{
|
// We tell LLVM to not try to build a .o, only an "assembly" file.
|
||||||
.directory = emit.directory,
|
// This is required by the LLVM PTX backend.
|
||||||
|
comp.bin_file.options.emit = null;
|
||||||
|
comp.emit_asm = .{
|
||||||
|
.directory = outfile.directory,
|
||||||
.basename = comp.bin_file.intermediary_basename.?,
|
.basename = comp.bin_file.intermediary_basename.?,
|
||||||
};
|
};
|
||||||
hack_comp.bin_file.options.emit = null;
|
defer {
|
||||||
|
comp.bin_file.options.emit = outfile;
|
||||||
|
comp.emit_asm = null;
|
||||||
}
|
}
|
||||||
return try self.llvm_object.flushModule(hack_comp, prog_node);
|
|
||||||
|
try self.llvm_object.flushModule(comp, prog_node);
|
||||||
}
|
}
|
||||||
|
|||||||
@ -411,7 +411,11 @@ pub fn classifyCompilerRtLibName(target: std.Target, name: []const u8) CompilerR
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub fn hasDebugInfo(target: std.Target) bool {
|
pub fn hasDebugInfo(target: std.Target) bool {
|
||||||
_ = target;
|
if (target.cpu.arch.isNvptx()) {
|
||||||
|
// TODO: not sure how to test "ptx >= 7.5" with featureset
|
||||||
|
return std.Target.nvptx.featureSetHas(target.cpu.features, .ptx75);
|
||||||
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -651,7 +655,7 @@ pub fn addrSpaceCastIsValid(
|
|||||||
const arch = target.cpu.arch;
|
const arch = target.cpu.arch;
|
||||||
switch (arch) {
|
switch (arch) {
|
||||||
.x86_64, .i386 => return arch.supportsAddressSpace(from) and arch.supportsAddressSpace(to),
|
.x86_64, .i386 => return arch.supportsAddressSpace(from) and arch.supportsAddressSpace(to),
|
||||||
.amdgcn => {
|
.nvptx64, .nvptx, .amdgcn => {
|
||||||
const to_generic = arch.supportsAddressSpace(from) and to == .generic;
|
const to_generic = arch.supportsAddressSpace(from) and to == .generic;
|
||||||
const from_generic = arch.supportsAddressSpace(to) and from == .generic;
|
const from_generic = arch.supportsAddressSpace(to) and from == .generic;
|
||||||
return to_generic or from_generic;
|
return to_generic or from_generic;
|
||||||
|
|||||||
@ -4,6 +4,5 @@ const TestContext = @import("../src/test.zig").TestContext;
|
|||||||
pub fn addCases(ctx: *TestContext) !void {
|
pub fn addCases(ctx: *TestContext) !void {
|
||||||
try @import("compile_errors.zig").addCases(ctx);
|
try @import("compile_errors.zig").addCases(ctx);
|
||||||
try @import("stage2/cbe.zig").addCases(ctx);
|
try @import("stage2/cbe.zig").addCases(ctx);
|
||||||
// https://github.com/ziglang/zig/issues/10968
|
try @import("stage2/nvptx.zig").addCases(ctx);
|
||||||
//try @import("stage2/nvptx.zig").addCases(ctx);
|
|
||||||
}
|
}
|
||||||
|
|||||||
@ -23,11 +23,10 @@ pub fn addCases(ctx: *TestContext) !void {
|
|||||||
var case = addPtx(ctx, "nvptx: read special registers");
|
var case = addPtx(ctx, "nvptx: read special registers");
|
||||||
|
|
||||||
case.compiles(
|
case.compiles(
|
||||||
\\fn threadIdX() usize {
|
\\fn threadIdX() u32 {
|
||||||
\\ var tid = asm volatile ("mov.u32 \t$0, %tid.x;"
|
\\ return asm ("mov.u32 \t%[r], %tid.x;"
|
||||||
\\ : [ret] "=r" (-> u32),
|
\\ : [r] "=r" (-> u32),
|
||||||
\\ );
|
\\ );
|
||||||
\\ return @as(usize, tid);
|
|
||||||
\\}
|
\\}
|
||||||
\\
|
\\
|
||||||
\\pub export fn special_reg(a: []const i32, out: []i32) callconv(.PtxKernel) void {
|
\\pub export fn special_reg(a: []const i32, out: []i32) callconv(.PtxKernel) void {
|
||||||
@ -49,6 +48,38 @@ pub fn addCases(ctx: *TestContext) !void {
|
|||||||
\\}
|
\\}
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
var case = addPtx(ctx, "nvptx: reduce in shared mem");
|
||||||
|
case.compiles(
|
||||||
|
\\fn threadIdX() u32 {
|
||||||
|
\\ return asm ("mov.u32 \t%[r], %tid.x;"
|
||||||
|
\\ : [r] "=r" (-> u32),
|
||||||
|
\\ );
|
||||||
|
\\}
|
||||||
|
\\
|
||||||
|
\\ var _sdata: [1024]f32 addrspace(.shared) = undefined;
|
||||||
|
\\ pub export fn reduceSum(d_x: []const f32, out: *f32) callconv(.PtxKernel) void {
|
||||||
|
\\ var sdata = @addrSpaceCast(.generic, &_sdata);
|
||||||
|
\\ const tid: u32 = threadIdX();
|
||||||
|
\\ var sum = d_x[tid];
|
||||||
|
\\ sdata[tid] = sum;
|
||||||
|
\\ asm volatile ("bar.sync \t0;");
|
||||||
|
\\ var s: u32 = 512;
|
||||||
|
\\ while (s > 0) : (s = s >> 1) {
|
||||||
|
\\ if (tid < s) {
|
||||||
|
\\ sum += sdata[tid + s];
|
||||||
|
\\ sdata[tid] = sum;
|
||||||
|
\\ }
|
||||||
|
\\ asm volatile ("bar.sync \t0;");
|
||||||
|
\\ }
|
||||||
|
\\
|
||||||
|
\\ if (tid == 0) {
|
||||||
|
\\ out.* = sum;
|
||||||
|
\\ }
|
||||||
|
\\ }
|
||||||
|
);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const nvptx_target = std.zig.CrossTarget{
|
const nvptx_target = std.zig.CrossTarget{
|
||||||
@ -68,6 +99,8 @@ pub fn addPtx(
|
|||||||
.files = std.ArrayList(TestContext.File).init(ctx.cases.allocator),
|
.files = std.ArrayList(TestContext.File).init(ctx.cases.allocator),
|
||||||
.link_libc = false,
|
.link_libc = false,
|
||||||
.backend = .llvm,
|
.backend = .llvm,
|
||||||
|
// Bug in Debug mode
|
||||||
|
.optimize_mode = .ReleaseSafe,
|
||||||
}) catch @panic("out of memory");
|
}) catch @panic("out of memory");
|
||||||
return &ctx.cases.items[ctx.cases.items.len - 1];
|
return &ctx.cases.items[ctx.cases.items.len - 1];
|
||||||
}
|
}
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user