From 85c6b0f0bcda2e3bb2dc5f3347e5c6712d268a39 Mon Sep 17 00:00:00 2001 From: Guillaume Wenzek Date: Tue, 9 Aug 2022 15:37:08 +0200 Subject: [PATCH 01/11] allow ptx kernel export --- src/Sema.zig | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/Sema.zig b/src/Sema.zig index 1b2bf84885..da3dea6125 100644 --- a/src/Sema.zig +++ b/src/Sema.zig @@ -21397,7 +21397,12 @@ fn validateExternType( }, .Fn => { if (position != .other) return false; - return !Type.fnCallingConventionAllowsZigTypes(ty.fnCallingConvention()); + return switch (ty.fnCallingConvention()) { + // For now we want to authorize PTX kernel to use zig objects, even if we end up exposing the ABI. + // The goal is to experiment with more integrated CPU/GPU code. + .PtxKernel => true, + else => !Type.fnCallingConventionAllowsZigTypes(ty.fnCallingConvention()), + }; }, .Enum => { var buf: Type.Payload.Bits = undefined; From b3dc80a1e33cf78205666d01a6dcf53bd5a4698f Mon Sep 17 00:00:00 2001 From: Guillaume Wenzek Date: Thu, 1 Sep 2022 21:47:44 +0200 Subject: [PATCH 02/11] disable debug info for ptx<7.5 --- src/target.zig | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/target.zig b/src/target.zig index 3fbaf6abc4..2c05a80258 100644 --- a/src/target.zig +++ b/src/target.zig @@ -411,8 +411,13 @@ pub fn classifyCompilerRtLibName(target: std.Target, name: []const u8) CompilerR } pub fn hasDebugInfo(target: std.Target) bool { - _ = target; - return true; + return switch (target.cpu.arch) { + .nvptx, .nvptx64 => { + // TODO: not sure to test "ptx >= 7.5" with featureset + return std.Target.nvptx.featureSetHas(target.cpu.features, .ptx75); + }, + else => true + }; } pub fn defaultCompilerRtOptimizeMode(target: std.Target) std.builtin.Mode { From 004fca2c6496b7760b453dabc1d8041621d011ca Mon Sep 17 00:00:00 2001 From: Guillaume Wenzek Date: Wed, 7 Sep 2022 11:16:48 +0200 Subject: [PATCH 03/11] restore comp when leaving flushModule --- src/link/NvPtx.zig | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/src/link/NvPtx.zig b/src/link/NvPtx.zig index 7bf51c7ad3..eeea371d00 100644 --- a/src/link/NvPtx.zig +++ b/src/link/NvPtx.zig @@ -109,13 +109,19 @@ pub fn flushModule(self: *NvPtx, comp: *Compilation, prog_node: *std.Progress.No const tracy = trace(@src()); defer tracy.end(); - var hack_comp = comp; - if (comp.bin_file.options.emit) |emit| { - hack_comp.emit_asm = .{ - .directory = emit.directory, - .basename = comp.bin_file.intermediary_basename.?, - }; - hack_comp.bin_file.options.emit = null; + const outfile = comp.bin_file.options.emit.?; + // !!! We modify 'comp' before passing it to LLVM, but restore value afterwards + // We tell LLVM to not try to build a .o, only an "assembly" file. + // This is required by the LLVM PTX backend. + comp.bin_file.options.emit = null; + comp.emit_asm = .{ + .directory = outfile.directory, + .basename = comp.bin_file.intermediary_basename.?, + }; + defer { + comp.bin_file.options.emit = outfile; + comp.emit_asm = null; } - return try self.llvm_object.flushModule(hack_comp, prog_node); + + try self.llvm_object.flushModule(comp, prog_node); } From 92a857b76c9a6ff7b885b623ae86844ca77ed646 Mon Sep 17 00:00:00 2001 From: Guillaume Wenzek Date: Wed, 14 Sep 2022 21:47:29 +0200 Subject: [PATCH 04/11] debug --- src/link/NvPtx.zig | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/link/NvPtx.zig b/src/link/NvPtx.zig index eeea371d00..501575fafb 100644 --- a/src/link/NvPtx.zig +++ b/src/link/NvPtx.zig @@ -76,7 +76,15 @@ pub fn updateFunc(self: *NvPtx, module: *Module, func: *Module.Fn, air: Air, liv pub fn updateDecl(self: *NvPtx, module: *Module, decl_index: Module.Decl.Index) !void { if (!build_options.have_llvm) return; + const decl = module.declPtr(decl_index); + log.info("updating {s}", .{decl.name}); return self.llvm_object.updateDecl(module, decl_index); + // const decl_index = func.owner_decl; + // const decl = module.declPtr(decl_index); + + // try mod.decl_exports.ensureUnusedCapacity(gpa, 1); + // try mod.export_owners.ensureUnusedCapacity(gpa, 1); + // mod.decl_exports.getOrPutAssumeCapacity(exported_decl_index); } pub fn updateDeclExports( From aad983cf40dad209ccc79b1e5ef4531e1b4d4ca7 Mon Sep 17 00:00:00 2001 From: Guillaume Wenzek Date: Fri, 16 Sep 2022 22:21:14 +0200 Subject: [PATCH 05/11] sanitize qualified name for nvptx backend --- lib/std/target.zig | 7 +++++++ src/Module.zig | 9 +++++++++ src/link/NvPtx.zig | 20 ++++---------------- src/target.zig | 13 ++++++------- 4 files changed, 26 insertions(+), 23 deletions(-) diff --git a/lib/std/target.zig b/lib/std/target.zig index 201fac222c..5deba28d2c 100644 --- a/lib/std/target.zig +++ b/lib/std/target.zig @@ -951,6 +951,13 @@ pub const Target = struct { }; } + pub fn isNvptx(arch: Arch) bool { + return switch (arch) { + .nvptx, .nvptx64 => true, + else => false, + }; + } + pub fn parseCpuModel(arch: Arch, cpu_name: []const u8) !*const Cpu.Model { for (arch.allCpuModels()) |cpu| { if (mem.eql(u8, cpu_name, cpu.name)) { diff --git a/src/Module.zig b/src/Module.zig index 7d87bdba53..fd5cf29516 100644 --- a/src/Module.zig +++ b/src/Module.zig @@ -720,6 +720,15 @@ pub const Decl = struct { var buffer = std.ArrayList(u8).init(mod.gpa); defer buffer.deinit(); try decl.renderFullyQualifiedName(mod, buffer.writer()); + + // Sanitize the name for nvptx which is more restrictive. + if (mod.comp.bin_file.options.target.cpu.arch.isNvptx()) { + for (buffer.items) |*byte| switch (byte.*) { + '{', '}', '*', '[', ']', '(', ')', ',', ' ', '\'' => byte.* = '_', + else => {}, + }; + } + return buffer.toOwnedSliceSentinel(0); } diff --git a/src/link/NvPtx.zig b/src/link/NvPtx.zig index 501575fafb..4873511d55 100644 --- a/src/link/NvPtx.zig +++ b/src/link/NvPtx.zig @@ -28,10 +28,7 @@ pub fn createEmpty(gpa: Allocator, options: link.Options) !*NvPtx { if (!build_options.have_llvm) return error.PtxArchNotSupported; if (!options.use_llvm) return error.PtxArchNotSupported; - switch (options.target.cpu.arch) { - .nvptx, .nvptx64 => {}, - else => return error.PtxArchNotSupported, - } + if (!options.target.cpu.arch.isNvptx()) return error.PtxArchNotSupported; switch (options.target.os.tag) { // TODO: does it also work with nvcl ? @@ -59,9 +56,8 @@ pub fn openPath(allocator: Allocator, sub_path: []const u8, options: link.Option if (!options.use_llvm) return error.PtxArchNotSupported; assert(options.target.ofmt == .nvptx); - const nvptx = try createEmpty(allocator, options); - log.info("Opening .ptx target file {s}", .{sub_path}); - return nvptx; + log.debug("Opening .ptx target file {s}", .{sub_path}); + return createEmpty(allocator, options); } pub fn deinit(self: *NvPtx) void { @@ -76,15 +72,7 @@ pub fn updateFunc(self: *NvPtx, module: *Module, func: *Module.Fn, air: Air, liv pub fn updateDecl(self: *NvPtx, module: *Module, decl_index: Module.Decl.Index) !void { if (!build_options.have_llvm) return; - const decl = module.declPtr(decl_index); - log.info("updating {s}", .{decl.name}); return self.llvm_object.updateDecl(module, decl_index); - // const decl_index = func.owner_decl; - // const decl = module.declPtr(decl_index); - - // try mod.decl_exports.ensureUnusedCapacity(gpa, 1); - // try mod.export_owners.ensureUnusedCapacity(gpa, 1); - // mod.decl_exports.getOrPutAssumeCapacity(exported_decl_index); } pub fn updateDeclExports( @@ -118,7 +106,7 @@ pub fn flushModule(self: *NvPtx, comp: *Compilation, prog_node: *std.Progress.No defer tracy.end(); const outfile = comp.bin_file.options.emit.?; - // !!! We modify 'comp' before passing it to LLVM, but restore value afterwards + // We modify 'comp' before passing it to LLVM, but restore value afterwards. // We tell LLVM to not try to build a .o, only an "assembly" file. // This is required by the LLVM PTX backend. comp.bin_file.options.emit = null; diff --git a/src/target.zig b/src/target.zig index 2c05a80258..01db8555da 100644 --- a/src/target.zig +++ b/src/target.zig @@ -411,13 +411,12 @@ pub fn classifyCompilerRtLibName(target: std.Target, name: []const u8) CompilerR } pub fn hasDebugInfo(target: std.Target) bool { - return switch (target.cpu.arch) { - .nvptx, .nvptx64 => { - // TODO: not sure to test "ptx >= 7.5" with featureset - return std.Target.nvptx.featureSetHas(target.cpu.features, .ptx75); - }, - else => true - }; + if (target.cpu.arch.isNvptx()) { + // TODO: not sure how to test "ptx >= 7.5" with featureset + return std.Target.nvptx.featureSetHas(target.cpu.features, .ptx75); + } + + return true; } pub fn defaultCompilerRtOptimizeMode(target: std.Target) std.builtin.Mode { From 577f0aa54b49445d4248cf839346ce3f8632ea39 Mon Sep 17 00:00:00 2001 From: Guillaume Wenzek Date: Sat, 1 Oct 2022 15:51:45 +0200 Subject: [PATCH 06/11] addrSpaceCastIsValid nvptx --- src/target.zig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/target.zig b/src/target.zig index 01db8555da..9e2d26dac6 100644 --- a/src/target.zig +++ b/src/target.zig @@ -655,7 +655,7 @@ pub fn addrSpaceCastIsValid( const arch = target.cpu.arch; switch (arch) { .x86_64, .i386 => return arch.supportsAddressSpace(from) and arch.supportsAddressSpace(to), - .amdgcn => { + .nvptx64, .nvptx, .amdgcn => { const to_generic = arch.supportsAddressSpace(from) and to == .generic; const from_generic = arch.supportsAddressSpace(to) and from == .generic; return to_generic or from_generic; From b425d887375132a915a5cd2baf7958f273732ee1 Mon Sep 17 00:00:00 2001 From: Guillaume Wenzek Date: Tue, 4 Oct 2022 07:31:36 +0200 Subject: [PATCH 07/11] re-enable nvptx tests --- test/cases.zig | 3 +-- test/stage2/nvptx.zig | 41 ++++++++++++++++++++++++++++++++++++----- 2 files changed, 37 insertions(+), 7 deletions(-) diff --git a/test/cases.zig b/test/cases.zig index 65eec90f1b..412b4cb5e2 100644 --- a/test/cases.zig +++ b/test/cases.zig @@ -4,6 +4,5 @@ const TestContext = @import("../src/test.zig").TestContext; pub fn addCases(ctx: *TestContext) !void { try @import("compile_errors.zig").addCases(ctx); try @import("stage2/cbe.zig").addCases(ctx); - // https://github.com/ziglang/zig/issues/10968 - //try @import("stage2/nvptx.zig").addCases(ctx); + try @import("stage2/nvptx.zig").addCases(ctx); } diff --git a/test/stage2/nvptx.zig b/test/stage2/nvptx.zig index 7182092be7..b41a21ed6f 100644 --- a/test/stage2/nvptx.zig +++ b/test/stage2/nvptx.zig @@ -23,11 +23,10 @@ pub fn addCases(ctx: *TestContext) !void { var case = addPtx(ctx, "nvptx: read special registers"); case.compiles( - \\fn threadIdX() usize { - \\ var tid = asm volatile ("mov.u32 \t$0, %tid.x;" - \\ : [ret] "=r" (-> u32), - \\ ); - \\ return @as(usize, tid); + \\fn threadIdX() u32 { + \\ return asm ("mov.u32 \t%[r], %tid.x;" + \\ : [r] "=r" (-> utid), + \\ ); \\} \\ \\pub export fn special_reg(a: []const i32, out: []i32) callconv(.PtxKernel) void { @@ -49,6 +48,38 @@ pub fn addCases(ctx: *TestContext) !void { \\} ); } + + { + var case = addPtx(ctx, "nvptx: reduce in shared mem"); + case.compiles( + \\fn threadIdX() u32 { + \\ return asm ("mov.u32 \t%[r], %tid.x;" + \\ : [r] "=r" (-> utid), + \\ ); + \\} + \\ + \\ var _sdata: [1024]f32 addrspace(.shared) = undefined; + \\ pub export fn reduceSum(d_x: []const f32, out: *f32) callconv(ptx.Kernel) void { + \\ var sdata = @addrSpaceCast(.generic, &_sdata); + \\ const tid: u32 = threadIdX(); + \\ var sum = d_x[tid]; + \\ sdata[tid] = sum; + \\ asm volatile ("bar.sync \t0;"); + \\ var s: u32 = 512; + \\ while (s > 0) : (s = s >> 1) { + \\ if (tid < s) { + \\ sum += sdata[tid + s]; + \\ sdata[tid] = sum; + \\ } + \\ asm volatile ("bar.sync \t0;"); + \\ } + \\ + \\ if (tid == 0) { + \\ out.* = sum; + \\ } + \\ } + ); + } } const nvptx_target = std.zig.CrossTarget{ From ac1f17f63fd833e5b35dc5255c2ca6a5a041c36c Mon Sep 17 00:00:00 2001 From: Guillaume Wenzek Date: Thu, 6 Oct 2022 21:48:35 +0200 Subject: [PATCH 08/11] disable sema.addConstant --- src/Sema.zig | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/Sema.zig b/src/Sema.zig index da3dea6125..08f3b59661 100644 --- a/src/Sema.zig +++ b/src/Sema.zig @@ -18202,12 +18202,6 @@ fn zirAddrSpaceCast(sema: *Sema, block: *Block, extended: Zir.Inst.Extended.Inst else dest_ptr_ty; - if (try sema.resolveMaybeUndefVal(block, ptr_src, ptr)) |val| { - // Pointer value should compatible with both address spaces. - // TODO: Figure out why this generates an invalid bitcast. - return sema.addConstant(dest_ty, val); - } - try sema.requireRuntimeBlock(block, src, ptr_src); // TODO: Address space cast safety? From 419855c4750450b92403300081298ddfdad4be6d Mon Sep 17 00:00:00 2001 From: Guillaume Wenzek Date: Thu, 6 Oct 2022 22:06:20 +0200 Subject: [PATCH 09/11] fix test --- test/stage2/nvptx.zig | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/test/stage2/nvptx.zig b/test/stage2/nvptx.zig index b41a21ed6f..c87f32add0 100644 --- a/test/stage2/nvptx.zig +++ b/test/stage2/nvptx.zig @@ -25,7 +25,7 @@ pub fn addCases(ctx: *TestContext) !void { case.compiles( \\fn threadIdX() u32 { \\ return asm ("mov.u32 \t%[r], %tid.x;" - \\ : [r] "=r" (-> utid), + \\ : [r] "=r" (-> u32), \\ ); \\} \\ @@ -54,12 +54,12 @@ pub fn addCases(ctx: *TestContext) !void { case.compiles( \\fn threadIdX() u32 { \\ return asm ("mov.u32 \t%[r], %tid.x;" - \\ : [r] "=r" (-> utid), + \\ : [r] "=r" (-> u32), \\ ); \\} \\ \\ var _sdata: [1024]f32 addrspace(.shared) = undefined; - \\ pub export fn reduceSum(d_x: []const f32, out: *f32) callconv(ptx.Kernel) void { + \\ pub export fn reduceSum(d_x: []const f32, out: *f32) callconv(.PtxKernel) void { \\ var sdata = @addrSpaceCast(.generic, &_sdata); \\ const tid: u32 = threadIdX(); \\ var sum = d_x[tid]; @@ -99,6 +99,8 @@ pub fn addPtx( .files = std.ArrayList(TestContext.File).init(ctx.cases.allocator), .link_libc = false, .backend = .llvm, + // Bug in Debug mode + .optimize_mode = .ReleaseSafe, }) catch @panic("out of memory"); return &ctx.cases.items[ctx.cases.items.len - 1]; } From 24c749473a9e2eaeb1de804ec7567a4a933e5993 Mon Sep 17 00:00:00 2001 From: Guillaume Wenzek Date: Fri, 7 Oct 2022 14:31:15 +0200 Subject: [PATCH 10/11] implement os.abort and panic for cuda --- lib/std/builtin.zig | 1 + lib/std/os.zig | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/lib/std/builtin.zig b/lib/std/builtin.zig index 87e8e90df8..c772d8e6f9 100644 --- a/lib/std/builtin.zig +++ b/lib/std/builtin.zig @@ -833,6 +833,7 @@ pub fn default_panic(msg: []const u8, error_return_trace: ?*StackTrace, ret_addr // Didn't have boot_services, just fallback to whatever. std.os.abort(); }, + .cuda => std.os.abort(), else => { const first_trace_addr = ret_addr orelse @returnAddress(); std.debug.panicImpl(error_return_trace, first_trace_addr, msg); diff --git a/lib/std/os.zig b/lib/std/os.zig index 9270a52adb..ba8f523faf 100644 --- a/lib/std/os.zig +++ b/lib/std/os.zig @@ -500,10 +500,15 @@ pub fn abort() noreturn { @breakpoint(); exit(1); } + if (builtin.os.tag == .cuda) { + @"llvm.trap"(); + } system.abort(); } +extern fn @"llvm.trap"() noreturn; + pub const RaiseError = UnexpectedError; pub fn raise(sig: u8) RaiseError!void { From c289794f0db3e06c568450cc6c646a0ba63a73ba Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Sat, 15 Oct 2022 10:50:02 -0700 Subject: [PATCH 11/11] nvptx: add TODO comment regarding abuse of llvm builtins --- lib/std/os.zig | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/std/os.zig b/lib/std/os.zig index ba8f523faf..19c9f78d48 100644 --- a/lib/std/os.zig +++ b/lib/std/os.zig @@ -501,6 +501,7 @@ pub fn abort() noreturn { exit(1); } if (builtin.os.tag == .cuda) { + // TODO: introduce `@trap` instead of abusing https://github.com/ziglang/zig/issues/2291 @"llvm.trap"(); }