zig/src/link/NvPtx.zig
Andrew Kelley db33ee45b7 rework generic function calls
Abridged summary:

 * Move `Module.Fn` into `InternPool`.
 * Delete a lot of confusing and problematic `Sema` logic related to
   generic function calls.

This commit removes `Module.Fn` and replaces it with two new
`InternPool.Tag` values:

 * `func_decl` - corresponding to a function declared in the source
   code. This one contains line/column numbers, zir_body_inst, etc.

 * `func_instance` - one for each monomorphization of a generic
   function. Contains a reference to the `func_decl` from whence the
   instantiation came, along with the `comptime` parameter values (or
   types in the case of `anytype`)

Since `InternPool` provides deduplication on these values, these fields
are now deleted from `Module`:

 * `monomorphed_func_keys`
 * `monomorphed_funcs`
 * `align_stack_fns`

Instead of these, Sema logic for generic function instantiation now
unconditionally evaluates the function prototype expression for every
generic callsite. This is technically required in order for type
coercions to work. The previous code had some dubious, probably wrong
hacks to make things work, such as `hashUncoerced`. I'm not 100% sure
how we were able to eliminate that function and still pass all the
behavior tests, but I'm pretty sure things were still broken without
doing type coercion for every generic function call argument.

After the function prototype is evaluated, it produces a deduplicated
`func_instance` `InternPool.Index` which can then be used for the
generic function call.

Some other nice things made by this simplification are the removal of
`comptime_args_fn_inst` and `preallocated_new_func` from `Sema`, and the
messy logic associated with them.

I have not yet been able to measure the perf of this against master
branch. On one hand, it reduces memory usage and pointer chasing of the
most heavily used `InternPool` Tag - function bodies - but on the other
hand, it does evaluate function prototype expressions more than before.
We will soon find out.
2023-07-18 19:02:05 -07:00

129 lines
4.4 KiB
Zig

//! NVidia PTX (Parallel Thread Execution)
//! https://docs.nvidia.com/cuda/parallel-thread-execution/index.html
//! For this we rely on the nvptx backend of LLVM
//! Kernel functions need to be marked both as "export" and "callconv(.Kernel)"
const NvPtx = @This();
const std = @import("std");
const builtin = @import("builtin");
const Allocator = std.mem.Allocator;
const assert = std.debug.assert;
const log = std.log.scoped(.link);
const Module = @import("../Module.zig");
const InternPool = @import("../InternPool.zig");
const Compilation = @import("../Compilation.zig");
const link = @import("../link.zig");
const trace = @import("../tracy.zig").trace;
const build_options = @import("build_options");
const Air = @import("../Air.zig");
const Liveness = @import("../Liveness.zig");
const LlvmObject = @import("../codegen/llvm.zig").Object;
base: link.File,
llvm_object: *LlvmObject,
ptx_file_name: []const u8,
pub fn createEmpty(gpa: Allocator, options: link.Options) !*NvPtx {
if (!build_options.have_llvm) return error.PtxArchNotSupported;
if (!options.use_llvm) return error.PtxArchNotSupported;
if (!options.target.cpu.arch.isNvptx()) return error.PtxArchNotSupported;
switch (options.target.os.tag) {
// TODO: does it also work with nvcl ?
.cuda => {},
else => return error.PtxArchNotSupported,
}
const llvm_object = try LlvmObject.create(gpa, options);
const nvptx = try gpa.create(NvPtx);
nvptx.* = .{
.base = .{
.tag = .nvptx,
.options = options,
.file = null,
.allocator = gpa,
},
.llvm_object = llvm_object,
.ptx_file_name = try std.mem.join(gpa, "", &[_][]const u8{ options.root_name, ".ptx" }),
};
return nvptx;
}
pub fn openPath(allocator: Allocator, sub_path: []const u8, options: link.Options) !*NvPtx {
if (!build_options.have_llvm) @panic("nvptx target requires a zig compiler with llvm enabled.");
if (!options.use_llvm) return error.PtxArchNotSupported;
assert(options.target.ofmt == .nvptx);
log.debug("Opening .ptx target file {s}", .{sub_path});
return createEmpty(allocator, options);
}
pub fn deinit(self: *NvPtx) void {
if (!build_options.have_llvm) return;
self.llvm_object.destroy(self.base.allocator);
self.base.allocator.free(self.ptx_file_name);
}
pub fn updateFunc(self: *NvPtx, module: *Module, func_index: InternPool.Index, air: Air, liveness: Liveness) !void {
if (!build_options.have_llvm) return;
try self.llvm_object.updateFunc(module, func_index, air, liveness);
}
pub fn updateDecl(self: *NvPtx, module: *Module, decl_index: Module.Decl.Index) !void {
if (!build_options.have_llvm) return;
return self.llvm_object.updateDecl(module, decl_index);
}
pub fn updateDeclExports(
self: *NvPtx,
module: *Module,
decl_index: Module.Decl.Index,
exports: []const *Module.Export,
) !void {
if (!build_options.have_llvm) return;
if (build_options.skip_non_native and builtin.object_format != .nvptx) {
@panic("Attempted to compile for object format that was disabled by build configuration");
}
return self.llvm_object.updateDeclExports(module, decl_index, exports);
}
pub fn freeDecl(self: *NvPtx, decl_index: Module.Decl.Index) void {
if (!build_options.have_llvm) return;
return self.llvm_object.freeDecl(decl_index);
}
pub fn flush(self: *NvPtx, comp: *Compilation, prog_node: *std.Progress.Node) link.File.FlushError!void {
return self.flushModule(comp, prog_node);
}
pub fn flushModule(self: *NvPtx, comp: *Compilation, prog_node: *std.Progress.Node) link.File.FlushError!void {
if (!build_options.have_llvm) return;
if (build_options.skip_non_native) {
@panic("Attempted to compile for architecture that was disabled by build configuration");
}
const tracy = trace(@src());
defer tracy.end();
const outfile = comp.bin_file.options.emit.?;
// We modify 'comp' before passing it to LLVM, but restore value afterwards.
// We tell LLVM to not try to build a .o, only an "assembly" file.
// This is required by the LLVM PTX backend.
comp.bin_file.options.emit = null;
comp.emit_asm = .{
// 'null' means using the default cache dir: zig-cache/o/...
.directory = null,
.basename = self.ptx_file_name,
};
defer {
comp.bin_file.options.emit = outfile;
comp.emit_asm = null;
}
try self.llvm_object.flushModule(comp, prog_node);
}