mirror of
https://github.com/ziglang/zig.git
synced 2026-02-05 14:07:04 +00:00
Abridged summary: * Move `Module.Fn` into `InternPool`. * Delete a lot of confusing and problematic `Sema` logic related to generic function calls. This commit removes `Module.Fn` and replaces it with two new `InternPool.Tag` values: * `func_decl` - corresponding to a function declared in the source code. This one contains line/column numbers, zir_body_inst, etc. * `func_instance` - one for each monomorphization of a generic function. Contains a reference to the `func_decl` from whence the instantiation came, along with the `comptime` parameter values (or types in the case of `anytype`) Since `InternPool` provides deduplication on these values, these fields are now deleted from `Module`: * `monomorphed_func_keys` * `monomorphed_funcs` * `align_stack_fns` Instead of these, Sema logic for generic function instantiation now unconditionally evaluates the function prototype expression for every generic callsite. This is technically required in order for type coercions to work. The previous code had some dubious, probably wrong hacks to make things work, such as `hashUncoerced`. I'm not 100% sure how we were able to eliminate that function and still pass all the behavior tests, but I'm pretty sure things were still broken without doing type coercion for every generic function call argument. After the function prototype is evaluated, it produces a deduplicated `func_instance` `InternPool.Index` which can then be used for the generic function call. Some other nice things made by this simplification are the removal of `comptime_args_fn_inst` and `preallocated_new_func` from `Sema`, and the messy logic associated with them. I have not yet been able to measure the perf of this against master branch. On one hand, it reduces memory usage and pointer chasing of the most heavily used `InternPool` Tag - function bodies - but on the other hand, it does evaluate function prototype expressions more than before. We will soon find out.
129 lines
4.4 KiB
Zig
129 lines
4.4 KiB
Zig
//! NVidia PTX (Parallel Thread Execution)
|
|
//! https://docs.nvidia.com/cuda/parallel-thread-execution/index.html
|
|
//! For this we rely on the nvptx backend of LLVM
|
|
//! Kernel functions need to be marked both as "export" and "callconv(.Kernel)"
|
|
|
|
const NvPtx = @This();
|
|
|
|
const std = @import("std");
|
|
const builtin = @import("builtin");
|
|
|
|
const Allocator = std.mem.Allocator;
|
|
const assert = std.debug.assert;
|
|
const log = std.log.scoped(.link);
|
|
|
|
const Module = @import("../Module.zig");
|
|
const InternPool = @import("../InternPool.zig");
|
|
const Compilation = @import("../Compilation.zig");
|
|
const link = @import("../link.zig");
|
|
const trace = @import("../tracy.zig").trace;
|
|
const build_options = @import("build_options");
|
|
const Air = @import("../Air.zig");
|
|
const Liveness = @import("../Liveness.zig");
|
|
const LlvmObject = @import("../codegen/llvm.zig").Object;
|
|
|
|
base: link.File,
|
|
llvm_object: *LlvmObject,
|
|
ptx_file_name: []const u8,
|
|
|
|
pub fn createEmpty(gpa: Allocator, options: link.Options) !*NvPtx {
|
|
if (!build_options.have_llvm) return error.PtxArchNotSupported;
|
|
if (!options.use_llvm) return error.PtxArchNotSupported;
|
|
|
|
if (!options.target.cpu.arch.isNvptx()) return error.PtxArchNotSupported;
|
|
|
|
switch (options.target.os.tag) {
|
|
// TODO: does it also work with nvcl ?
|
|
.cuda => {},
|
|
else => return error.PtxArchNotSupported,
|
|
}
|
|
|
|
const llvm_object = try LlvmObject.create(gpa, options);
|
|
const nvptx = try gpa.create(NvPtx);
|
|
nvptx.* = .{
|
|
.base = .{
|
|
.tag = .nvptx,
|
|
.options = options,
|
|
.file = null,
|
|
.allocator = gpa,
|
|
},
|
|
.llvm_object = llvm_object,
|
|
.ptx_file_name = try std.mem.join(gpa, "", &[_][]const u8{ options.root_name, ".ptx" }),
|
|
};
|
|
|
|
return nvptx;
|
|
}
|
|
|
|
pub fn openPath(allocator: Allocator, sub_path: []const u8, options: link.Options) !*NvPtx {
|
|
if (!build_options.have_llvm) @panic("nvptx target requires a zig compiler with llvm enabled.");
|
|
if (!options.use_llvm) return error.PtxArchNotSupported;
|
|
assert(options.target.ofmt == .nvptx);
|
|
|
|
log.debug("Opening .ptx target file {s}", .{sub_path});
|
|
return createEmpty(allocator, options);
|
|
}
|
|
|
|
pub fn deinit(self: *NvPtx) void {
|
|
if (!build_options.have_llvm) return;
|
|
self.llvm_object.destroy(self.base.allocator);
|
|
self.base.allocator.free(self.ptx_file_name);
|
|
}
|
|
|
|
pub fn updateFunc(self: *NvPtx, module: *Module, func_index: InternPool.Index, air: Air, liveness: Liveness) !void {
|
|
if (!build_options.have_llvm) return;
|
|
try self.llvm_object.updateFunc(module, func_index, air, liveness);
|
|
}
|
|
|
|
pub fn updateDecl(self: *NvPtx, module: *Module, decl_index: Module.Decl.Index) !void {
|
|
if (!build_options.have_llvm) return;
|
|
return self.llvm_object.updateDecl(module, decl_index);
|
|
}
|
|
|
|
pub fn updateDeclExports(
|
|
self: *NvPtx,
|
|
module: *Module,
|
|
decl_index: Module.Decl.Index,
|
|
exports: []const *Module.Export,
|
|
) !void {
|
|
if (!build_options.have_llvm) return;
|
|
if (build_options.skip_non_native and builtin.object_format != .nvptx) {
|
|
@panic("Attempted to compile for object format that was disabled by build configuration");
|
|
}
|
|
return self.llvm_object.updateDeclExports(module, decl_index, exports);
|
|
}
|
|
|
|
pub fn freeDecl(self: *NvPtx, decl_index: Module.Decl.Index) void {
|
|
if (!build_options.have_llvm) return;
|
|
return self.llvm_object.freeDecl(decl_index);
|
|
}
|
|
|
|
pub fn flush(self: *NvPtx, comp: *Compilation, prog_node: *std.Progress.Node) link.File.FlushError!void {
|
|
return self.flushModule(comp, prog_node);
|
|
}
|
|
|
|
pub fn flushModule(self: *NvPtx, comp: *Compilation, prog_node: *std.Progress.Node) link.File.FlushError!void {
|
|
if (!build_options.have_llvm) return;
|
|
if (build_options.skip_non_native) {
|
|
@panic("Attempted to compile for architecture that was disabled by build configuration");
|
|
}
|
|
const tracy = trace(@src());
|
|
defer tracy.end();
|
|
|
|
const outfile = comp.bin_file.options.emit.?;
|
|
// We modify 'comp' before passing it to LLVM, but restore value afterwards.
|
|
// We tell LLVM to not try to build a .o, only an "assembly" file.
|
|
// This is required by the LLVM PTX backend.
|
|
comp.bin_file.options.emit = null;
|
|
comp.emit_asm = .{
|
|
// 'null' means using the default cache dir: zig-cache/o/...
|
|
.directory = null,
|
|
.basename = self.ptx_file_name,
|
|
};
|
|
defer {
|
|
comp.bin_file.options.emit = outfile;
|
|
comp.emit_asm = null;
|
|
}
|
|
|
|
try self.llvm_object.flushModule(comp, prog_node);
|
|
}
|