stage2: C backend: re-implement emit-h

and also mark functions as `extern "C"` as appropriate to support c++
compilers.
This commit is contained in:
Andrew Kelley 2021-01-05 17:33:31 -07:00
parent cd95444e47
commit 1a2dd85570
6 changed files with 239 additions and 74 deletions

View File

@ -27,7 +27,6 @@ const Cache = @import("Cache.zig");
const stage1 = @import("stage1.zig");
const translate_c = @import("translate_c.zig");
const c_codegen = @import("codegen/c.zig");
const c_link = @import("link/C.zig");
const ThreadPool = @import("ThreadPool.zig");
const WaitGroup = @import("WaitGroup.zig");
const libtsan = @import("libtsan.zig");
@ -162,6 +161,8 @@ pub const CSourceFile = struct {
const Job = union(enum) {
/// Write the machine code for a Decl to the output file.
codegen_decl: *Module.Decl,
/// Render the .h file snippet for the Decl.
emit_h_decl: *Module.Decl,
/// The Decl needs to be analyzed and possibly export itself.
/// It may have already be analyzed, or it may have been determined
/// to be outdated; in this case perform semantic analysis again.
@ -1312,9 +1313,14 @@ pub fn update(self: *Compilation) !void {
// This is needed before reading the error flags.
try self.bin_file.flush(self);
self.link_error_flags = self.bin_file.errorFlags();
if (!use_stage1) {
if (self.bin_file.options.module) |module| {
try link.File.C.flushEmitH(module);
}
}
// If there are any errors, we anticipate the source files being loaded
// to report error messages. Otherwise we unload all source files to save memory.
if (self.totalErrorCount() == 0 and !self.keep_source_files_loaded) {
@ -1340,7 +1346,8 @@ pub fn totalErrorCount(self: *Compilation) usize {
var total: usize = self.failed_c_objects.items().len;
if (self.bin_file.options.module) |module| {
total += module.failed_decls.items().len +
total += module.failed_decls.count() +
module.emit_h_failed_decls.count() +
module.failed_exports.items().len +
module.failed_files.items().len +
@boolToInt(module.failed_root_src_file != null);
@ -1379,6 +1386,12 @@ pub fn getAllErrorsAlloc(self: *Compilation) !AllErrors {
const source = try decl.scope.getSource(module);
try AllErrors.add(&arena, &errors, decl.scope.subFilePath(), source, err_msg.*);
}
for (module.emit_h_failed_decls.items()) |entry| {
const decl = entry.key;
const err_msg = entry.value;
const source = try decl.scope.getSource(module);
try AllErrors.add(&arena, &errors, decl.scope.subFilePath(), source, err_msg.*);
}
for (module.failed_exports.items()) |entry| {
const decl = entry.key.owner_decl;
const err_msg = entry.value;
@ -1476,27 +1489,68 @@ pub fn performAllTheWork(self: *Compilation) error{ TimerUnsupported, OutOfMemor
assert(decl.typed_value.most_recent.typed_value.ty.hasCodeGenBits());
self.bin_file.updateDecl(module, decl) catch |err| {
switch (err) {
error.OutOfMemory => return error.OutOfMemory,
error.AnalysisFail => {
decl.analysis = .codegen_failure;
},
else => {
try module.failed_decls.ensureCapacity(module.gpa, module.failed_decls.items().len + 1);
module.failed_decls.putAssumeCapacityNoClobber(decl, try ErrorMsg.create(
module.gpa,
decl.src(),
"unable to codegen: {s}",
.{@errorName(err)},
));
decl.analysis = .codegen_failure_retryable;
},
}
return;
self.bin_file.updateDecl(module, decl) catch |err| switch (err) {
error.OutOfMemory => return error.OutOfMemory,
error.AnalysisFail => {
decl.analysis = .codegen_failure;
continue;
},
else => {
try module.failed_decls.ensureCapacity(module.gpa, module.failed_decls.items().len + 1);
module.failed_decls.putAssumeCapacityNoClobber(decl, try ErrorMsg.create(
module.gpa,
decl.src(),
"unable to codegen: {s}",
.{@errorName(err)},
));
decl.analysis = .codegen_failure_retryable;
continue;
},
};
},
},
.emit_h_decl => |decl| switch (decl.analysis) {
.unreferenced => unreachable,
.in_progress => unreachable,
.outdated => unreachable,
.sema_failure,
.dependency_failure,
.sema_failure_retryable,
=> continue,
// emit-h only requires semantic analysis of the Decl to be complete,
// it does not depend on machine code generation to succeed.
.codegen_failure, .codegen_failure_retryable, .complete => {
if (build_options.omit_stage2)
@panic("sadly stage2 is omitted from this build to save memory on the CI server");
const module = self.bin_file.options.module.?;
const emit_loc = module.emit_h.?;
const tv = decl.typed_value.most_recent.typed_value;
const emit_h = decl.getEmitH(module);
const fwd_decl = &emit_h.fwd_decl;
fwd_decl.shrinkRetainingCapacity(0);
var dg: c_codegen.DeclGen = .{
.module = module,
.error_msg = null,
.decl = decl,
.fwd_decl = fwd_decl.toManaged(module.gpa),
};
defer dg.fwd_decl.deinit();
c_codegen.genHeader(&dg) catch |err| switch (err) {
error.AnalysisFail => {
try module.emit_h_failed_decls.put(module.gpa, decl, dg.error_msg.?);
continue;
},
else => |e| return e,
};
fwd_decl.* = dg.fwd_decl.moveToUnmanaged();
fwd_decl.shrink(module.gpa, fwd_decl.items.len);
},
},
.analyze_decl => |decl| {
if (build_options.omit_stage2)
@panic("sadly stage2 is omitted from this build to save memory on the CI server");

View File

@ -57,6 +57,10 @@ decl_table: std.ArrayHashMapUnmanaged(Scope.NameHash, *Decl, Scope.name_hash_has
/// Note that a Decl can succeed but the Fn it represents can fail. In this case,
/// a Decl can have a failed_decls entry but have analysis status of success.
failed_decls: std.AutoArrayHashMapUnmanaged(*Decl, *Compilation.ErrorMsg) = .{},
/// When emit_h is non-null, each Decl gets one more compile error slot for
/// emit-h failing for that Decl. This table is also how we tell if a Decl has
/// failed emit-h or succeeded.
emit_h_failed_decls: std.AutoArrayHashMapUnmanaged(*Decl, *Compilation.ErrorMsg) = .{},
/// Using a map here for consistency with the other fields here.
/// The ErrorMsg memory is owned by the `Scope`, using Module's general purpose allocator.
failed_files: std.AutoArrayHashMapUnmanaged(*Scope, *Compilation.ErrorMsg) = .{},
@ -116,6 +120,13 @@ pub const Export = struct {
},
};
/// When Module emit_h field is non-null, each Decl is allocated via this struct, so that
/// there can be EmitH state attached to each Decl.
pub const DeclPlusEmitH = struct {
decl: Decl,
emit_h: EmitH,
};
pub const Decl = struct {
/// This name is relative to the containing namespace of the decl. It uses a null-termination
/// to save bytes, since there can be a lot of decls in a compilation. The null byte is not allowed
@ -204,14 +215,21 @@ pub const Decl = struct {
/// stage1 compiler giving me: `error: struct 'Module.Decl' depends on itself`
pub const DepsTable = std.ArrayHashMapUnmanaged(*Decl, void, std.array_hash_map.getAutoHashFn(*Decl), std.array_hash_map.getAutoEqlFn(*Decl), false);
pub fn destroy(self: *Decl, gpa: *Allocator) void {
pub fn destroy(self: *Decl, module: *Module) void {
const gpa = module.gpa;
gpa.free(mem.spanZ(self.name));
if (self.typedValueManaged()) |tvm| {
tvm.deinit(gpa);
}
self.dependants.deinit(gpa);
self.dependencies.deinit(gpa);
gpa.destroy(self);
if (module.emit_h != null) {
const decl_plus_emit_h = @fieldParentPtr(DeclPlusEmitH, "decl", self);
decl_plus_emit_h.emit_h.fwd_decl.deinit(gpa);
gpa.destroy(decl_plus_emit_h);
} else {
gpa.destroy(self);
}
}
pub fn src(self: Decl) usize {
@ -277,6 +295,12 @@ pub const Decl = struct {
return self.scope.cast(Scope.Container).?.file_scope;
}
pub fn getEmitH(decl: *Decl, module: *Module) *EmitH {
assert(module.emit_h != null);
const decl_plus_emit_h = @fieldParentPtr(DeclPlusEmitH, "decl", decl);
return &decl_plus_emit_h.emit_h;
}
fn removeDependant(self: *Decl, other: *Decl) void {
self.dependants.removeAssertDiscard(other);
}
@ -286,6 +310,11 @@ pub const Decl = struct {
}
};
/// This state is attached to every Decl when Module emit_h is non-null.
pub const EmitH = struct {
fwd_decl: std.ArrayListUnmanaged(u8) = .{},
};
/// Fn struct memory is owned by the Decl's TypedValue.Managed arena allocator.
/// Extern functions do not have this data structure; they are represented by
/// the `Decl` only, with a `Value` tag of `extern_fn`.
@ -883,7 +912,7 @@ pub fn deinit(self: *Module) void {
self.deletion_set.deinit(gpa);
for (self.decl_table.items()) |entry| {
entry.value.destroy(gpa);
entry.value.destroy(self);
}
self.decl_table.deinit(gpa);
@ -892,6 +921,11 @@ pub fn deinit(self: *Module) void {
}
self.failed_decls.deinit(gpa);
for (self.emit_h_failed_decls.items()) |entry| {
entry.value.destroy(gpa);
}
self.emit_h_failed_decls.deinit(gpa);
for (self.failed_files.items()) |entry| {
entry.value.destroy(gpa);
}
@ -1150,6 +1184,10 @@ fn astGenAndAnalyzeDecl(self: *Module, decl: *Decl) !bool {
try self.comp.bin_file.allocateDeclIndexes(decl);
try self.comp.work_queue.writeItem(.{ .codegen_decl = decl });
if (type_changed and self.emit_h != null) {
try self.comp.work_queue.writeItem(.{ .emit_h_decl = decl });
}
return type_changed;
};
@ -1269,6 +1307,9 @@ fn astGenAndAnalyzeDecl(self: *Module, decl: *Decl) !bool {
// increasing how many computations can be done in parallel.
try self.comp.bin_file.allocateDeclIndexes(decl);
try self.comp.work_queue.writeItem(.{ .codegen_decl = decl });
if (type_changed and self.emit_h != null) {
try self.comp.work_queue.writeItem(.{ .emit_h_decl = decl });
}
} else if (!prev_is_inline and prev_type_has_bits) {
self.comp.bin_file.freeDecl(decl);
}
@ -1837,9 +1878,13 @@ pub fn deleteDecl(self: *Module, decl: *Decl) !void {
if (self.failed_decls.remove(decl)) |entry| {
entry.value.destroy(self.gpa);
}
if (self.emit_h_failed_decls.remove(decl)) |entry| {
entry.value.destroy(self.gpa);
}
self.deleteDeclExports(decl);
self.comp.bin_file.freeDecl(decl);
decl.destroy(self.gpa);
decl.destroy(self);
}
/// Delete all the Export objects that are caused by this Decl. Re-analysis of
@ -1923,16 +1968,28 @@ fn markOutdatedDecl(self: *Module, decl: *Decl) !void {
if (self.failed_decls.remove(decl)) |entry| {
entry.value.destroy(self.gpa);
}
if (self.emit_h_failed_decls.remove(decl)) |entry| {
entry.value.destroy(self.gpa);
}
decl.analysis = .outdated;
}
fn allocateNewDecl(
self: *Module,
mod: *Module,
scope: *Scope,
src_index: usize,
contents_hash: std.zig.SrcHash,
) !*Decl {
const new_decl = try self.gpa.create(Decl);
// If we have emit-h then we must allocate a bigger structure to store the emit-h state.
const new_decl: *Decl = if (mod.emit_h != null) blk: {
const parent_struct = try mod.gpa.create(DeclPlusEmitH);
parent_struct.* = .{
.emit_h = .{},
.decl = undefined,
};
break :blk &parent_struct.decl;
} else try mod.gpa.create(Decl);
new_decl.* = .{
.name = "",
.scope = scope.namespace(),
@ -1941,14 +1998,14 @@ fn allocateNewDecl(
.analysis = .unreferenced,
.deletion_flag = false,
.contents_hash = contents_hash,
.link = switch (self.comp.bin_file.tag) {
.link = switch (mod.comp.bin_file.tag) {
.coff => .{ .coff = link.File.Coff.TextBlock.empty },
.elf => .{ .elf = link.File.Elf.TextBlock.empty },
.macho => .{ .macho = link.File.MachO.TextBlock.empty },
.c => .{ .c = link.File.C.DeclBlock.empty },
.wasm => .{ .wasm = {} },
},
.fn_link = switch (self.comp.bin_file.tag) {
.fn_link = switch (mod.comp.bin_file.tag) {
.coff => .{ .coff = {} },
.elf => .{ .elf = link.File.Elf.SrcFn.empty },
.macho => .{ .macho = link.File.MachO.SrcFn.empty },

View File

@ -9,6 +9,7 @@ const Compilation = @import("../Compilation.zig");
const Inst = @import("../ir.zig").Inst;
const Value = @import("../value.zig").Value;
const Type = @import("../type.zig").Type;
const TypedValue = @import("../TypedValue.zig");
const C = link.File.C;
const Decl = Module.Decl;
const trace = @import("../tracy.zig").trace;
@ -109,7 +110,7 @@ pub const Object = struct {
};
/// This data is available both when outputting .c code and when outputting an .h file.
const DeclGen = struct {
pub const DeclGen = struct {
module: *Module,
decl: *Decl,
fwd_decl: std.ArrayList(u8),
@ -199,22 +200,11 @@ const DeclGen = struct {
}
}
fn renderFunctionSignature(dg: *DeclGen, w: Writer) !void {
const tv = dg.decl.typed_value.most_recent.typed_value;
// Determine whether the function is globally visible.
const is_global = blk: {
switch (tv.val.tag()) {
.extern_fn => break :blk true,
.function => {
const func = tv.val.castTag(.function).?.data;
break :blk dg.module.decl_exports.contains(func.owner_decl);
},
else => unreachable,
}
};
fn renderFunctionSignature(dg: *DeclGen, w: Writer, is_global: bool) !void {
if (!is_global) {
try w.writeAll("static ");
}
const tv = dg.decl.typed_value.most_recent.typed_value;
try dg.renderType(w, tv.ty.fnReturnType());
const decl_name = mem.span(dg.decl.name);
try w.print(" {s}(", .{decl_name});
@ -302,6 +292,17 @@ const DeclGen = struct {
}),
}
}
fn functionIsGlobal(dg: *DeclGen, tv: TypedValue) bool {
switch (tv.val.tag()) {
.extern_fn => return true,
.function => {
const func = tv.val.castTag(.function).?.data;
return dg.module.decl_exports.contains(func.owner_decl);
},
else => unreachable,
}
}
};
pub fn genDecl(o: *Object) !void {
@ -311,15 +312,19 @@ pub fn genDecl(o: *Object) !void {
const tv = o.dg.decl.typed_value.most_recent.typed_value;
if (tv.val.castTag(.function)) |func_payload| {
const is_global = o.dg.functionIsGlobal(tv);
const fwd_decl_writer = o.dg.fwd_decl.writer();
try o.dg.renderFunctionSignature(fwd_decl_writer);
if (is_global) {
try fwd_decl_writer.writeAll("ZIG_EXTERN_C ");
}
try o.dg.renderFunctionSignature(fwd_decl_writer, is_global);
try fwd_decl_writer.writeAll(";\n");
const func: *Module.Fn = func_payload.data;
const instructions = func.body.instructions;
const writer = o.code.writer();
try writer.writeAll("\n");
try o.dg.renderFunctionSignature(writer);
try o.dg.renderFunctionSignature(writer, is_global);
if (instructions.len == 0) {
try writer.writeAll(" {}\n");
return;
@ -363,7 +368,8 @@ pub fn genDecl(o: *Object) !void {
try writer.writeAll("}\n");
} else if (tv.val.tag() == .extern_fn) {
const writer = o.code.writer();
try o.dg.renderFunctionSignature(writer);
try writer.writeAll("ZIG_EXTERN_C ");
try o.dg.renderFunctionSignature(writer, true);
try writer.writeAll(";\n");
} else {
const writer = o.code.writer();
@ -381,20 +387,20 @@ pub fn genDecl(o: *Object) !void {
}
}
pub fn genHeader(comp: *Compilation, dg: *DeclGen) error{ AnalysisFail, OutOfMemory }!void {
pub fn genHeader(dg: *DeclGen) error{ AnalysisFail, OutOfMemory }!void {
const tracy = trace(@src());
defer tracy.end();
switch (decl.typed_value.most_recent.typed_value.ty.zigTypeTag()) {
const tv = dg.decl.typed_value.most_recent.typed_value;
const writer = dg.fwd_decl.writer();
switch (tv.ty.zigTypeTag()) {
.Fn => {
dg.renderFunctionSignature() catch |err| switch (err) {
error.AnalysisFail => {
try dg.module.failed_decls.put(dg.module.gpa, decl, dg.error_msg.?);
dg.error_msg = null;
return error.AnalysisFail;
},
else => |e| return e,
};
const is_global = dg.functionIsGlobal(tv);
if (is_global) {
try writer.writeAll("ZIG_EXTERN_C ");
}
try dg.renderFunctionSignature(writer, is_global);
try dg.fwd_decl.appendSlice(";\n");
},
else => {},

View File

@ -130,8 +130,10 @@ pub fn flushModule(self: *C, comp: *Compilation) !void {
const tracy = trace(@src());
defer tracy.end();
const module = self.base.options.module orelse
return error.LinkingWithoutZigSourceUnimplemented;
const module = self.base.options.module.?;
// This code path happens exclusively with -ofmt=c. The flush logic for
// emit-h is in `flushEmitH` below.
// We collect a list of buffers to write, and write them all at once with pwritev 😎
var all_buffers = std.ArrayList(std.os.iovec_const).init(comp.gpa);
@ -187,6 +189,46 @@ pub fn flushModule(self: *C, comp: *Compilation) !void {
try file.pwritevAll(all_buffers.items, 0);
}
pub fn flushEmitH(module: *Module) !void {
const tracy = trace(@src());
defer tracy.end();
const emit_h_loc = module.emit_h orelse return;
// We collect a list of buffers to write, and write them all at once with pwritev 😎
var all_buffers = std.ArrayList(std.os.iovec_const).init(module.gpa);
defer all_buffers.deinit();
try all_buffers.ensureCapacity(module.decl_table.count() + 1);
var file_size: u64 = zig_h.len;
all_buffers.appendAssumeCapacity(.{
.iov_base = zig_h,
.iov_len = zig_h.len,
});
for (module.decl_table.items()) |kv| {
const emit_h = kv.value.getEmitH(module);
const buf = emit_h.fwd_decl.items;
all_buffers.appendAssumeCapacity(.{
.iov_base = buf.ptr,
.iov_len = buf.len,
});
file_size += buf.len;
}
const directory = emit_h_loc.directory orelse module.comp.local_cache_directory;
const file = try directory.handle.createFile(emit_h_loc.basename, .{
// We set the end position explicitly below; by not truncating the file, we possibly
// make it easier on the file system by doing 1 reallocation instead of two.
.truncate = false,
});
defer file.close();
try file.setEndPos(file_size);
try file.pwritevAll(all_buffers.items, 0);
}
pub fn updateDeclExports(
self: *C,
module: *Module,

View File

@ -23,11 +23,17 @@
#endif
#if __STDC_VERSION__ >= 199901L
#define zig_restrict restrict
#define ZIG_RESTRICT restrict
#elif defined(__GNUC__)
#define zig_restrict __restrict
#define ZIG_RESTRICT __restrict
#else
#define zig_restrict
#define ZIG_RESTRICT
#endif
#ifdef __cplusplus
#define ZIG_EXTERN_C extern "C"
#else
#define ZIG_EXTERN_C
#endif
#if defined(_MSC_VER)
@ -48,5 +54,5 @@
#include <stddef.h>
#define int128_t __int128
#define uint128_t unsigned __int128
void *memcpy (void *zig_restrict, const void *zig_restrict, size_t);
ZIG_EXTERN_C void *memcpy (void *ZIG_RESTRICT, const void *ZIG_RESTRICT, size_t);

View File

@ -180,7 +180,7 @@ pub fn addCases(ctx: *TestContext) !void {
\\ unreachable;
\\}
,
\\zig_noreturn void _start(void);
\\ZIG_EXTERN_C zig_noreturn void _start(void);
\\
\\zig_noreturn void _start(void) {
\\ zig_breakpoint();
@ -191,37 +191,37 @@ pub fn addCases(ctx: *TestContext) !void {
ctx.h("simple header", linux_x64,
\\export fn start() void{}
,
\\void start(void);
\\ZIG_EXTERN_C void start(void);
\\
);
ctx.h("header with single param function", linux_x64,
\\export fn start(a: u8) void{}
,
\\void start(uint8_t arg0);
\\ZIG_EXTERN_C void start(uint8_t a0);
\\
);
ctx.h("header with multiple param function", linux_x64,
\\export fn start(a: u8, b: u8, c: u8) void{}
,
\\void start(uint8_t arg0, uint8_t arg1, uint8_t arg2);
\\ZIG_EXTERN_C void start(uint8_t a0, uint8_t a1, uint8_t a2);
\\
);
ctx.h("header with u32 param function", linux_x64,
\\export fn start(a: u32) void{}
,
\\void start(uint32_t arg0);
\\ZIG_EXTERN_C void start(uint32_t a0);
\\
);
ctx.h("header with usize param function", linux_x64,
\\export fn start(a: usize) void{}
,
\\void start(uintptr_t arg0);
\\ZIG_EXTERN_C void start(uintptr_t a0);
\\
);
ctx.h("header with bool param function", linux_x64,
\\export fn start(a: bool) void{}
,
\\void start(bool arg0);
\\ZIG_EXTERN_C void start(bool a0);
\\
);
ctx.h("header with noreturn function", linux_x64,
@ -229,7 +229,7 @@ pub fn addCases(ctx: *TestContext) !void {
\\ unreachable;
\\}
,
\\zig_noreturn void start(void);
\\ZIG_EXTERN_C zig_noreturn void start(void);
\\
);
ctx.h("header with multiple functions", linux_x64,
@ -237,15 +237,15 @@ pub fn addCases(ctx: *TestContext) !void {
\\export fn b() void{}
\\export fn c() void{}
,
\\void a(void);
\\void b(void);
\\void c(void);
\\ZIG_EXTERN_C void a(void);
\\ZIG_EXTERN_C void b(void);
\\ZIG_EXTERN_C void c(void);
\\
);
ctx.h("header with multiple includes", linux_x64,
\\export fn start(a: u32, b: usize) void{}
,
\\void start(uint32_t arg0, uintptr_t arg1);
\\ZIG_EXTERN_C void start(uint32_t a0, uintptr_t a1);
\\
);
}