From 2c7387aef5c24a463ef9fc15809df570b6463bba Mon Sep 17 00:00:00 2001 From: r00ster91 Date: Thu, 28 Jul 2022 20:49:55 +0200 Subject: [PATCH 01/33] autodoc: better short description algorithm --- lib/docs/main.js | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/lib/docs/main.js b/lib/docs/main.js index 8be263e57f..da1d90fb3e 100644 --- a/lib/docs/main.js +++ b/lib/docs/main.js @@ -2639,14 +2639,23 @@ var zigAnalysis; }); } - + function shortDescMarkdown(docs) { - let parts = docs.trim().split("\n"); - let firstLine = parts[0]; - return markdown(firstLine); + const trimmed_docs = docs.trim(); + let index = trimmed_docs.indexOf('.'); + if (index < 0) { + index = trimmed_docs.indexOf('\n'); + if (index < 0) { + index = trimmed_docs.length; + } + } else { + index += 1; // include the period + } + const slice = trimmed_docs.slice(0, index); + return markdown(slice); } - + function markdown(input) { const raw_lines = input.split('\n'); // zig allows no '\r', so we don't need to split on CR From 921ba6cad4b9f414ab90ab0968acdcf50068bbc5 Mon Sep 17 00:00:00 2001 From: Ryan Liptak Date: Sat, 30 Jul 2022 17:19:37 -0700 Subject: [PATCH 02/33] autodoc: Add borders to field docs to make it clear which field they are associated with --- lib/docs/index.html | 10 ++++++++++ lib/docs/main.js | 20 ++++++++++++-------- 2 files changed, 22 insertions(+), 8 deletions(-) diff --git a/lib/docs/index.html b/lib/docs/index.html index 03579d975e..dbcb54288a 100644 --- a/lib/docs/index.html +++ b/lib/docs/index.html @@ -284,6 +284,16 @@ overflow-x: hidden; } + .fieldHasDocs { + margin-bottom: 0; + } + + .fieldDocs { + border: 1px solid #2A2A2A; + border-top: 0px; + padding: 1px 1em; + } + /* help dialog */ .help-modal { display: flex; diff --git a/lib/docs/main.js b/lib/docs/main.js index 8be263e57f..3a16ec7ad2 100644 --- a/lib/docs/main.js +++ b/lib/docs/main.js @@ -618,15 +618,18 @@ var zigAnalysis; for (let i = 0; i < fields.length; i += 1) { let field = fields[i]; let fieldNode = zigAnalysis.astNodes[field]; + let docs = fieldNode.docs; if (fieldNode.docs == null) { continue; } + let docsNonEmpty = docs !== ""; let divDom = domListParams.children[domIndex]; domIndex += 1; let value = typeObj.params[i]; - let html = '
' + escapeHtml((fieldNode.name)) + ": ";
+            let preClass = docsNonEmpty ? ' class="fieldHasDocs"' : "";
+            let html = '' + escapeHtml((fieldNode.name)) + ": ";
             if (isVarArgs && i === typeObj.params.length - 1) {
                 html += '...';
             } else {
@@ -636,9 +639,8 @@ var zigAnalysis;
 
             html += ',
'; - let docs = fieldNode.docs; - if (docs != null) { - html += markdown(docs); + if (docsNonEmpty) { + html += '
' + markdown(docs) + '
'; } divDom.innerHTML = html; } @@ -2270,8 +2272,11 @@ var zigAnalysis; let fieldNode = zigAnalysis.astNodes[containerNode.fields[i]]; let divDom = domListFields.children[i]; let fieldName = (fieldNode.name); + let docs = fieldNode.docs; + let docsNonEmpty = docs != null && docs !== ""; + let extraPreClass = docsNonEmpty ? " fieldHasDocs" : ""; - let html = '
' + escapeHtml(fieldName);
+                let html = '
' + escapeHtml(fieldName);
 
                 if (container.kind === typeKinds.Enum) {
                     html += ' = ' + fieldName + '';
@@ -2289,9 +2294,8 @@ var zigAnalysis;
 
                 html += ',
'; - let docs = fieldNode.docs; - if (docs != null) { - html += markdown(docs); + if (docsNonEmpty) { + html += '
' + markdown(docs) + '
'; } divDom.innerHTML = html; } From e7b6a1833106a5d808e4e82a2d61abf417aff407 Mon Sep 17 00:00:00 2001 From: Ryan Liptak Date: Sat, 30 Jul 2022 23:41:24 -0700 Subject: [PATCH 03/33] std.fs: Split Iterator.next on Linux and WASI to allow for handling platform-specific errors Follow up to #12226, implements the compromise detailed in https://github.com/ziglang/zig/issues/12211#issuecomment-1196011590 --- lib/std/fs.zig | 31 +++++++++++++++++++++++++++++-- lib/std/fs/test.zig | 5 +++++ 2 files changed, 34 insertions(+), 2 deletions(-) diff --git a/lib/std/fs.zig b/lib/std/fs.zig index c96a118399..0968e16812 100644 --- a/lib/std/fs.zig +++ b/lib/std/fs.zig @@ -595,6 +595,19 @@ pub const IterableDir = struct { /// Memory such as file names referenced in this returned entry becomes invalid /// with subsequent calls to `next`, as well as when this `Dir` is deinitialized. pub fn next(self: *Self) Error!?Entry { + return self.nextLinux() catch |err| switch (err) { + // To be consistent across platforms, iteration ends if the directory being iterated is deleted during iteration. + // This matches the behavior of non-Linux UNIX platforms. + error.DirNotFound => null, + else => |e| return e, + }; + } + + pub const ErrorLinux = error{DirNotFound} || IteratorError; + + /// Implementation of `next` that can return `error.DirNotFound` if the directory being + /// iterated was deleted during iteration (this error is Linux specific). + pub fn nextLinux(self: *Self) ErrorLinux!?Entry { start_over: while (true) { if (self.index >= self.end_index) { if (self.first_iter) { @@ -607,7 +620,7 @@ pub const IterableDir = struct { .BADF => unreachable, // Dir is invalid or was opened without iteration ability .FAULT => unreachable, .NOTDIR => unreachable, - .NOENT => return null, // The directory being iterated was deleted during iteration. + .NOENT => return error.DirNotFound, // The directory being iterated was deleted during iteration. .INVAL => return error.Unexpected, // Linux may in some cases return EINVAL when reading /proc/$PID/net. else => |err| return os.unexpectedErrno(err), } @@ -729,6 +742,20 @@ pub const IterableDir = struct { /// Memory such as file names referenced in this returned entry becomes invalid /// with subsequent calls to `next`, as well as when this `Dir` is deinitialized. pub fn next(self: *Self) Error!?Entry { + return self.nextWasi() catch |err| switch (err) { + // To be consistent across platforms, iteration ends if the directory being iterated is deleted during iteration. + // This matches the behavior of non-Linux UNIX platforms. + error.DirNotFound => null, + else => |e| return e, + }; + } + + pub const ErrorWasi = error{DirNotFound} || IteratorError; + + /// Implementation of `next` that can return platform-dependent errors depending on the host platform. + /// When the host platform is Linux, `error.DirNotFound` can be returned if the directory being + /// iterated was deleted during iteration. + pub fn nextWasi(self: *Self) ErrorWasi!?Entry { // We intentinally use fd_readdir even when linked with libc, // since its implementation is exactly the same as below, // and we avoid the code complexity here. @@ -742,7 +769,7 @@ pub const IterableDir = struct { .FAULT => unreachable, .NOTDIR => unreachable, .INVAL => unreachable, - .NOENT => return null, // The directory being iterated was deleted during iteration. + .NOENT => return error.DirNotFound, // The directory being iterated was deleted during iteration. .NOTCAPABLE => return error.AccessDenied, else => |err| return os.unexpectedErrno(err), } diff --git a/lib/std/fs/test.zig b/lib/std/fs/test.zig index bedec7d4ad..538ce1bf5e 100644 --- a/lib/std/fs/test.zig +++ b/lib/std/fs/test.zig @@ -241,6 +241,11 @@ test "Dir.Iterator but dir is deleted during iteration" { // Now, when we try to iterate, the next call should return null immediately. const entry = try iterator.next(); try std.testing.expect(entry == null); + + // On Linux, we can opt-in to receiving a more specific error by calling `nextLinux` + if (builtin.os.tag == .linux) { + try std.testing.expectError(error.DirNotFound, iterator.nextLinux()); + } } fn entryEql(lhs: IterableDir.Entry, rhs: IterableDir.Entry) bool { From 57f9405a8fcaec6043d680fa47ae0e98709160c2 Mon Sep 17 00:00:00 2001 From: Veikka Tuominen Date: Thu, 28 Jul 2022 15:00:40 +0300 Subject: [PATCH 04/33] Sema: validate bitcast operand type --- src/Sema.zig | 86 +++++++++++++++++-- test/behavior/bitcast.zig | 16 ---- .../compile_errors/bitCast_to_enum_type.zig | 2 +- .../intToPtr_with_misaligned_address.zig | 10 +++ ..._3818_bitcast_from_parray-slice_to_u16.zig | 8 +- .../obj/intToPtr_with_misaligned_address.zig | 10 --- 6 files changed, 97 insertions(+), 35 deletions(-) create mode 100644 test/cases/compile_errors/intToPtr_with_misaligned_address.zig rename test/cases/compile_errors/{stage1/obj => }/issue_3818_bitcast_from_parray-slice_to_u16.zig (56%) delete mode 100644 test/cases/compile_errors/stage1/obj/intToPtr_with_misaligned_address.zig diff --git a/src/Sema.zig b/src/Sema.zig index a0829d6eb7..efdc0f8262 100644 --- a/src/Sema.zig +++ b/src/Sema.zig @@ -8288,6 +8288,7 @@ fn zirBitcast(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!Air const dest_ty = try sema.resolveType(block, dest_ty_src, extra.lhs); const operand = try sema.resolveInst(extra.rhs); + const operand_ty = sema.typeOf(operand); switch (dest_ty.zigTypeTag()) { .AnyFrame, .ComptimeFloat, @@ -8310,8 +8311,8 @@ fn zirBitcast(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!Air const msg = msg: { const msg = try sema.errMsg(block, dest_ty_src, "cannot @bitCast to '{}'", .{dest_ty.fmt(sema.mod)}); errdefer msg.destroy(sema.gpa); - switch (sema.typeOf(operand).zigTypeTag()) { - .Int, .ComptimeInt => try sema.errNote(block, dest_ty_src, msg, "use @intToEnum for type coercion", .{}), + switch (operand_ty.zigTypeTag()) { + .Int, .ComptimeInt => try sema.errNote(block, dest_ty_src, msg, "use @intToEnum to cast from '{}'", .{operand_ty.fmt(sema.mod)}), else => {}, } @@ -8320,9 +8321,20 @@ fn zirBitcast(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!Air return sema.failWithOwnedErrorMsg(block, msg); }, - .Pointer => return sema.fail(block, dest_ty_src, "cannot @bitCast to '{}', use @ptrCast to cast to a pointer", .{ - dest_ty.fmt(sema.mod), - }), + .Pointer => { + const msg = msg: { + const msg = try sema.errMsg(block, dest_ty_src, "cannot @bitCast to '{}'", .{dest_ty.fmt(sema.mod)}); + errdefer msg.destroy(sema.gpa); + switch (operand_ty.zigTypeTag()) { + .Int, .ComptimeInt => try sema.errNote(block, dest_ty_src, msg, "use @intToPtr to cast from '{}'", .{operand_ty.fmt(sema.mod)}), + .Pointer => try sema.errNote(block, dest_ty_src, msg, "use @ptrCast to cast from '{}'", .{operand_ty.fmt(sema.mod)}), + else => {}, + } + + break :msg msg; + }; + return sema.failWithOwnedErrorMsg(block, msg); + }, .Struct, .Union => if (dest_ty.containerLayout() == .Auto) { const container = switch (dest_ty.zigTypeTag()) { .Struct => "struct", @@ -8342,6 +8354,70 @@ fn zirBitcast(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!Air .Vector, => {}, } + switch (operand_ty.zigTypeTag()) { + .AnyFrame, + .ComptimeFloat, + .ComptimeInt, + .EnumLiteral, + .ErrorSet, + .ErrorUnion, + .Fn, + .Frame, + .NoReturn, + .Null, + .Opaque, + .Optional, + .Type, + .Undefined, + .Void, + => return sema.fail(block, operand_src, "cannot @bitCast from '{}'", .{operand_ty.fmt(sema.mod)}), + + .Enum => { + const msg = msg: { + const msg = try sema.errMsg(block, operand_src, "cannot @bitCast from '{}'", .{operand_ty.fmt(sema.mod)}); + errdefer msg.destroy(sema.gpa); + switch (dest_ty.zigTypeTag()) { + .Int, .ComptimeInt => try sema.errNote(block, operand_src, msg, "use @enumToInt to cast to '{}'", .{dest_ty.fmt(sema.mod)}), + else => {}, + } + + break :msg msg; + }; + return sema.failWithOwnedErrorMsg(block, msg); + }, + .Pointer => { + const msg = msg: { + const msg = try sema.errMsg(block, operand_src, "cannot @bitCast from '{}'", .{operand_ty.fmt(sema.mod)}); + errdefer msg.destroy(sema.gpa); + switch (dest_ty.zigTypeTag()) { + .Int, .ComptimeInt => try sema.errNote(block, operand_src, msg, "use @ptrToInt to cast to '{}'", .{dest_ty.fmt(sema.mod)}), + .Pointer => try sema.errNote(block, operand_src, msg, "use @ptrCast to cast to '{}'", .{dest_ty.fmt(sema.mod)}), + else => {}, + } + + break :msg msg; + }; + return sema.failWithOwnedErrorMsg(block, msg); + }, + .Struct, .Union => if (operand_ty.containerLayout() == .Auto) { + const container = switch (operand_ty.zigTypeTag()) { + .Struct => "struct", + .Union => "union", + else => unreachable, + }; + return sema.fail(block, operand_src, "cannot @bitCast from '{}', {s} does not have a guaranteed in-memory layout", .{ + operand_ty.fmt(sema.mod), container, + }); + }, + .BoundFn => @panic("TODO remove this type from the language and compiler"), + + .Array, + .Bool, + .Float, + .Int, + .Vector, + => {}, + } return sema.bitCast(block, dest_ty, operand, operand_src); } diff --git a/test/behavior/bitcast.zig b/test/behavior/bitcast.zig index b0c66fd92b..27a0692a44 100644 --- a/test/behavior/bitcast.zig +++ b/test/behavior/bitcast.zig @@ -90,22 +90,6 @@ test "nested bitcast" { comptime try S.foo(42); } -test "@bitCast enum to its integer type" { - const SOCK = enum(c_int) { - A, - B, - - fn testBitCastExternEnum() !void { - var SOCK_DGRAM = @This().B; - var sock_dgram = @bitCast(c_int, SOCK_DGRAM); - try expect(sock_dgram == 1); - } - }; - - try SOCK.testBitCastExternEnum(); - comptime try SOCK.testBitCastExternEnum(); -} - // issue #3010: compiler segfault test "bitcast literal [4]u8 param to u32" { const ip = @bitCast(u32, [_]u8{ 255, 255, 255, 255 }); diff --git a/test/cases/compile_errors/bitCast_to_enum_type.zig b/test/cases/compile_errors/bitCast_to_enum_type.zig index add531627f..a8fedb7d54 100644 --- a/test/cases/compile_errors/bitCast_to_enum_type.zig +++ b/test/cases/compile_errors/bitCast_to_enum_type.zig @@ -9,4 +9,4 @@ export fn entry() void { // target=native // // :3:24: error: cannot @bitCast to 'tmp.entry.E' -// :3:24: note: use @intToEnum for type coercion +// :3:24: note: use @intToEnum to cast from 'u32' diff --git a/test/cases/compile_errors/intToPtr_with_misaligned_address.zig b/test/cases/compile_errors/intToPtr_with_misaligned_address.zig new file mode 100644 index 0000000000..43f89ab3b5 --- /dev/null +++ b/test/cases/compile_errors/intToPtr_with_misaligned_address.zig @@ -0,0 +1,10 @@ +pub export fn entry() void { + var y = @intToPtr([*]align(4) u8, 5); + _ = y; +} + +// error +// backend=stage2 +// target=native +// +// :2:39: error: pointer type '[*]align(4) u8' requires aligned address diff --git a/test/cases/compile_errors/stage1/obj/issue_3818_bitcast_from_parray-slice_to_u16.zig b/test/cases/compile_errors/issue_3818_bitcast_from_parray-slice_to_u16.zig similarity index 56% rename from test/cases/compile_errors/stage1/obj/issue_3818_bitcast_from_parray-slice_to_u16.zig rename to test/cases/compile_errors/issue_3818_bitcast_from_parray-slice_to_u16.zig index cdc1def677..874f015ffb 100644 --- a/test/cases/compile_errors/stage1/obj/issue_3818_bitcast_from_parray-slice_to_u16.zig +++ b/test/cases/compile_errors/issue_3818_bitcast_from_parray-slice_to_u16.zig @@ -10,8 +10,10 @@ export fn foo2() void { } // error -// backend=stage1 +// backend=stage2 // target=native // -// tmp.zig:3:42: error: unable to @bitCast from pointer type '*[2]u8' -// tmp.zig:8:32: error: destination type 'u16' has size 2 but source type '[]const u8' has size 16 +// :3:42: error: cannot @bitCast from '*[2]u8' +// :3:42: note: use @ptrToInt to cast to 'u16' +// :8:37: error: cannot @bitCast from '[]const u8' +// :8:37: note: use @ptrToInt to cast to 'u16' diff --git a/test/cases/compile_errors/stage1/obj/intToPtr_with_misaligned_address.zig b/test/cases/compile_errors/stage1/obj/intToPtr_with_misaligned_address.zig deleted file mode 100644 index 16b6bf565e..0000000000 --- a/test/cases/compile_errors/stage1/obj/intToPtr_with_misaligned_address.zig +++ /dev/null @@ -1,10 +0,0 @@ -pub fn main() void { - var y = @intToPtr([*]align(4) u8, 5); - _ = y; -} - -// error -// backend=stage1 -// target=native -// -// tmp.zig:2:13: error: pointer type '[*]align(4) u8' requires aligned address From f1768b40b2468d63355c8cf83d3614ae23a54317 Mon Sep 17 00:00:00 2001 From: Veikka Tuominen Date: Thu, 28 Jul 2022 17:28:37 +0300 Subject: [PATCH 05/33] stage2: better source location for var decls --- src/AstGen.zig | 3 +- src/Module.zig | 107 +++++++++++++++--- src/Sema.zig | 22 ++-- src/Zir.zig | 6 + ...obal_variable_alignment_non_power_of_2.zig | 4 +- .../compile_errors/src_outside_function.zig | 9 ++ .../stage1/obj/src_outside_function.zig | 9 -- .../type_variables_must_be_constant.zig | 4 +- ..._invalid_number_literal_as_array_index.zig | 4 +- .../usingnamespace_with_wrong_type.zig | 2 +- 10 files changed, 125 insertions(+), 45 deletions(-) rename test/cases/compile_errors/{stage1/obj => }/global_variable_alignment_non_power_of_2.zig (63%) create mode 100644 test/cases/compile_errors/src_outside_function.zig delete mode 100644 test/cases/compile_errors/stage1/obj/src_outside_function.zig diff --git a/src/AstGen.zig b/src/AstGen.zig index b6a7450f3a..b2bbb27865 100644 --- a/src/AstGen.zig +++ b/src/AstGen.zig @@ -7424,7 +7424,8 @@ fn builtinCall( const token_starts = tree.tokens.items(.start); const node_start = token_starts[tree.firstToken(node)]; astgen.advanceSourceCursor(node_start); - const result = try gz.addExtendedPayload(.builtin_src, Zir.Inst.LineColumn{ + const result = try gz.addExtendedPayload(.builtin_src, Zir.Inst.Src{ + .node = gz.nodeIndexToRelative(node), .line = astgen.source_line, .column = astgen.source_column, }); diff --git a/src/Module.zig b/src/Module.zig index 4ac2775515..6122b417e4 100644 --- a/src/Module.zig +++ b/src/Module.zig @@ -2161,6 +2161,10 @@ pub const SrcLoc = struct { .local_var_decl => tree.localVarDecl(node), .simple_var_decl => tree.simpleVarDecl(node), .aligned_var_decl => tree.alignedVarDecl(node), + .@"usingnamespace" => { + const node_data = tree.nodes.items(.data); + return nodeToSpan(tree, node_data[node].lhs); + }, else => unreachable, }; if (full.ast.type_node != 0) { @@ -2171,6 +2175,58 @@ pub const SrcLoc = struct { const end = start + @intCast(u32, tree.tokenSlice(tok_index).len); return Span{ .start = start, .end = end, .main = start }; }, + .node_offset_var_decl_align => |node_off| { + const tree = try src_loc.file_scope.getTree(gpa); + const node = src_loc.declRelativeToNodeIndex(node_off); + const node_tags = tree.nodes.items(.tag); + const full: Ast.full.VarDecl = switch (node_tags[node]) { + .global_var_decl => tree.globalVarDecl(node), + .local_var_decl => tree.localVarDecl(node), + .simple_var_decl => tree.simpleVarDecl(node), + .aligned_var_decl => tree.alignedVarDecl(node), + else => unreachable, + }; + return nodeToSpan(tree, full.ast.align_node); + }, + .node_offset_var_decl_section => |node_off| { + const tree = try src_loc.file_scope.getTree(gpa); + const node = src_loc.declRelativeToNodeIndex(node_off); + const node_tags = tree.nodes.items(.tag); + const full: Ast.full.VarDecl = switch (node_tags[node]) { + .global_var_decl => tree.globalVarDecl(node), + .local_var_decl => tree.localVarDecl(node), + .simple_var_decl => tree.simpleVarDecl(node), + .aligned_var_decl => tree.alignedVarDecl(node), + else => unreachable, + }; + return nodeToSpan(tree, full.ast.section_node); + }, + .node_offset_var_decl_addrspace => |node_off| { + const tree = try src_loc.file_scope.getTree(gpa); + const node = src_loc.declRelativeToNodeIndex(node_off); + const node_tags = tree.nodes.items(.tag); + const full: Ast.full.VarDecl = switch (node_tags[node]) { + .global_var_decl => tree.globalVarDecl(node), + .local_var_decl => tree.localVarDecl(node), + .simple_var_decl => tree.simpleVarDecl(node), + .aligned_var_decl => tree.alignedVarDecl(node), + else => unreachable, + }; + return nodeToSpan(tree, full.ast.addrspace_node); + }, + .node_offset_var_decl_init => |node_off| { + const tree = try src_loc.file_scope.getTree(gpa); + const node = src_loc.declRelativeToNodeIndex(node_off); + const node_tags = tree.nodes.items(.tag); + const full: Ast.full.VarDecl = switch (node_tags[node]) { + .global_var_decl => tree.globalVarDecl(node), + .local_var_decl => tree.localVarDecl(node), + .simple_var_decl => tree.simpleVarDecl(node), + .aligned_var_decl => tree.alignedVarDecl(node), + else => unreachable, + }; + return nodeToSpan(tree, full.ast.init_node); + }, .node_offset_builtin_call_arg0 => |n| return src_loc.byteOffsetBuiltinCallArg(gpa, n, 0), .node_offset_builtin_call_arg1 => |n| return src_loc.byteOffsetBuiltinCallArg(gpa, n, 1), .node_offset_builtin_call_arg2 => |n| return src_loc.byteOffsetBuiltinCallArg(gpa, n, 2), @@ -2857,6 +2913,18 @@ pub const LazySrcLoc = union(enum) { /// to the type expression. /// The Decl is determined contextually. node_offset_var_decl_ty: i32, + /// The source location points to the alignment expression of a var decl. + /// The Decl is determined contextually. + node_offset_var_decl_align: i32, + /// The source location points to the linksection expression of a var decl. + /// The Decl is determined contextually. + node_offset_var_decl_section: i32, + /// The source location points to the addrspace expression of a var decl. + /// The Decl is determined contextually. + node_offset_var_decl_addrspace: i32, + /// The source location points to the initializer of a var decl. + /// The Decl is determined contextually. + node_offset_var_decl_init: i32, /// The source location points to a for loop condition expression, /// found by taking this AST node index offset from the containing /// Decl AST node, which points to a for loop AST node. Next, navigate @@ -3098,6 +3166,10 @@ pub const LazySrcLoc = union(enum) { .node_offset, .node_offset_initializer, .node_offset_var_decl_ty, + .node_offset_var_decl_align, + .node_offset_var_decl_section, + .node_offset_var_decl_addrspace, + .node_offset_var_decl_init, .node_offset_for_cond, .node_offset_builtin_call_arg0, .node_offset_builtin_call_arg1, @@ -4414,17 +4486,26 @@ fn semaDecl(mod: *Module, decl_index: Decl.Index) !bool { const body = zir.extra[extra.end..][0..extra.data.body_len]; const result_ref = (try sema.analyzeBodyBreak(&block_scope, body)).?.operand; try wip_captures.finalize(); - const src = LazySrcLoc.nodeOffset(0); - const decl_tv = try sema.resolveInstValue(&block_scope, .unneeded, result_ref, undefined); + const align_src: LazySrcLoc = .{ .node_offset_var_decl_align = 0 }; + const section_src: LazySrcLoc = .{ .node_offset_var_decl_section = 0 }; + const address_space_src: LazySrcLoc = .{ .node_offset_var_decl_addrspace = 0 }; + const ty_src: LazySrcLoc = .{ .node_offset_var_decl_ty = 0 }; + const init_src: LazySrcLoc = .{ .node_offset_var_decl_init = 0 }; + const decl_tv = try sema.resolveInstValue(&block_scope, init_src, result_ref, undefined); const decl_align: u32 = blk: { const align_ref = decl.zirAlignRef(); if (align_ref == .none) break :blk 0; - break :blk try sema.resolveAlign(&block_scope, src, align_ref); + break :blk try sema.resolveAlign(&block_scope, align_src, align_ref); }; const decl_linksection: ?[*:0]const u8 = blk: { const linksection_ref = decl.zirLinksectionRef(); if (linksection_ref == .none) break :blk null; - const bytes = try sema.resolveConstString(&block_scope, src, linksection_ref, "linksection must be comptime known"); + const bytes = try sema.resolveConstString(&block_scope, section_src, linksection_ref, "linksection must be comptime known"); + if (mem.indexOfScalar(u8, bytes, 0) != null) { + return sema.fail(&block_scope, section_src, "linksection cannot contain null bytes", .{}); + } else if (bytes.len == 0) { + return sema.fail(&block_scope, section_src, "linksection cannot be empty", .{}); + } break :blk (try decl_arena_allocator.dupeZ(u8, bytes)).ptr; }; const target = sema.mod.getTarget(); @@ -4442,27 +4523,27 @@ fn semaDecl(mod: *Module, decl_index: Decl.Index) !bool { .constant => target_util.defaultAddressSpace(target, .global_constant), else => unreachable, }, - else => |addrspace_ref| try sema.analyzeAddrspace(&block_scope, src, addrspace_ref, addrspace_ctx), + else => |addrspace_ref| try sema.analyzeAddrspace(&block_scope, address_space_src, addrspace_ref, addrspace_ctx), }; }; // Note this resolves the type of the Decl, not the value; if this Decl // is a struct, for example, this resolves `type` (which needs no resolution), // not the struct itself. - try sema.resolveTypeLayout(&block_scope, src, decl_tv.ty); + try sema.resolveTypeLayout(&block_scope, ty_src, decl_tv.ty); const decl_arena_state = try decl_arena_allocator.create(std.heap.ArenaAllocator.State); if (decl.is_usingnamespace) { if (!decl_tv.ty.eql(Type.type, mod)) { - return sema.fail(&block_scope, src, "expected type, found {}", .{ + return sema.fail(&block_scope, ty_src, "expected type, found {}", .{ decl_tv.ty.fmt(mod), }); } var buffer: Value.ToTypeBuffer = undefined; const ty = try decl_tv.val.toType(&buffer).copy(decl_arena_allocator); if (ty.getNamespace() == null) { - return sema.fail(&block_scope, src, "type {} has no namespace", .{ty.fmt(mod)}); + return sema.fail(&block_scope, ty_src, "type {} has no namespace", .{ty.fmt(mod)}); } decl.ty = Type.type; @@ -4508,7 +4589,7 @@ fn semaDecl(mod: *Module, decl_index: Decl.Index) !bool { decl.analysis = .complete; decl.generation = mod.generation; - const has_runtime_bits = try sema.fnHasRuntimeBits(&block_scope, src, decl.ty); + const has_runtime_bits = try sema.fnHasRuntimeBits(&block_scope, ty_src, decl.ty); if (has_runtime_bits) { // We don't fully codegen the decl until later, but we do need to reserve a global @@ -4525,7 +4606,7 @@ fn semaDecl(mod: *Module, decl_index: Decl.Index) !bool { const is_inline = decl.ty.fnCallingConvention() == .Inline; if (decl.is_exported) { - const export_src = src; // TODO make this point at `export` token + const export_src: LazySrcLoc = .{ .token_offset = @boolToInt(decl.is_pub) }; if (is_inline) { return sema.fail(&block_scope, export_src, "export of inline function", .{}); } @@ -4588,14 +4669,14 @@ fn semaDecl(mod: *Module, decl_index: Decl.Index) !bool { decl.generation = mod.generation; const has_runtime_bits = is_extern or - (queue_linker_work and try sema.typeHasRuntimeBits(&block_scope, src, decl.ty)); + (queue_linker_work and try sema.typeHasRuntimeBits(&block_scope, ty_src, decl.ty)); if (has_runtime_bits) { log.debug("queue linker work for {*} ({s})", .{ decl, decl.name }); // Needed for codegen_decl which will call updateDecl and then the // codegen backend wants full access to the Decl Type. - try sema.resolveTypeFully(&block_scope, src, decl.ty); + try sema.resolveTypeFully(&block_scope, ty_src, decl.ty); try mod.comp.bin_file.allocateDeclIndexes(decl_index); try mod.comp.work_queue.writeItem(.{ .codegen_decl = decl_index }); @@ -4606,7 +4687,7 @@ fn semaDecl(mod: *Module, decl_index: Decl.Index) !bool { } if (decl.is_exported) { - const export_src = src; // TODO point to the export token + const export_src: LazySrcLoc = .{ .token_offset = @boolToInt(decl.is_pub) }; // The scope needs to have the decl in it. const options: std.builtin.ExportOptions = .{ .name = mem.sliceTo(decl.name, 0) }; try sema.analyzeExport(&block_scope, export_src, options, decl_index); diff --git a/src/Sema.zig b/src/Sema.zig index efdc0f8262..22d0c572fc 100644 --- a/src/Sema.zig +++ b/src/Sema.zig @@ -2971,7 +2971,7 @@ fn zirAllocExtended( const extra = sema.code.extraData(Zir.Inst.AllocExtended, extended.operand); const src = LazySrcLoc.nodeOffset(extra.data.src_node); const ty_src: LazySrcLoc = .{ .node_offset_var_decl_ty = extra.data.src_node }; - const align_src = src; // TODO better source location + const align_src: LazySrcLoc = .{ .node_offset_var_decl_align = extra.data.src_node }; const small = @bitCast(Zir.Inst.AllocExtended.Small, extended.small); var extra_index: usize = extra.end; @@ -8046,7 +8046,7 @@ fn zirPtrToInt(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!Ai if (try sema.resolveMaybeUndefValIntable(block, ptr_src, ptr)) |ptr_val| { return sema.addConstant(Type.usize, ptr_val); } - try sema.requireRuntimeBlock(block, ptr_src, ptr_src); + try sema.requireRuntimeBlock(block, inst_data.src(), ptr_src); return block.addUnOp(.ptrtoint, ptr); } @@ -13174,8 +13174,8 @@ fn zirBuiltinSrc( const tracy = trace(@src()); defer tracy.end(); - const src = sema.src; // TODO better source location - const extra = sema.code.extraData(Zir.Inst.LineColumn, extended.operand).data; + const extra = sema.code.extraData(Zir.Inst.Src, extended.operand).data; + const src = LazySrcLoc.nodeOffset(extra.node); const func = sema.func orelse return sema.fail(block, src, "@src outside function", .{}); const fn_owner_decl = sema.mod.declPtr(func.owner_decl); @@ -18959,10 +18959,8 @@ fn zirVarExtended( extended: Zir.Inst.Extended.InstData, ) CompileError!Air.Inst.Ref { const extra = sema.code.extraData(Zir.Inst.ExtendedVar, extended.operand); - const src = sema.src; - const ty_src: LazySrcLoc = src; // TODO add a LazySrcLoc that points at type - const name_src: LazySrcLoc = src; // TODO add a LazySrcLoc that points at the name token - const init_src: LazySrcLoc = src; // TODO add a LazySrcLoc that points at init expr + const ty_src: LazySrcLoc = .{ .node_offset_var_decl_ty = 0 }; + const init_src: LazySrcLoc = .{ .node_offset_var_decl_init = 0 }; const small = @bitCast(Zir.Inst.ExtendedVar.Small, extended.small); var extra_index: usize = extra.end; @@ -18976,12 +18974,6 @@ fn zirVarExtended( // ZIR supports encoding this information but it is not used; the information // is encoded via the Decl entry. assert(!small.has_align); - //const align_val: Value = if (small.has_align) blk: { - // const align_ref = @intToEnum(Zir.Inst.Ref, sema.code.extra[extra_index]); - // extra_index += 1; - // const align_tv = try sema.resolveInstConst(block, align_src, align_ref); - // break :blk align_tv.val; - //} else Value.@"null"; const uncasted_init: Air.Inst.Ref = if (small.has_init) blk: { const init_ref = @intToEnum(Zir.Inst.Ref, sema.code.extra[extra_index]); @@ -19005,7 +18997,7 @@ fn zirVarExtended( return sema.failWithNeededComptime(block, init_src, "container level variable initializers must be comptime known"); } else Value.initTag(.unreachable_value); - try sema.validateVarType(block, name_src, var_ty, small.is_extern); + try sema.validateVarType(block, ty_src, var_ty, small.is_extern); const new_var = try sema.gpa.create(Module.Var); errdefer sema.gpa.destroy(new_var); diff --git a/src/Zir.zig b/src/Zir.zig index ccd677df0b..3aa2378697 100644 --- a/src/Zir.zig +++ b/src/Zir.zig @@ -3548,6 +3548,12 @@ pub const Inst = struct { ty: Ref, init_count: u32, }; + + pub const Src = struct { + node: i32, + line: u32, + column: u32, + }; }; pub const SpecialProng = enum { none, @"else", under }; diff --git a/test/cases/compile_errors/stage1/obj/global_variable_alignment_non_power_of_2.zig b/test/cases/compile_errors/global_variable_alignment_non_power_of_2.zig similarity index 63% rename from test/cases/compile_errors/stage1/obj/global_variable_alignment_non_power_of_2.zig rename to test/cases/compile_errors/global_variable_alignment_non_power_of_2.zig index f23f340b16..b88d8aaf70 100644 --- a/test/cases/compile_errors/stage1/obj/global_variable_alignment_non_power_of_2.zig +++ b/test/cases/compile_errors/global_variable_alignment_non_power_of_2.zig @@ -2,7 +2,7 @@ const some_data: [100]u8 align(3) = undefined; export fn entry() usize { return @sizeOf(@TypeOf(some_data)); } // error -// backend=stage1 +// backend=stage2 // target=native // -// tmp.zig:1:32: error: alignment value 3 is not a power of 2 +// :1:32: error: alignment value '3' is not a power of two diff --git a/test/cases/compile_errors/src_outside_function.zig b/test/cases/compile_errors/src_outside_function.zig new file mode 100644 index 0000000000..8b66ba730b --- /dev/null +++ b/test/cases/compile_errors/src_outside_function.zig @@ -0,0 +1,9 @@ +comptime { + @src(); +} + +// error +// backend=stage2 +// target=native +// +// :2:5: error: @src outside function diff --git a/test/cases/compile_errors/stage1/obj/src_outside_function.zig b/test/cases/compile_errors/stage1/obj/src_outside_function.zig deleted file mode 100644 index 7f8c7ae72f..0000000000 --- a/test/cases/compile_errors/stage1/obj/src_outside_function.zig +++ /dev/null @@ -1,9 +0,0 @@ -comptime { - @src(); -} - -// error -// backend=stage1 -// target=native -// -// tmp.zig:2:5: error: @src outside function diff --git a/test/cases/compile_errors/type_variables_must_be_constant.zig b/test/cases/compile_errors/type_variables_must_be_constant.zig index 35e94f3641..1dbddc126c 100644 --- a/test/cases/compile_errors/type_variables_must_be_constant.zig +++ b/test/cases/compile_errors/type_variables_must_be_constant.zig @@ -7,5 +7,5 @@ export fn entry() foo { // backend=stage2 // target=native // -// :1:1: error: variable of type 'type' must be const or comptime -// :1:1: note: types are not available at runtime +// :1:5: error: variable of type 'type' must be const or comptime +// :1:5: note: types are not available at runtime diff --git a/test/cases/compile_errors/use_invalid_number_literal_as_array_index.zig b/test/cases/compile_errors/use_invalid_number_literal_as_array_index.zig index 120ba5c588..c52f614657 100644 --- a/test/cases/compile_errors/use_invalid_number_literal_as_array_index.zig +++ b/test/cases/compile_errors/use_invalid_number_literal_as_array_index.zig @@ -8,5 +8,5 @@ export fn entry() void { // backend=stage2 // target=native // -// :1:1: error: variable of type 'comptime_int' must be const or comptime -// :1:1: note: to modify this variable at runtime, it must be given an explicit fixed-size number type +// :1:5: error: variable of type 'comptime_int' must be const or comptime +// :1:5: note: to modify this variable at runtime, it must be given an explicit fixed-size number type diff --git a/test/cases/compile_errors/usingnamespace_with_wrong_type.zig b/test/cases/compile_errors/usingnamespace_with_wrong_type.zig index 4ec4bf4838..294fd8c012 100644 --- a/test/cases/compile_errors/usingnamespace_with_wrong_type.zig +++ b/test/cases/compile_errors/usingnamespace_with_wrong_type.zig @@ -4,4 +4,4 @@ usingnamespace void; // backend=stage2 // target=native // -// :1:1: error: type void has no namespace +// :1:16: error: type void has no namespace From e47706f34454c46dd00fe10fdf9252657117a00d Mon Sep 17 00:00:00 2001 From: Veikka Tuominen Date: Thu, 28 Jul 2022 21:03:10 +0300 Subject: [PATCH 06/33] Sema: validate packed struct field types --- src/Sema.zig | 186 +++++++++++++----- test/behavior/packed-struct.zig | 28 +-- test/behavior/sizeof_and_typeof.zig | 2 + test/behavior/struct.zig | 6 +- ...truct_with_fields_of_not_allowed_types.zig | 84 ++++++++ ...truct_with_fields_of_not_allowed_types.zig | 74 ------- 6 files changed, 232 insertions(+), 148 deletions(-) create mode 100644 test/cases/compile_errors/packed_struct_with_fields_of_not_allowed_types.zig delete mode 100644 test/cases/compile_errors/stage1/test/packed_struct_with_fields_of_not_allowed_types.zig diff --git a/src/Sema.zig b/src/Sema.zig index 22d0c572fc..71ab59d7f9 100644 --- a/src/Sema.zig +++ b/src/Sema.zig @@ -5049,13 +5049,13 @@ pub fn analyzeExport( try mod.ensureDeclAnalyzed(exported_decl_index); const exported_decl = mod.declPtr(exported_decl_index); - if (!(try sema.validateExternType(exported_decl.ty, .other))) { + if (!sema.validateExternType(exported_decl.ty, .other)) { const msg = msg: { const msg = try sema.errMsg(block, src, "unable to export type '{}'", .{exported_decl.ty.fmt(sema.mod)}); errdefer msg.destroy(sema.gpa); const src_decl = sema.mod.declPtr(block.src_decl); - try sema.explainWhyTypeIsNotExtern(block, src, msg, src.toSrcLoc(src_decl), exported_decl.ty, .other); + try sema.explainWhyTypeIsNotExtern(msg, src.toSrcLoc(src_decl), exported_decl.ty, .other); try sema.addDeclaredHereNote(msg, exported_decl.ty); break :msg msg; @@ -7634,7 +7634,7 @@ fn funcCommon( }; return sema.failWithOwnedErrorMsg(block, msg); } - if (!Type.fnCallingConventionAllowsZigTypes(cc_workaround) and !(try sema.validateExternType(return_type, .ret_ty))) { + if (!Type.fnCallingConventionAllowsZigTypes(cc_workaround) and !sema.validateExternType(return_type, .ret_ty)) { const msg = msg: { const msg = try sema.errMsg(block, ret_ty_src, "return type '{}' not allowed in function with calling convention '{s}'", .{ return_type.fmt(sema.mod), @tagName(cc_workaround), @@ -7642,7 +7642,7 @@ fn funcCommon( errdefer msg.destroy(sema.gpa); const src_decl = sema.mod.declPtr(block.src_decl); - try sema.explainWhyTypeIsNotExtern(block, ret_ty_src, msg, ret_ty_src.toSrcLoc(src_decl), return_type, .ret_ty); + try sema.explainWhyTypeIsNotExtern(msg, ret_ty_src.toSrcLoc(src_decl), return_type, .ret_ty); try sema.addDeclaredHereNote(msg, return_type); break :msg msg; @@ -7830,7 +7830,7 @@ fn analyzeParameter( }; return sema.failWithOwnedErrorMsg(block, msg); } - if (!Type.fnCallingConventionAllowsZigTypes(cc) and !(try sema.validateExternType(param.ty, .param_ty))) { + if (!Type.fnCallingConventionAllowsZigTypes(cc) and !sema.validateExternType(param.ty, .param_ty)) { const msg = msg: { const msg = try sema.errMsg(block, param_src, "parameter of type '{}' not allowed in function with calling convention '{s}'", .{ param.ty.fmt(sema.mod), @tagName(cc), @@ -7838,7 +7838,7 @@ fn analyzeParameter( errdefer msg.destroy(sema.gpa); const src_decl = sema.mod.declPtr(block.src_decl); - try sema.explainWhyTypeIsNotExtern(block, param_src, msg, param_src.toSrcLoc(src_decl), param.ty, .param_ty); + try sema.explainWhyTypeIsNotExtern(msg, param_src.toSrcLoc(src_decl), param.ty, .param_ty); try sema.addDeclaredHereNote(msg, param.ty); break :msg msg; @@ -14866,13 +14866,13 @@ fn zirPtrType(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!Air } else if (inst_data.size == .Many and elem_ty.zigTypeTag() == .Opaque) { return sema.fail(block, elem_ty_src, "unknown-length pointer to opaque not allowed", .{}); } else if (inst_data.size == .C) { - if (!(try sema.validateExternType(elem_ty, .other))) { + if (!sema.validateExternType(elem_ty, .other)) { const msg = msg: { const msg = try sema.errMsg(block, elem_ty_src, "C pointers cannot point to non-C-ABI-compatible type '{}'", .{elem_ty.fmt(sema.mod)}); errdefer msg.destroy(sema.gpa); const src_decl = sema.mod.declPtr(block.src_decl); - try sema.explainWhyTypeIsNotExtern(block, elem_ty_src, msg, elem_ty_src.toSrcLoc(src_decl), elem_ty, .other); + try sema.explainWhyTypeIsNotExtern(msg, elem_ty_src.toSrcLoc(src_decl), elem_ty, .other); try sema.addDeclaredHereNote(msg, elem_ty); break :msg msg; @@ -15950,13 +15950,13 @@ fn zirReify(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!Air.I } else if (ptr_size == .Many and elem_ty.zigTypeTag() == .Opaque) { return sema.fail(block, src, "unknown-length pointer to opaque not allowed", .{}); } else if (ptr_size == .C) { - if (!(try sema.validateExternType(elem_ty, .other))) { + if (!sema.validateExternType(elem_ty, .other)) { const msg = msg: { const msg = try sema.errMsg(block, src, "C pointers cannot point to non-C-ABI-compatible type '{}'", .{elem_ty.fmt(sema.mod)}); errdefer msg.destroy(sema.gpa); const src_decl = sema.mod.declPtr(block.src_decl); - try sema.explainWhyTypeIsNotExtern(block, src, msg, src.toSrcLoc(src_decl), elem_ty, .other); + try sema.explainWhyTypeIsNotExtern(msg, src.toSrcLoc(src_decl), elem_ty, .other); try sema.addDeclaredHereNote(msg, elem_ty); break :msg msg; @@ -19736,7 +19736,7 @@ const ExternPosition = enum { /// Returns true if `ty` is allowed in extern types. /// Does *NOT* require `ty` to be resolved in any way. -fn validateExternType(sema: *Sema, ty: Type, position: ExternPosition) CompileError!bool { +fn validateExternType(sema: *Sema, ty: Type, position: ExternPosition) bool { switch (ty.zigTypeTag()) { .Type, .ComptimeFloat, @@ -19781,8 +19781,6 @@ fn validateExternType(sema: *Sema, ty: Type, position: ExternPosition) CompileEr fn explainWhyTypeIsNotExtern( sema: *Sema, - block: *Block, - src: LazySrcLoc, msg: *Module.ErrorMsg, src_loc: Module.SrcLoc, ty: Type, @@ -19826,7 +19824,7 @@ fn explainWhyTypeIsNotExtern( var buf: Type.Payload.Bits = undefined; const tag_ty = ty.intTagType(&buf); try mod.errNoteNonLazy(src_loc, msg, "enum tag type '{}' is not extern compatible", .{tag_ty.fmt(sema.mod)}); - try sema.explainWhyTypeIsNotExtern(block, src, msg, src_loc, tag_ty, position); + try sema.explainWhyTypeIsNotExtern(msg, src_loc, tag_ty, position); }, .Struct => try mod.errNoteNonLazy(src_loc, msg, "only structs with packed or extern layout are extern compatible", .{}), .Union => try mod.errNoteNonLazy(src_loc, msg, "only unions with packed or extern layout are extern compatible", .{}), @@ -19836,13 +19834,87 @@ fn explainWhyTypeIsNotExtern( } else if (position == .param_ty) { return mod.errNoteNonLazy(src_loc, msg, "arrays are not allowed as a parameter type", .{}); } - try sema.explainWhyTypeIsNotExtern(block, src, msg, src_loc, ty.elemType2(), position); + try sema.explainWhyTypeIsNotExtern(msg, src_loc, ty.elemType2(), position); }, - .Vector => try sema.explainWhyTypeIsNotExtern(block, src, msg, src_loc, ty.elemType2(), position), + .Vector => try sema.explainWhyTypeIsNotExtern(msg, src_loc, ty.elemType2(), position), .Optional => try mod.errNoteNonLazy(src_loc, msg, "only pointer like optionals are extern compatible", .{}), } } +/// Returns true if `ty` is allowed in packed types. +/// Does *NOT* require `ty` to be resolved in any way. +fn validatePackedType(ty: Type) bool { + switch (ty.zigTypeTag()) { + .Type, + .ComptimeFloat, + .ComptimeInt, + .EnumLiteral, + .Undefined, + .Null, + .ErrorUnion, + .ErrorSet, + .BoundFn, + .Frame, + .NoReturn, + .Opaque, + .AnyFrame, + .Fn, + .Array, + .Optional, + => return false, + .Void, + .Bool, + .Float, + .Pointer, + .Int, + .Vector, + .Enum, + => return true, + .Struct, .Union => return ty.containerLayout() == .Packed, + } +} + +fn explainWhyTypeIsNotPacked( + sema: *Sema, + msg: *Module.ErrorMsg, + src_loc: Module.SrcLoc, + ty: Type, +) CompileError!void { + const mod = sema.mod; + switch (ty.zigTypeTag()) { + .Void, + .Bool, + .Float, + .Pointer, + .Int, + .Vector, + .Enum, + => return, + .Type, + .ComptimeFloat, + .ComptimeInt, + .EnumLiteral, + .Undefined, + .Null, + .BoundFn, + .Frame, + .NoReturn, + .Opaque, + .ErrorUnion, + .ErrorSet, + .AnyFrame, + .Optional, + .Array, + => try mod.errNoteNonLazy(src_loc, msg, "type has no guaranteed in-memory representation", .{}), + .Fn => { + try mod.errNoteNonLazy(src_loc, msg, "type has no guaranteed in-memory representation", .{}); + try mod.errNoteNonLazy(src_loc, msg, "use '*const ' to make a function pointer type", .{}); + }, + .Struct => try mod.errNoteNonLazy(src_loc, msg, "only packed structs layout are allowed in packed types", .{}), + .Union => try mod.errNoteNonLazy(src_loc, msg, "only packed unions layout are allowed in packed types", .{}), + } +} + pub const PanicId = enum { unreach, unwrap_null, @@ -26919,20 +26991,6 @@ fn semaStructFields(mod: *Module, struct_obj: *Module.Struct) CompileError!void const field = &struct_obj.fields.values()[i]; field.ty = try field_ty.copy(decl_arena_allocator); - if (struct_obj.layout == .Extern and !(try sema.validateExternType(field.ty, .other))) { - const msg = msg: { - const tree = try sema.getAstTree(&block_scope); - const fields_src = enumFieldSrcLoc(decl, tree.*, struct_obj.node_offset, i); - const msg = try sema.errMsg(&block_scope, fields_src, "extern structs cannot contain fields of type '{}'", .{field.ty.fmt(sema.mod)}); - errdefer msg.destroy(sema.gpa); - - try sema.explainWhyTypeIsNotExtern(&block_scope, fields_src, msg, fields_src.toSrcLoc(decl), field.ty, .other); - - try sema.addDeclaredHereNote(msg, field.ty); - break :msg msg; - }; - return sema.failWithOwnedErrorMsg(&block_scope, msg); - } if (field_ty.zigTypeTag() == .Opaque) { const msg = msg: { const tree = try sema.getAstTree(&block_scope); @@ -26945,6 +27003,33 @@ fn semaStructFields(mod: *Module, struct_obj: *Module.Struct) CompileError!void }; return sema.failWithOwnedErrorMsg(&block_scope, msg); } + if (struct_obj.layout == .Extern and !sema.validateExternType(field.ty, .other)) { + const msg = msg: { + const tree = try sema.getAstTree(&block_scope); + const fields_src = enumFieldSrcLoc(decl, tree.*, struct_obj.node_offset, i); + const msg = try sema.errMsg(&block_scope, fields_src, "extern structs cannot contain fields of type '{}'", .{field.ty.fmt(sema.mod)}); + errdefer msg.destroy(sema.gpa); + + try sema.explainWhyTypeIsNotExtern(msg, fields_src.toSrcLoc(decl), field.ty, .other); + + try sema.addDeclaredHereNote(msg, field.ty); + break :msg msg; + }; + return sema.failWithOwnedErrorMsg(&block_scope, msg); + } else if (struct_obj.layout == .Packed and !(validatePackedType(field.ty))) { + const msg = msg: { + const tree = try sema.getAstTree(&block_scope); + const fields_src = enumFieldSrcLoc(decl, tree.*, struct_obj.node_offset, i); + const msg = try sema.errMsg(&block_scope, fields_src, "packed structs cannot contain fields of type '{}'", .{field.ty.fmt(sema.mod)}); + errdefer msg.destroy(sema.gpa); + + try sema.explainWhyTypeIsNotPacked(msg, fields_src.toSrcLoc(decl), field.ty); + + try sema.addDeclaredHereNote(msg, field.ty); + break :msg msg; + }; + return sema.failWithOwnedErrorMsg(&block_scope, msg); + } if (zir_field.align_body_len > 0) { const body = zir.extra[extra_index..][0..zir_field.align_body_len]; @@ -27243,20 +27328,6 @@ fn semaUnionFields(mod: *Module, union_obj: *Module.Union) CompileError!void { } } - if (union_obj.layout == .Extern and !(try sema.validateExternType(field_ty, .union_field))) { - const msg = msg: { - const tree = try sema.getAstTree(&block_scope); - const field_src = enumFieldSrcLoc(decl, tree.*, union_obj.node_offset, field_i); - const msg = try sema.errMsg(&block_scope, field_src, "extern unions cannot contain fields of type '{}'", .{field_ty.fmt(sema.mod)}); - errdefer msg.destroy(sema.gpa); - - try sema.explainWhyTypeIsNotExtern(&block_scope, field_src, msg, field_src.toSrcLoc(decl), field_ty, .union_field); - - try sema.addDeclaredHereNote(msg, field_ty); - break :msg msg; - }; - return sema.failWithOwnedErrorMsg(&block_scope, msg); - } if (field_ty.zigTypeTag() == .Opaque) { const msg = msg: { const tree = try sema.getAstTree(&block_scope); @@ -27269,6 +27340,33 @@ fn semaUnionFields(mod: *Module, union_obj: *Module.Union) CompileError!void { }; return sema.failWithOwnedErrorMsg(&block_scope, msg); } + if (union_obj.layout == .Extern and !sema.validateExternType(field_ty, .union_field)) { + const msg = msg: { + const tree = try sema.getAstTree(&block_scope); + const field_src = enumFieldSrcLoc(decl, tree.*, union_obj.node_offset, field_i); + const msg = try sema.errMsg(&block_scope, field_src, "extern unions cannot contain fields of type '{}'", .{field_ty.fmt(sema.mod)}); + errdefer msg.destroy(sema.gpa); + + try sema.explainWhyTypeIsNotExtern(msg, field_src.toSrcLoc(decl), field_ty, .union_field); + + try sema.addDeclaredHereNote(msg, field_ty); + break :msg msg; + }; + return sema.failWithOwnedErrorMsg(&block_scope, msg); + } else if (union_obj.layout == .Packed and !(validatePackedType(field_ty))) { + const msg = msg: { + const tree = try sema.getAstTree(&block_scope); + const fields_src = enumFieldSrcLoc(decl, tree.*, union_obj.node_offset, field_i); + const msg = try sema.errMsg(&block_scope, fields_src, "packed unions cannot contain fields of type '{}'", .{field_ty.fmt(sema.mod)}); + errdefer msg.destroy(sema.gpa); + + try sema.explainWhyTypeIsNotPacked(msg, fields_src.toSrcLoc(decl), field_ty); + + try sema.addDeclaredHereNote(msg, field_ty); + break :msg msg; + }; + return sema.failWithOwnedErrorMsg(&block_scope, msg); + } gop.value_ptr.* = .{ .ty = try field_ty.copy(decl_arena_allocator), diff --git a/test/behavior/packed-struct.zig b/test/behavior/packed-struct.zig index 2dea485bf5..8c34f5741b 100644 --- a/test/behavior/packed-struct.zig +++ b/test/behavior/packed-struct.zig @@ -6,6 +6,8 @@ const expectEqual = std.testing.expectEqual; const native_endian = builtin.cpu.arch.endian(); test "correct size of packed structs" { + // Stage2 has different packed struct semantics. + if (builtin.zig_backend != .stage1) return error.SkipZigTest; const T1 = packed struct { one: u8, three: [3]u8 }; try expectEqual(4, @sizeOf(T1)); @@ -118,18 +120,6 @@ test "flags in packed structs" { try expectEqual(32, @bitSizeOf(Flags3)); } -test "arrays in packed structs" { - if (builtin.zig_backend == .stage1) return error.SkipZigTest; - - const T1 = packed struct { array: [3][3]u8 }; - const T2 = packed struct { array: [9]u8 }; - - try expectEqual(@sizeOf(u72), @sizeOf(T1)); - try expectEqual(72, @bitSizeOf(T1)); - try expectEqual(@sizeOf(u72), @sizeOf(T2)); - try expectEqual(72, @bitSizeOf(T2)); -} - test "consistent size of packed structs" { if (builtin.zig_backend == .stage1) return error.SkipZigTest; @@ -145,23 +135,15 @@ test "consistent size of packed structs" { try expectEqual(register_size_bits, @bitSizeOf(TxData2)); try expectEqual(register_size_bytes, @sizeOf(TxData2)); - const TxData3 = packed struct { a: u32, b: [3]u8 }; const TxData4 = packed struct { a: u32, b: u24 }; - const TxData5 = packed struct { a: [3]u8, b: u32 }; const TxData6 = packed struct { a: u24, b: u32 }; const expectedBitSize = 56; const expectedByteSize = @sizeOf(u56); - try expectEqual(expectedBitSize, @bitSizeOf(TxData3)); - try expectEqual(expectedByteSize, @sizeOf(TxData3)); - try expectEqual(expectedBitSize, @bitSizeOf(TxData4)); try expectEqual(expectedByteSize, @sizeOf(TxData4)); - try expectEqual(expectedBitSize, @bitSizeOf(TxData5)); - try expectEqual(expectedByteSize, @sizeOf(TxData5)); - try expectEqual(expectedBitSize, @bitSizeOf(TxData6)); try expectEqual(expectedByteSize, @sizeOf(TxData6)); } @@ -234,12 +216,6 @@ test "correct sizeOf and offsets in packed structs" { try expectEqual(@as(u7, 0b1111010), s2.y); try expectEqual(@as(u24, 0xd5c71f), s2.z); } - - const S = packed struct { a: u32, pad: [3]u32, b: u32 }; - - try expectEqual(16, @offsetOf(S, "b")); - try expectEqual(128, @bitOffsetOf(S, "b")); - try expectEqual(@sizeOf(u160), @sizeOf(S)); } test "nested packed structs" { diff --git a/test/behavior/sizeof_and_typeof.zig b/test/behavior/sizeof_and_typeof.zig index 6c7e16b502..83c5d977be 100644 --- a/test/behavior/sizeof_and_typeof.zig +++ b/test/behavior/sizeof_and_typeof.zig @@ -105,6 +105,8 @@ test "@offsetOf" { } test "@offsetOf packed struct, array length not power of 2 or multiple of native pointer width in bytes" { + // Stage2 has different packed struct semantics. + if (builtin.zig_backend != .stage1) return error.SkipZigTest; const p3a_len = 3; const P3 = packed struct { a: [p3a_len]u8, diff --git a/test/behavior/struct.zig b/test/behavior/struct.zig index 8fac5697ec..377cbb56f4 100644 --- a/test/behavior/struct.zig +++ b/test/behavior/struct.zig @@ -704,10 +704,8 @@ const FooArray24Bits = packed struct { }; test "aligned array of packed struct" { - if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO - if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO - if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO + // Stage2 has different packed struct semantics. + if (builtin.zig_backend != .stage1) return error.SkipZigTest; comptime { try expect(@sizeOf(FooStructAligned) == 2); diff --git a/test/cases/compile_errors/packed_struct_with_fields_of_not_allowed_types.zig b/test/cases/compile_errors/packed_struct_with_fields_of_not_allowed_types.zig new file mode 100644 index 0000000000..8fcd300629 --- /dev/null +++ b/test/cases/compile_errors/packed_struct_with_fields_of_not_allowed_types.zig @@ -0,0 +1,84 @@ +export fn entry1() void { + _ = @sizeOf(packed struct { + x: anyerror, + }); +} +export fn entry2() void { + _ = @sizeOf(packed struct { + x: [2]u24, + }); +} +export fn entry3() void { + _ = @sizeOf(packed struct { + x: anyerror!u32, + }); +} +export fn entry4() void { + _ = @sizeOf(packed struct { + x: S, + }); +} +export fn entry5() void { + _ = @sizeOf(packed struct { + x: U, + }); +} +export fn entry6() void { + _ = @sizeOf(packed struct { + x: ?anyerror, + }); +} +export fn entry7() void { + _ = @sizeOf(packed struct { + x: enum { A, B }, + }); +} +export fn entry8() void { + _ = @sizeOf(packed struct { + x: fn () void, + }); +} +export fn entry9() void { + _ = @sizeOf(packed struct { + x: *const fn () void, + }); +} +export fn entry10() void { + _ = @sizeOf(packed struct { + x: packed struct { x: i32 }, + }); +} +export fn entry11() void { + _ = @sizeOf(packed struct { + x: packed union { A: i32, B: u32 }, + }); +} +const S = struct { + x: i32, +}; +const U = extern union { + A: i32, + B: u32, +}; + +// error +// backend=llvm +// target=native +// +// :3:9: error: packed structs cannot contain fields of type 'anyerror' +// :3:9: note: type has no guaranteed in-memory representation +// :8:9: error: packed structs cannot contain fields of type '[2]u24' +// :8:9: note: type has no guaranteed in-memory representation +// :13:9: error: packed structs cannot contain fields of type 'anyerror!u32' +// :13:9: note: type has no guaranteed in-memory representation +// :18:9: error: packed structs cannot contain fields of type 'tmp.S' +// :18:9: note: only packed structs layout are allowed in packed types +// :56:11: note: struct declared here +// :23:9: error: packed structs cannot contain fields of type 'tmp.U' +// :23:9: note: only packed unions layout are allowed in packed types +// :59:18: note: union declared here +// :28:9: error: packed structs cannot contain fields of type '?anyerror' +// :28:9: note: type has no guaranteed in-memory representation +// :38:9: error: packed structs cannot contain fields of type 'fn() void' +// :38:9: note: type has no guaranteed in-memory representation +// :38:9: note: use '*const ' to make a function pointer type diff --git a/test/cases/compile_errors/stage1/test/packed_struct_with_fields_of_not_allowed_types.zig b/test/cases/compile_errors/stage1/test/packed_struct_with_fields_of_not_allowed_types.zig deleted file mode 100644 index 2951d26c52..0000000000 --- a/test/cases/compile_errors/stage1/test/packed_struct_with_fields_of_not_allowed_types.zig +++ /dev/null @@ -1,74 +0,0 @@ -const A = packed struct { - x: anyerror, -}; -const B = packed struct { - x: [2]u24, -}; -const C = packed struct { - x: [1]anyerror, -}; -const D = packed struct { - x: [1]S, -}; -const E = packed struct { - x: [1]U, -}; -const F = packed struct { - x: ?anyerror, -}; -const G = packed struct { - x: Enum, -}; -export fn entry1() void { - var a: A = undefined; - _ = a; -} -export fn entry2() void { - var b: B = undefined; - _ = b; -} -export fn entry3() void { - var r: C = undefined; - _ = r; -} -export fn entry4() void { - var d: D = undefined; - _ = d; -} -export fn entry5() void { - var e: E = undefined; - _ = e; -} -export fn entry6() void { - var f: F = undefined; - _ = f; -} -export fn entry7() void { - var g: G = undefined; - _ = g; -} -const S = struct { - x: i32, -}; -const U = struct { - A: i32, - B: u32, -}; -const Enum = enum { - A, - B, -}; - -// error -// backend=stage1 -// target=native -// is_test=1 -// -// tmp.zig:2:5: error: type 'anyerror' not allowed in packed struct; no guaranteed in-memory representation -// tmp.zig:5:5: error: array of 'u24' not allowed in packed struct due to padding bits (must be padded from 48 to 64 bits) -// tmp.zig:8:5: error: type 'anyerror' not allowed in packed struct; no guaranteed in-memory representation -// tmp.zig:11:5: error: non-packed, non-extern struct 'S' not allowed in packed struct; no guaranteed in-memory representation -// tmp.zig:14:5: error: non-packed, non-extern struct 'U' not allowed in packed struct; no guaranteed in-memory representation -// tmp.zig:17:5: error: type '?anyerror' not allowed in packed struct; no guaranteed in-memory representation -// tmp.zig:20:5: error: type 'Enum' not allowed in packed struct; no guaranteed in-memory representation -// tmp.zig:57:14: note: enum declaration does not specify an integer tag type From 292906fb2378bd37e02abf09e2be7f86ee657fac Mon Sep 17 00:00:00 2001 From: Veikka Tuominen Date: Mon, 1 Aug 2022 17:45:47 +0300 Subject: [PATCH 07/33] Sema: fix reify Fn alignment and args --- src/Sema.zig | 18 +++++++++++-- test/behavior/type.zig | 57 +++++++++++++++++++++++++++++------------- 2 files changed, 56 insertions(+), 19 deletions(-) diff --git a/src/Sema.zig b/src/Sema.zig index 71ab59d7f9..67f04daf83 100644 --- a/src/Sema.zig +++ b/src/Sema.zig @@ -16429,10 +16429,24 @@ fn zirReify(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!Air.I return sema.fail(block, src, "varargs functions must have C calling convention", .{}); } - const alignment = @intCast(u29, alignment_val.toUnsignedInt(target)); // TODO: Validate this value. + const alignment = alignment: { + if (!try sema.intFitsInType(block, src, alignment_val, Type.u32, null)) { + return sema.fail(block, src, "alignment must fit in 'u32'", .{}); + } + const alignment = @intCast(u29, alignment_val.toUnsignedInt(target)); + if (alignment == target_util.defaultFunctionAlignment(target)) { + break :alignment 0; + } else { + break :alignment alignment; + } + }; var buf: Value.ToTypeBuffer = undefined; - const args: []Value = if (args_val.castTag(.aggregate)) |some| some.data else &.{}; + const args_slice_val = args_val.castTag(.slice).?.data; + const args_decl_index = args_slice_val.ptr.pointerDecl().?; + try sema.ensureDeclAnalyzed(args_decl_index); + const args_decl = mod.declPtr(args_decl_index); + const args: []Value = if (args_decl.val.castTag(.aggregate)) |some| some.data else &.{}; var param_types = try sema.arena.alloc(Type, args.len); var comptime_params = try sema.arena.alloc(bool, args.len); var noalias_bits: u32 = 0; diff --git a/test/behavior/type.zig b/test/behavior/type.zig index 16a79a5122..4aec553527 100644 --- a/test/behavior/type.zig +++ b/test/behavior/type.zig @@ -247,26 +247,17 @@ fn add(a: i32, b: i32) i32 { } test "Type.ErrorSet" { + if (builtin.zig_backend == .stage1) return error.SkipZigTest; + if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; // TODO + try testing.expect(@Type(.{ .ErrorSet = null }) == anyerror); // error sets don't compare equal so just check if they compile - _ = @Type(@typeInfo(error{})); - _ = @Type(@typeInfo(error{A})); - _ = @Type(@typeInfo(error{ A, B, C })); - _ = @Type(.{ - .ErrorSet = &[_]Type.Error{ - .{ .name = "A" }, - .{ .name = "B" }, - .{ .name = "C" }, - }, - }); - _ = @Type(.{ - .ErrorSet = &.{ - .{ .name = "C" }, - .{ .name = "B" }, - .{ .name = "A" }, - }, - }); + inline for (.{ error{}, error{A}, error{ A, B, C } }) |T| { + const info = @typeInfo(T); + const T2 = @Type(info); + try testing.expect(T == T2); + } } test "Type.Struct" { @@ -517,3 +508,35 @@ test "Type.Union from regular enum" { _ = T; _ = @typeInfo(T).Union; } + +test "Type.Fn" { + if (builtin.zig_backend == .stage1) return error.SkipZigTest; + if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; // TODO + + const some_opaque = opaque {}; + const some_ptr = *some_opaque; + const T = fn (c_int, some_ptr) callconv(.C) void; + + { + const fn_info = std.builtin.Type{ .Fn = .{ + .calling_convention = .C, + .alignment = 0, + .is_generic = false, + .is_var_args = false, + .return_type = void, + .args = &.{ + .{ .is_generic = false, .is_noalias = false, .arg_type = c_int }, + .{ .is_generic = false, .is_noalias = false, .arg_type = some_ptr }, + }, + } }; + + const fn_type = @Type(fn_info); + try std.testing.expectEqual(T, fn_type); + } + + { + const fn_info = @typeInfo(T); + const fn_type = @Type(fn_info); + try std.testing.expectEqual(T, fn_type); + } +} From 14f0b70570aa1c50c7316851293899000615bc94 Mon Sep 17 00:00:00 2001 From: Veikka Tuominen Date: Mon, 1 Aug 2022 19:33:18 +0300 Subject: [PATCH 08/33] Sema: add safety for sentinel slice --- lib/std/builtin.zig | 11 +++ src/Sema.zig | 86 ++++++++++++++++++- ...or_tagged_union_with_extra_union_field.zig | 2 +- ...ray slice sentinel mismatch non-scalar.zig | 21 +++++ .../array slice sentinel mismatch vector.zig | 19 ++++ .../safety/array slice sentinel mismatch.zig | 7 +- 6 files changed, 140 insertions(+), 6 deletions(-) create mode 100644 test/cases/safety/array slice sentinel mismatch non-scalar.zig create mode 100644 test/cases/safety/array slice sentinel mismatch vector.zig diff --git a/lib/std/builtin.zig b/lib/std/builtin.zig index c38eb543ed..047c65439c 100644 --- a/lib/std/builtin.zig +++ b/lib/std/builtin.zig @@ -846,6 +846,17 @@ pub fn default_panic(msg: []const u8, error_return_trace: ?*StackTrace) noreturn } } +pub fn checkNonScalarSentinel(expected: anytype, actual: @TypeOf(expected)) void { + if (!std.meta.eql(expected, actual)) { + panicSentinelMismatch(expected, actual); + } +} + +pub fn panicSentinelMismatch(expected: anytype, actual: @TypeOf(expected)) noreturn { + @setCold(true); + std.debug.panic("sentinel mismatch: expected {any}, found {any}", .{ expected, actual }); +} + pub fn panicUnwrapError(st: ?*StackTrace, err: anyerror) noreturn { @setCold(true); std.debug.panicExtra(st, "attempt to unwrap error: {s}", .{@errorName(err)}); diff --git a/src/Sema.zig b/src/Sema.zig index 67f04daf83..e41602a037 100644 --- a/src/Sema.zig +++ b/src/Sema.zig @@ -20148,6 +20148,77 @@ fn panicIndexOutOfBounds( try sema.addSafetyCheckExtra(parent_block, ok, &fail_block); } +fn panicSentinelMismatch( + sema: *Sema, + parent_block: *Block, + src: LazySrcLoc, + maybe_sentinel: ?Value, + sentinel_ty: Type, + ptr: Air.Inst.Ref, + sentinel_index: Air.Inst.Ref, +) !void { + const expected_sentinel_val = maybe_sentinel orelse return; + const expected_sentinel = try sema.addConstant(sentinel_ty, expected_sentinel_val); + + const ptr_ty = sema.typeOf(ptr); + const actual_sentinel = if (ptr_ty.isSlice()) + try parent_block.addBinOp(.slice_elem_val, ptr, sentinel_index) + else blk: { + const elem_ptr_ty = try sema.elemPtrType(ptr_ty, null); + const sentinel_ptr = try parent_block.addPtrElemPtr(ptr, sentinel_index, elem_ptr_ty); + break :blk try parent_block.addTyOp(.load, sentinel_ty, sentinel_ptr); + }; + + const ok = if (sentinel_ty.zigTypeTag() == .Vector) ok: { + const eql = + try parent_block.addCmpVector(expected_sentinel, actual_sentinel, .eq, try sema.addType(sentinel_ty)); + break :ok try parent_block.addInst(.{ + .tag = .reduce, + .data = .{ .reduce = .{ + .operand = eql, + .operation = .And, + } }, + }); + } else if (sentinel_ty.isSelfComparable(true)) + try parent_block.addBinOp(.cmp_eq, expected_sentinel, actual_sentinel) + else { + const panic_fn = try sema.getBuiltin(parent_block, src, "checkNonScalarSentinel"); + const args: [2]Air.Inst.Ref = .{ expected_sentinel, actual_sentinel }; + _ = try sema.analyzeCall(parent_block, panic_fn, src, src, .auto, false, &args, null); + return; + }; + const gpa = sema.gpa; + + var fail_block: Block = .{ + .parent = parent_block, + .sema = sema, + .src_decl = parent_block.src_decl, + .namespace = parent_block.namespace, + .wip_capture_scope = parent_block.wip_capture_scope, + .instructions = .{}, + .inlining = parent_block.inlining, + .is_comptime = parent_block.is_comptime, + }; + + defer fail_block.instructions.deinit(gpa); + + { + const this_feature_is_implemented_in_the_backend = + sema.mod.comp.bin_file.options.use_llvm; + + if (!this_feature_is_implemented_in_the_backend) { + // TODO implement this feature in all the backends and then delete this branch + _ = try fail_block.addNoOp(.breakpoint); + _ = try fail_block.addNoOp(.unreach); + } else { + const panic_fn = try sema.getBuiltin(&fail_block, src, "panicSentinelMismatch"); + const args: [2]Air.Inst.Ref = .{ expected_sentinel, actual_sentinel }; + _ = try sema.analyzeCall(&fail_block, panic_fn, src, src, .auto, false, &args, null); + } + } + try sema.addSafetyCheckExtra(parent_block, ok, &fail_block); +} + fn safetyPanic( sema: *Sema, block: *Block, @@ -25368,6 +25439,7 @@ fn analyzeSlice( } break :s null; }; + const slice_sentinel = if (sentinel_opt != .none) sentinel else null; // requirement: start <= end if (try sema.resolveDefinedValue(block, end_src, end)) |end_val| { @@ -25447,7 +25519,12 @@ fn analyzeSlice( const opt_new_ptr_val = try sema.resolveMaybeUndefVal(block, ptr_src, new_ptr); const new_ptr_val = opt_new_ptr_val orelse { - return block.addBitCast(return_ty, new_ptr); + const result = try block.addBitCast(return_ty, new_ptr); + if (block.wantSafety()) { + // requirement: result[new_len] == slice_sentinel + try sema.panicSentinelMismatch(block, src, slice_sentinel, elem_ty, result, new_len); + } + return result; }; if (!new_ptr_val.isUndef()) { @@ -25511,7 +25588,7 @@ fn analyzeSlice( // requirement: start <= end try sema.panicIndexOutOfBounds(block, src, start, end, .cmp_lte); } - return block.addInst(.{ + const result = try block.addInst(.{ .tag = .slice, .data = .{ .ty_pl = .{ .ty = try sema.addType(return_ty), @@ -25521,6 +25598,11 @@ fn analyzeSlice( }), } }, }); + if (block.wantSafety()) { + // requirement: result[new_len] == slice_sentinel + try sema.panicSentinelMismatch(block, src, slice_sentinel, elem_ty, result, new_len); + } + return result; } /// Asserts that lhs and rhs types are both numeric. diff --git a/test/cases/compile_errors/reify_type_for_tagged_union_with_extra_union_field.zig b/test/cases/compile_errors/reify_type_for_tagged_union_with_extra_union_field.zig index 414bf2ce5e..ee557cd6c2 100644 --- a/test/cases/compile_errors/reify_type_for_tagged_union_with_extra_union_field.zig +++ b/test/cases/compile_errors/reify_type_for_tagged_union_with_extra_union_field.zig @@ -31,5 +31,5 @@ export fn entry() void { // backend=stage2 // target=native // -// :13:16: error: no field named 'arst' in enum 'tmp.Tag__enum_264' +// :13:16: error: no field named 'arst' in enum 'tmp.Tag__enum_266' // :1:13: note: enum declared here diff --git a/test/cases/safety/array slice sentinel mismatch non-scalar.zig b/test/cases/safety/array slice sentinel mismatch non-scalar.zig new file mode 100644 index 0000000000..e773b165e8 --- /dev/null +++ b/test/cases/safety/array slice sentinel mismatch non-scalar.zig @@ -0,0 +1,21 @@ +const std = @import("std"); + +pub fn panic(message: []const u8, stack_trace: ?*std.builtin.StackTrace) noreturn { + _ = stack_trace; + if (std.mem.eql(u8, message, "sentinel mismatch: expected tmp.main.S{ .a = 1 }, found tmp.main.S{ .a = 2 }")) { + std.process.exit(0); + } + std.process.exit(1); +} + +pub fn main() !void { + const S = struct { a: u32 }; + var arr = [_]S{ .{ .a = 1 }, .{ .a = 2 } }; + var s = arr[0..1 :.{ .a = 1 }]; + _ = s; + return error.TestFailed; +} + +// run +// backend=llvm +// target=native diff --git a/test/cases/safety/array slice sentinel mismatch vector.zig b/test/cases/safety/array slice sentinel mismatch vector.zig new file mode 100644 index 0000000000..38d020d84a --- /dev/null +++ b/test/cases/safety/array slice sentinel mismatch vector.zig @@ -0,0 +1,19 @@ +const std = @import("std"); + +pub fn panic(message: []const u8, stack_trace: ?*std.builtin.StackTrace) noreturn { + _ = stack_trace; + if (std.mem.eql(u8, message, "sentinel mismatch: expected { 0, 0 }, found { 4, 4 }")) { + std.process.exit(0); + } + std.process.exit(1); +} + +pub fn main() !void { + var buf: [4]@Vector(2, u32) = .{ .{ 1, 1 }, .{ 2, 2 }, .{ 3, 3 }, .{ 4, 4 } }; + const slice = buf[0..3 :.{ 0, 0 }]; + _ = slice; + return error.TestFailed; +} +// run +// backend=llvm +// target=native diff --git a/test/cases/safety/array slice sentinel mismatch.zig b/test/cases/safety/array slice sentinel mismatch.zig index 3aca5b9610..8f54b4cc53 100644 --- a/test/cases/safety/array slice sentinel mismatch.zig +++ b/test/cases/safety/array slice sentinel mismatch.zig @@ -2,17 +2,18 @@ const std = @import("std"); pub fn panic(message: []const u8, stack_trace: ?*std.builtin.StackTrace) noreturn { _ = stack_trace; - if (std.mem.eql(u8, message, "sentinel mismatch")) { + if (std.mem.eql(u8, message, "sentinel mismatch: expected 0, found 4")) { std.process.exit(0); } std.process.exit(1); } + pub fn main() !void { - var buf: [4]u8 = undefined; + var buf: [4]u8 = .{ 1, 2, 3, 4 }; const slice = buf[0..3 :0]; _ = slice; return error.TestFailed; } // run -// backend=stage1 +// backend=llvm // target=native From 622714b76b6c3a21a5806e894151d6de8a7e6866 Mon Sep 17 00:00:00 2001 From: Loris Cro Date: Tue, 2 Aug 2022 17:15:56 +0200 Subject: [PATCH 09/33] autodoc: fix frontend crash while rendering std.mem Previously we expected to only find decl refs in a `foo.bar.baz` type of expression. This would crash when trying to render something like `@typeInfo(T).Int.bits`. We now properly account for builtins and other components. --- lib/docs/main.js | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/lib/docs/main.js b/lib/docs/main.js index a6b75eb1fc..31f91a8445 100644 --- a/lib/docs/main.js +++ b/lib/docs/main.js @@ -931,8 +931,7 @@ var zigAnalysis; return exprName(switchIndex, opts); } case "refPath" : { - const declRef = expr.refPath[0].declRef; - let name = zigAnalysis.decls[declRef].name; + let name = exprName(expr.refPath[0]); for (let i = 1; i < expr.refPath.length; i++) { let component = undefined; if ("string" in expr.refPath[i]) { From c5afefec427f2f2aeb25fe8f7114b5a0a6aa1129 Mon Sep 17 00:00:00 2001 From: Loris Cro Date: Tue, 2 Aug 2022 17:24:19 +0200 Subject: [PATCH 10/33] autodoc: fix autodoc analysis for `@typeInfo` We were previously erroneously saving it as a `@TypeOf`. --- lib/docs/main.js | 5 +++++ src/Autodoc.zig | 3 ++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/lib/docs/main.js b/lib/docs/main.js index 31f91a8445..5435efd647 100644 --- a/lib/docs/main.js +++ b/lib/docs/main.js @@ -1403,6 +1403,11 @@ var zigAnalysis; let payloadHtml = "@TypeOf(" + exprName(typeRefArg, {wantHtml: true, wantLink:true}) + ")"; return payloadHtml; } + case "typeInfo": { + const typeRefArg = zigAnalysis.exprs[expr.typeInfo]; + let payloadHtml = "@typeInfo(" + exprName(typeRefArg, {wantHtml: true, wantLink:true}) + ")"; + return payloadHtml; + } case "null": { return "null"; } diff --git a/src/Autodoc.zig b/src/Autodoc.zig index 2d163bf4f0..1ed2694553 100644 --- a/src/Autodoc.zig +++ b/src/Autodoc.zig @@ -580,6 +580,7 @@ const DocData = struct { enumLiteral: []const u8, // direct value alignOf: usize, // index in `exprs` typeOf: usize, // index in `exprs` + typeInfo: usize, // index in `exprs` typeOf_peer: []usize, errorUnion: usize, // index in `exprs` as: As, @@ -1788,7 +1789,7 @@ fn walkInstruction( return DocData.WalkResult{ .typeRef = operand.typeRef, - .expr = .{ .typeOf = operand_index }, + .expr = .{ .typeInfo = operand_index }, }; }, .as_node => { From 447a4cc115f5d105d49fcb3e970082b89e5a46dd Mon Sep 17 00:00:00 2001 From: Loris Cro Date: Tue, 2 Aug 2022 17:48:15 +0200 Subject: [PATCH 11/33] autodoc: fix off-by-1 error in analysis of pointer types --- src/Autodoc.zig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Autodoc.zig b/src/Autodoc.zig index 1ed2694553..35f9dc7dea 100644 --- a/src/Autodoc.zig +++ b/src/Autodoc.zig @@ -1399,7 +1399,6 @@ fn walkInstruction( const extra = file.zir.extraData(Zir.Inst.PtrType, ptr.payload_index); var extra_index = extra.end; - const type_slot_index = self.types.items.len; const elem_type_ref = try self.walkRef( file, parent_scope, @@ -1446,6 +1445,7 @@ fn walkInstruction( host_size = ref_result.expr; } + const type_slot_index = self.types.items.len; try self.types.append(self.arena, .{ .Pointer = .{ .size = ptr.size, @@ -2994,7 +2994,7 @@ fn tryResolveRefPath( "TODO: handle `{s}`in tryResolveRefPath\nInfo: {}", .{ @tagName(resolved_parent), resolved_parent }, ); - path[i + 1] = (try self.cteTodo("match failure")).expr; + path[i + 1] = (try self.cteTodo("")).expr; continue :outer; }, .comptimeExpr, .call, .typeOf => { From fa321a07cd985c672879c091db6dd0aa6b66f0b7 Mon Sep 17 00:00:00 2001 From: Frank Denis <124872+jedisct1@users.noreply.github.com> Date: Wed, 3 Aug 2022 15:25:15 +0200 Subject: [PATCH 12/33] crypto.sign.ed25519: include a context string in blind key signatures (#12316) The next revision of the specification is going to include a context string in the way blinded scalars are computed. See: https://github.com/cfrg/draft-irtf-cfrg-signature-key-blinding/issues/30#issuecomment-1180516152 https://github.com/cfrg/draft-irtf-cfrg-signature-key-blinding/pull/37 --- lib/std/crypto/25519/ed25519.zig | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/lib/std/crypto/25519/ed25519.zig b/lib/std/crypto/25519/ed25519.zig index 83a6c2389e..2a7671863e 100644 --- a/lib/std/crypto/25519/ed25519.zig +++ b/lib/std/crypto/25519/ed25519.zig @@ -229,15 +229,14 @@ pub const Ed25519 = struct { blind_secret_key: BlindSecretKey, }; - /// Blind an existing key pair with a blinding seed. - pub fn blind(key_pair: Ed25519.KeyPair, blind_seed: [blind_seed_length]u8) !BlindKeyPair { + /// Blind an existing key pair with a blinding seed and a context. + pub fn blind(key_pair: Ed25519.KeyPair, blind_seed: [blind_seed_length]u8, ctx: []const u8) !BlindKeyPair { var h: [Sha512.digest_length]u8 = undefined; Sha512.hash(key_pair.secret_key[0..32], &h, .{}); Curve.scalar.clamp(h[0..32]); const scalar = Curve.scalar.reduce(h[0..32].*); - var blind_h: [Sha512.digest_length]u8 = undefined; - Sha512.hash(blind_seed[0..], &blind_h, .{}); + const blind_h = blindCtx(blind_seed, ctx); const blind_factor = Curve.scalar.reduce(blind_h[0..32].*); const blind_scalar = Curve.scalar.mul(scalar, blind_factor); @@ -259,9 +258,8 @@ pub const Ed25519 = struct { } /// Recover a public key from a blind version of it. - pub fn unblindPublicKey(blind_public_key: [public_length]u8, blind_seed: [blind_seed_length]u8) ![public_length]u8 { - var blind_h: [Sha512.digest_length]u8 = undefined; - Sha512.hash(&blind_seed, &blind_h, .{}); + pub fn unblindPublicKey(blind_public_key: [public_length]u8, blind_seed: [blind_seed_length]u8, ctx: []const u8) ![public_length]u8 { + const blind_h = blindCtx(blind_seed, ctx); const inv_blind_factor = Scalar.fromBytes(blind_h[0..32].*).invert().toBytes(); const public_key = try (try Curve.fromBytes(blind_public_key)).mul(inv_blind_factor); return public_key.toBytes(); @@ -297,6 +295,17 @@ pub const Ed25519 = struct { mem.copy(u8, sig[32..], s[0..]); return sig; } + + /// Compute a blind context from a blinding seed and a context. + fn blindCtx(blind_seed: [blind_seed_length]u8, ctx: []const u8) [Sha512.digest_length]u8 { + var blind_h: [Sha512.digest_length]u8 = undefined; + var hx = Sha512.init(.{}); + hx.update(&blind_seed); + hx.update(&[1]u8{0}); + hx.update(ctx); + hx.final(&blind_h); + return blind_h; + } }; }; @@ -458,7 +467,7 @@ test "ed25519 with blind keys" { crypto.random.bytes(&blind); // Blind the key pair - const blind_kp = try BlindKeySignatures.blind(kp, blind); + const blind_kp = try BlindKeySignatures.blind(kp, blind, "ctx"); // Sign a message and check that it can be verified with the blind public key const msg = "test"; @@ -466,6 +475,6 @@ test "ed25519 with blind keys" { try Ed25519.verify(sig, msg, blind_kp.blind_public_key); // Unblind the public key - const pk = try BlindKeySignatures.unblindPublicKey(blind_kp.blind_public_key, blind); + const pk = try BlindKeySignatures.unblindPublicKey(blind_kp.blind_public_key, blind, "ctx"); try std.testing.expectEqualSlices(u8, &pk, &kp.public_key); } From 6547c3887eb604cf4d494d307927a50a12117708 Mon Sep 17 00:00:00 2001 From: Veikka Tuominen Date: Tue, 2 Aug 2022 19:03:38 +0300 Subject: [PATCH 13/33] Sema: add error for closure capture at runtime --- src/Sema.zig | 30 +++++++++++++++++++ ..._runtime_parameter_from_outer_function.zig | 9 +++--- 2 files changed, 34 insertions(+), 5 deletions(-) rename test/cases/compile_errors/{stage1/obj => }/accessing_runtime_parameter_from_outer_function.zig (52%) diff --git a/src/Sema.zig b/src/Sema.zig index e41602a037..7b5a7cdf26 100644 --- a/src/Sema.zig +++ b/src/Sema.zig @@ -13143,6 +13143,36 @@ fn zirClosureGet( scope = scope.parent.?; } else unreachable; + if (tv.val.tag() == .generic_poison and !block.is_typeof and !block.is_comptime and sema.func != null) { + const msg = msg: { + const name = name: { + const file = sema.owner_decl.getFileScope(); + const tree = file.getTree(sema.mod.gpa) catch |err| { + // In this case we emit a warning + a less precise source location. + log.warn("unable to load {s}: {s}", .{ + file.sub_file_path, @errorName(err), + }); + break :name null; + }; + const node = sema.owner_decl.relativeToNodeIndex(inst_data.src_node); + const token = tree.nodes.items(.main_token)[node]; + break :name tree.tokenSlice(token); + }; + + const msg = if (name) |some| + try sema.errMsg(block, inst_data.src(), "'{s}' not accessible from inner function", .{some}) + else + try sema.errMsg(block, inst_data.src(), "variable not accessible from inner function", .{}); + errdefer msg.destroy(sema.gpa); + + try sema.errNote(block, LazySrcLoc.nodeOffset(0), msg, "crossed function definition here", .{}); + + // TODO add "declared here" note + break :msg msg; + }; + return sema.failWithOwnedErrorMsg(block, msg); + } + return sema.addConstant(tv.ty, tv.val); } diff --git a/test/cases/compile_errors/stage1/obj/accessing_runtime_parameter_from_outer_function.zig b/test/cases/compile_errors/accessing_runtime_parameter_from_outer_function.zig similarity index 52% rename from test/cases/compile_errors/stage1/obj/accessing_runtime_parameter_from_outer_function.zig rename to test/cases/compile_errors/accessing_runtime_parameter_from_outer_function.zig index 73a9e94d44..49ef3b4d4d 100644 --- a/test/cases/compile_errors/stage1/obj/accessing_runtime_parameter_from_outer_function.zig +++ b/test/cases/compile_errors/accessing_runtime_parameter_from_outer_function.zig @@ -1,4 +1,4 @@ -fn outer(y: u32) fn (u32) u32 { +fn outer(y: u32) *const fn (u32) u32 { const st = struct { fn get(z: u32) u32 { return z + y; @@ -13,9 +13,8 @@ export fn entry() void { } // error -// backend=stage1 +// backend=stage2 // target=native // -// tmp.zig:4:24: error: 'y' not accessible from inner function -// tmp.zig:3:28: note: crossed function definition here -// tmp.zig:1:10: note: declared here +// :4:24: error: 'y' not accessible from inner function +// :3:9: note: crossed function definition here From 02112f88364d0564346a71d4f9fcd1936547d725 Mon Sep 17 00:00:00 2001 From: Veikka Tuominen Date: Tue, 2 Aug 2022 19:15:41 +0300 Subject: [PATCH 14/33] AstGen: add error for break/continue out of defer expression --- src/AstGen.zig | 46 ++++++++++++++++++- .../cannot_break_out_of_defer_expression.zig | 5 +- ...annot_continue_out_of_defer_expression.zig | 5 +- 3 files changed, 51 insertions(+), 5 deletions(-) rename test/cases/compile_errors/{stage1/obj => }/cannot_break_out_of_defer_expression.zig (53%) rename test/cases/compile_errors/{stage1/obj => }/cannot_continue_out_of_defer_expression.zig (53%) diff --git a/src/AstGen.zig b/src/AstGen.zig index b2bbb27865..051f1dace8 100644 --- a/src/AstGen.zig +++ b/src/AstGen.zig @@ -1899,6 +1899,17 @@ fn breakExpr(parent_gz: *GenZir, parent_scope: *Scope, node: Ast.Node.Index) Inn .local_ptr => scope = scope.cast(Scope.LocalPtr).?.parent, .namespace => break, .defer_normal, .defer_error => scope = scope.cast(Scope.Defer).?.parent, + .defer_gen => { + const defer_gen = scope.cast(Scope.DeferGen).?; + + return astgen.failNodeNotes(node, "cannot break out of defer expression", .{}, &.{ + try astgen.errNoteNode( + defer_gen.defer_node, + "defer expression here", + .{}, + ), + }); + }, .top => unreachable, } } @@ -1958,6 +1969,17 @@ fn continueExpr(parent_gz: *GenZir, parent_scope: *Scope, node: Ast.Node.Index) try unusedResultDeferExpr(parent_gz, defer_scope, defer_scope.parent, expr_node); }, .defer_error => scope = scope.cast(Scope.Defer).?.parent, + .defer_gen => { + const defer_gen = scope.cast(Scope.DeferGen).?; + + return astgen.failNodeNotes(node, "cannot continue out of defer expression", .{}, &.{ + try astgen.errNoteNode( + defer_gen.defer_node, + "defer expression here", + .{}, + ), + }); + }, .namespace => break, .top => unreachable, } @@ -2022,6 +2044,7 @@ fn checkLabelRedefinition(astgen: *AstGen, parent_scope: *Scope, label: Ast.Toke .local_val => scope = scope.cast(Scope.LocalVal).?.parent, .local_ptr => scope = scope.cast(Scope.LocalPtr).?.parent, .defer_normal, .defer_error => scope = scope.cast(Scope.Defer).?.parent, + .defer_gen => scope = scope.cast(Scope.DeferGen).?.parent, .namespace => break, .top => unreachable, } @@ -2206,7 +2229,13 @@ fn unusedResultDeferExpr(gz: *GenZir, defer_scope: *Scope.Defer, expr_scope: *Sc astgen.source_offset = defer_scope.source_offset; astgen.source_line = defer_scope.source_line; astgen.source_column = defer_scope.source_column; - _ = try unusedResultExpr(gz, expr_scope, expr_node); + + var defer_gen: Scope.DeferGen = .{ + .parent = expr_scope, + .defer_node = defer_scope.defer_node, + }; + + _ = try unusedResultExpr(gz, &defer_gen.base, expr_node); } /// Returns AST source node of the thing that is noreturn if the statement is @@ -2553,6 +2582,7 @@ fn countDefers(astgen: *AstGen, outer_scope: *Scope, inner_scope: *Scope) struct .gen_zir => scope = scope.cast(GenZir).?.parent, .local_val => scope = scope.cast(Scope.LocalVal).?.parent, .local_ptr => scope = scope.cast(Scope.LocalPtr).?.parent, + .defer_gen => scope = scope.cast(Scope.DeferGen).?.parent, .defer_normal => { const defer_scope = scope.cast(Scope.Defer).?; scope = defer_scope.parent; @@ -2602,6 +2632,7 @@ fn genDefers( .gen_zir => scope = scope.cast(GenZir).?.parent, .local_val => scope = scope.cast(Scope.LocalVal).?.parent, .local_ptr => scope = scope.cast(Scope.LocalPtr).?.parent, + .defer_gen => scope = scope.cast(Scope.DeferGen).?.parent, .defer_normal => { const defer_scope = scope.cast(Scope.Defer).?; scope = defer_scope.parent; @@ -2681,6 +2712,7 @@ fn checkUsed( scope = s.parent; }, .defer_normal, .defer_error => scope = scope.cast(Scope.Defer).?.parent, + .defer_gen => scope = scope.cast(Scope.DeferGen).?.parent, .namespace => unreachable, .top => unreachable, } @@ -4040,6 +4072,7 @@ fn testDecl( .local_val, .local_ptr => unreachable, // a test cannot be in a local scope .gen_zir => s = s.cast(GenZir).?.parent, .defer_normal, .defer_error => s = s.cast(Scope.Defer).?.parent, + .defer_gen => s = s.cast(Scope.DeferGen).?.parent, .namespace => { const ns = s.cast(Scope.Namespace).?; if (ns.decls.get(name_str_index)) |i| { @@ -6730,6 +6763,7 @@ fn localVarRef( }, .gen_zir => s = s.cast(GenZir).?.parent, .defer_normal, .defer_error => s = s.cast(Scope.Defer).?.parent, + .defer_gen => s = s.cast(Scope.DeferGen).?.parent, .namespace => { const ns = s.cast(Scope.Namespace).?; if (ns.decls.get(name_str_index)) |i| { @@ -7351,6 +7385,7 @@ fn builtinCall( }, .gen_zir => s = s.cast(GenZir).?.parent, .defer_normal, .defer_error => s = s.cast(Scope.Defer).?.parent, + .defer_gen => s = s.cast(Scope.DeferGen).?.parent, .namespace => { const ns = s.cast(Scope.Namespace).?; if (ns.decls.get(decl_name)) |i| { @@ -9808,6 +9843,7 @@ const Scope = struct { local_ptr, defer_normal, defer_error, + defer_gen, namespace, top, }; @@ -9905,6 +9941,13 @@ const Scope = struct { const base_tag: Scope.Tag = .top; base: Scope = Scope{ .tag = base_tag }, }; + + const DeferGen = struct { + const base_tag: Scope.Tag = .defer_gen; + base: Scope = Scope{ .tag = base_tag }, + parent: *Scope, + defer_node: Ast.Node.Index, + }; }; /// This is a temporary structure; references to it are valid only @@ -11415,6 +11458,7 @@ fn detectLocalShadowing( }, .gen_zir => s = s.cast(GenZir).?.parent, .defer_normal, .defer_error => s = s.cast(Scope.Defer).?.parent, + .defer_gen => s = s.cast(Scope.DeferGen).?.parent, .top => break, }; } diff --git a/test/cases/compile_errors/stage1/obj/cannot_break_out_of_defer_expression.zig b/test/cases/compile_errors/cannot_break_out_of_defer_expression.zig similarity index 53% rename from test/cases/compile_errors/stage1/obj/cannot_break_out_of_defer_expression.zig rename to test/cases/compile_errors/cannot_break_out_of_defer_expression.zig index 3c7ae4fa2f..454309eb89 100644 --- a/test/cases/compile_errors/stage1/obj/cannot_break_out_of_defer_expression.zig +++ b/test/cases/compile_errors/cannot_break_out_of_defer_expression.zig @@ -7,7 +7,8 @@ export fn foo() void { } // error -// backend=stage1 +// backend=stage2 // target=native // -// tmp.zig:4:13: error: cannot break out of defer expression +// :4:13: error: cannot break out of defer expression +// :3:9: note: defer expression here diff --git a/test/cases/compile_errors/stage1/obj/cannot_continue_out_of_defer_expression.zig b/test/cases/compile_errors/cannot_continue_out_of_defer_expression.zig similarity index 53% rename from test/cases/compile_errors/stage1/obj/cannot_continue_out_of_defer_expression.zig rename to test/cases/compile_errors/cannot_continue_out_of_defer_expression.zig index 56b8ced05b..74d11528eb 100644 --- a/test/cases/compile_errors/stage1/obj/cannot_continue_out_of_defer_expression.zig +++ b/test/cases/compile_errors/cannot_continue_out_of_defer_expression.zig @@ -7,7 +7,8 @@ export fn foo() void { } // error -// backend=stage1 +// backend=stage2 // target=native // -// tmp.zig:4:13: error: cannot continue out of defer expression +// :4:13: error: cannot continue out of defer expression +// :3:9: note: defer expression here From 797ded47f05ce033be58d3fb78d777ec3218048b Mon Sep 17 00:00:00 2001 From: Veikka Tuominen Date: Tue, 2 Aug 2022 19:54:13 +0300 Subject: [PATCH 15/33] Sema: move last error message from `Inlining` to `Sema` --- src/Sema.zig | 218 +++++++++++++++++++++++++-------------------------- 1 file changed, 109 insertions(+), 109 deletions(-) diff --git a/src/Sema.zig b/src/Sema.zig index 7b5a7cdf26..a4d815ea3c 100644 --- a/src/Sema.zig +++ b/src/Sema.zig @@ -74,6 +74,8 @@ types_to_resolve: std.ArrayListUnmanaged(Air.Inst.Ref) = .{}, /// Sema must convert comptime control flow to runtime control flow, which means /// breaking from a block. post_hoc_blocks: std.AutoHashMapUnmanaged(Air.Inst.Index, *LabeledBlock) = .{}, +/// Populated with the last compile error created. +err: ?*Module.ErrorMsg = null, const std = @import("std"); const mem = std.mem; @@ -174,7 +176,6 @@ pub const Block = struct { pub const Inlining = struct { comptime_result: Air.Inst.Ref, merges: Merges, - err: ?*Module.ErrorMsg = null, }; pub const Merges = struct { @@ -1159,7 +1160,7 @@ fn analyzeBodyInner( try sema.errNote(block, runtime_src, msg, "runtime control flow here", .{}); break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } } i += 1; @@ -1738,7 +1739,7 @@ fn failWithNeededComptime(sema: *Sema, block: *Block, src: LazySrcLoc, reason: [ try sema.errNote(block, src, msg, "{s}", .{reason}); break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } fn failWithUseOfUndef(sema: *Sema, block: *Block, src: LazySrcLoc) CompileError { @@ -1770,7 +1771,7 @@ fn failWithArrayInitNotSupported(sema: *Sema, block: *Block, src: LazySrcLoc, ty } break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } fn failWithStructInitNotSupported(sema: *Sema, block: *Block, src: LazySrcLoc, ty: Type) CompileError { @@ -1801,7 +1802,7 @@ fn failWithIntegerOverflow(sema: *Sema, block: *Block, src: LazySrcLoc, int_ty: try sema.errNote(block, src, msg, "when computing vector element at index '{d}'", .{vector_index}); break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } return sema.fail(block, src, "overflow of integer type '{}' with value '{}'", .{ int_ty.fmt(sema.mod), val.fmtValue(int_ty, sema.mod), @@ -1823,7 +1824,7 @@ fn failWithInvalidComptimeFieldStore(sema: *Sema, block: *Block, init_src: LazyS try sema.errNote(block, default_value_src, msg, "default value set here", .{}); break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } /// We don't return a pointer to the new error note because the pointer @@ -1878,10 +1879,10 @@ pub fn fail( args: anytype, ) CompileError { const err_msg = try sema.errMsg(block, src, format, args); - return sema.failWithOwnedErrorMsg(block, err_msg); + return sema.failWithOwnedErrorMsg(err_msg); } -fn failWithOwnedErrorMsg(sema: *Sema, block: *Block, err_msg: *Module.ErrorMsg) CompileError { +fn failWithOwnedErrorMsg(sema: *Sema, err_msg: *Module.ErrorMsg) CompileError { @setCold(true); if (crash_report.is_enabled and sema.mod.comp.debug_compile_errors) { @@ -1894,7 +1895,7 @@ fn failWithOwnedErrorMsg(sema: *Sema, block: *Block, err_msg: *Module.ErrorMsg) } const mod = sema.mod; - if (block.inlining) |some| some.err = err_msg; + sema.err = err_msg; { errdefer err_msg.destroy(mod.gpa); @@ -2591,7 +2592,7 @@ fn zirEnumDecl( try sema.errNote(block, other_tag_src, msg, "other field here", .{}); break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } if (has_tag_value) { @@ -2886,7 +2887,7 @@ fn ensureResultUsed( try sema.errNote(block, src, msg, "consider using `try`, `catch`, or `if`", .{}); break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); }, else => { const msg = msg: { @@ -2896,7 +2897,7 @@ fn ensureResultUsed( try sema.errNote(block, src, msg, "this error can be suppressed by assigning the value to '_'", .{}); break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); }, } } @@ -2917,7 +2918,7 @@ fn zirEnsureResultNonError(sema: *Sema, block: *Block, inst: Zir.Inst.Index) Com try sema.errNote(block, src, msg, "consider using `try`, `catch`, or `if`", .{}); break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); }, else => return, } @@ -2957,7 +2958,7 @@ fn zirIndexablePtrLen(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileE ); break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } return sema.fieldVal(block, src, object, "len", src); @@ -3615,7 +3616,7 @@ fn validateUnionInit( try sema.addDeclaredHereNote(msg, union_ty); break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } if ((is_comptime or block.is_comptime) and @@ -3747,7 +3748,7 @@ fn validateStructInit( try sema.errNote(block, other_field_src, msg, "other field here", .{}); break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } found_fields[field_index] = field_ptr; } @@ -3808,7 +3809,7 @@ fn validateStructInit( .{fqn}, ); } - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } return; @@ -3938,7 +3939,7 @@ fn validateStructInit( .{fqn}, ); } - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } if (struct_is_comptime) { @@ -4000,7 +4001,7 @@ fn zirValidateArrayInit( } if (root_msg) |msg| { - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } } @@ -4180,7 +4181,7 @@ fn zirValidateDeref(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileErr try sema.explainWhyTypeIsComptime(block, src, msg, src.toSrcLoc(src_decl), elem_ty); break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } } @@ -4206,7 +4207,7 @@ fn failWithBadMemberAccess( try sema.addDeclaredHereNote(msg, agg_ty); break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } fn failWithBadStructFieldAccess( @@ -4232,7 +4233,7 @@ fn failWithBadStructFieldAccess( try sema.mod.errNoteNonLazy(struct_obj.srcLoc(sema.mod), msg, "struct declared here", .{}); break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } fn failWithBadUnionFieldAccess( @@ -4258,7 +4259,7 @@ fn failWithBadUnionFieldAccess( try sema.mod.errNoteNonLazy(union_obj.srcLoc(sema.mod), msg, "union declared here", .{}); break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } fn addDeclaredHereNote(sema: *Sema, parent: *Module.ErrorMsg, decl_ty: Type) !void { @@ -4747,7 +4748,7 @@ fn zirCImport(sema: *Sema, parent_block: *Block, inst: Zir.Inst.Index) CompileEr @import("clang.zig").Stage2ErrorMsg.delete(c_import_res.errors.ptr, c_import_res.errors.len); break :msg msg; }; - return sema.failWithOwnedErrorMsg(parent_block, msg); + return sema.failWithOwnedErrorMsg(msg); } const c_import_pkg = Package.create( sema.gpa, @@ -4921,7 +4922,7 @@ fn analyzeBlockBody( break :msg msg; }; - return sema.failWithOwnedErrorMsg(child_block, msg); + return sema.failWithOwnedErrorMsg(msg); } const ty_inst = try sema.addType(resolved_ty); try sema.air_extra.ensureUnusedCapacity(gpa, @typeInfo(Air.Block).Struct.fields.len + @@ -5060,7 +5061,7 @@ pub fn analyzeExport( try sema.addDeclaredHereNote(msg, exported_decl.ty); break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } const gpa = mod.gpa; @@ -5150,7 +5151,7 @@ fn zirSetAlignStack(sema: *Sema, block: *Block, extended: Zir.Inst.Extended.Inst try sema.errNote(block, gop.value_ptr.src, msg, "other instance here", .{}); break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } gop.value_ptr.* = .{ .alignment = alignment, .src = src }; } @@ -5413,7 +5414,7 @@ fn lookupInNamespace( } break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); }, } } else if (namespace.decls.getKeyAdapted(ident_name, Module.DeclAdapter{ .mod = mod })) |decl_index| { @@ -5872,9 +5873,8 @@ fn analyzeCall( sema.analyzeBody(&child_block, fn_info.body) catch |err| switch (err) { error.ComptimeReturn => break :result inlining.comptime_result, error.AnalysisFail => { - const err_msg = inlining.err orelse return err; + const err_msg = sema.err orelse return err; try sema.errNote(block, call_src, err_msg, "called from here", .{}); - if (block.inlining) |some| some.err = err_msg; return err; }, else => |e| return e, @@ -6802,7 +6802,7 @@ fn zirMergeErrorSets(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileEr try sema.errNote(block, src, msg, "'||' merges error sets; 'or' performs boolean OR", .{}); break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } const lhs_ty = try sema.analyzeAsType(block, lhs_src, lhs); const rhs_ty = try sema.analyzeAsType(block, rhs_src, rhs); @@ -6927,7 +6927,7 @@ fn zirIntToEnum(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!A try sema.addDeclaredHereNote(msg, dest_ty); break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } return sema.addConstant(dest_ty, int_val); } @@ -7632,7 +7632,7 @@ fn funcCommon( try sema.addDeclaredHereNote(msg, bare_return_type); break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } if (!Type.fnCallingConventionAllowsZigTypes(cc_workaround) and !sema.validateExternType(return_type, .ret_ty)) { const msg = msg: { @@ -7647,7 +7647,7 @@ fn funcCommon( try sema.addDeclaredHereNote(msg, return_type); break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } const arch = sema.mod.getTarget().cpu.arch; @@ -7812,7 +7812,7 @@ fn analyzeParameter( try sema.errNote(block, param_src, msg, "function is generic because of this parameter", .{}); break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } if (this_generic and !Type.fnCallingConventionAllowsZigTypes(cc)) { return sema.fail(block, param_src, "generic parameters not allowed in function with calling convention '{s}'", .{@tagName(cc)}); @@ -7828,7 +7828,7 @@ fn analyzeParameter( try sema.addDeclaredHereNote(msg, param.ty); break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } if (!Type.fnCallingConventionAllowsZigTypes(cc) and !sema.validateExternType(param.ty, .param_ty)) { const msg = msg: { @@ -7843,7 +7843,7 @@ fn analyzeParameter( try sema.addDeclaredHereNote(msg, param.ty); break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } if (requires_comptime and !param.is_comptime) { const msg = msg: { @@ -7855,7 +7855,7 @@ fn analyzeParameter( try sema.addDeclaredHereNote(msg, param.ty); break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } } @@ -8318,7 +8318,7 @@ fn zirBitcast(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!Air break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); }, .Pointer => { @@ -8333,7 +8333,7 @@ fn zirBitcast(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!Air break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); }, .Struct, .Union => if (dest_ty.containerLayout() == .Auto) { const container = switch (dest_ty.zigTypeTag()) { @@ -8383,7 +8383,7 @@ fn zirBitcast(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!Air break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); }, .Pointer => { const msg = msg: { @@ -8397,7 +8397,7 @@ fn zirBitcast(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!Air break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); }, .Struct, .Union => if (operand_ty.containerLayout() == .Auto) { const container = switch (operand_ty.zigTypeTag()) { @@ -8663,7 +8663,7 @@ fn zirSwitchCapture( try sema.errNote(block, item_src, msg, "type '{}' here", .{field.ty.fmt(sema.mod)}); break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } } @@ -8786,7 +8786,7 @@ fn zirSwitchCond( } break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); }; return sema.unionToTag(block, enum_ty, operand, src); }, @@ -8875,7 +8875,7 @@ fn zirSwitchBlock(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError ); break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } const target = sema.mod.getTarget(); @@ -8979,7 +8979,7 @@ fn zirSwitchBlock(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError ); break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } else if (special_prong == .none and operand_ty.isNonexhaustiveEnum() and !union_originally) { return sema.fail( block, @@ -9079,7 +9079,7 @@ fn zirSwitchBlock(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError if (maybe_msg) |msg| { maybe_msg = null; try sema.addDeclaredHereNote(msg, operand_ty); - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } if (special_prong == .@"else" and seen_errors.count() == operand_ty.errorSetNames().len) { @@ -9888,7 +9888,7 @@ fn validateSwitchDupe( ); break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } fn validateSwitchItemBool( @@ -9958,7 +9958,7 @@ fn validateSwitchNoRange( ); break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } fn zirHasField(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!Air.Inst.Ref { @@ -12882,7 +12882,7 @@ fn analyzeCmpUnionTag( try sema.mod.errNoteNonLazy(union_ty.declSrcLoc(sema.mod), msg, "union '{}' is not a tagged union", .{union_ty.fmt(sema.mod)}); break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); }; // Coerce both the union and the tag to the union's tag type, and then execute the // enum comparison codepath. @@ -13170,7 +13170,7 @@ fn zirClosureGet( // TODO add "declared here" note break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } return sema.addConstant(tv.ty, tv.val); @@ -14907,7 +14907,7 @@ fn zirPtrType(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!Air try sema.addDeclaredHereNote(msg, elem_ty); break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } if (elem_ty.zigTypeTag() == .Opaque) { return sema.fail(block, elem_ty_src, "C pointers cannot point to opaque types", .{}); @@ -15078,7 +15078,7 @@ fn zirStructInit( try sema.errNote(block, other_field_src, msg, "other field here", .{}); break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } found_fields[field_index] = item.data.field_type; field_inits[field_index] = try sema.resolveInst(item.data.init); @@ -15219,7 +15219,7 @@ fn finishStructInit( .{fqn}, ); } - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } const is_comptime = for (field_inits) |field_init| { @@ -15294,7 +15294,7 @@ fn zirStructInitAnon( try sema.errNote(block, prev_source, msg, "other field here", .{}); break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } gop.value_ptr.* = @intCast(u32, i); @@ -15310,7 +15310,7 @@ fn zirStructInitAnon( try sema.addDeclaredHereNote(msg, types[i]); break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } const init_src = src; // TODO better source location if (try sema.resolveMaybeUndefVal(block, init_src, init)) |init_val| { @@ -15508,7 +15508,7 @@ fn zirArrayInitAnon( try sema.addDeclaredHereNote(msg, types[i]); break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } if (try sema.resolveMaybeUndefVal(block, operand_src, elem)) |val| { values[i] = val; @@ -15822,7 +15822,7 @@ fn zirTagName(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!Air try sema.addDeclaredHereNote(msg, operand_ty); break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); }, else => return sema.fail(block, operand_src, "expected enum or union; found '{}'", .{ operand_ty.fmt(mod), @@ -15841,7 +15841,7 @@ fn zirTagName(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!Air try mod.errNoteNonLazy(enum_decl.srcLoc(), msg, "declared here", .{}); break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); }; const field_name = enum_ty.enumFieldName(field_index); return sema.addStrLit(block, field_name); @@ -15991,7 +15991,7 @@ fn zirReify(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!Air.I try sema.addDeclaredHereNote(msg, elem_ty); break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } if (elem_ty.zigTypeTag() == .Opaque) { return sema.fail(block, src, "C pointers cannot point to opaque types", .{}); @@ -16394,7 +16394,7 @@ fn zirReify(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!Air.I try sema.addDeclaredHereNote(msg, union_obj.tag_ty); break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } } @@ -16428,7 +16428,7 @@ fn zirReify(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!Air.I try sema.addDeclaredHereNote(msg, union_obj.tag_ty); break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } } @@ -16898,7 +16898,7 @@ fn zirErrSetCast(sema: *Sema, block: *Block, extended: Zir.Inst.Extended.InstDat try sema.addDeclaredHereNote(msg, dest_ty); break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } if (maybe_operand_val) |val| { @@ -16916,7 +16916,7 @@ fn zirErrSetCast(sema: *Sema, block: *Block, extended: Zir.Inst.Extended.InstDat try sema.addDeclaredHereNote(msg, dest_ty); break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } } @@ -17054,7 +17054,7 @@ fn zirTruncate(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!Ai }); break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } } @@ -17345,7 +17345,7 @@ fn bitOffsetOf(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!u6 try sema.addDeclaredHereNote(msg, ty); break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); }, } @@ -17449,7 +17449,7 @@ fn checkPtrOperand( break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); }, .Optional => if (ty.isPtrLikeOptional()) return, else => {}, @@ -17479,7 +17479,7 @@ fn checkPtrType( break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); }, .Optional => if (ty.isPtrLikeOptional()) return, else => {}, @@ -17615,7 +17615,7 @@ fn checkComptimeVarStore( try sema.errNote(block, cond_src, msg, "runtime condition here", .{}); break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } if (block.runtime_loop) |loop_src| { const msg = msg: { @@ -17624,7 +17624,7 @@ fn checkComptimeVarStore( try sema.errNote(block, loop_src, msg, "non-inline loop here", .{}); break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } unreachable; } @@ -17761,7 +17761,7 @@ fn checkVectorizableBinaryOperands( try sema.errNote(block, rhs_src, msg, "length {d} here", .{rhs_len}); break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } } else { const msg = msg: { @@ -17778,7 +17778,7 @@ fn checkVectorizableBinaryOperands( } break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } } @@ -18178,7 +18178,7 @@ fn analyzeShuffle( break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } } @@ -18774,7 +18774,7 @@ fn zirFieldParentPtr(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileEr try sema.addDeclaredHereNote(msg, struct_ty); break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } return sema.addConstant(result_ptr, payload.data.container_ptr); } @@ -19574,7 +19574,7 @@ fn requireRuntimeBlock(sema: *Sema, block: *Block, src: LazySrcLoc, runtime_src: } break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } try sema.requireFunctionBlock(block, src); } @@ -19603,7 +19603,7 @@ fn validateVarType( break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } fn validateRunTimeType( @@ -20305,7 +20305,7 @@ fn emitBackwardBranch(sema: *Sema, block: *Block, src: LazySrcLoc) !void { "use @setEvalBranchQuota() to raise the branch limit from {d}", .{sema.branch_quota}, ); - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } } @@ -20413,7 +20413,7 @@ fn fieldVal( try sema.addDeclaredHereNote(msg, child_type); break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } else (try sema.mod.getErrorValue(field_name)).key; return sema.addConstant( @@ -20468,7 +20468,7 @@ fn fieldVal( if (child_type.zigTypeTag() == .Array) try sema.errNote(block, src, msg, "array values have 'len' member", .{}); break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); }, } }, @@ -20806,7 +20806,7 @@ fn fieldCallBind( try sema.addDeclaredHereNote(msg, concrete_ty); break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } fn finishFieldCallBind( @@ -20861,7 +20861,7 @@ fn namespaceLookup( try sema.mod.errNoteNonLazy(decl.srcLoc(), msg, "declared here", .{}); break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } return decl_index; } @@ -21194,7 +21194,7 @@ fn unionFieldPtr( try sema.addDeclaredHereNote(msg, union_ty); break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } }, .Packed, .Extern => {}, @@ -21264,7 +21264,7 @@ fn unionFieldVal( try sema.addDeclaredHereNote(msg, union_ty); break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } }, .Packed, .Extern => { @@ -21438,7 +21438,7 @@ fn validateRuntimeElemAccess( break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } } @@ -22214,7 +22214,7 @@ fn coerceExtra( try sema.addDeclaredHereNote(msg, dest_ty); break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); }; return sema.addConstant( dest_ty, @@ -22342,7 +22342,7 @@ fn coerceExtra( try sema.mod.errNoteNonLazy(ret_ty_src.toSrcLoc(src_decl), msg, "'noreturn' declared here", .{}); break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } const msg = msg: { @@ -22383,7 +22383,7 @@ fn coerceExtra( break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } const InMemoryCoercionResult = union(enum) { @@ -24448,7 +24448,7 @@ fn coerceEnumToUnion( try sema.addDeclaredHereNote(msg, union_ty); break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); }; const enum_tag = try sema.coerce(block, tag_ty, inst, inst_src); @@ -24463,7 +24463,7 @@ fn coerceEnumToUnion( try sema.addDeclaredHereNote(msg, union_ty); break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); }; const field = union_obj.fields.values()[field_index]; const field_ty = try sema.resolveTypeFields(block, inst_src, field.ty); @@ -24479,7 +24479,7 @@ fn coerceEnumToUnion( try sema.addDeclaredHereNote(msg, union_ty); break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); }; return sema.addConstant(union_ty, try Value.Tag.@"union".create(sema.arena, .{ @@ -24499,7 +24499,7 @@ fn coerceEnumToUnion( try sema.addDeclaredHereNote(msg, tag_ty); break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } // If the union has all fields 0 bits, the union value is just the enum value. @@ -24528,7 +24528,7 @@ fn coerceEnumToUnion( try sema.addDeclaredHereNote(msg, union_ty); break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } fn coerceAnonStructToUnion( @@ -24557,7 +24557,7 @@ fn coerceAnonStructToUnion( try sema.addDeclaredHereNote(msg, union_ty); break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } const field_name = anon_struct.names[0]; @@ -24617,7 +24617,7 @@ fn coerceArrayLike( try sema.errNote(block, inst_src, msg, "source has length {d}", .{inst_len}); break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } const dest_elem_ty = dest_ty.childType(); @@ -24689,7 +24689,7 @@ fn coerceTupleToArray( try sema.errNote(block, inst_src, msg, "source has length {d}", .{inst_len}); break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } const dest_elems = try sema.usizeCast(block, dest_ty_src, dest_ty.arrayLenIncludingSentinel()); @@ -24852,7 +24852,7 @@ fn coerceTupleToStruct( if (root_msg) |msg| { try sema.addDeclaredHereNote(msg, struct_ty); - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } if (runtime_src) |rs| { @@ -24956,7 +24956,7 @@ fn coerceTupleToTuple( if (root_msg) |msg| { try sema.addDeclaredHereNote(msg, tuple_ty); - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } if (runtime_src) |rs| { @@ -25521,7 +25521,7 @@ fn analyzeSlice( break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } } } @@ -26470,7 +26470,7 @@ fn resolvePeerTypes( break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } const chosen_ty = sema.typeOf(chosen); @@ -27068,7 +27068,7 @@ fn semaStructFields(mod: *Module, struct_obj: *Module.Struct) CompileError!void try sema.errNote(&block_scope, src, msg, "struct declared here", .{}); break :msg msg; }; - return sema.failWithOwnedErrorMsg(&block_scope, msg); + return sema.failWithOwnedErrorMsg(msg); } gop.value_ptr.* = .{ .ty = Type.initTag(.noreturn), @@ -27127,7 +27127,7 @@ fn semaStructFields(mod: *Module, struct_obj: *Module.Struct) CompileError!void try sema.addDeclaredHereNote(msg, field_ty); break :msg msg; }; - return sema.failWithOwnedErrorMsg(&block_scope, msg); + return sema.failWithOwnedErrorMsg(msg); } if (struct_obj.layout == .Extern and !sema.validateExternType(field.ty, .other)) { const msg = msg: { @@ -27141,7 +27141,7 @@ fn semaStructFields(mod: *Module, struct_obj: *Module.Struct) CompileError!void try sema.addDeclaredHereNote(msg, field.ty); break :msg msg; }; - return sema.failWithOwnedErrorMsg(&block_scope, msg); + return sema.failWithOwnedErrorMsg(msg); } else if (struct_obj.layout == .Packed and !(validatePackedType(field.ty))) { const msg = msg: { const tree = try sema.getAstTree(&block_scope); @@ -27154,7 +27154,7 @@ fn semaStructFields(mod: *Module, struct_obj: *Module.Struct) CompileError!void try sema.addDeclaredHereNote(msg, field.ty); break :msg msg; }; - return sema.failWithOwnedErrorMsg(&block_scope, msg); + return sema.failWithOwnedErrorMsg(msg); } if (zir_field.align_body_len > 0) { @@ -27436,7 +27436,7 @@ fn semaUnionFields(mod: *Module, union_obj: *Module.Union) CompileError!void { try sema.errNote(&block_scope, src, msg, "union declared here", .{}); break :msg msg; }; - return sema.failWithOwnedErrorMsg(&block_scope, msg); + return sema.failWithOwnedErrorMsg(msg); } if (tag_ty_field_names) |*names| { @@ -27450,7 +27450,7 @@ fn semaUnionFields(mod: *Module, union_obj: *Module.Union) CompileError!void { try sema.addDeclaredHereNote(msg, union_obj.tag_ty); break :msg msg; }; - return sema.failWithOwnedErrorMsg(&block_scope, msg); + return sema.failWithOwnedErrorMsg(msg); } } @@ -27464,7 +27464,7 @@ fn semaUnionFields(mod: *Module, union_obj: *Module.Union) CompileError!void { try sema.addDeclaredHereNote(msg, field_ty); break :msg msg; }; - return sema.failWithOwnedErrorMsg(&block_scope, msg); + return sema.failWithOwnedErrorMsg(msg); } if (union_obj.layout == .Extern and !sema.validateExternType(field_ty, .union_field)) { const msg = msg: { @@ -27478,7 +27478,7 @@ fn semaUnionFields(mod: *Module, union_obj: *Module.Union) CompileError!void { try sema.addDeclaredHereNote(msg, field_ty); break :msg msg; }; - return sema.failWithOwnedErrorMsg(&block_scope, msg); + return sema.failWithOwnedErrorMsg(msg); } else if (union_obj.layout == .Packed and !(validatePackedType(field_ty))) { const msg = msg: { const tree = try sema.getAstTree(&block_scope); @@ -27491,7 +27491,7 @@ fn semaUnionFields(mod: *Module, union_obj: *Module.Union) CompileError!void { try sema.addDeclaredHereNote(msg, field_ty); break :msg msg; }; - return sema.failWithOwnedErrorMsg(&block_scope, msg); + return sema.failWithOwnedErrorMsg(msg); } gop.value_ptr.* = .{ @@ -27523,7 +27523,7 @@ fn semaUnionFields(mod: *Module, union_obj: *Module.Union) CompileError!void { try sema.addDeclaredHereNote(msg, union_obj.tag_ty); break :msg msg; }; - return sema.failWithOwnedErrorMsg(&block_scope, msg); + return sema.failWithOwnedErrorMsg(msg); } } } From aa78ebaf95af5a3587194d8dbcb101a49c0bb898 Mon Sep 17 00:00:00 2001 From: Veikka Tuominen Date: Tue, 2 Aug 2022 19:54:54 +0300 Subject: [PATCH 16/33] Sema: improve circular dependency errors --- src/Sema.zig | 109 +++++++++++++----- .../compile_errors/direct_struct_loop.zig | 9 ++ .../compile_errors/indirect_struct_loop.zig | 13 +++ ...an_invalid_struct_that_contains_itself.zig | 5 +- ..._an_invalid_union_that_contains_itself.zig | 16 +++ .../stage1/obj/direct_struct_loop.zig | 8 -- .../stage1/obj/indirect_struct_loop.zig | 10 -- ...t_depends_on_itself_via_optional_field.zig | 8 +- 8 files changed, 128 insertions(+), 50 deletions(-) create mode 100644 test/cases/compile_errors/direct_struct_loop.zig create mode 100644 test/cases/compile_errors/indirect_struct_loop.zig rename test/cases/compile_errors/{stage1/obj => }/instantiating_an_undefined_value_for_an_invalid_struct_that_contains_itself.zig (58%) create mode 100644 test/cases/compile_errors/instantiating_an_undefined_value_for_an_invalid_union_that_contains_itself.zig delete mode 100644 test/cases/compile_errors/stage1/obj/direct_struct_loop.zig delete mode 100644 test/cases/compile_errors/stage1/obj/indirect_struct_loop.zig rename test/cases/compile_errors/{stage1/obj => }/struct_depends_on_itself_via_optional_field.zig (61%) diff --git a/src/Sema.zig b/src/Sema.zig index a4d815ea3c..6b376f3d6c 100644 --- a/src/Sema.zig +++ b/src/Sema.zig @@ -26629,21 +26629,41 @@ fn resolveStructLayout( switch (struct_obj.status) { .none, .have_field_types => {}, .field_types_wip, .layout_wip => { - return sema.fail(block, src, "struct '{}' depends on itself", .{ty.fmt(sema.mod)}); + const msg = try Module.ErrorMsg.create( + sema.gpa, + struct_obj.srcLoc(sema.mod), + "struct '{}' depends on itself", + .{ty.fmt(sema.mod)}, + ); + return sema.failWithOwnedErrorMsg(msg); }, .have_layout, .fully_resolved_wip, .fully_resolved => return, } struct_obj.status = .layout_wip; - for (struct_obj.fields.values()) |field| { - try sema.resolveTypeLayout(block, src, field.ty); + for (struct_obj.fields.values()) |field, i| { + sema.resolveTypeLayout(block, src, field.ty) catch |err| switch (err) { + error.AnalysisFail => { + const msg = sema.err orelse return err; + try sema.addFieldErrNote(block, ty, i, msg, "while checking this field", .{}); + return err; + }, + else => return err, + }; } struct_obj.status = .have_layout; // In case of querying the ABI alignment of this struct, we will ask // for hasRuntimeBits() of each field, so we need "requires comptime" // to be known already before this function returns. - for (struct_obj.fields.values()) |field| { - _ = try sema.typeRequiresComptime(block, src, field.ty); + for (struct_obj.fields.values()) |field, i| { + _ = sema.typeRequiresComptime(block, src, field.ty) catch |err| switch (err) { + error.AnalysisFail => { + const msg = sema.err orelse return err; + try sema.addFieldErrNote(block, ty, i, msg, "while checking this field", .{}); + return err; + }, + else => return err, + }; } } // otherwise it's a tuple; no need to resolve anything @@ -26660,13 +26680,26 @@ fn resolveUnionLayout( switch (union_obj.status) { .none, .have_field_types => {}, .field_types_wip, .layout_wip => { - return sema.fail(block, src, "union '{}' depends on itself", .{ty.fmt(sema.mod)}); + const msg = try Module.ErrorMsg.create( + sema.gpa, + union_obj.srcLoc(sema.mod), + "union '{}' depends on itself", + .{ty.fmt(sema.mod)}, + ); + return sema.failWithOwnedErrorMsg(msg); }, .have_layout, .fully_resolved_wip, .fully_resolved => return, } union_obj.status = .layout_wip; - for (union_obj.fields.values()) |field| { - try sema.resolveTypeLayout(block, src, field.ty); + for (union_obj.fields.values()) |field, i| { + sema.resolveTypeLayout(block, src, field.ty) catch |err| switch (err) { + error.AnalysisFail => { + const msg = sema.err orelse return err; + try sema.addFieldErrNote(block, ty, i, msg, "while checking this field", .{}); + return err; + }, + else => return err, + }; } union_obj.status = .have_layout; } @@ -26794,12 +26827,12 @@ pub fn resolveTypeFields(sema: *Sema, block: *Block, src: LazySrcLoc, ty: Type) switch (ty.tag()) { .@"struct" => { const struct_obj = ty.castTag(.@"struct").?.data; - try sema.resolveTypeFieldsStruct(block, src, ty, struct_obj); + try sema.resolveTypeFieldsStruct(ty, struct_obj); return ty; }, .@"union", .union_safety_tagged, .union_tagged => { const union_obj = ty.cast(Type.Payload.Union).?.data; - try sema.resolveTypeFieldsUnion(block, src, ty, union_obj); + try sema.resolveTypeFieldsUnion(ty, union_obj); return ty; }, .type_info => return sema.resolveBuiltinTypeFields(block, src, "Type"), @@ -26820,15 +26853,19 @@ pub fn resolveTypeFields(sema: *Sema, block: *Block, src: LazySrcLoc, ty: Type) fn resolveTypeFieldsStruct( sema: *Sema, - block: *Block, - src: LazySrcLoc, ty: Type, struct_obj: *Module.Struct, ) CompileError!void { switch (struct_obj.status) { .none => {}, .field_types_wip => { - return sema.fail(block, src, "struct '{}' depends on itself", .{ty.fmt(sema.mod)}); + const msg = try Module.ErrorMsg.create( + sema.gpa, + struct_obj.srcLoc(sema.mod), + "struct '{}' depends on itself", + .{ty.fmt(sema.mod)}, + ); + return sema.failWithOwnedErrorMsg(msg); }, .have_field_types, .have_layout, @@ -26842,17 +26879,17 @@ fn resolveTypeFieldsStruct( try semaStructFields(sema.mod, struct_obj); } -fn resolveTypeFieldsUnion( - sema: *Sema, - block: *Block, - src: LazySrcLoc, - ty: Type, - union_obj: *Module.Union, -) CompileError!void { +fn resolveTypeFieldsUnion(sema: *Sema, ty: Type, union_obj: *Module.Union) CompileError!void { switch (union_obj.status) { .none => {}, .field_types_wip => { - return sema.fail(block, src, "union '{}' depends on itself", .{ty.fmt(sema.mod)}); + const msg = try Module.ErrorMsg.create( + sema.gpa, + union_obj.srcLoc(sema.mod), + "union '{}' depends on itself", + .{ty.fmt(sema.mod)}, + ); + return sema.failWithOwnedErrorMsg(msg); }, .have_field_types, .have_layout, @@ -27786,9 +27823,19 @@ pub fn typeHasOnePossibleValue( .@"struct" => { const resolved_ty = try sema.resolveTypeFields(block, src, ty); const s = resolved_ty.castTag(.@"struct").?.data; - for (s.fields.values()) |value| { - if (value.is_comptime) continue; - if ((try sema.typeHasOnePossibleValue(block, src, value.ty)) == null) { + for (s.fields.values()) |field, i| { + if (field.is_comptime) continue; + if (field.ty.eql(resolved_ty, sema.mod)) { + const msg = try Module.ErrorMsg.create( + sema.gpa, + s.srcLoc(sema.mod), + "struct '{}' depends on itself", + .{ty.fmt(sema.mod)}, + ); + try sema.addFieldErrNote(block, resolved_ty, i, msg, "while checking this field", .{}); + return sema.failWithOwnedErrorMsg(msg); + } + if ((try sema.typeHasOnePossibleValue(block, src, field.ty)) == null) { return null; } } @@ -27854,6 +27901,16 @@ pub fn typeHasOnePossibleValue( const tag_val = (try sema.typeHasOnePossibleValue(block, src, union_obj.tag_ty)) orelse return null; const only_field = union_obj.fields.values()[0]; + if (only_field.ty.eql(resolved_ty, sema.mod)) { + const msg = try Module.ErrorMsg.create( + sema.gpa, + union_obj.srcLoc(sema.mod), + "union '{}' depends on itself", + .{ty.fmt(sema.mod)}, + ); + try sema.addFieldErrNote(block, resolved_ty, 0, msg, "while checking this field", .{}); + return sema.failWithOwnedErrorMsg(msg); + } const val_val = (try sema.typeHasOnePossibleValue(block, src, only_field.ty)) orelse return null; // TODO make this not allocate. The function in `Type.onePossibleValue` @@ -28493,7 +28550,7 @@ pub fn typeRequiresComptime(sema: *Sema, block: *Block, src: LazySrcLoc, ty: Typ if (struct_obj.status == .field_types_wip) return false; - try sema.resolveTypeFieldsStruct(block, src, ty, struct_obj); + try sema.resolveTypeFieldsStruct(ty, struct_obj); struct_obj.requires_comptime = .wip; for (struct_obj.fields.values()) |field| { @@ -28518,7 +28575,7 @@ pub fn typeRequiresComptime(sema: *Sema, block: *Block, src: LazySrcLoc, ty: Typ if (union_obj.status == .field_types_wip) return false; - try sema.resolveTypeFieldsUnion(block, src, ty, union_obj); + try sema.resolveTypeFieldsUnion(ty, union_obj); union_obj.requires_comptime = .wip; for (union_obj.fields.values()) |field| { diff --git a/test/cases/compile_errors/direct_struct_loop.zig b/test/cases/compile_errors/direct_struct_loop.zig new file mode 100644 index 0000000000..0abc1a4f73 --- /dev/null +++ b/test/cases/compile_errors/direct_struct_loop.zig @@ -0,0 +1,9 @@ +const A = struct { a : A, }; +export fn entry() usize { return @sizeOf(A); } + +// error +// backend=stage2 +// target=native +// +// :1:11: error: struct 'tmp.A' depends on itself +// :1:20: note: while checking this field diff --git a/test/cases/compile_errors/indirect_struct_loop.zig b/test/cases/compile_errors/indirect_struct_loop.zig new file mode 100644 index 0000000000..dca2b9c3f6 --- /dev/null +++ b/test/cases/compile_errors/indirect_struct_loop.zig @@ -0,0 +1,13 @@ +const A = struct { b : B, }; +const B = struct { c : C, }; +const C = struct { a : A, }; +export fn entry() usize { return @sizeOf(A); } + +// error +// backend=stage2 +// target=native +// +// :1:11: error: struct 'tmp.A' depends on itself +// :3:20: note: while checking this field +// :2:20: note: while checking this field +// :1:20: note: while checking this field diff --git a/test/cases/compile_errors/stage1/obj/instantiating_an_undefined_value_for_an_invalid_struct_that_contains_itself.zig b/test/cases/compile_errors/instantiating_an_undefined_value_for_an_invalid_struct_that_contains_itself.zig similarity index 58% rename from test/cases/compile_errors/stage1/obj/instantiating_an_undefined_value_for_an_invalid_struct_that_contains_itself.zig rename to test/cases/compile_errors/instantiating_an_undefined_value_for_an_invalid_struct_that_contains_itself.zig index dd6909b1c2..74cafabe7c 100644 --- a/test/cases/compile_errors/stage1/obj/instantiating_an_undefined_value_for_an_invalid_struct_that_contains_itself.zig +++ b/test/cases/compile_errors/instantiating_an_undefined_value_for_an_invalid_struct_that_contains_itself.zig @@ -9,7 +9,8 @@ export fn entry() usize { } // error -// backend=stage1 +// backend=stage2 // target=native // -// tmp.zig:1:13: error: struct 'Foo' depends on itself +// :1:13: error: struct 'tmp.Foo' depends on itself +// :2:5: note: while checking this field diff --git a/test/cases/compile_errors/instantiating_an_undefined_value_for_an_invalid_union_that_contains_itself.zig b/test/cases/compile_errors/instantiating_an_undefined_value_for_an_invalid_union_that_contains_itself.zig new file mode 100644 index 0000000000..6030ca4d3e --- /dev/null +++ b/test/cases/compile_errors/instantiating_an_undefined_value_for_an_invalid_union_that_contains_itself.zig @@ -0,0 +1,16 @@ +const Foo = union { + x: Foo, +}; + +var foo: Foo = undefined; + +export fn entry() usize { + return @sizeOf(@TypeOf(foo.x)); +} + +// error +// backend=stage2 +// target=native +// +// :1:13: error: union 'tmp.Foo' depends on itself +// :2:5: note: while checking this field diff --git a/test/cases/compile_errors/stage1/obj/direct_struct_loop.zig b/test/cases/compile_errors/stage1/obj/direct_struct_loop.zig deleted file mode 100644 index 3062e617d6..0000000000 --- a/test/cases/compile_errors/stage1/obj/direct_struct_loop.zig +++ /dev/null @@ -1,8 +0,0 @@ -const A = struct { a : A, }; -export fn entry() usize { return @sizeOf(A); } - -// error -// backend=stage1 -// target=native -// -// tmp.zig:1:11: error: struct 'A' depends on itself diff --git a/test/cases/compile_errors/stage1/obj/indirect_struct_loop.zig b/test/cases/compile_errors/stage1/obj/indirect_struct_loop.zig deleted file mode 100644 index 12214923d0..0000000000 --- a/test/cases/compile_errors/stage1/obj/indirect_struct_loop.zig +++ /dev/null @@ -1,10 +0,0 @@ -const A = struct { b : B, }; -const B = struct { c : C, }; -const C = struct { a : A, }; -export fn entry() usize { return @sizeOf(A); } - -// error -// backend=stage1 -// target=native -// -// tmp.zig:1:11: error: struct 'A' depends on itself diff --git a/test/cases/compile_errors/stage1/obj/struct_depends_on_itself_via_optional_field.zig b/test/cases/compile_errors/struct_depends_on_itself_via_optional_field.zig similarity index 61% rename from test/cases/compile_errors/stage1/obj/struct_depends_on_itself_via_optional_field.zig rename to test/cases/compile_errors/struct_depends_on_itself_via_optional_field.zig index 46086172f7..cad779e3d7 100644 --- a/test/cases/compile_errors/stage1/obj/struct_depends_on_itself_via_optional_field.zig +++ b/test/cases/compile_errors/struct_depends_on_itself_via_optional_field.zig @@ -11,9 +11,9 @@ export fn entry() void { } // error -// backend=stage1 +// backend=stage2 // target=native // -// tmp.zig:1:17: error: struct 'LhsExpr' depends on itself -// tmp.zig:5:5: note: while checking this field -// tmp.zig:2:5: note: while checking this field +// :1:17: error: struct 'tmp.LhsExpr' depends on itself +// :5:5: note: while checking this field +// :2:5: note: while checking this field From d1d24b426dd8f12e6d643f45fcb6bb11dddaa8ef Mon Sep 17 00:00:00 2001 From: Veikka Tuominen Date: Tue, 2 Aug 2022 20:44:14 +0300 Subject: [PATCH 17/33] AstGen: check loop bodies and else branches for unused result --- src/AstGen.zig | 67 +++++++++++++++---- .../for_loop_body_expression_ignored.zig | 35 ++++++++++ .../obj/for_loop_body_expression_ignored.zig | 18 ----- .../while_loop_body_expression_ignored.zig | 22 ------ .../while_loop_body_expression_ignored.zig | 43 ++++++++++++ 5 files changed, 133 insertions(+), 52 deletions(-) create mode 100644 test/cases/compile_errors/for_loop_body_expression_ignored.zig delete mode 100644 test/cases/compile_errors/stage1/obj/for_loop_body_expression_ignored.zig delete mode 100644 test/cases/compile_errors/stage1/obj/while_loop_body_expression_ignored.zig create mode 100644 test/cases/compile_errors/while_loop_body_expression_ignored.zig diff --git a/src/AstGen.zig b/src/AstGen.zig index 051f1dace8..3850fe84a7 100644 --- a/src/AstGen.zig +++ b/src/AstGen.zig @@ -768,12 +768,12 @@ fn expr(gz: *GenZir, scope: *Scope, rl: ResultLoc, node: Ast.Node.Index) InnerEr .if_simple => return ifExpr(gz, scope, rl.br(), node, tree.ifSimple(node)), .@"if" => return ifExpr(gz, scope, rl.br(), node, tree.ifFull(node)), - .while_simple => return whileExpr(gz, scope, rl.br(), node, tree.whileSimple(node)), - .while_cont => return whileExpr(gz, scope, rl.br(), node, tree.whileCont(node)), - .@"while" => return whileExpr(gz, scope, rl.br(), node, tree.whileFull(node)), + .while_simple => return whileExpr(gz, scope, rl.br(), node, tree.whileSimple(node), false), + .while_cont => return whileExpr(gz, scope, rl.br(), node, tree.whileCont(node), false), + .@"while" => return whileExpr(gz, scope, rl.br(), node, tree.whileFull(node), false), - .for_simple => return forExpr(gz, scope, rl.br(), node, tree.forSimple(node)), - .@"for" => return forExpr(gz, scope, rl.br(), node, tree.forFull(node)), + .for_simple => return forExpr(gz, scope, rl.br(), node, tree.forSimple(node), false), + .@"for" => return forExpr(gz, scope, rl.br(), node, tree.forFull(node), false), .slice_open => { const lhs = try expr(gz, scope, .ref, node_datas[node].lhs); @@ -2152,6 +2152,7 @@ fn blockExprStmts(gz: *GenZir, parent_scope: *Scope, statements: []const Ast.Nod const astgen = gz.astgen; const tree = astgen.tree; const node_tags = tree.nodes.items(.tag); + const node_data = tree.nodes.items(.data); if (statements.len == 0) return; @@ -2178,8 +2179,10 @@ fn blockExprStmts(gz: *GenZir, parent_scope: *Scope, statements: []const Ast.Nod }, ); } - switch (node_tags[statement]) { - // zig fmt: off + var inner_node = statement; + while (true) { + switch (node_tags[inner_node]) { + // zig fmt: off .global_var_decl => scope = try varDecl(gz, scope, statement, block_arena_allocator, tree.globalVarDecl(statement)), .local_var_decl => scope = try varDecl(gz, scope, statement, block_arena_allocator, tree.localVarDecl(statement)), .simple_var_decl => scope = try varDecl(gz, scope, statement, block_arena_allocator, tree.simpleVarDecl(statement)), @@ -2204,9 +2207,23 @@ fn blockExprStmts(gz: *GenZir, parent_scope: *Scope, statements: []const Ast.Nod .assign_add_wrap => try assignOp(gz, scope, statement, .addwrap), .assign_mul => try assignOp(gz, scope, statement, .mul), .assign_mul_wrap => try assignOp(gz, scope, statement, .mulwrap), + + .grouped_expression => { + inner_node = node_data[statement].lhs; + continue; + }, - else => noreturn_src_node = try unusedResultExpr(gz, scope, statement), + .while_simple => _ = try whileExpr(gz, scope, .discard, inner_node, tree.whileSimple(inner_node), true), + .while_cont => _ = try whileExpr(gz, scope, .discard, inner_node, tree.whileCont(inner_node), true), + .@"while" => _ = try whileExpr(gz, scope, .discard, inner_node, tree.whileFull(inner_node), true), + + .for_simple => _ = try forExpr(gz, scope, .discard, inner_node, tree.forSimple(inner_node), true), + .@"for" => _ = try forExpr(gz, scope, .discard, inner_node, tree.forFull(inner_node), true), + + else => noreturn_src_node = try unusedResultExpr(gz, scope, inner_node), // zig fmt: on + } + break; } } @@ -2245,6 +2262,10 @@ fn unusedResultExpr(gz: *GenZir, scope: *Scope, statement: Ast.Node.Index) Inner // We need to emit an error if the result is not `noreturn` or `void`, but // we want to avoid adding the ZIR instruction if possible for performance. const maybe_unused_result = try expr(gz, scope, .none, statement); + return addEnsureResult(gz, maybe_unused_result, statement); +} + +fn addEnsureResult(gz: *GenZir, maybe_unused_result: Zir.Inst.Ref, statement: Ast.Node.Index) InnerError!Ast.Node.Index { var noreturn_src_node: Ast.Node.Index = 0; const elide_check = if (refToIndex(maybe_unused_result)) |inst| b: { // Note that this array becomes invalid after appending more items to it @@ -5648,6 +5669,7 @@ fn whileExpr( rl: ResultLoc, node: Ast.Node.Index, while_full: Ast.full.While, + is_statement: bool, ) InnerError!Zir.Inst.Ref { const astgen = parent_gz.astgen; const tree = astgen.tree; @@ -5818,6 +5840,8 @@ fn whileExpr( try then_scope.addDbgVar(.dbg_var_val, some, dbg_var_inst); } const then_result = try expr(&then_scope, then_sub_scope, loop_scope.break_result_loc, while_full.ast.then_expr); + _ = try addEnsureResult(&then_scope, then_result, while_full.ast.then_expr); + try checkUsed(parent_gz, &then_scope.base, then_sub_scope); try then_scope.addDbgBlockEnd(); @@ -5860,7 +5884,11 @@ fn whileExpr( // control flow apply to outer loops; not this one. loop_scope.continue_block = 0; loop_scope.break_block = 0; - const e = try expr(&else_scope, sub_scope, loop_scope.break_result_loc, else_node); + const else_result = try expr(&else_scope, sub_scope, loop_scope.break_result_loc, else_node); + if (is_statement) { + _ = try addEnsureResult(&else_scope, else_result, else_node); + } + if (!else_scope.endsWithNoReturn()) { loop_scope.break_count += 1; } @@ -5868,7 +5896,7 @@ fn whileExpr( try else_scope.addDbgBlockEnd(); break :blk .{ .src = else_node, - .result = e, + .result = else_result, }; } else .{ .src = while_full.ast.then_expr, @@ -5881,7 +5909,7 @@ fn whileExpr( } } const break_tag: Zir.Inst.Tag = if (is_inline) .break_inline else .@"break"; - return finishThenElseBlock( + const result = try finishThenElseBlock( parent_gz, rl, node, @@ -5896,6 +5924,10 @@ fn whileExpr( cond_block, break_tag, ); + if (is_statement) { + _ = try parent_gz.addUnNode(.ensure_result_used, result, node); + } + return result; } fn forExpr( @@ -5904,6 +5936,7 @@ fn forExpr( rl: ResultLoc, node: Ast.Node.Index, for_full: Ast.full.While, + is_statement: bool, ) InnerError!Zir.Inst.Ref { const astgen = parent_gz.astgen; @@ -6047,6 +6080,8 @@ fn forExpr( }; const then_result = try expr(&then_scope, then_sub_scope, loop_scope.break_result_loc, for_full.ast.then_expr); + _ = try addEnsureResult(&then_scope, then_result, for_full.ast.then_expr); + try checkUsed(parent_gz, &then_scope.base, then_sub_scope); try then_scope.addDbgBlockEnd(); @@ -6064,6 +6099,10 @@ fn forExpr( loop_scope.continue_block = 0; loop_scope.break_block = 0; const else_result = try expr(&else_scope, sub_scope, loop_scope.break_result_loc, else_node); + if (is_statement) { + _ = try addEnsureResult(&else_scope, else_result, else_node); + } + if (!else_scope.endsWithNoReturn()) { loop_scope.break_count += 1; } @@ -6082,7 +6121,7 @@ fn forExpr( } } const break_tag: Zir.Inst.Tag = if (is_inline) .break_inline else .@"break"; - return finishThenElseBlock( + const result = try finishThenElseBlock( parent_gz, rl, node, @@ -6097,6 +6136,10 @@ fn forExpr( cond_block, break_tag, ); + if (is_statement) { + _ = try parent_gz.addUnNode(.ensure_result_used, result, node); + } + return result; } fn switchExpr( diff --git a/test/cases/compile_errors/for_loop_body_expression_ignored.zig b/test/cases/compile_errors/for_loop_body_expression_ignored.zig new file mode 100644 index 0000000000..3ce73a9fab --- /dev/null +++ b/test/cases/compile_errors/for_loop_body_expression_ignored.zig @@ -0,0 +1,35 @@ +fn returns() usize { + return 2; +} +export fn f1() void { + for ("hello") |_| returns(); +} +export fn f2() void { + var x: anyerror!i32 = error.Bad; + for ("hello") |_| returns() else unreachable; + _ = x; +} +export fn f3() void { + for ("hello") |_| {} else true; +} +export fn f4() void { + const foo = for ("hello") |_| returns() else true; + _ = foo; +} + +// error +// backend=stage2 +// target=native +// +// :5:30: error: value of type 'usize' ignored +// :5:30: note: all non-void values must be used +// :5:30: note: this error can be suppressed by assigning the value to '_' +// :9:30: error: value of type 'usize' ignored +// :9:30: note: all non-void values must be used +// :9:30: note: this error can be suppressed by assigning the value to '_' +// :13:31: error: value of type 'bool' ignored +// :13:31: note: all non-void values must be used +// :13:31: note: this error can be suppressed by assigning the value to '_' +// :16:42: error: value of type 'usize' ignored +// :16:42: note: all non-void values must be used +// :16:42: note: this error can be suppressed by assigning the value to '_' diff --git a/test/cases/compile_errors/stage1/obj/for_loop_body_expression_ignored.zig b/test/cases/compile_errors/stage1/obj/for_loop_body_expression_ignored.zig deleted file mode 100644 index 6281d4b276..0000000000 --- a/test/cases/compile_errors/stage1/obj/for_loop_body_expression_ignored.zig +++ /dev/null @@ -1,18 +0,0 @@ -fn returns() usize { - return 2; -} -export fn f1() void { - for ("hello") |_| returns(); -} -export fn f2() void { - var x: anyerror!i32 = error.Bad; - for ("hello") |_| returns() else unreachable; - _ = x; -} - -// error -// backend=stage1 -// target=native -// -// tmp.zig:5:30: error: expression value is ignored -// tmp.zig:9:30: error: expression value is ignored diff --git a/test/cases/compile_errors/stage1/obj/while_loop_body_expression_ignored.zig b/test/cases/compile_errors/stage1/obj/while_loop_body_expression_ignored.zig deleted file mode 100644 index 9542cbc62f..0000000000 --- a/test/cases/compile_errors/stage1/obj/while_loop_body_expression_ignored.zig +++ /dev/null @@ -1,22 +0,0 @@ -fn returns() usize { - return 2; -} -export fn f1() void { - while (true) returns(); -} -export fn f2() void { - var x: ?i32 = null; - while (x) |_| returns(); -} -export fn f3() void { - var x: anyerror!i32 = error.Bad; - while (x) |_| returns() else |_| unreachable; -} - -// error -// backend=stage1 -// target=native -// -// tmp.zig:5:25: error: expression value is ignored -// tmp.zig:9:26: error: expression value is ignored -// tmp.zig:13:26: error: expression value is ignored diff --git a/test/cases/compile_errors/while_loop_body_expression_ignored.zig b/test/cases/compile_errors/while_loop_body_expression_ignored.zig new file mode 100644 index 0000000000..e33f48e6a5 --- /dev/null +++ b/test/cases/compile_errors/while_loop_body_expression_ignored.zig @@ -0,0 +1,43 @@ +fn returns() usize { + return 2; +} +export fn f1() void { + while (true) returns(); +} +export fn f2() void { + var x: ?i32 = null; + while (x) |_| returns(); +} +export fn f3() void { + var x: anyerror!i32 = error.Bad; + while (x) |_| returns() else |_| unreachable; +} +export fn f4() void { + var a = true; + while (a) {} else true; +} +export fn f5() void { + var a = true; + const foo = while (a) returns() else true; + _ = foo; +} + +// error +// backend=stage2 +// target=native +// +// :5:25: error: value of type 'usize' ignored +// :5:25: note: all non-void values must be used +// :5:25: note: this error can be suppressed by assigning the value to '_' +// :9:26: error: value of type 'usize' ignored +// :9:26: note: all non-void values must be used +// :9:26: note: this error can be suppressed by assigning the value to '_' +// :13:26: error: value of type 'usize' ignored +// :13:26: note: all non-void values must be used +// :13:26: note: this error can be suppressed by assigning the value to '_' +// :17:23: error: value of type 'bool' ignored +// :17:23: note: all non-void values must be used +// :17:23: note: this error can be suppressed by assigning the value to '_' +// :21:34: error: value of type 'usize' ignored +// :21:34: note: all non-void values must be used +// :21:34: note: this error can be suppressed by assigning the value to '_' From b79929b2eaa634b756fe374372d59718f4f8479a Mon Sep 17 00:00:00 2001 From: Veikka Tuominen Date: Tue, 2 Aug 2022 21:01:20 +0300 Subject: [PATCH 18/33] AstGen: better source location for if/while condition unwrapping --- src/AstGen.zig | 21 ++++++++++--------- .../packed_union_given_enum_tag_type.zig | 4 ++-- ...cked_union_with_automatic_layout_field.zig | 6 ++++-- .../specify_non-integer_enum_tag_type.zig | 4 ++-- .../unused_variable_error_on_errdefer.zig | 4 ++-- .../obj => }/vector_index_out_of_bounds.zig | 4 ++-- .../while_expected_error_union_got_bool.zig | 4 ++-- ...hile_expected_error_union_got_optional.zig | 4 ++-- .../while_expected_optional_got_bool.zig | 4 ++-- ...hile_expected_optional_got_error_union.zig | 4 ++-- 10 files changed, 31 insertions(+), 28 deletions(-) rename test/cases/compile_errors/{stage1/obj => }/packed_union_given_enum_tag_type.zig (72%) rename test/cases/compile_errors/{stage1/obj => }/packed_union_with_automatic_layout_field.zig (51%) rename test/cases/compile_errors/{stage1/obj => }/specify_non-integer_enum_tag_type.zig (67%) rename test/cases/compile_errors/{stage1/obj => }/unused_variable_error_on_errdefer.zig (71%) rename test/cases/compile_errors/{stage1/obj => }/vector_index_out_of_bounds.zig (64%) rename test/cases/compile_errors/{stage1/obj => }/while_expected_error_union_got_bool.zig (62%) rename test/cases/compile_errors/{stage1/obj => }/while_expected_error_union_got_optional.zig (62%) rename test/cases/compile_errors/{stage1/obj => }/while_expected_optional_got_bool.zig (59%) rename test/cases/compile_errors/{stage1/obj => }/while_expected_optional_got_error_union.zig (58%) diff --git a/src/AstGen.zig b/src/AstGen.zig index 3850fe84a7..e30913ac76 100644 --- a/src/AstGen.zig +++ b/src/AstGen.zig @@ -2696,6 +2696,7 @@ fn genDefers( break :blk &local_val_scope.base; }; try unusedResultDeferExpr(gz, defer_scope, sub_scope, expr_node); + try checkUsed(gz, scope, sub_scope); try gz.addDbgBlockEnd(); }, .normal_only => continue, @@ -5384,7 +5385,7 @@ fn ifExpr( const tag: Zir.Inst.Tag = if (payload_is_ref) .is_non_err_ptr else .is_non_err; break :c .{ .inst = err_union, - .bool_bit = try block_scope.addUnNode(tag, err_union, node), + .bool_bit = try block_scope.addUnNode(tag, err_union, if_full.ast.cond_expr), }; } else if (if_full.payload_token) |_| { const cond_rl: ResultLoc = if (payload_is_ref) .ref else .none; @@ -5392,7 +5393,7 @@ fn ifExpr( const tag: Zir.Inst.Tag = if (payload_is_ref) .is_non_null_ptr else .is_non_null; break :c .{ .inst = optional, - .bool_bit = try block_scope.addUnNode(tag, optional, node), + .bool_bit = try block_scope.addUnNode(tag, optional, if_full.ast.cond_expr), }; } else { const cond = try expr(&block_scope, &block_scope.base, bool_rl, if_full.ast.cond_expr); @@ -5423,7 +5424,7 @@ fn ifExpr( .err_union_payload_unsafe_ptr else .err_union_payload_unsafe; - const payload_inst = try then_scope.addUnNode(tag, cond.inst, node); + const payload_inst = try then_scope.addUnNode(tag, cond.inst, if_full.ast.then_expr); const token_name_index = payload_token + @boolToInt(payload_is_ref); const ident_name = try astgen.identAsString(token_name_index); const token_name_str = tree.tokenSlice(token_name_index); @@ -5452,7 +5453,7 @@ fn ifExpr( const ident_bytes = tree.tokenSlice(ident_token); if (mem.eql(u8, "_", ident_bytes)) break :s &then_scope.base; - const payload_inst = try then_scope.addUnNode(tag, cond.inst, node); + const payload_inst = try then_scope.addUnNode(tag, cond.inst, if_full.ast.then_expr); const ident_name = try astgen.identAsString(ident_token); try astgen.detectLocalShadowing(&then_scope.base, ident_name, ident_token, ident_bytes); payload_val_scope = .{ @@ -5495,7 +5496,7 @@ fn ifExpr( .err_union_code_ptr else .err_union_code; - const payload_inst = try else_scope.addUnNode(tag, cond.inst, node); + const payload_inst = try else_scope.addUnNode(tag, cond.inst, if_full.ast.cond_expr); const ident_name = try astgen.identAsString(error_token); const error_token_str = tree.tokenSlice(error_token); if (mem.eql(u8, "_", error_token_str)) @@ -5709,7 +5710,7 @@ fn whileExpr( const tag: Zir.Inst.Tag = if (payload_is_ref) .is_non_err_ptr else .is_non_err; break :c .{ .inst = err_union, - .bool_bit = try continue_scope.addUnNode(tag, err_union, node), + .bool_bit = try continue_scope.addUnNode(tag, err_union, while_full.ast.then_expr), }; } else if (while_full.payload_token) |_| { const cond_rl: ResultLoc = if (payload_is_ref) .ref else .none; @@ -5717,7 +5718,7 @@ fn whileExpr( const tag: Zir.Inst.Tag = if (payload_is_ref) .is_non_null_ptr else .is_non_null; break :c .{ .inst = optional, - .bool_bit = try continue_scope.addUnNode(tag, optional, node), + .bool_bit = try continue_scope.addUnNode(tag, optional, while_full.ast.then_expr), }; } else { const cond = try expr(&continue_scope, &continue_scope.base, bool_rl, while_full.ast.cond_expr); @@ -5755,7 +5756,7 @@ fn whileExpr( else .err_union_payload_unsafe; // will add this instruction to then_scope.instructions below - payload_inst = try then_scope.makeUnNode(tag, cond.inst, node); + payload_inst = try then_scope.makeUnNode(tag, cond.inst, while_full.ast.cond_expr); const ident_token = if (payload_is_ref) payload_token + 1 else payload_token; const ident_bytes = tree.tokenSlice(ident_token); if (mem.eql(u8, "_", ident_bytes)) @@ -5784,7 +5785,7 @@ fn whileExpr( else .optional_payload_unsafe; // will add this instruction to then_scope.instructions below - payload_inst = try then_scope.makeUnNode(tag, cond.inst, node); + payload_inst = try then_scope.makeUnNode(tag, cond.inst, while_full.ast.cond_expr); const ident_name = try astgen.identAsString(ident_token); const ident_bytes = tree.tokenSlice(ident_token); if (mem.eql(u8, "_", ident_bytes)) @@ -5860,7 +5861,7 @@ fn whileExpr( .err_union_code_ptr else .err_union_code; - const else_payload_inst = try else_scope.addUnNode(tag, cond.inst, node); + const else_payload_inst = try else_scope.addUnNode(tag, cond.inst, while_full.ast.cond_expr); const ident_name = try astgen.identAsString(error_token); const ident_bytes = tree.tokenSlice(error_token); if (mem.eql(u8, ident_bytes, "_")) diff --git a/test/cases/compile_errors/stage1/obj/packed_union_given_enum_tag_type.zig b/test/cases/compile_errors/packed_union_given_enum_tag_type.zig similarity index 72% rename from test/cases/compile_errors/stage1/obj/packed_union_given_enum_tag_type.zig rename to test/cases/compile_errors/packed_union_given_enum_tag_type.zig index fceb7af65c..03aaef0d8c 100644 --- a/test/cases/compile_errors/stage1/obj/packed_union_given_enum_tag_type.zig +++ b/test/cases/compile_errors/packed_union_given_enum_tag_type.zig @@ -14,7 +14,7 @@ export fn entry() void { } // error -// backend=stage1 +// backend=stage2 // target=native // -// tmp.zig:6:30: error: packed union does not support enum tag type +// :6:30: error: packed union does not support enum tag type diff --git a/test/cases/compile_errors/stage1/obj/packed_union_with_automatic_layout_field.zig b/test/cases/compile_errors/packed_union_with_automatic_layout_field.zig similarity index 51% rename from test/cases/compile_errors/stage1/obj/packed_union_with_automatic_layout_field.zig rename to test/cases/compile_errors/packed_union_with_automatic_layout_field.zig index 99ad6ca306..97771e9b78 100644 --- a/test/cases/compile_errors/stage1/obj/packed_union_with_automatic_layout_field.zig +++ b/test/cases/compile_errors/packed_union_with_automatic_layout_field.zig @@ -12,7 +12,9 @@ export fn entry() void { } // error -// backend=stage1 +// backend=stage2 // target=native // -// tmp.zig:6:5: error: non-packed, non-extern struct 'Foo' not allowed in packed union; no guaranteed in-memory representation +// :6:5: error: packed unions cannot contain fields of type 'tmp.Foo' +// :6:5: note: only packed structs layout are allowed in packed types +// :1:13: note: struct declared here diff --git a/test/cases/compile_errors/stage1/obj/specify_non-integer_enum_tag_type.zig b/test/cases/compile_errors/specify_non-integer_enum_tag_type.zig similarity index 67% rename from test/cases/compile_errors/stage1/obj/specify_non-integer_enum_tag_type.zig rename to test/cases/compile_errors/specify_non-integer_enum_tag_type.zig index 333647e1e3..f2ff3e2cd1 100644 --- a/test/cases/compile_errors/stage1/obj/specify_non-integer_enum_tag_type.zig +++ b/test/cases/compile_errors/specify_non-integer_enum_tag_type.zig @@ -10,7 +10,7 @@ export fn entry() void { } // error -// backend=stage1 +// backend=stage2 // target=native // -// tmp.zig:1:21: error: expected integer, found 'f32' +// :1:21: error: expected integer tag type, found 'f32' diff --git a/test/cases/compile_errors/stage1/obj/unused_variable_error_on_errdefer.zig b/test/cases/compile_errors/unused_variable_error_on_errdefer.zig similarity index 71% rename from test/cases/compile_errors/stage1/obj/unused_variable_error_on_errdefer.zig rename to test/cases/compile_errors/unused_variable_error_on_errdefer.zig index b85d5729dc..4c37af04eb 100644 --- a/test/cases/compile_errors/stage1/obj/unused_variable_error_on_errdefer.zig +++ b/test/cases/compile_errors/unused_variable_error_on_errdefer.zig @@ -7,7 +7,7 @@ export fn entry() void { } // error -// backend=stage1 +// backend=stage2 // target=native // -// tmp.zig:2:15: error: unused variable: 'a' +// :2:15: error: unused capture diff --git a/test/cases/compile_errors/stage1/obj/vector_index_out_of_bounds.zig b/test/cases/compile_errors/vector_index_out_of_bounds.zig similarity index 64% rename from test/cases/compile_errors/stage1/obj/vector_index_out_of_bounds.zig rename to test/cases/compile_errors/vector_index_out_of_bounds.zig index fdffd8b455..ed1a25a321 100644 --- a/test/cases/compile_errors/stage1/obj/vector_index_out_of_bounds.zig +++ b/test/cases/compile_errors/vector_index_out_of_bounds.zig @@ -4,7 +4,7 @@ export fn entry() void { } // error -// backend=stage1 +// backend=stage2 // target=native // -// tmp.zig:2:62: error: index 3 outside vector of size 3 +// :2:49: error: expected 3 vector elements; found 4 diff --git a/test/cases/compile_errors/stage1/obj/while_expected_error_union_got_bool.zig b/test/cases/compile_errors/while_expected_error_union_got_bool.zig similarity index 62% rename from test/cases/compile_errors/stage1/obj/while_expected_error_union_got_bool.zig rename to test/cases/compile_errors/while_expected_error_union_got_bool.zig index b8a72e9793..f7960437ec 100644 --- a/test/cases/compile_errors/stage1/obj/while_expected_error_union_got_bool.zig +++ b/test/cases/compile_errors/while_expected_error_union_got_bool.zig @@ -4,7 +4,7 @@ export fn foo() void { fn bar() bool { return true; } // error -// backend=stage1 +// backend=stage2 // target=native // -// tmp.zig:2:15: error: expected error union type, found 'bool' +// :2:15: error: expected error union type, found 'bool' diff --git a/test/cases/compile_errors/stage1/obj/while_expected_error_union_got_optional.zig b/test/cases/compile_errors/while_expected_error_union_got_optional.zig similarity index 62% rename from test/cases/compile_errors/stage1/obj/while_expected_error_union_got_optional.zig rename to test/cases/compile_errors/while_expected_error_union_got_optional.zig index c933dc9509..5cabd76fce 100644 --- a/test/cases/compile_errors/stage1/obj/while_expected_error_union_got_optional.zig +++ b/test/cases/compile_errors/while_expected_error_union_got_optional.zig @@ -4,7 +4,7 @@ export fn foo() void { fn bar() ?i32 { return 1; } // error -// backend=stage1 +// backend=stage2 // target=native // -// tmp.zig:2:15: error: expected error union type, found '?i32' +// :2:15: error: expected error union type, found '?i32' diff --git a/test/cases/compile_errors/stage1/obj/while_expected_optional_got_bool.zig b/test/cases/compile_errors/while_expected_optional_got_bool.zig similarity index 59% rename from test/cases/compile_errors/stage1/obj/while_expected_optional_got_bool.zig rename to test/cases/compile_errors/while_expected_optional_got_bool.zig index 0458d1ba01..22b8c1e58c 100644 --- a/test/cases/compile_errors/stage1/obj/while_expected_optional_got_bool.zig +++ b/test/cases/compile_errors/while_expected_optional_got_bool.zig @@ -4,7 +4,7 @@ export fn foo() void { fn bar() bool { return true; } // error -// backend=stage1 +// backend=stage2 // target=native // -// tmp.zig:2:15: error: expected optional type, found 'bool' +// :2:15: error: expected optional type, found 'bool' diff --git a/test/cases/compile_errors/stage1/obj/while_expected_optional_got_error_union.zig b/test/cases/compile_errors/while_expected_optional_got_error_union.zig similarity index 58% rename from test/cases/compile_errors/stage1/obj/while_expected_optional_got_error_union.zig rename to test/cases/compile_errors/while_expected_optional_got_error_union.zig index 7cdbd2cccf..38a8a0dd20 100644 --- a/test/cases/compile_errors/stage1/obj/while_expected_optional_got_error_union.zig +++ b/test/cases/compile_errors/while_expected_optional_got_error_union.zig @@ -4,7 +4,7 @@ export fn foo() void { fn bar() anyerror!i32 { return 1; } // error -// backend=stage1 +// backend=stage2 // target=native // -// tmp.zig:2:15: error: expected optional type, found 'anyerror!i32' +// :2:15: error: expected optional type, found 'anyerror!i32' From 4ab60dc18b44cffad88d470c567920bcc34b1214 Mon Sep 17 00:00:00 2001 From: Veikka Tuominen Date: Wed, 3 Aug 2022 17:10:39 +0300 Subject: [PATCH 19/33] Sema: add error for dependency loops --- src/Sema.zig | 33 +++++++++++++++++-- .../top_level_decl_dependency_loop.zig | 5 +-- 2 files changed, 33 insertions(+), 5 deletions(-) rename test/cases/compile_errors/{stage1/obj => }/top_level_decl_dependency_loop.zig (60%) diff --git a/src/Sema.zig b/src/Sema.zig index 6b376f3d6c..2721ed5179 100644 --- a/src/Sema.zig +++ b/src/Sema.zig @@ -5312,7 +5312,14 @@ fn zirDeclRef(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!Air const src = inst_data.src(); const decl_name = inst_data.get(sema.code); const decl_index = try sema.lookupIdentifier(block, src, decl_name); - return sema.analyzeDeclRef(decl_index); + return sema.analyzeDeclRef(decl_index) catch |err| switch (err) { + error.AnalysisFail => { + const msg = sema.err orelse return err; + try sema.errNote(block, src, msg, "referenced here", .{}); + return err; + }, + else => return err, + }; } fn zirDeclVal(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!Air.Inst.Ref { @@ -20876,7 +20883,14 @@ fn namespaceLookupRef( decl_name: []const u8, ) CompileError!?Air.Inst.Ref { const decl = (try sema.namespaceLookup(block, src, namespace, decl_name)) orelse return null; - return try sema.analyzeDeclRef(decl); + return sema.analyzeDeclRef(decl) catch |err| switch (err) { + error.AnalysisFail => { + const msg = sema.err orelse return err; + try sema.errNote(block, src, msg, "referenced here", .{}); + return err; + }, + else => return err, + }; } fn namespaceLookupVal( @@ -24979,7 +24993,14 @@ fn analyzeDeclVal( if (sema.decl_val_table.get(decl_index)) |result| { return result; } - const decl_ref = try sema.analyzeDeclRef(decl_index); + const decl_ref = sema.analyzeDeclRef(decl_index) catch |err| switch (err) { + error.AnalysisFail => { + const msg = sema.err orelse return err; + try sema.errNote(block, src, msg, "referenced here", .{}); + return err; + }, + else => return err, + }; const result = try sema.analyzeLoad(block, src, decl_ref, src); if (Air.refToIndex(result)) |index| { if (sema.air_instructions.items(.tag)[index] == .constant and !block.is_typeof) { @@ -24990,6 +25011,12 @@ fn analyzeDeclVal( } fn ensureDeclAnalyzed(sema: *Sema, decl_index: Decl.Index) CompileError!void { + const decl = sema.mod.declPtr(decl_index); + if (decl.analysis == .in_progress) { + const msg = try Module.ErrorMsg.create(sema.gpa, decl.srcLoc(), "dependency loop detected", .{}); + return sema.failWithOwnedErrorMsg(msg); + } + sema.mod.ensureDeclAnalyzed(decl_index) catch |err| { if (sema.owner_func) |owner_func| { owner_func.state = .dependency_failure; diff --git a/test/cases/compile_errors/stage1/obj/top_level_decl_dependency_loop.zig b/test/cases/compile_errors/top_level_decl_dependency_loop.zig similarity index 60% rename from test/cases/compile_errors/stage1/obj/top_level_decl_dependency_loop.zig rename to test/cases/compile_errors/top_level_decl_dependency_loop.zig index ac70285c9c..3b0e60ac02 100644 --- a/test/cases/compile_errors/stage1/obj/top_level_decl_dependency_loop.zig +++ b/test/cases/compile_errors/top_level_decl_dependency_loop.zig @@ -6,7 +6,8 @@ export fn entry() void { } // error -// backend=stage1 +// backend=stage2 // target=native // -// tmp.zig:2:19: error: dependency loop detected +// :1:1: error: dependency loop detected +// :2:19: note: referenced here From c85bdbffa976fd9bd7a9512d5a4706b2df31627b Mon Sep 17 00:00:00 2001 From: Loris Cro Date: Wed, 3 Aug 2022 16:20:27 +0200 Subject: [PATCH 20/33] Update issue templates --- .github/ISSUE_TEMPLATE/autodoc-issue.md | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100644 .github/ISSUE_TEMPLATE/autodoc-issue.md diff --git a/.github/ISSUE_TEMPLATE/autodoc-issue.md b/.github/ISSUE_TEMPLATE/autodoc-issue.md new file mode 100644 index 0000000000..bdf4e784b8 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/autodoc-issue.md @@ -0,0 +1,12 @@ +--- +name: Autodoc Issue +about: Issues with automatically generated docs, including stdlib docs. +title: 'Autodoc: {your issue}' +labels: autodoc +assignees: kristoff-it + +--- + +Autodoc is still work in progress and as such many bugs and missing features are already known. + +# Please report only regressions, i.e. things that worked in a previous build of new Autodoc (orange banner) that now don't work any more. From ecccf1f91f70e6d844eecfd848ddfa9951d9e6b1 Mon Sep 17 00:00:00 2001 From: Loris Cro Date: Wed, 3 Aug 2022 16:26:53 +0200 Subject: [PATCH 21/33] Add codeowners file for automated PR assignment Added myself for Autodoc related files. --- .github/CODEOWNERS | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 .github/CODEOWNERS diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS new file mode 100644 index 0000000000..0eafbafbce --- /dev/null +++ b/.github/CODEOWNERS @@ -0,0 +1,3 @@ +# Autodoc +/src/Autodoc.zig @kristoff-it +/lib/docs/* @kristoff-it \ No newline at end of file From 4c750016eb9b1c0831cbb0398a4d6ee9dbdc932e Mon Sep 17 00:00:00 2001 From: Loris Cro Date: Wed, 3 Aug 2022 17:21:56 +0200 Subject: [PATCH 22/33] autodoc: inferred error unions in function return values --- lib/docs/main.js | 14 +++++++++++++- src/Autodoc.zig | 20 ++++++++++++++++---- 2 files changed, 29 insertions(+), 5 deletions(-) diff --git a/lib/docs/main.js b/lib/docs/main.js index 5435efd647..5ee39ec893 100644 --- a/lib/docs/main.js +++ b/lib/docs/main.js @@ -826,7 +826,13 @@ var zigAnalysis; function exprName(expr, opts) { switch (Object.keys(expr)[0]) { - default: throw "oh no"; + default: throw "this expression is not implemented yet"; + case "bool": { + if (expr.bool) { + return "true"; + } + return "false"; + } case "&": { return "&" + exprName(zigAnalysis.exprs[expr["&"]]); } @@ -1699,6 +1705,12 @@ var zigAnalysis; let rhs = exprName(errUnionObj.rhs, opts); return lhs + "!" + rhs; } + case typeKinds.InferredErrorUnion: + { + let errUnionObj = (typeObj); + let payload = exprName(errUnionObj.payload, opts); + return "!" + payload; + } case typeKinds.Fn: { let fnObj = (typeObj); diff --git a/src/Autodoc.zig b/src/Autodoc.zig index 35f9dc7dea..a98ad7aee0 100644 --- a/src/Autodoc.zig +++ b/src/Autodoc.zig @@ -468,7 +468,7 @@ const DocData = struct { child: Expr, }, ErrorUnion: struct { lhs: Expr, rhs: Expr }, - // ErrorUnion: struct { name: []const u8 }, + InferredErrorUnion: struct { payload: Expr }, ErrorSet: struct { name: []const u8, fields: ?[]const Field = null, @@ -582,7 +582,7 @@ const DocData = struct { typeOf: usize, // index in `exprs` typeInfo: usize, // index in `exprs` typeOf_peer: []usize, - errorUnion: usize, // index in `exprs` + errorUnion: usize, // index in `types` as: As, sizeOf: usize, // index in `exprs` bitSizeOf: usize, // index in `exprs` @@ -1929,7 +1929,7 @@ fn walkInstruction( .comptimeExpr = self.comptime_exprs.items.len, } }; try self.comptime_exprs.append(self.arena, .{ - .code = "if(banana) 1 else 0", + .code = "if (...) { ... }", }); return res; }, @@ -2119,6 +2119,7 @@ fn walkInstruction( inst_index, self_ast_node_index, type_slot_index, + tags[inst_index] == .func_inferred, ); return result; @@ -3416,6 +3417,7 @@ fn analyzeFunction( inst_index: usize, self_ast_node_index: usize, type_slot_index: usize, + ret_is_inferred_error_set: bool, ) AutodocErrors!DocData.WalkResult { const tags = file.zir.instructions.items(.tag); const data = file.zir.instructions.items(.data); @@ -3522,13 +3524,23 @@ fn analyzeFunction( else => null, }; + const ret_type: DocData.Expr = blk: { + if (ret_is_inferred_error_set) { + const ret_type_slot_index = self.types.items.len; + try self.types.append(self.arena, .{ + .InferredErrorUnion = .{ .payload = ret_type_ref }, + }); + break :blk .{ .type = ret_type_slot_index }; + } else break :blk ret_type_ref; + }; + self.ast_nodes.items[self_ast_node_index].fields = param_ast_indexes.items; self.types.items[type_slot_index] = .{ .Fn = .{ .name = "todo_name func", .src = self_ast_node_index, .params = param_type_refs.items, - .ret = ret_type_ref, + .ret = ret_type, .generic_ret = generic_ret, }, }; From f26d5ee7ea97c8fd6e5b2655f845be7e4293930e Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sun, 31 Jul 2022 18:19:17 +0200 Subject: [PATCH 23/33] macho: sync with zld gitrev a2c32e972f8c5adfcda8ed2d99379ae868f59c24 https://github.com/kubkon/zld/commit/a2c32e972f8c5adfcda8ed2d99379ae868f59c24 --- lib/std/build/CheckObjectStep.zig | 89 +- lib/std/macho.zig | 479 +--- src/link/Dwarf.zig | 44 +- src/link/MachO.zig | 3463 +++++++++++++---------------- src/link/MachO/Archive.zig | 58 +- src/link/MachO/Atom.zig | 35 +- src/link/MachO/CodeSignature.zig | 12 +- src/link/MachO/DebugSymbols.zig | 506 ++--- src/link/MachO/Dylib.zig | 159 +- src/link/MachO/Object.zig | 308 ++- src/link/MachO/dead_strip.zig | 48 +- src/link/MachO/fat.zig | 4 +- 12 files changed, 2128 insertions(+), 3077 deletions(-) diff --git a/lib/std/build/CheckObjectStep.zig b/lib/std/build/CheckObjectStep.zig index 0525bbf034..cc0982ec08 100644 --- a/lib/std/build/CheckObjectStep.zig +++ b/lib/std/build/CheckObjectStep.zig @@ -283,7 +283,14 @@ fn make(step: *Step) !void { const gpa = self.builder.allocator; const src_path = self.source.getPath(self.builder); - const contents = try fs.cwd().readFileAlloc(gpa, src_path, self.max_bytes); + const contents = try fs.cwd().readFileAllocOptions( + gpa, + src_path, + self.max_bytes, + null, + @alignOf(u64), + null, + ); const output = switch (self.obj_format) { .macho => try MachODumper.parseAndDump(contents, .{ @@ -370,9 +377,10 @@ const Opts = struct { }; const MachODumper = struct { + const LoadCommandIterator = macho.LoadCommandIterator; const symtab_label = "symtab"; - fn parseAndDump(bytes: []const u8, opts: Opts) ![]const u8 { + fn parseAndDump(bytes: []align(@alignOf(u64)) const u8, opts: Opts) ![]const u8 { const gpa = opts.gpa orelse unreachable; // MachO dumper requires an allocator var stream = std.io.fixedBufferStream(bytes); const reader = stream.reader(); @@ -385,55 +393,54 @@ const MachODumper = struct { var output = std.ArrayList(u8).init(gpa); const writer = output.writer(); - var load_commands = std.ArrayList(macho.LoadCommand).init(gpa); - try load_commands.ensureTotalCapacity(hdr.ncmds); - - var sections = std.ArrayList(struct { seg: u16, sect: u16 }).init(gpa); - var imports = std.ArrayList(u16).init(gpa); - - var symtab_cmd: ?u16 = null; - var i: u16 = 0; - while (i < hdr.ncmds) : (i += 1) { - var cmd = try macho.LoadCommand.read(gpa, reader); - load_commands.appendAssumeCapacity(cmd); + var symtab: []const macho.nlist_64 = undefined; + var strtab: []const u8 = undefined; + var sections = std.ArrayList(macho.section_64).init(gpa); + var imports = std.ArrayList([]const u8).init(gpa); + var it = LoadCommandIterator{ + .ncmds = hdr.ncmds, + .buffer = bytes[@sizeOf(macho.mach_header_64)..][0..hdr.sizeofcmds], + }; + var i: usize = 0; + while (it.next()) |cmd| { switch (cmd.cmd()) { .SEGMENT_64 => { - const seg = cmd.segment; - for (seg.sections.items) |_, j| { - try sections.append(.{ .seg = i, .sect = @intCast(u16, j) }); + const seg = cmd.cast(macho.segment_command_64).?; + try sections.ensureUnusedCapacity(seg.nsects); + for (cmd.getSections()) |sect| { + sections.appendAssumeCapacity(sect); } }, - .SYMTAB => { - symtab_cmd = i; + .SYMTAB => if (opts.dump_symtab) { + const lc = cmd.cast(macho.symtab_command).?; + symtab = @ptrCast( + [*]const macho.nlist_64, + @alignCast(@alignOf(macho.nlist_64), &bytes[lc.symoff]), + )[0..lc.nsyms]; + strtab = bytes[lc.stroff..][0..lc.strsize]; }, .LOAD_DYLIB, .LOAD_WEAK_DYLIB, .REEXPORT_DYLIB, => { - try imports.append(i); + try imports.append(cmd.getDylibPathName()); }, else => {}, } try dumpLoadCommand(cmd, i, writer); try writer.writeByte('\n'); + + i += 1; } if (opts.dump_symtab) { - const cmd = load_commands.items[symtab_cmd.?].symtab; - try writer.writeAll(symtab_label ++ "\n"); - const strtab = bytes[cmd.stroff..][0..cmd.strsize]; - const raw_symtab = bytes[cmd.symoff..][0 .. cmd.nsyms * @sizeOf(macho.nlist_64)]; - const symtab = mem.bytesAsSlice(macho.nlist_64, raw_symtab); - for (symtab) |sym| { if (sym.stab()) continue; const sym_name = mem.sliceTo(@ptrCast([*:0]const u8, strtab.ptr + sym.n_strx), 0); if (sym.sect()) { - const map = sections.items[sym.n_sect - 1]; - const seg = load_commands.items[map.seg].segment; - const sect = seg.sections.items[map.sect]; + const sect = sections.items[sym.n_sect - 1]; try writer.print("{x} ({s},{s})", .{ sym.n_value, sect.segName(), @@ -455,9 +462,7 @@ const MachODumper = struct { break :blk "flat lookup"; unreachable; } - const import_id = imports.items[@bitCast(u16, ordinal) - 1]; - const import = load_commands.items[import_id].dylib; - const full_path = mem.sliceTo(import.data, 0); + const full_path = imports.items[@bitCast(u16, ordinal) - 1]; const basename = fs.path.basename(full_path); assert(basename.len > 0); const ext = mem.lastIndexOfScalar(u8, basename, '.') orelse basename.len; @@ -481,7 +486,7 @@ const MachODumper = struct { return output.toOwnedSlice(); } - fn dumpLoadCommand(lc: macho.LoadCommand, index: u16, writer: anytype) !void { + fn dumpLoadCommand(lc: macho.LoadCommandIterator.LoadCommand, index: usize, writer: anytype) !void { // print header first try writer.print( \\LC {d} @@ -491,8 +496,7 @@ const MachODumper = struct { switch (lc.cmd()) { .SEGMENT_64 => { - // TODO dump section headers - const seg = lc.segment.inner; + const seg = lc.cast(macho.segment_command_64).?; try writer.writeByte('\n'); try writer.print( \\segname {s} @@ -508,7 +512,7 @@ const MachODumper = struct { seg.filesize, }); - for (lc.segment.sections.items) |sect| { + for (lc.getSections()) |sect| { try writer.writeByte('\n'); try writer.print( \\sectname {s} @@ -531,7 +535,7 @@ const MachODumper = struct { .LOAD_WEAK_DYLIB, .REEXPORT_DYLIB, => { - const dylib = lc.dylib.inner.dylib; + const dylib = lc.cast(macho.dylib_command).?; try writer.writeByte('\n'); try writer.print( \\name {s} @@ -539,19 +543,20 @@ const MachODumper = struct { \\current version {x} \\compatibility version {x} , .{ - mem.sliceTo(lc.dylib.data, 0), - dylib.timestamp, - dylib.current_version, - dylib.compatibility_version, + lc.getDylibPathName(), + dylib.dylib.timestamp, + dylib.dylib.current_version, + dylib.dylib.compatibility_version, }); }, .MAIN => { + const main = lc.cast(macho.entry_point_command).?; try writer.writeByte('\n'); try writer.print( \\entryoff {x} \\stacksize {x} - , .{ lc.main.entryoff, lc.main.stacksize }); + , .{ main.entryoff, main.stacksize }); }, .RPATH => { @@ -559,7 +564,7 @@ const MachODumper = struct { try writer.print( \\path {s} , .{ - mem.sliceTo(lc.rpath.data, 0), + lc.getRpathPathName(), }); }, diff --git a/lib/std/macho.zig b/lib/std/macho.zig index cd4bfa37fb..9334f79dc5 100644 --- a/lib/std/macho.zig +++ b/lib/std/macho.zig @@ -1835,429 +1835,70 @@ pub const data_in_code_entry = extern struct { kind: u16, }; -/// A Zig wrapper for all known MachO load commands. -/// Provides interface to read and write the load command data to a buffer. -pub const LoadCommand = union(enum) { - segment: SegmentCommand, - dyld_info_only: dyld_info_command, - symtab: symtab_command, - dysymtab: dysymtab_command, - dylinker: GenericCommandWithData(dylinker_command), - dylib: GenericCommandWithData(dylib_command), - main: entry_point_command, - version_min: version_min_command, - source_version: source_version_command, - build_version: GenericCommandWithData(build_version_command), - uuid: uuid_command, - linkedit_data: linkedit_data_command, - rpath: GenericCommandWithData(rpath_command), - unknown: GenericCommandWithData(load_command), +pub const LoadCommandIterator = struct { + ncmds: usize, + buffer: []align(@alignOf(u64)) const u8, + index: usize = 0, - pub fn read(allocator: Allocator, reader: anytype) !LoadCommand { - const header = try reader.readStruct(load_command); - var buffer = try allocator.alloc(u8, header.cmdsize); - defer allocator.free(buffer); - mem.copy(u8, buffer, mem.asBytes(&header)); - try reader.readNoEof(buffer[@sizeOf(load_command)..]); - var stream = io.fixedBufferStream(buffer); + pub const LoadCommand = struct { + hdr: load_command, + data: []const u8, - return switch (header.cmd) { - .SEGMENT_64 => LoadCommand{ - .segment = try SegmentCommand.read(allocator, stream.reader()), - }, - .DYLD_INFO, .DYLD_INFO_ONLY => LoadCommand{ - .dyld_info_only = try stream.reader().readStruct(dyld_info_command), - }, - .SYMTAB => LoadCommand{ - .symtab = try stream.reader().readStruct(symtab_command), - }, - .DYSYMTAB => LoadCommand{ - .dysymtab = try stream.reader().readStruct(dysymtab_command), - }, - .ID_DYLINKER, .LOAD_DYLINKER, .DYLD_ENVIRONMENT => LoadCommand{ - .dylinker = try GenericCommandWithData(dylinker_command).read(allocator, stream.reader()), - }, - .ID_DYLIB, .LOAD_WEAK_DYLIB, .LOAD_DYLIB, .REEXPORT_DYLIB => LoadCommand{ - .dylib = try GenericCommandWithData(dylib_command).read(allocator, stream.reader()), - }, - .MAIN => LoadCommand{ - .main = try stream.reader().readStruct(entry_point_command), - }, - .VERSION_MIN_MACOSX, .VERSION_MIN_IPHONEOS, .VERSION_MIN_WATCHOS, .VERSION_MIN_TVOS => LoadCommand{ - .version_min = try stream.reader().readStruct(version_min_command), - }, - .SOURCE_VERSION => LoadCommand{ - .source_version = try stream.reader().readStruct(source_version_command), - }, - .BUILD_VERSION => LoadCommand{ - .build_version = try GenericCommandWithData(build_version_command).read(allocator, stream.reader()), - }, - .UUID => LoadCommand{ - .uuid = try stream.reader().readStruct(uuid_command), - }, - .FUNCTION_STARTS, .DATA_IN_CODE, .CODE_SIGNATURE => LoadCommand{ - .linkedit_data = try stream.reader().readStruct(linkedit_data_command), - }, - .RPATH => LoadCommand{ - .rpath = try GenericCommandWithData(rpath_command).read(allocator, stream.reader()), - }, - else => LoadCommand{ - .unknown = try GenericCommandWithData(load_command).read(allocator, stream.reader()), - }, + pub fn cmd(lc: LoadCommand) LC { + return lc.hdr.cmd; + } + + pub fn cmdsize(lc: LoadCommand) u32 { + return lc.hdr.cmdsize; + } + + pub fn cast(lc: LoadCommand, comptime Cmd: type) ?Cmd { + if (lc.data.len < @sizeOf(Cmd)) return null; + return @ptrCast(*const Cmd, @alignCast(@alignOf(Cmd), &lc.data[0])).*; + } + + /// Asserts LoadCommand is of type segment_command_64. + pub fn getSections(lc: LoadCommand) []const section_64 { + const segment_lc = lc.cast(segment_command_64).?; + if (segment_lc.nsects == 0) return &[0]section_64{}; + const data = lc.data[@sizeOf(segment_command_64)..]; + const sections = @ptrCast( + [*]const section_64, + @alignCast(@alignOf(section_64), &data[0]), + )[0..segment_lc.nsects]; + return sections; + } + + /// Asserts LoadCommand is of type dylib_command. + pub fn getDylibPathName(lc: LoadCommand) []const u8 { + const dylib_lc = lc.cast(dylib_command).?; + const data = lc.data[dylib_lc.dylib.name..]; + return mem.sliceTo(data, 0); + } + + /// Asserts LoadCommand is of type rpath_command. + pub fn getRpathPathName(lc: LoadCommand) []const u8 { + const rpath_lc = lc.cast(rpath_command).?; + const data = lc.data[rpath_lc.path..]; + return mem.sliceTo(data, 0); + } + }; + + pub fn next(it: *LoadCommandIterator) ?LoadCommand { + if (it.index >= it.ncmds) return null; + + const hdr = @ptrCast( + *const load_command, + @alignCast(@alignOf(load_command), &it.buffer[0]), + ).*; + const cmd = LoadCommand{ + .hdr = hdr, + .data = it.buffer[0..hdr.cmdsize], }; - } - pub fn write(self: LoadCommand, writer: anytype) !void { - return switch (self) { - .dyld_info_only => |x| writeStruct(x, writer), - .symtab => |x| writeStruct(x, writer), - .dysymtab => |x| writeStruct(x, writer), - .main => |x| writeStruct(x, writer), - .version_min => |x| writeStruct(x, writer), - .source_version => |x| writeStruct(x, writer), - .uuid => |x| writeStruct(x, writer), - .linkedit_data => |x| writeStruct(x, writer), - .segment => |x| x.write(writer), - .dylinker => |x| x.write(writer), - .dylib => |x| x.write(writer), - .rpath => |x| x.write(writer), - .build_version => |x| x.write(writer), - .unknown => |x| x.write(writer), - }; - } + it.buffer = it.buffer[hdr.cmdsize..]; + it.index += 1; - pub fn cmd(self: LoadCommand) LC { - return switch (self) { - .dyld_info_only => |x| x.cmd, - .symtab => |x| x.cmd, - .dysymtab => |x| x.cmd, - .main => |x| x.cmd, - .version_min => |x| x.cmd, - .source_version => |x| x.cmd, - .uuid => |x| x.cmd, - .linkedit_data => |x| x.cmd, - .segment => |x| x.inner.cmd, - .dylinker => |x| x.inner.cmd, - .dylib => |x| x.inner.cmd, - .rpath => |x| x.inner.cmd, - .build_version => |x| x.inner.cmd, - .unknown => |x| x.inner.cmd, - }; - } - - pub fn cmdsize(self: LoadCommand) u32 { - return switch (self) { - .dyld_info_only => |x| x.cmdsize, - .symtab => |x| x.cmdsize, - .dysymtab => |x| x.cmdsize, - .main => |x| x.cmdsize, - .version_min => |x| x.cmdsize, - .source_version => |x| x.cmdsize, - .linkedit_data => |x| x.cmdsize, - .uuid => |x| x.cmdsize, - .segment => |x| x.inner.cmdsize, - .dylinker => |x| x.inner.cmdsize, - .dylib => |x| x.inner.cmdsize, - .rpath => |x| x.inner.cmdsize, - .build_version => |x| x.inner.cmdsize, - .unknown => |x| x.inner.cmdsize, - }; - } - - pub fn deinit(self: *LoadCommand, allocator: Allocator) void { - return switch (self.*) { - .segment => |*x| x.deinit(allocator), - .dylinker => |*x| x.deinit(allocator), - .dylib => |*x| x.deinit(allocator), - .rpath => |*x| x.deinit(allocator), - .build_version => |*x| x.deinit(allocator), - .unknown => |*x| x.deinit(allocator), - else => {}, - }; - } - - fn writeStruct(command: anytype, writer: anytype) !void { - return writer.writeAll(mem.asBytes(&command)); - } - - pub fn eql(self: LoadCommand, other: LoadCommand) bool { - if (@as(meta.Tag(LoadCommand), self) != @as(meta.Tag(LoadCommand), other)) return false; - return switch (self) { - .dyld_info_only => |x| meta.eql(x, other.dyld_info_only), - .symtab => |x| meta.eql(x, other.symtab), - .dysymtab => |x| meta.eql(x, other.dysymtab), - .main => |x| meta.eql(x, other.main), - .version_min => |x| meta.eql(x, other.version_min), - .source_version => |x| meta.eql(x, other.source_version), - .build_version => |x| x.eql(other.build_version), - .uuid => |x| meta.eql(x, other.uuid), - .linkedit_data => |x| meta.eql(x, other.linkedit_data), - .segment => |x| x.eql(other.segment), - .dylinker => |x| x.eql(other.dylinker), - .dylib => |x| x.eql(other.dylib), - .rpath => |x| x.eql(other.rpath), - .unknown => |x| x.eql(other.unknown), - }; + return cmd; } }; - -/// A Zig wrapper for segment_command_64. -/// Encloses the extern struct together with a list of sections for this segment. -pub const SegmentCommand = struct { - inner: segment_command_64, - sections: std.ArrayListUnmanaged(section_64) = .{}, - - pub fn read(allocator: Allocator, reader: anytype) !SegmentCommand { - const inner = try reader.readStruct(segment_command_64); - var segment = SegmentCommand{ - .inner = inner, - }; - try segment.sections.ensureTotalCapacityPrecise(allocator, inner.nsects); - - var i: usize = 0; - while (i < inner.nsects) : (i += 1) { - const sect = try reader.readStruct(section_64); - segment.sections.appendAssumeCapacity(sect); - } - - return segment; - } - - pub fn write(self: SegmentCommand, writer: anytype) !void { - try writer.writeAll(mem.asBytes(&self.inner)); - for (self.sections.items) |sect| { - try writer.writeAll(mem.asBytes(§)); - } - } - - pub fn deinit(self: *SegmentCommand, allocator: Allocator) void { - self.sections.deinit(allocator); - } - - pub fn eql(self: SegmentCommand, other: SegmentCommand) bool { - if (!meta.eql(self.inner, other.inner)) return false; - const lhs = self.sections.items; - const rhs = other.sections.items; - var i: usize = 0; - while (i < self.inner.nsects) : (i += 1) { - if (!meta.eql(lhs[i], rhs[i])) return false; - } - return true; - } -}; - -pub fn emptyGenericCommandWithData(cmd: anytype) GenericCommandWithData(@TypeOf(cmd)) { - return .{ .inner = cmd }; -} - -/// A Zig wrapper for a generic load command with variable-length data. -pub fn GenericCommandWithData(comptime Cmd: type) type { - return struct { - inner: Cmd, - /// This field remains undefined until `read` is called. - data: []u8 = undefined, - - const Self = @This(); - - pub fn read(allocator: Allocator, reader: anytype) !Self { - const inner = try reader.readStruct(Cmd); - var data = try allocator.alloc(u8, inner.cmdsize - @sizeOf(Cmd)); - errdefer allocator.free(data); - try reader.readNoEof(data); - return Self{ - .inner = inner, - .data = data, - }; - } - - pub fn write(self: Self, writer: anytype) !void { - try writer.writeAll(mem.asBytes(&self.inner)); - try writer.writeAll(self.data); - } - - pub fn deinit(self: *Self, allocator: Allocator) void { - allocator.free(self.data); - } - - pub fn eql(self: Self, other: Self) bool { - if (!meta.eql(self.inner, other.inner)) return false; - return mem.eql(u8, self.data, other.data); - } - }; -} - -pub fn createLoadDylibCommand( - allocator: Allocator, - cmd_id: LC, - name: []const u8, - timestamp: u32, - current_version: u32, - compatibility_version: u32, -) !GenericCommandWithData(dylib_command) { - assert(cmd_id == .LOAD_DYLIB or cmd_id == .LOAD_WEAK_DYLIB or cmd_id == .REEXPORT_DYLIB or cmd_id == .ID_DYLIB); - const cmdsize = @intCast(u32, mem.alignForwardGeneric( - u64, - @sizeOf(dylib_command) + name.len + 1, // +1 for nul - @sizeOf(u64), - )); - - var dylib_cmd = emptyGenericCommandWithData(dylib_command{ - .cmd = cmd_id, - .cmdsize = cmdsize, - .dylib = .{ - .name = @sizeOf(dylib_command), - .timestamp = timestamp, - .current_version = current_version, - .compatibility_version = compatibility_version, - }, - }); - dylib_cmd.data = try allocator.alloc(u8, cmdsize - dylib_cmd.inner.dylib.name); - - mem.set(u8, dylib_cmd.data, 0); - mem.copy(u8, dylib_cmd.data, name); - - return dylib_cmd; -} - -fn testRead(allocator: Allocator, buffer: []const u8, expected: anytype) !void { - var stream = io.fixedBufferStream(buffer); - var given = try LoadCommand.read(allocator, stream.reader()); - defer given.deinit(allocator); - try testing.expect(expected.eql(given)); -} - -fn testWrite(buffer: []u8, cmd: LoadCommand, expected: []const u8) !void { - var stream = io.fixedBufferStream(buffer); - try cmd.write(stream.writer()); - try testing.expect(mem.eql(u8, expected, buffer[0..expected.len])); -} - -fn makeStaticString(bytes: []const u8) [16]u8 { - var buf = [_]u8{0} ** 16; - assert(bytes.len <= buf.len); - mem.copy(u8, &buf, bytes); - return buf; -} - -test "read-write segment command" { - // TODO compiling for macOS from big-endian arch - if (builtin.target.cpu.arch.endian() != .Little) return error.SkipZigTest; - - var gpa = testing.allocator; - const in_buffer = &[_]u8{ - 0x19, 0x00, 0x00, 0x00, // cmd - 0x98, 0x00, 0x00, 0x00, // cmdsize - 0x5f, 0x5f, 0x54, 0x45, 0x58, 0x54, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // segname - 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, // vmaddr - 0x00, 0x80, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, // vmsize - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // fileoff - 0x00, 0x80, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, // filesize - 0x07, 0x00, 0x00, 0x00, // maxprot - 0x05, 0x00, 0x00, 0x00, // initprot - 0x01, 0x00, 0x00, 0x00, // nsects - 0x00, 0x00, 0x00, 0x00, // flags - 0x5f, 0x5f, 0x74, 0x65, 0x78, 0x74, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // sectname - 0x5f, 0x5f, 0x54, 0x45, 0x58, 0x54, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // segname - 0x00, 0x40, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, // address - 0xc0, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // size - 0x00, 0x40, 0x00, 0x00, // offset - 0x02, 0x00, 0x00, 0x00, // alignment - 0x00, 0x00, 0x00, 0x00, // reloff - 0x00, 0x00, 0x00, 0x00, // nreloc - 0x00, 0x04, 0x00, 0x80, // flags - 0x00, 0x00, 0x00, 0x00, // reserved1 - 0x00, 0x00, 0x00, 0x00, // reserved2 - 0x00, 0x00, 0x00, 0x00, // reserved3 - }; - var cmd = SegmentCommand{ - .inner = .{ - .cmdsize = 152, - .segname = makeStaticString("__TEXT"), - .vmaddr = 4294967296, - .vmsize = 294912, - .filesize = 294912, - .maxprot = PROT.READ | PROT.WRITE | PROT.EXEC, - .initprot = PROT.EXEC | PROT.READ, - .nsects = 1, - }, - }; - try cmd.sections.append(gpa, .{ - .sectname = makeStaticString("__text"), - .segname = makeStaticString("__TEXT"), - .addr = 4294983680, - .size = 448, - .offset = 16384, - .@"align" = 2, - .flags = S_REGULAR | S_ATTR_PURE_INSTRUCTIONS | S_ATTR_SOME_INSTRUCTIONS, - }); - defer cmd.deinit(gpa); - try testRead(gpa, in_buffer, LoadCommand{ .segment = cmd }); - - var out_buffer: [in_buffer.len]u8 = undefined; - try testWrite(&out_buffer, LoadCommand{ .segment = cmd }, in_buffer); -} - -test "read-write generic command with data" { - // TODO compiling for macOS from big-endian arch - if (builtin.target.cpu.arch.endian() != .Little) return error.SkipZigTest; - - var gpa = testing.allocator; - const in_buffer = &[_]u8{ - 0x0c, 0x00, 0x00, 0x00, // cmd - 0x20, 0x00, 0x00, 0x00, // cmdsize - 0x18, 0x00, 0x00, 0x00, // name - 0x02, 0x00, 0x00, 0x00, // timestamp - 0x00, 0x00, 0x00, 0x00, // current_version - 0x00, 0x00, 0x00, 0x00, // compatibility_version - 0x2f, 0x75, 0x73, 0x72, 0x00, 0x00, 0x00, 0x00, // data - }; - var cmd = GenericCommandWithData(dylib_command){ - .inner = .{ - .cmd = .LOAD_DYLIB, - .cmdsize = 32, - .dylib = .{ - .name = 24, - .timestamp = 2, - .current_version = 0, - .compatibility_version = 0, - }, - }, - }; - cmd.data = try gpa.alloc(u8, 8); - defer gpa.free(cmd.data); - cmd.data[0] = 0x2f; - cmd.data[1] = 0x75; - cmd.data[2] = 0x73; - cmd.data[3] = 0x72; - cmd.data[4] = 0x0; - cmd.data[5] = 0x0; - cmd.data[6] = 0x0; - cmd.data[7] = 0x0; - try testRead(gpa, in_buffer, LoadCommand{ .dylib = cmd }); - - var out_buffer: [in_buffer.len]u8 = undefined; - try testWrite(&out_buffer, LoadCommand{ .dylib = cmd }, in_buffer); -} - -test "read-write C struct command" { - // TODO compiling for macOS from big-endian arch - if (builtin.target.cpu.arch.endian() != .Little) return error.SkipZigTest; - - var gpa = testing.allocator; - const in_buffer = &[_]u8{ - 0x28, 0x00, 0x00, 0x80, // cmd - 0x18, 0x00, 0x00, 0x00, // cmdsize - 0x04, 0x41, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // entryoff - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // stacksize - }; - const cmd = .{ - .cmd = .MAIN, - .cmdsize = 24, - .entryoff = 16644, - .stacksize = 0, - }; - try testRead(gpa, in_buffer, LoadCommand{ .main = cmd }); - - var out_buffer: [in_buffer.len]u8 = undefined; - try testWrite(&out_buffer, LoadCommand{ .main = cmd }, in_buffer); -} diff --git a/src/link/Dwarf.zig b/src/link/Dwarf.zig index 03ba53801b..627f946e36 100644 --- a/src/link/Dwarf.zig +++ b/src/link/Dwarf.zig @@ -853,8 +853,7 @@ pub fn commitDeclState( .macho => { const macho_file = file.cast(File.MachO).?; const d_sym = &macho_file.d_sym.?; - const dwarf_segment = &d_sym.load_commands.items[d_sym.dwarf_segment_cmd_index.?].segment; - const debug_line_sect = &dwarf_segment.sections.items[d_sym.debug_line_section_index.?]; + const debug_line_sect = &d_sym.sections.items[d_sym.debug_line_section_index.?]; const file_pos = debug_line_sect.offset + src_fn.off; try pwriteDbgLineNops(d_sym.file, file_pos, 0, &[0]u8{}, src_fn.len); }, @@ -933,8 +932,8 @@ pub fn commitDeclState( .macho => { const macho_file = file.cast(File.MachO).?; const d_sym = &macho_file.d_sym.?; - const dwarf_segment = &d_sym.load_commands.items[d_sym.dwarf_segment_cmd_index.?].segment; - const debug_line_sect = &dwarf_segment.sections.items[d_sym.debug_line_section_index.?]; + const dwarf_segment = d_sym.segments.items[d_sym.dwarf_segment_cmd_index.?]; + const debug_line_sect = &d_sym.sections.items[d_sym.debug_line_section_index.?]; if (needed_size != debug_line_sect.size) { if (needed_size > d_sym.allocatedSize(debug_line_sect.offset)) { const new_offset = d_sym.findFreeSpace(needed_size, 1); @@ -955,10 +954,9 @@ pub fn commitDeclState( ); debug_line_sect.offset = @intCast(u32, new_offset); - debug_line_sect.addr = dwarf_segment.inner.vmaddr + new_offset - dwarf_segment.inner.fileoff; + debug_line_sect.addr = dwarf_segment.vmaddr + new_offset - dwarf_segment.fileoff; } debug_line_sect.size = needed_size; - d_sym.load_commands_dirty = true; // TODO look into making only the one section dirty d_sym.debug_line_header_dirty = true; } const file_pos = debug_line_sect.offset + src_fn.off; @@ -1137,8 +1135,7 @@ fn updateDeclDebugInfoAllocation(self: *Dwarf, file: *File, atom: *Atom, len: u3 .macho => { const macho_file = file.cast(File.MachO).?; const d_sym = &macho_file.d_sym.?; - const dwarf_segment = &d_sym.load_commands.items[d_sym.dwarf_segment_cmd_index.?].segment; - const debug_info_sect = &dwarf_segment.sections.items[d_sym.debug_info_section_index.?]; + const debug_info_sect = &d_sym.sections.items[d_sym.debug_info_section_index.?]; const file_pos = debug_info_sect.offset + atom.off; try pwriteDbgInfoNops(d_sym.file, file_pos, 0, &[0]u8{}, atom.len, false); }, @@ -1235,8 +1232,8 @@ fn writeDeclDebugInfo(self: *Dwarf, file: *File, atom: *Atom, dbg_info_buf: []co .macho => { const macho_file = file.cast(File.MachO).?; const d_sym = &macho_file.d_sym.?; - const dwarf_segment = &d_sym.load_commands.items[d_sym.dwarf_segment_cmd_index.?].segment; - const debug_info_sect = &dwarf_segment.sections.items[d_sym.debug_info_section_index.?]; + const dwarf_segment = d_sym.segments.items[d_sym.dwarf_segment_cmd_index.?]; + const debug_info_sect = &d_sym.sections.items[d_sym.debug_info_section_index.?]; if (needed_size != debug_info_sect.size) { if (needed_size > d_sym.allocatedSize(debug_info_sect.offset)) { const new_offset = d_sym.findFreeSpace(needed_size, 1); @@ -1257,10 +1254,9 @@ fn writeDeclDebugInfo(self: *Dwarf, file: *File, atom: *Atom, dbg_info_buf: []co ); debug_info_sect.offset = @intCast(u32, new_offset); - debug_info_sect.addr = dwarf_segment.inner.vmaddr + new_offset - dwarf_segment.inner.fileoff; + debug_info_sect.addr = dwarf_segment.vmaddr + new_offset - dwarf_segment.fileoff; } debug_info_sect.size = needed_size; - d_sym.load_commands_dirty = true; // TODO look into making only the one section dirty d_sym.debug_line_header_dirty = true; } const file_pos = debug_info_sect.offset + atom.off; @@ -1330,8 +1326,7 @@ pub fn updateDeclLineNumber(self: *Dwarf, file: *File, decl: *const Module.Decl) .macho => { const macho_file = file.cast(File.MachO).?; const d_sym = macho_file.d_sym.?; - const dwarf_seg = d_sym.load_commands.items[d_sym.dwarf_segment_cmd_index.?].segment; - const sect = dwarf_seg.sections.items[d_sym.debug_line_section_index.?]; + const sect = d_sym.sections.items[d_sym.debug_line_section_index.?]; const file_pos = sect.offset + decl.fn_link.macho.off + self.getRelocDbgLineOff(); try d_sym.file.pwriteAll(&data, file_pos); }, @@ -1557,14 +1552,14 @@ pub fn writeDbgAbbrev(self: *Dwarf, file: *File) !void { .macho => { const macho_file = file.cast(File.MachO).?; const d_sym = &macho_file.d_sym.?; - const dwarf_segment = &d_sym.load_commands.items[d_sym.dwarf_segment_cmd_index.?].segment; - const debug_abbrev_sect = &dwarf_segment.sections.items[d_sym.debug_abbrev_section_index.?]; + const dwarf_segment = d_sym.segments.items[d_sym.dwarf_segment_cmd_index.?]; + const debug_abbrev_sect = &d_sym.sections.items[d_sym.debug_abbrev_section_index.?]; const allocated_size = d_sym.allocatedSize(debug_abbrev_sect.offset); if (needed_size > allocated_size) { debug_abbrev_sect.size = 0; // free the space const offset = d_sym.findFreeSpace(needed_size, 1); debug_abbrev_sect.offset = @intCast(u32, offset); - debug_abbrev_sect.addr = dwarf_segment.inner.vmaddr + offset - dwarf_segment.inner.fileoff; + debug_abbrev_sect.addr = dwarf_segment.vmaddr + offset - dwarf_segment.fileoff; } debug_abbrev_sect.size = needed_size; log.debug("__debug_abbrev start=0x{x} end=0x{x}", .{ @@ -1681,8 +1676,7 @@ pub fn writeDbgInfoHeader(self: *Dwarf, file: *File, module: *Module, low_pc: u6 .macho => { const macho_file = file.cast(File.MachO).?; const d_sym = &macho_file.d_sym.?; - const dwarf_seg = d_sym.load_commands.items[d_sym.dwarf_segment_cmd_index.?].segment; - const debug_info_sect = dwarf_seg.sections.items[d_sym.debug_info_section_index.?]; + const debug_info_sect = d_sym.sections.items[d_sym.debug_info_section_index.?]; const file_pos = debug_info_sect.offset; try pwriteDbgInfoNops(d_sym.file, file_pos, 0, di_buf.items, jmp_amt, false); }, @@ -1998,13 +1992,13 @@ pub fn writeDbgAranges(self: *Dwarf, file: *File, addr: u64, size: u64) !void { .macho => { const macho_file = file.cast(File.MachO).?; const d_sym = &macho_file.d_sym.?; - const dwarf_seg = &d_sym.load_commands.items[d_sym.dwarf_segment_cmd_index.?].segment; - const debug_aranges_sect = &dwarf_seg.sections.items[d_sym.debug_aranges_section_index.?]; + const dwarf_seg = d_sym.segments.items[d_sym.dwarf_segment_cmd_index.?]; + const debug_aranges_sect = &d_sym.sections.items[d_sym.debug_aranges_section_index.?]; const allocated_size = d_sym.allocatedSize(debug_aranges_sect.offset); if (needed_size > allocated_size) { debug_aranges_sect.size = 0; // free the space const new_offset = d_sym.findFreeSpace(needed_size, 16); - debug_aranges_sect.addr = dwarf_seg.inner.vmaddr + new_offset - dwarf_seg.inner.fileoff; + debug_aranges_sect.addr = dwarf_seg.vmaddr + new_offset - dwarf_seg.fileoff; debug_aranges_sect.offset = @intCast(u32, new_offset); } debug_aranges_sect.size = needed_size; @@ -2134,8 +2128,7 @@ pub fn writeDbgLineHeader(self: *Dwarf, file: *File, module: *Module) !void { .macho => { const macho_file = file.cast(File.MachO).?; const d_sym = &macho_file.d_sym.?; - const dwarf_seg = d_sym.load_commands.items[d_sym.dwarf_segment_cmd_index.?].segment; - const debug_line_sect = dwarf_seg.sections.items[d_sym.debug_line_section_index.?]; + const debug_line_sect = d_sym.sections.items[d_sym.debug_line_section_index.?]; const file_pos = debug_line_sect.offset; try pwriteDbgLineNops(d_sym.file, file_pos, 0, di_buf.items, jmp_amt); }, @@ -2264,8 +2257,7 @@ pub fn flushModule(self: *Dwarf, file: *File, module: *Module) !void { .macho => { const macho_file = file.cast(File.MachO).?; const d_sym = &macho_file.d_sym.?; - const dwarf_segment = &d_sym.load_commands.items[d_sym.dwarf_segment_cmd_index.?].segment; - const debug_info_sect = &dwarf_segment.sections.items[d_sym.debug_info_section_index.?]; + const debug_info_sect = &d_sym.sections.items[d_sym.debug_info_section_index.?]; break :blk debug_info_sect.offset; }, // for wasm, the offset is always 0 as we write to memory first diff --git a/src/link/MachO.zig b/src/link/MachO.zig index a247b3e6c5..b912130957 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -17,6 +17,7 @@ const aarch64 = @import("../arch/aarch64/bits.zig"); const bind = @import("MachO/bind.zig"); const codegen = @import("../codegen.zig"); const dead_strip = @import("MachO/dead_strip.zig"); +const fat = @import("MachO/fat.zig"); const link = @import("../link.zig"); const llvm_backend = @import("../codegen/llvm.zig"); const target_util = @import("../target.zig"); @@ -60,6 +61,29 @@ const SystemLib = struct { weak: bool = false, }; +const Section = struct { + header: macho.section_64, + segment_index: u8, + last_atom: ?*Atom = null, // TODO temporary hack; we really should shrink section to 0 + + /// A list of atoms that have surplus capacity. This list can have false + /// positives, as functions grow and shrink over time, only sometimes being added + /// or removed from the freelist. + /// + /// An atom has surplus capacity when its overcapacity value is greater than + /// padToIdeal(minimum_atom_size). That is, when it has so + /// much extra capacity, that we could fit a small new symbol in it, itself with + /// ideal_capacity or more. + /// + /// Ideal capacity is defined by size + (size / ideal_factor). + /// + /// Overcapacity is measured by actual_capacity - ideal_capacity. Note that + /// overcapacity can be negative. A simple way to have negative overcapacity is to + /// allocate a fresh atom, which will have ideal capacity, and then grow it + /// by 1 byte. It will then have -1 overcapacity. + free_list: std.ArrayListUnmanaged(*Atom) = .{}, +}; + base: File, /// If this is not null, an object file is created by LLVM and linked with LLD afterwards. @@ -77,80 +101,67 @@ page_size: u16, /// fashion (default for LLVM backend). mode: enum { incremental, one_shot }, -/// The absolute address of the entry point. -entry_addr: ?u64 = null, - -/// Code signature (if any) -code_signature: ?CodeSignature = null, +uuid: macho.uuid_command = .{ + .cmdsize = @sizeOf(macho.uuid_command), + .uuid = undefined, +}, objects: std.ArrayListUnmanaged(Object) = .{}, archives: std.ArrayListUnmanaged(Archive) = .{}, - dylibs: std.ArrayListUnmanaged(Dylib) = .{}, dylibs_map: std.StringHashMapUnmanaged(u16) = .{}, referenced_dylibs: std.AutoArrayHashMapUnmanaged(u16, void) = .{}, -load_commands: std.ArrayListUnmanaged(macho.LoadCommand) = .{}, +segments: std.ArrayListUnmanaged(macho.segment_command_64) = .{}, +sections: std.MultiArrayList(Section) = .{}, -pagezero_segment_cmd_index: ?u16 = null, -text_segment_cmd_index: ?u16 = null, -data_const_segment_cmd_index: ?u16 = null, -data_segment_cmd_index: ?u16 = null, -linkedit_segment_cmd_index: ?u16 = null, -dyld_info_cmd_index: ?u16 = null, -symtab_cmd_index: ?u16 = null, -dysymtab_cmd_index: ?u16 = null, -dylinker_cmd_index: ?u16 = null, -data_in_code_cmd_index: ?u16 = null, -function_starts_cmd_index: ?u16 = null, -main_cmd_index: ?u16 = null, -dylib_id_cmd_index: ?u16 = null, -source_version_cmd_index: ?u16 = null, -build_version_cmd_index: ?u16 = null, -uuid_cmd_index: ?u16 = null, -code_signature_cmd_index: ?u16 = null, +pagezero_segment_cmd_index: ?u8 = null, +text_segment_cmd_index: ?u8 = null, +data_const_segment_cmd_index: ?u8 = null, +data_segment_cmd_index: ?u8 = null, +linkedit_segment_cmd_index: ?u8 = null, // __TEXT segment sections -text_section_index: ?u16 = null, -stubs_section_index: ?u16 = null, -stub_helper_section_index: ?u16 = null, -text_const_section_index: ?u16 = null, -cstring_section_index: ?u16 = null, -ustring_section_index: ?u16 = null, -gcc_except_tab_section_index: ?u16 = null, -unwind_info_section_index: ?u16 = null, -eh_frame_section_index: ?u16 = null, +text_section_index: ?u8 = null, +stubs_section_index: ?u8 = null, +stub_helper_section_index: ?u8 = null, +text_const_section_index: ?u8 = null, +cstring_section_index: ?u8 = null, +ustring_section_index: ?u8 = null, +gcc_except_tab_section_index: ?u8 = null, +unwind_info_section_index: ?u8 = null, +eh_frame_section_index: ?u8 = null, -objc_methlist_section_index: ?u16 = null, -objc_methname_section_index: ?u16 = null, -objc_methtype_section_index: ?u16 = null, -objc_classname_section_index: ?u16 = null, +objc_methlist_section_index: ?u8 = null, +objc_methname_section_index: ?u8 = null, +objc_methtype_section_index: ?u8 = null, +objc_classname_section_index: ?u8 = null, // __DATA_CONST segment sections -got_section_index: ?u16 = null, -mod_init_func_section_index: ?u16 = null, -mod_term_func_section_index: ?u16 = null, -data_const_section_index: ?u16 = null, +got_section_index: ?u8 = null, +mod_init_func_section_index: ?u8 = null, +mod_term_func_section_index: ?u8 = null, +data_const_section_index: ?u8 = null, -objc_cfstring_section_index: ?u16 = null, -objc_classlist_section_index: ?u16 = null, -objc_imageinfo_section_index: ?u16 = null, +objc_cfstring_section_index: ?u8 = null, +objc_classlist_section_index: ?u8 = null, +objc_imageinfo_section_index: ?u8 = null, // __DATA segment sections -tlv_section_index: ?u16 = null, -tlv_data_section_index: ?u16 = null, -tlv_bss_section_index: ?u16 = null, -tlv_ptrs_section_index: ?u16 = null, -la_symbol_ptr_section_index: ?u16 = null, -data_section_index: ?u16 = null, -bss_section_index: ?u16 = null, +tlv_section_index: ?u8 = null, +tlv_data_section_index: ?u8 = null, +tlv_bss_section_index: ?u8 = null, +tlv_ptrs_section_index: ?u8 = null, +la_symbol_ptr_section_index: ?u8 = null, +data_section_index: ?u8 = null, +bss_section_index: ?u8 = null, -objc_const_section_index: ?u16 = null, -objc_selrefs_section_index: ?u16 = null, -objc_classrefs_section_index: ?u16 = null, -objc_data_section_index: ?u16 = null, +objc_const_section_index: ?u8 = null, +objc_selrefs_section_index: ?u8 = null, +objc_classrefs_section_index: ?u8 = null, +objc_data_section_index: ?u8 = null, -rustc_section_index: ?u16 = null, +rustc_section_index: ?u8 = null, rustc_section_size: u64 = 0, locals: std.ArrayListUnmanaged(macho.nlist_64) = .{}, @@ -188,37 +199,12 @@ stubs_table: std.AutoHashMapUnmanaged(SymbolWithLoc, u32) = .{}, error_flags: File.ErrorFlags = File.ErrorFlags{}, -load_commands_dirty: bool = false, -sections_order_dirty: bool = false, - /// A helper var to indicate if we are at the start of the incremental updates, or /// already somewhere further along the update-and-run chain. /// TODO once we add opening a prelinked output binary from file, this will become /// obsolete as we will carry on where we left off. cold_start: bool = true, -section_ordinals: std.AutoArrayHashMapUnmanaged(MatchingSection, void) = .{}, - -/// A list of atoms that have surplus capacity. This list can have false -/// positives, as functions grow and shrink over time, only sometimes being added -/// or removed from the freelist. -/// -/// An atom has surplus capacity when its overcapacity value is greater than -/// padToIdeal(minimum_atom_size). That is, when it has so -/// much extra capacity, that we could fit a small new symbol in it, itself with -/// ideal_capacity or more. -/// -/// Ideal capacity is defined by size + (size / ideal_factor). -/// -/// Overcapacity is measured by actual_capacity - ideal_capacity. Note that -/// overcapacity can be negative. A simple way to have negative overcapacity is to -/// allocate a fresh atom, which will have ideal capacity, and then grow it -/// by 1 byte. It will then have -1 overcapacity. -atom_free_lists: std.AutoHashMapUnmanaged(MatchingSection, std.ArrayListUnmanaged(*Atom)) = .{}, - -/// Pointer to the last allocated atom -atoms: std.AutoHashMapUnmanaged(MatchingSection, *Atom) = .{}, - /// List of atoms that are either synthetic or map directly to the Zig source program. managed_atoms: std.ArrayListUnmanaged(*Atom) = .{}, @@ -250,7 +236,7 @@ unnamed_const_atoms: UnnamedConstTable = .{}, /// We store them here so that we can properly dispose of any allocated /// memory within the atom in the incremental linker. /// TODO consolidate this. -decls: std.AutoArrayHashMapUnmanaged(Module.Decl.Index, ?MatchingSection) = .{}, +decls: std.AutoArrayHashMapUnmanaged(Module.Decl.Index, ?u8) = .{}, const Entry = struct { target: SymbolWithLoc, @@ -408,12 +394,7 @@ pub fn openPath(allocator: Allocator, options: link.Options) !*MachO { pub fn createEmpty(gpa: Allocator, options: link.Options) !*MachO { const cpu_arch = options.target.cpu.arch; - const os_tag = options.target.os.tag; - const abi = options.target.abi; const page_size: u16 = if (cpu_arch == .aarch64) 0x4000 else 0x1000; - // Adhoc code signature is required when targeting aarch64-macos either directly or indirectly via the simulator - // ABI such as aarch64-ios-simulator, etc. - const requires_adhoc_codesig = cpu_arch == .aarch64 and (os_tag == .macos or abi == .simulator); const use_llvm = build_options.have_llvm and options.use_llvm; const use_stage1 = build_options.is_stage1 and options.use_stage1; @@ -428,10 +409,6 @@ pub fn createEmpty(gpa: Allocator, options: link.Options) !*MachO { .file = null, }, .page_size = page_size, - .code_signature = if (requires_adhoc_codesig) - CodeSignature.init(page_size) - else - null, .mode = if (use_stage1 or use_llvm or options.module == null or options.cache_mode == .whole) .one_shot else @@ -562,8 +539,8 @@ pub fn flushModule(self: *MachO, comp: *Compilation, prog_node: *std.Progress.No var dependent_libs = std.fifo.LinearFifo(struct { id: Dylib.Id, parent: u16, - }, .Dynamic).init(self.base.allocator); - defer dependent_libs.deinit(); + }, .Dynamic).init(arena); + try self.parseLibs(libs.keys(), libs.values(), self.base.options.sysroot, &dependent_libs); try self.parseDependentLibs(self.base.options.sysroot, &dependent_libs); } @@ -573,7 +550,6 @@ pub fn flushModule(self: *MachO, comp: *Compilation, prog_node: *std.Progress.No try self.createDyldPrivateAtom(); try self.createStubHelperPreambleAtom(); try self.resolveSymbolsInDylibs(); - try self.addCodeSignatureLC(); if (self.unresolved.count() > 0) { return error.UndefinedSymbolReference; @@ -583,66 +559,90 @@ pub fn flushModule(self: *MachO, comp: *Compilation, prog_node: *std.Progress.No if (build_options.enable_logging) { self.logSymtab(); - self.logSectionOrdinals(); self.logAtoms(); } try self.writeAtomsIncremental(); - try self.setEntryPoint(); - try self.updateSectionOrdinals(); - try self.writeLinkeditSegment(); + var lc_buffer = std.ArrayList(u8).init(arena); + const lc_writer = lc_buffer.writer(); + var ncmds: u32 = 0; + + try self.writeLinkeditSegmentData(&ncmds, lc_writer); + try writeDylinkerLC(&ncmds, lc_writer); + + self.writeMainLC(&ncmds, lc_writer) catch |err| switch (err) { + error.MissingMainEntrypoint => { + self.error_flags.no_entry_point_found = true; + }, + else => |e| return e, + }; + + try self.writeDylibIdLC(&ncmds, lc_writer); + try self.writeRpathLCs(&ncmds, lc_writer); + + { + try lc_writer.writeStruct(macho.source_version_command{ + .cmdsize = @sizeOf(macho.source_version_command), + .version = 0x0, + }); + ncmds += 1; + } + + try self.writeBuildVersionLC(&ncmds, lc_writer); + + { + std.crypto.random.bytes(&self.uuid.uuid); + try lc_writer.writeStruct(self.uuid); + ncmds += 1; + } + + try self.writeLoadDylibLCs(&ncmds, lc_writer); + + const target = self.base.options.target; + const requires_codesig = blk: { + if (self.base.options.entitlements) |_| break :blk true; + if (target.cpu.arch == .aarch64 and (target.os.tag == .macos or target.abi == .simulator)) + break :blk true; + break :blk false; + }; + var codesig_offset: ?u32 = null; + var codesig: ?CodeSignature = if (requires_codesig) blk: { + // Preallocate space for the code signature. + // We need to do this at this stage so that we have the load commands with proper values + // written out to the file. + // The most important here is to have the correct vm and filesize of the __LINKEDIT segment + // where the code signature goes into. + var codesig = CodeSignature.init(self.page_size); + codesig.code_directory.ident = self.base.options.emit.?.sub_path; + if (self.base.options.entitlements) |path| { + try codesig.addEntitlements(arena, path); + } + codesig_offset = try self.writeCodeSignaturePadding(&codesig, &ncmds, lc_writer); + break :blk codesig; + } else null; + + var headers_buf = std.ArrayList(u8).init(arena); + try self.writeSegmentHeaders(0, self.segments.items.len, &ncmds, headers_buf.writer()); + + try self.base.file.?.pwriteAll(headers_buf.items, @sizeOf(macho.mach_header_64)); + try self.base.file.?.pwriteAll(lc_buffer.items, @sizeOf(macho.mach_header_64) + headers_buf.items.len); + + try self.writeHeader(ncmds, @intCast(u32, lc_buffer.items.len + headers_buf.items.len)); + + if (codesig) |*csig| { + try self.writeCodeSignature(csig, codesig_offset.?); // code signing always comes last + } if (self.d_sym) |*d_sym| { // Flush debug symbols bundle. try d_sym.flushModule(self.base.allocator, self.base.options); } - // code signature and entitlements - if (self.base.options.entitlements) |path| { - if (self.code_signature) |*csig| { - try csig.addEntitlements(self.base.allocator, path); - csig.code_directory.ident = self.base.options.emit.?.sub_path; - } else { - var csig = CodeSignature.init(self.page_size); - try csig.addEntitlements(self.base.allocator, path); - csig.code_directory.ident = self.base.options.emit.?.sub_path; - self.code_signature = csig; - } - } - - if (self.code_signature) |*csig| { - csig.clear(self.base.allocator); - csig.code_directory.ident = self.base.options.emit.?.sub_path; - // Preallocate space for the code signature. - // We need to do this at this stage so that we have the load commands with proper values - // written out to the file. - // The most important here is to have the correct vm and filesize of the __LINKEDIT segment - // where the code signature goes into. - try self.writeCodeSignaturePadding(csig); - } - - try self.writeLoadCommands(); - try self.writeHeader(); - - if (self.entry_addr == null and self.base.options.output_mode == .Exe) { - log.debug("flushing. no_entry_point_found = true", .{}); - self.error_flags.no_entry_point_found = true; - } else { - log.debug("flushing. no_entry_point_found = false", .{}); - self.error_flags.no_entry_point_found = false; - } - - assert(!self.load_commands_dirty); - - if (self.code_signature) |*csig| { - try self.writeCodeSignature(csig); // code signing always comes last - } - - if (build_options.enable_link_snapshots) { - if (self.base.options.enable_link_snapshots) - try self.snapshotState(); - } + // if (build_options.enable_link_snapshots) { + // if (self.base.options.enable_link_snapshots) + // try self.snapshotState(); + // } if (cache_miss) { // Update the file with the digest. If it fails we can continue; it only @@ -708,6 +708,9 @@ fn linkOneShot(self: *MachO, comp: *Compilation, prog_node: *std.Progress.Node) sub_prog_node.context.refresh(); defer sub_prog_node.end(); + const cpu_arch = self.base.options.target.cpu.arch; + const os_tag = self.base.options.target.os.tag; + const abi = self.base.options.target.abi; const is_lib = self.base.options.output_mode == .Lib; const is_dyn_lib = self.base.options.link_mode == .Dynamic and is_lib; const is_exe_or_dyn_lib = is_dyn_lib or self.base.options.output_mode == .Exe; @@ -990,40 +993,6 @@ fn linkOneShot(self: *MachO, comp: *Compilation, prog_node: *std.Progress.Node) } } - // rpaths - var rpath_table = std.StringArrayHashMap(void).init(arena); - for (self.base.options.rpath_list) |rpath| { - if (rpath_table.contains(rpath)) continue; - const cmdsize = @intCast(u32, mem.alignForwardGeneric( - u64, - @sizeOf(macho.rpath_command) + rpath.len + 1, - @sizeOf(u64), - )); - var rpath_cmd = macho.emptyGenericCommandWithData(macho.rpath_command{ - .cmdsize = cmdsize, - .path = @sizeOf(macho.rpath_command), - }); - rpath_cmd.data = try gpa.alloc(u8, cmdsize - rpath_cmd.inner.path); - mem.set(u8, rpath_cmd.data, 0); - mem.copy(u8, rpath_cmd.data, rpath); - try self.load_commands.append(gpa, .{ .rpath = rpath_cmd }); - try rpath_table.putNoClobber(rpath, {}); - self.load_commands_dirty = true; - } - - // code signature and entitlements - if (self.base.options.entitlements) |path| { - if (self.code_signature) |*csig| { - try csig.addEntitlements(gpa, path); - csig.code_directory.ident = self.base.options.emit.?.sub_path; - } else { - var csig = CodeSignature.init(self.page_size); - try csig.addEntitlements(gpa, path); - csig.code_directory.ident = self.base.options.emit.?.sub_path; - self.code_signature = csig; - } - } - if (self.base.options.verbose_link) { var argv = std.ArrayList([]const u8).init(arena); @@ -1048,7 +1017,7 @@ fn linkOneShot(self: *MachO, comp: *Compilation, prog_node: *std.Progress.Node) try argv.append(syslibroot); } - for (rpath_table.keys()) |rpath| { + for (self.base.options.rpath_list) |rpath| { try argv.append("-rpath"); try argv.append(rpath); } @@ -1157,15 +1126,15 @@ fn linkOneShot(self: *MachO, comp: *Compilation, prog_node: *std.Progress.Node) var dependent_libs = std.fifo.LinearFifo(struct { id: Dylib.Id, parent: u16, - }, .Dynamic).init(gpa); - defer dependent_libs.deinit(); + }, .Dynamic).init(arena); + try self.parseInputFiles(positionals.items, self.base.options.sysroot, &dependent_libs); try self.parseAndForceLoadStaticArchives(must_link_archives.keys()); try self.parseLibs(libs.keys(), libs.values(), self.base.options.sysroot, &dependent_libs); try self.parseDependentLibs(self.base.options.sysroot, &dependent_libs); - for (self.objects.items) |*object, object_id| { - try self.resolveSymbolsInObject(object, @intCast(u16, object_id)); + for (self.objects.items) |_, object_id| { + try self.resolveSymbolsInObject(@intCast(u16, object_id)); } try self.resolveSymbolsInArchives(); @@ -1175,7 +1144,6 @@ fn linkOneShot(self: *MachO, comp: *Compilation, prog_node: *std.Progress.Node) try self.resolveSymbolsInDylibs(); try self.createMhExecuteHeaderSymbol(); try self.createDsoHandleSymbol(); - try self.addCodeSignatureLC(); try self.resolveSymbolsAtLoading(); if (self.unresolved.count() > 0) { @@ -1206,41 +1174,79 @@ fn linkOneShot(self: *MachO, comp: *Compilation, prog_node: *std.Progress.Node) if (build_options.enable_logging) { self.logSymtab(); - self.logSectionOrdinals(); self.logAtoms(); } try self.writeAtomsOneShot(); if (self.rustc_section_index) |id| { - const sect = self.getSectionPtr(.{ - .seg = self.data_segment_cmd_index.?, - .sect = id, - }); - sect.size = self.rustc_section_size; + const header = &self.sections.items(.header)[id]; + header.size = self.rustc_section_size; } - try self.setEntryPoint(); - try self.writeLinkeditSegment(); + var lc_buffer = std.ArrayList(u8).init(arena); + const lc_writer = lc_buffer.writer(); + var ncmds: u32 = 0; - if (self.code_signature) |*csig| { - csig.clear(gpa); - csig.code_directory.ident = self.base.options.emit.?.sub_path; + try self.writeLinkeditSegmentData(&ncmds, lc_writer); + try writeDylinkerLC(&ncmds, lc_writer); + try self.writeMainLC(&ncmds, lc_writer); + try self.writeDylibIdLC(&ncmds, lc_writer); + try self.writeRpathLCs(&ncmds, lc_writer); + + { + try lc_writer.writeStruct(macho.source_version_command{ + .cmdsize = @sizeOf(macho.source_version_command), + .version = 0x0, + }); + ncmds += 1; + } + + try self.writeBuildVersionLC(&ncmds, lc_writer); + + { + var uuid_lc = macho.uuid_command{ + .cmdsize = @sizeOf(macho.uuid_command), + .uuid = undefined, + }; + std.crypto.random.bytes(&uuid_lc.uuid); + try lc_writer.writeStruct(uuid_lc); + ncmds += 1; + } + + try self.writeLoadDylibLCs(&ncmds, lc_writer); + + const requires_codesig = blk: { + if (self.base.options.entitlements) |_| break :blk true; + if (cpu_arch == .aarch64 and (os_tag == .macos or abi == .simulator)) break :blk true; + break :blk false; + }; + var codesig_offset: ?u32 = null; + var codesig: ?CodeSignature = if (requires_codesig) blk: { // Preallocate space for the code signature. // We need to do this at this stage so that we have the load commands with proper values // written out to the file. // The most important here is to have the correct vm and filesize of the __LINKEDIT segment // where the code signature goes into. - try self.writeCodeSignaturePadding(csig); - } + var codesig = CodeSignature.init(self.page_size); + codesig.code_directory.ident = self.base.options.emit.?.sub_path; + if (self.base.options.entitlements) |path| { + try codesig.addEntitlements(arena, path); + } + codesig_offset = try self.writeCodeSignaturePadding(&codesig, &ncmds, lc_writer); + break :blk codesig; + } else null; - try self.writeLoadCommands(); - try self.writeHeader(); + var headers_buf = std.ArrayList(u8).init(arena); + try self.writeSegmentHeaders(0, self.segments.items.len, &ncmds, headers_buf.writer()); - assert(!self.load_commands_dirty); + try self.base.file.?.pwriteAll(headers_buf.items, @sizeOf(macho.mach_header_64)); + try self.base.file.?.pwriteAll(lc_buffer.items, @sizeOf(macho.mach_header_64) + headers_buf.items.len); - if (self.code_signature) |*csig| { - try self.writeCodeSignature(csig); // code signing always comes last + try self.writeHeader(ncmds, @intCast(u32, lc_buffer.items.len + headers_buf.items.len)); + + if (codesig) |*csig| { + try self.writeCodeSignature(csig, codesig_offset.?); // code signing always comes last } } @@ -1395,66 +1401,77 @@ fn resolveFramework( } fn parseObject(self: *MachO, path: []const u8) !bool { + const gpa = self.base.allocator; const file = fs.cwd().openFile(path, .{}) catch |err| switch (err) { error.FileNotFound => return false, else => |e| return e, }; - errdefer file.close(); - - const name = try self.base.allocator.dupe(u8, path); - errdefer self.base.allocator.free(name); + defer file.close(); + const name = try gpa.dupe(u8, path); + errdefer gpa.free(name); + const cpu_arch = self.base.options.target.cpu.arch; const mtime: u64 = mtime: { const stat = file.stat() catch break :mtime 0; break :mtime @intCast(u64, @divFloor(stat.mtime, 1_000_000_000)); }; + const file_stat = try file.stat(); + const file_size = math.cast(usize, file_stat.size) orelse return error.Overflow; + const contents = try file.readToEndAllocOptions(gpa, file_size, file_size, @alignOf(u64), null); var object = Object{ .name = name, - .file = file, .mtime = mtime, + .contents = contents, }; - object.parse(self.base.allocator, self.base.options.target.cpu.arch) catch |err| switch (err) { + object.parse(gpa, cpu_arch) catch |err| switch (err) { error.EndOfStream, error.NotObject => { - object.deinit(self.base.allocator); + object.deinit(gpa); return false; }, else => |e| return e, }; - try self.objects.append(self.base.allocator, object); + try self.objects.append(gpa, object); return true; } fn parseArchive(self: *MachO, path: []const u8, force_load: bool) !bool { + const gpa = self.base.allocator; const file = fs.cwd().openFile(path, .{}) catch |err| switch (err) { error.FileNotFound => return false, else => |e| return e, }; errdefer file.close(); - const name = try self.base.allocator.dupe(u8, path); - errdefer self.base.allocator.free(name); + const name = try gpa.dupe(u8, path); + errdefer gpa.free(name); + const cpu_arch = self.base.options.target.cpu.arch; + const reader = file.reader(); + const fat_offset = try fat.getLibraryOffset(reader, cpu_arch); + try reader.context.seekTo(fat_offset); var archive = Archive{ .name = name, + .fat_offset = fat_offset, .file = file, }; - archive.parse(self.base.allocator, self.base.options.target.cpu.arch) catch |err| switch (err) { + archive.parse(gpa, reader) catch |err| switch (err) { error.EndOfStream, error.NotArchive => { - archive.deinit(self.base.allocator); + archive.deinit(gpa); return false; }, else => |e| return e, }; if (force_load) { - defer archive.deinit(self.base.allocator); + defer archive.deinit(gpa); + defer file.close(); // Get all offsets from the ToC - var offsets = std.AutoArrayHashMap(u32, void).init(self.base.allocator); + var offsets = std.AutoArrayHashMap(u32, void).init(gpa); defer offsets.deinit(); for (archive.toc.values()) |offs| { for (offs.items) |off| { @@ -1462,15 +1479,11 @@ fn parseArchive(self: *MachO, path: []const u8, force_load: bool) !bool { } } for (offsets.keys()) |off| { - const object = try self.objects.addOne(self.base.allocator); - object.* = try archive.parseObject( - self.base.allocator, - self.base.options.target.cpu.arch, - off, - ); + const object = try archive.parseObject(gpa, cpu_arch, off); + try self.objects.append(gpa, object); } } else { - try self.archives.append(self.base.allocator, archive); + try self.archives.append(gpa, archive); } return true; @@ -1481,6 +1494,7 @@ const ParseDylibError = error{ EmptyStubFile, MismatchedCpuArchitecture, UnsupportedCpuArchitecture, + EndOfStream, } || fs.File.OpenError || std.os.PReadError || Dylib.Id.ParseError; const DylibCreateOpts = struct { @@ -1497,43 +1511,52 @@ pub fn parseDylib( dependent_libs: anytype, opts: DylibCreateOpts, ) ParseDylibError!bool { + const gpa = self.base.allocator; const file = fs.cwd().openFile(path, .{}) catch |err| switch (err) { error.FileNotFound => return false, else => |e| return e, }; - errdefer file.close(); + defer file.close(); - const name = try self.base.allocator.dupe(u8, path); - errdefer self.base.allocator.free(name); + const cpu_arch = self.base.options.target.cpu.arch; + const file_stat = try file.stat(); + var file_size = math.cast(usize, file_stat.size) orelse return error.Overflow; + + const reader = file.reader(); + const fat_offset = try fat.getLibraryOffset(reader, cpu_arch); + try file.seekTo(fat_offset); + file_size -= fat_offset; + + const contents = try file.readToEndAllocOptions(gpa, file_size, file_size, @alignOf(u64), null); + defer gpa.free(contents); const dylib_id = @intCast(u16, self.dylibs.items.len); - var dylib = Dylib{ - .name = name, - .file = file, - .weak = opts.weak, - }; + var dylib = Dylib{ .weak = opts.weak }; - dylib.parse( - self.base.allocator, - self.base.options.target.cpu.arch, + dylib.parseFromBinary( + gpa, + cpu_arch, dylib_id, dependent_libs, + path, + contents, ) catch |err| switch (err) { error.EndOfStream, error.NotDylib => { try file.seekTo(0); - var lib_stub = LibStub.loadFromFile(self.base.allocator, file) catch { - dylib.deinit(self.base.allocator); + var lib_stub = LibStub.loadFromFile(gpa, file) catch { + dylib.deinit(gpa); return false; }; defer lib_stub.deinit(); try dylib.parseFromStub( - self.base.allocator, + gpa, self.base.options.target, lib_stub, dylib_id, dependent_libs, + path, ); }, else => |e| return e, @@ -1547,13 +1570,13 @@ pub fn parseDylib( log.warn(" dylib version: {}", .{dylib.id.?.current_version}); // TODO maybe this should be an error and facilitate auto-cleanup? - dylib.deinit(self.base.allocator); + dylib.deinit(gpa); return false; } } - try self.dylibs.append(self.base.allocator, dylib); - try self.dylibs_map.putNoClobber(self.base.allocator, dylib.id.?.name, dylib_id); + try self.dylibs.append(gpa, dylib); + try self.dylibs_map.putNoClobber(gpa, dylib.id.?.name, dylib_id); const should_link_dylib_even_if_unreachable = blk: { if (self.base.options.dead_strip_dylibs and !opts.needed) break :blk false; @@ -1561,8 +1584,7 @@ pub fn parseDylib( }; if (should_link_dylib_even_if_unreachable) { - try self.addLoadDylibLC(dylib_id); - try self.referenced_dylibs.putNoClobber(self.base.allocator, dylib_id, {}); + try self.referenced_dylibs.putNoClobber(gpa, dylib_id, {}); } return true; @@ -1572,10 +1594,8 @@ fn parseInputFiles(self: *MachO, files: []const []const u8, syslibroot: ?[]const for (files) |file_name| { const full_path = full_path: { var buffer: [fs.MAX_PATH_BYTES]u8 = undefined; - const path = try fs.realpath(file_name, &buffer); - break :full_path try self.base.allocator.dupe(u8, path); + break :full_path try fs.realpath(file_name, &buffer); }; - defer self.base.allocator.free(full_path); log.debug("parsing input file path '{s}'", .{full_path}); if (try self.parseObject(full_path)) continue; @@ -1592,10 +1612,8 @@ fn parseAndForceLoadStaticArchives(self: *MachO, files: []const []const u8) !voi for (files) |file_name| { const full_path = full_path: { var buffer: [fs.MAX_PATH_BYTES]u8 = undefined; - const path = try fs.realpath(file_name, &buffer); - break :full_path try self.base.allocator.dupe(u8, path); + break :full_path try fs.realpath(file_name, &buffer); }; - defer self.base.allocator.free(full_path); log.debug("parsing and force loading static archive '{s}'", .{full_path}); if (try self.parseArchive(full_path, true)) continue; @@ -1669,24 +1687,10 @@ fn parseDependentLibs(self: *MachO, syslibroot: ?[]const u8, dependent_libs: any } } -pub const MatchingSection = struct { - seg: u16, - sect: u16, - - pub fn eql(this: MatchingSection, other: struct { - seg: ?u16, - sect: ?u16, - }) bool { - const seg = other.seg orelse return false; - const sect = other.sect orelse return false; - return this.seg == seg and this.sect == sect; - } -}; - -pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSection { +pub fn getOutputSection(self: *MachO, sect: macho.section_64) !?u8 { const segname = sect.segName(); const sectname = sect.sectName(); - const res: ?MatchingSection = blk: { + const res: ?u8 = blk: { switch (sect.type_()) { macho.S_4BYTE_LITERALS, macho.S_8BYTE_LITERALS, macho.S_16BYTE_LITERALS => { if (self.text_const_section_index == null) { @@ -1698,11 +1702,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio .{}, ); } - - break :blk .{ - .seg = self.text_segment_cmd_index.?, - .sect = self.text_const_section_index.?, - }; + break :blk self.text_const_section_index.?; }, macho.S_CSTRING_LITERALS => { if (mem.eql(u8, sectname, "__objc_methname")) { @@ -1717,11 +1717,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio .{}, ); } - - break :blk .{ - .seg = self.text_segment_cmd_index.?, - .sect = self.objc_methname_section_index.?, - }; + break :blk self.objc_methname_section_index.?; } else if (mem.eql(u8, sectname, "__objc_methtype")) { if (self.objc_methtype_section_index == null) { self.objc_methtype_section_index = try self.initSection( @@ -1732,11 +1728,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio .{}, ); } - - break :blk .{ - .seg = self.text_segment_cmd_index.?, - .sect = self.objc_methtype_section_index.?, - }; + break :blk self.objc_methtype_section_index.?; } else if (mem.eql(u8, sectname, "__objc_classname")) { if (self.objc_classname_section_index == null) { self.objc_classname_section_index = try self.initSection( @@ -1747,11 +1739,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio .{}, ); } - - break :blk .{ - .seg = self.text_segment_cmd_index.?, - .sect = self.objc_classname_section_index.?, - }; + break :blk self.objc_classname_section_index.?; } if (self.cstring_section_index == null) { @@ -1765,11 +1753,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio }, ); } - - break :blk .{ - .seg = self.text_segment_cmd_index.?, - .sect = self.cstring_section_index.?, - }; + break :blk self.cstring_section_index.?; }, macho.S_LITERAL_POINTERS => { if (mem.eql(u8, segname, "__DATA") and mem.eql(u8, sectname, "__objc_selrefs")) { @@ -1784,11 +1768,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio }, ); } - - break :blk .{ - .seg = self.data_segment_cmd_index.?, - .sect = self.objc_selrefs_section_index.?, - }; + break :blk self.objc_selrefs_section_index.?; } else { // TODO investigate break :blk null; @@ -1806,11 +1786,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio }, ); } - - break :blk .{ - .seg = self.data_const_segment_cmd_index.?, - .sect = self.mod_init_func_section_index.?, - }; + break :blk self.mod_init_func_section_index.?; }, macho.S_MOD_TERM_FUNC_POINTERS => { if (self.mod_term_func_section_index == null) { @@ -1824,11 +1800,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio }, ); } - - break :blk .{ - .seg = self.data_const_segment_cmd_index.?, - .sect = self.mod_term_func_section_index.?, - }; + break :blk self.mod_term_func_section_index.?; }, macho.S_ZEROFILL => { if (self.bss_section_index == null) { @@ -1842,11 +1814,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio }, ); } - - break :blk .{ - .seg = self.data_segment_cmd_index.?, - .sect = self.bss_section_index.?, - }; + break :blk self.bss_section_index.?; }, macho.S_THREAD_LOCAL_VARIABLES => { if (self.tlv_section_index == null) { @@ -1860,11 +1828,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio }, ); } - - break :blk .{ - .seg = self.data_segment_cmd_index.?, - .sect = self.tlv_section_index.?, - }; + break :blk self.tlv_section_index.?; }, macho.S_THREAD_LOCAL_VARIABLE_POINTERS => { if (self.tlv_ptrs_section_index == null) { @@ -1878,11 +1842,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio }, ); } - - break :blk .{ - .seg = self.data_segment_cmd_index.?, - .sect = self.tlv_ptrs_section_index.?, - }; + break :blk self.tlv_ptrs_section_index.?; }, macho.S_THREAD_LOCAL_REGULAR => { if (self.tlv_data_section_index == null) { @@ -1896,11 +1856,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio }, ); } - - break :blk .{ - .seg = self.data_segment_cmd_index.?, - .sect = self.tlv_data_section_index.?, - }; + break :blk self.tlv_data_section_index.?; }, macho.S_THREAD_LOCAL_ZEROFILL => { if (self.tlv_bss_section_index == null) { @@ -1914,11 +1870,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio }, ); } - - break :blk .{ - .seg = self.data_segment_cmd_index.?, - .sect = self.tlv_bss_section_index.?, - }; + break :blk self.tlv_bss_section_index.?; }, macho.S_COALESCED => { if (mem.eql(u8, "__TEXT", segname) and mem.eql(u8, "__eh_frame", sectname)) { @@ -1933,11 +1885,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio .{}, ); } - - break :blk .{ - .seg = self.text_segment_cmd_index.?, - .sect = self.eh_frame_section_index.?, - }; + break :blk self.eh_frame_section_index.?; } // TODO audit this: is this the right mapping? @@ -1951,10 +1899,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio ); } - break :blk .{ - .seg = self.data_const_segment_cmd_index.?, - .sect = self.data_const_section_index.?, - }; + break :blk self.data_const_section_index.?; }, macho.S_REGULAR => { if (sect.isCode()) { @@ -1971,11 +1916,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio }, ); } - - break :blk .{ - .seg = self.text_segment_cmd_index.?, - .sect = self.text_section_index.?, - }; + break :blk self.text_section_index.?; } if (sect.isDebug()) { // TODO debug attributes @@ -1998,11 +1939,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio .{}, ); } - - break :blk .{ - .seg = self.text_segment_cmd_index.?, - .sect = self.ustring_section_index.?, - }; + break :blk self.ustring_section_index.?; } else if (mem.eql(u8, sectname, "__gcc_except_tab")) { if (self.gcc_except_tab_section_index == null) { self.gcc_except_tab_section_index = try self.initSection( @@ -2013,11 +1950,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio .{}, ); } - - break :blk .{ - .seg = self.text_segment_cmd_index.?, - .sect = self.gcc_except_tab_section_index.?, - }; + break :blk self.gcc_except_tab_section_index.?; } else if (mem.eql(u8, sectname, "__objc_methlist")) { if (self.objc_methlist_section_index == null) { self.objc_methlist_section_index = try self.initSection( @@ -2028,11 +1961,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio .{}, ); } - - break :blk .{ - .seg = self.text_segment_cmd_index.?, - .sect = self.objc_methlist_section_index.?, - }; + break :blk self.objc_methlist_section_index.?; } else if (mem.eql(u8, sectname, "__rodata") or mem.eql(u8, sectname, "__typelink") or mem.eql(u8, sectname, "__itablink") or @@ -2048,11 +1977,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio .{}, ); } - - break :blk .{ - .seg = self.data_const_segment_cmd_index.?, - .sect = self.data_const_section_index.?, - }; + break :blk self.data_const_section_index.?; } else { if (self.text_const_section_index == null) { self.text_const_section_index = try self.initSection( @@ -2063,11 +1988,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio .{}, ); } - - break :blk .{ - .seg = self.text_segment_cmd_index.?, - .sect = self.text_const_section_index.?, - }; + break :blk self.text_const_section_index.?; } } @@ -2081,11 +2002,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio .{}, ); } - - break :blk .{ - .seg = self.data_const_segment_cmd_index.?, - .sect = self.data_const_section_index.?, - }; + break :blk self.data_const_section_index.?; } if (mem.eql(u8, segname, "__DATA")) { @@ -2099,11 +2016,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio .{}, ); } - - break :blk .{ - .seg = self.data_const_segment_cmd_index.?, - .sect = self.data_const_section_index.?, - }; + break :blk self.data_const_section_index.?; } else if (mem.eql(u8, sectname, "__cfstring")) { if (self.objc_cfstring_section_index == null) { self.objc_cfstring_section_index = try self.initSection( @@ -2114,11 +2027,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio .{}, ); } - - break :blk .{ - .seg = self.data_const_segment_cmd_index.?, - .sect = self.objc_cfstring_section_index.?, - }; + break :blk self.objc_cfstring_section_index.?; } else if (mem.eql(u8, sectname, "__objc_classlist")) { if (self.objc_classlist_section_index == null) { self.objc_classlist_section_index = try self.initSection( @@ -2129,11 +2038,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio .{}, ); } - - break :blk .{ - .seg = self.data_const_segment_cmd_index.?, - .sect = self.objc_classlist_section_index.?, - }; + break :blk self.objc_classlist_section_index.?; } else if (mem.eql(u8, sectname, "__objc_imageinfo")) { if (self.objc_imageinfo_section_index == null) { self.objc_imageinfo_section_index = try self.initSection( @@ -2144,11 +2049,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio .{}, ); } - - break :blk .{ - .seg = self.data_const_segment_cmd_index.?, - .sect = self.objc_imageinfo_section_index.?, - }; + break :blk self.objc_imageinfo_section_index.?; } else if (mem.eql(u8, sectname, "__objc_const")) { if (self.objc_const_section_index == null) { self.objc_const_section_index = try self.initSection( @@ -2159,11 +2060,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio .{}, ); } - - break :blk .{ - .seg = self.data_segment_cmd_index.?, - .sect = self.objc_const_section_index.?, - }; + break :blk self.objc_const_section_index.?; } else if (mem.eql(u8, sectname, "__objc_classrefs")) { if (self.objc_classrefs_section_index == null) { self.objc_classrefs_section_index = try self.initSection( @@ -2174,11 +2071,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio .{}, ); } - - break :blk .{ - .seg = self.data_segment_cmd_index.?, - .sect = self.objc_classrefs_section_index.?, - }; + break :blk self.objc_classrefs_section_index.?; } else if (mem.eql(u8, sectname, "__objc_data")) { if (self.objc_data_section_index == null) { self.objc_data_section_index = try self.initSection( @@ -2189,11 +2082,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio .{}, ); } - - break :blk .{ - .seg = self.data_segment_cmd_index.?, - .sect = self.objc_data_section_index.?, - }; + break :blk self.objc_data_section_index.?; } else if (mem.eql(u8, sectname, ".rustc")) { if (self.rustc_section_index == null) { self.rustc_section_index = try self.initSection( @@ -2207,11 +2096,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio // decompress the metadata. self.rustc_section_size = sect.size; } - - break :blk .{ - .seg = self.data_segment_cmd_index.?, - .sect = self.rustc_section_index.?, - }; + break :blk self.rustc_section_index.?; } else { if (self.data_section_index == null) { self.data_section_index = try self.initSection( @@ -2222,11 +2107,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio .{}, ); } - - break :blk .{ - .seg = self.data_segment_cmd_index.?, - .sect = self.data_section_index.?, - }; + break :blk self.data_section_index.?; } } @@ -2259,30 +2140,33 @@ pub fn createEmptyAtom(gpa: Allocator, sym_index: u32, size: u64, alignment: u32 return atom; } -pub fn writeAtom(self: *MachO, atom: *Atom, match: MatchingSection) !void { - const sect = self.getSection(match); +pub fn writeAtom(self: *MachO, atom: *Atom, sect_id: u8) !void { + const section = self.sections.get(sect_id); const sym = atom.getSymbol(self); - const file_offset = sect.offset + sym.n_value - sect.addr; + const file_offset = section.header.offset + sym.n_value - section.header.addr; try atom.resolveRelocs(self); log.debug("writing atom for symbol {s} at file offset 0x{x}", .{ atom.getName(self), file_offset }); try self.base.file.?.pwriteAll(atom.code.items, file_offset); } fn allocateSymbols(self: *MachO) !void { - var it = self.atoms.iterator(); - while (it.next()) |entry| { - const match = entry.key_ptr.*; - var atom = entry.value_ptr.*; + const slice = self.sections.slice(); + for (slice.items(.last_atom)) |last_atom, sect_id| { + const header = slice.items(.header)[sect_id]; + var atom = last_atom orelse continue; while (atom.prev) |prev| { atom = prev; } - const n_sect = self.getSectionOrdinal(match); - const sect = self.getSection(match); - var base_vaddr = sect.addr; + const n_sect = @intCast(u8, sect_id + 1); + var base_vaddr = header.addr; - log.debug("allocating local symbols in sect({d}, '{s},{s}')", .{ n_sect, sect.segName(), sect.sectName() }); + log.debug("allocating local symbols in sect({d}, '{s},{s}')", .{ + n_sect, + header.segName(), + header.sectName(), + }); while (true) { const alignment = try math.powi(u32, 2, atom.alignment); @@ -2296,7 +2180,10 @@ fn allocateSymbols(self: *MachO) !void { // Update each symbol contained within the atom for (atom.contained.items) |sym_at_off| { - const contained_sym = self.getSymbolPtr(.{ .sym_index = sym_at_off.sym_index, .file = atom.file }); + const contained_sym = self.getSymbolPtr(.{ + .sym_index = sym_at_off.sym_index, + .file = atom.file, + }); contained_sym.n_value = base_vaddr + sym_at_off.offset; contained_sym.n_sect = n_sect; } @@ -2310,15 +2197,18 @@ fn allocateSymbols(self: *MachO) !void { } } -fn shiftLocalsByOffset(self: *MachO, match: MatchingSection, offset: i64) !void { - var atom = self.atoms.get(match) orelse return; +fn shiftLocalsByOffset(self: *MachO, sect_id: u8, offset: i64) !void { + var atom = self.sections.items(.last_atom)[sect_id] orelse return; while (true) { const atom_sym = atom.getSymbolPtr(self); atom_sym.n_value = @intCast(u64, @intCast(i64, atom_sym.n_value) + offset); for (atom.contained.items) |sym_at_off| { - const contained_sym = self.getSymbolPtr(.{ .sym_index = sym_at_off.sym_index, .file = atom.file }); + const contained_sym = self.getSymbolPtr(.{ + .sym_index = sym_at_off.sym_index, + .file = atom.file, + }); contained_sym.n_value = @intCast(u64, @intCast(i64, contained_sym.n_value) + offset); } @@ -2336,16 +2226,13 @@ fn allocateSpecialSymbols(self: *MachO) !void { const global = self.globals.get(name) orelse continue; if (global.file != null) continue; const sym = self.getSymbolPtr(global); - const seg = self.load_commands.items[self.text_segment_cmd_index.?].segment; - sym.n_sect = self.getSectionOrdinal(.{ - .seg = self.text_segment_cmd_index.?, - .sect = 0, - }); - sym.n_value = seg.inner.vmaddr; + const seg = self.segments.items[self.text_segment_cmd_index.?]; + sym.n_sect = 1; + sym.n_value = seg.vmaddr; log.debug("allocating {s} at the start of {s}", .{ name, - seg.inner.segName(), + seg.segName(), }); } } @@ -2353,18 +2240,20 @@ fn allocateSpecialSymbols(self: *MachO) !void { fn writeAtomsOneShot(self: *MachO) !void { assert(self.mode == .one_shot); - var it = self.atoms.iterator(); - while (it.next()) |entry| { - const sect = self.getSection(entry.key_ptr.*); - var atom: *Atom = entry.value_ptr.*; + const gpa = self.base.allocator; + const slice = self.sections.slice(); - if (sect.flags == macho.S_ZEROFILL or sect.flags == macho.S_THREAD_LOCAL_ZEROFILL) continue; + for (slice.items(.last_atom)) |last_atom, sect_id| { + const header = slice.items(.header)[sect_id]; + var atom = last_atom.?; - var buffer = std.ArrayList(u8).init(self.base.allocator); + if (header.flags == macho.S_ZEROFILL or header.flags == macho.S_THREAD_LOCAL_ZEROFILL) continue; + + var buffer = std.ArrayList(u8).init(gpa); defer buffer.deinit(); - try buffer.ensureTotalCapacity(math.cast(usize, sect.size) orelse return error.Overflow); + try buffer.ensureTotalCapacity(math.cast(usize, header.size) orelse return error.Overflow); - log.debug("writing atoms in {s},{s}", .{ sect.segName(), sect.sectName() }); + log.debug("writing atoms in {s},{s}", .{ header.segName(), header.sectName() }); while (atom.prev) |prev| { atom = prev; @@ -2399,18 +2288,18 @@ fn writeAtomsOneShot(self: *MachO) !void { if (atom.next) |next| { atom = next; } else { - assert(buffer.items.len == sect.size); - log.debug(" (writing at file offset 0x{x})", .{sect.offset}); - try self.base.file.?.pwriteAll(buffer.items, sect.offset); + assert(buffer.items.len == header.size); + log.debug(" (writing at file offset 0x{x})", .{header.offset}); + try self.base.file.?.pwriteAll(buffer.items, header.offset); break; } } } } -fn writePadding(self: *MachO, match: MatchingSection, size: usize, writer: anytype) !void { - const is_code = match.seg == self.text_segment_cmd_index.? and match.sect == self.text_section_index.?; - const min_alignment: u3 = if (!is_code) +fn writePadding(self: *MachO, sect_id: u8, size: usize, writer: anytype) !void { + const header = self.sections.items(.header)[sect_id]; + const min_alignment: u3 = if (!header.isCode()) 1 else switch (self.base.options.target.cpu.arch) { .aarch64 => @sizeOf(u32), @@ -2421,7 +2310,7 @@ fn writePadding(self: *MachO, match: MatchingSection, size: usize, writer: anyty const len = @divExact(size, min_alignment); var i: usize = 0; while (i < len) : (i += 1) { - if (!is_code) { + if (!header.isCode()) { try writer.writeByte(0); } else switch (self.base.options.target.cpu.arch) { .aarch64 => { @@ -2439,20 +2328,20 @@ fn writePadding(self: *MachO, match: MatchingSection, size: usize, writer: anyty fn writeAtomsIncremental(self: *MachO) !void { assert(self.mode == .incremental); - var it = self.atoms.iterator(); - while (it.next()) |entry| { - const match = entry.key_ptr.*; - const sect = self.getSection(match); - var atom: *Atom = entry.value_ptr.*; + const slice = self.sections.slice(); + for (slice.items(.last_atom)) |last, i| { + var atom: *Atom = last orelse continue; + const sect_i = @intCast(u8, i); + const header = slice.items(.header)[sect_i]; // TODO handle zerofill in stage2 // if (sect.flags == macho.S_ZEROFILL or sect.flags == macho.S_THREAD_LOCAL_ZEROFILL) continue; - log.debug("writing atoms in {s},{s}", .{ sect.segName(), sect.sectName() }); + log.debug("writing atoms in {s},{s}", .{ header.segName(), header.sectName() }); while (true) { if (atom.dirty) { - try self.writeAtom(atom, match); + try self.writeAtom(atom, sect_i); atom.dirty = false; } @@ -2503,10 +2392,7 @@ pub fn createGotAtom(self: *MachO, target: SymbolWithLoc) !*Atom { try self.managed_atoms.append(gpa, atom); try self.atom_by_index_table.putNoClobber(gpa, sym_index, atom); - try self.allocateAtomCommon(atom, .{ - .seg = self.data_const_segment_cmd_index.?, - .sect = self.got_section_index.?, - }); + try self.allocateAtomCommon(atom, self.got_section_index.?); return atom; } @@ -2535,7 +2421,7 @@ pub fn createTlvPtrAtom(self: *MachO, target: SymbolWithLoc) !*Atom { try self.managed_atoms.append(gpa, atom); try self.atom_by_index_table.putNoClobber(gpa, sym_index, atom); - const match = (try self.getMatchingSection(.{ + const match = (try self.getOutputSection(.{ .segname = makeStaticString("__DATA"), .sectname = makeStaticString("__thread_ptrs"), .flags = macho.S_THREAD_LOCAL_VARIABLE_POINTERS, @@ -2561,10 +2447,7 @@ fn createDyldPrivateAtom(self: *MachO) !void { const atom = try MachO.createEmptyAtom(gpa, sym_index, @sizeOf(u64), 3); self.dyld_private_atom = atom; - try self.allocateAtomCommon(atom, .{ - .seg = self.data_segment_cmd_index.?, - .sect = self.data_section_index.?, - }); + try self.allocateAtomCommon(atom, self.data_section_index.?); try self.managed_atoms.append(gpa, atom); try self.atom_by_index_table.putNoClobber(gpa, sym_index, atom); @@ -2692,10 +2575,7 @@ fn createStubHelperPreambleAtom(self: *MachO) !void { } self.stub_helper_preamble_atom = atom; - try self.allocateAtomCommon(atom, .{ - .seg = self.text_segment_cmd_index.?, - .sect = self.stub_helper_section_index.?, - }); + try self.allocateAtomCommon(atom, self.stub_helper_section_index.?); try self.managed_atoms.append(gpa, atom); try self.atom_by_index_table.putNoClobber(gpa, sym_index, atom); @@ -2771,10 +2651,7 @@ pub fn createStubHelperAtom(self: *MachO) !*Atom { try self.managed_atoms.append(gpa, atom); try self.atom_by_index_table.putNoClobber(gpa, sym_index, atom); - try self.allocateAtomCommon(atom, .{ - .seg = self.text_segment_cmd_index.?, - .sect = self.stub_helper_section_index.?, - }); + try self.allocateAtomCommon(atom, self.stub_helper_section_index.?); return atom; } @@ -2814,10 +2691,7 @@ pub fn createLazyPointerAtom(self: *MachO, stub_sym_index: u32, target: SymbolWi try self.managed_atoms.append(gpa, atom); try self.atom_by_index_table.putNoClobber(gpa, sym_index, atom); - try self.allocateAtomCommon(atom, .{ - .seg = self.data_segment_cmd_index.?, - .sect = self.la_symbol_ptr_section_index.?, - }); + try self.allocateAtomCommon(atom, self.la_symbol_ptr_section_index.?); return atom; } @@ -2896,10 +2770,7 @@ pub fn createStubAtom(self: *MachO, laptr_sym_index: u32) !*Atom { try self.managed_atoms.append(gpa, atom); try self.atom_by_index_table.putNoClobber(gpa, sym_index, atom); - try self.allocateAtomCommon(atom, .{ - .seg = self.text_segment_cmd_index.?, - .sect = self.stubs_section_index.?, - }); + try self.allocateAtomCommon(atom, self.stubs_section_index.?); return atom; } @@ -2917,12 +2788,6 @@ fn createTentativeDefAtoms(self: *MachO) !void { // Convert any tentative definition into a regular symbol and allocate // text blocks for each tentative definition. - const match = MatchingSection{ - .seg = self.data_segment_cmd_index.?, - .sect = self.bss_section_index.?, - }; - _ = try self.section_ordinals.getOrPut(gpa, match); - const size = sym.n_value; const alignment = (sym.n_desc >> 8) & 0x0f; @@ -2937,7 +2802,7 @@ fn createTentativeDefAtoms(self: *MachO) !void { const atom = try MachO.createEmptyAtom(gpa, global.sym_index, size, alignment); atom.file = global.file; - try self.allocateAtomCommon(atom, match); + try self.allocateAtomCommon(atom, self.bss_section_index.?); if (global.file) |file| { const object = &self.objects.items[file]; @@ -3060,7 +2925,8 @@ fn resolveGlobalSymbol(self: *MachO, current: SymbolWithLoc) !void { gop.value_ptr.* = current; } -fn resolveSymbolsInObject(self: *MachO, object: *Object, object_id: u16) !void { +fn resolveSymbolsInObject(self: *MachO, object_id: u16) !void { + const object = &self.objects.items[object_id]; log.debug("resolving symbols in '{s}'", .{object.name}); for (object.symtab.items) |sym, index| { @@ -3115,6 +2981,8 @@ fn resolveSymbolsInObject(self: *MachO, object: *Object, object_id: u16) !void { fn resolveSymbolsInArchives(self: *MachO) !void { if (self.archives.items.len == 0) return; + const gpa = self.base.allocator; + const cpu_arch = self.base.options.target.cpu.arch; var next_sym: usize = 0; loop: while (next_sym < self.unresolved.count()) { const global = self.globals.values()[self.unresolved.keys()[next_sym]]; @@ -3129,13 +2997,9 @@ fn resolveSymbolsInArchives(self: *MachO) !void { assert(offsets.items.len > 0); const object_id = @intCast(u16, self.objects.items.len); - const object = try self.objects.addOne(self.base.allocator); - object.* = try archive.parseObject( - self.base.allocator, - self.base.options.target.cpu.arch, - offsets.items[0], - ); - try self.resolveSymbolsInObject(object, object_id); + const object = try archive.parseObject(gpa, cpu_arch, offsets.items[0]); + try self.objects.append(gpa, object); + try self.resolveSymbolsInObject(object_id); continue :loop; } @@ -3159,7 +3023,6 @@ fn resolveSymbolsInDylibs(self: *MachO) !void { const dylib_id = @intCast(u16, id); if (!self.referenced_dylibs.contains(dylib_id)) { - try self.addLoadDylibLC(dylib_id); try self.referenced_dylibs.putNoClobber(self.base.allocator, dylib_id, {}); } @@ -3257,7 +3120,6 @@ fn resolveDyldStubBinder(self: *MachO) !void { const dylib_id = @intCast(u16, id); if (!self.referenced_dylibs.contains(dylib_id)) { - try self.addLoadDylibLC(dylib_id); try self.referenced_dylibs.putNoClobber(self.base.allocator, dylib_id, {}); } @@ -3280,47 +3142,192 @@ fn resolveDyldStubBinder(self: *MachO) !void { self.got_entries.items[got_index].sym_index = got_atom.sym_index; } -fn addLoadDylibLC(self: *MachO, id: u16) !void { - const dylib = self.dylibs.items[id]; - const dylib_id = dylib.id orelse unreachable; - var dylib_cmd = try macho.createLoadDylibCommand( - self.base.allocator, - if (dylib.weak) .LOAD_WEAK_DYLIB else .LOAD_DYLIB, - dylib_id.name, - dylib_id.timestamp, - dylib_id.current_version, - dylib_id.compatibility_version, - ); - errdefer dylib_cmd.deinit(self.base.allocator); - try self.load_commands.append(self.base.allocator, .{ .dylib = dylib_cmd }); - self.load_commands_dirty = true; -} - -fn addCodeSignatureLC(self: *MachO) !void { - if (self.code_signature_cmd_index != null or self.code_signature == null) return; - self.code_signature_cmd_index = @intCast(u16, self.load_commands.items.len); - try self.load_commands.append(self.base.allocator, .{ - .linkedit_data = .{ - .cmd = .CODE_SIGNATURE, - .cmdsize = @sizeOf(macho.linkedit_data_command), - .dataoff = 0, - .datasize = 0, - }, +fn writeDylinkerLC(ncmds: *u32, lc_writer: anytype) !void { + const name_len = mem.sliceTo(default_dyld_path, 0).len; + const cmdsize = @intCast(u32, mem.alignForwardGeneric( + u64, + @sizeOf(macho.dylinker_command) + name_len, + @sizeOf(u64), + )); + try lc_writer.writeStruct(macho.dylinker_command{ + .cmd = .LOAD_DYLINKER, + .cmdsize = cmdsize, + .name = @sizeOf(macho.dylinker_command), }); - self.load_commands_dirty = true; + try lc_writer.writeAll(mem.sliceTo(default_dyld_path, 0)); + const padding = cmdsize - @sizeOf(macho.dylinker_command) - name_len; + if (padding > 0) { + try lc_writer.writeByteNTimes(0, padding); + } + ncmds.* += 1; } -fn setEntryPoint(self: *MachO) !void { +fn writeMainLC(self: *MachO, ncmds: *u32, lc_writer: anytype) !void { if (self.base.options.output_mode != .Exe) return; - - const seg = self.load_commands.items[self.text_segment_cmd_index.?].segment; + const seg = self.segments.items[self.text_segment_cmd_index.?]; const global = try self.getEntryPoint(); const sym = self.getSymbol(global); - const ec = &self.load_commands.items[self.main_cmd_index.?].main; - ec.entryoff = @intCast(u32, sym.n_value - seg.inner.vmaddr); - ec.stacksize = self.base.options.stack_size_override orelse 0; - self.entry_addr = sym.n_value; - self.load_commands_dirty = true; + try lc_writer.writeStruct(macho.entry_point_command{ + .cmd = .MAIN, + .cmdsize = @sizeOf(macho.entry_point_command), + .entryoff = @intCast(u32, sym.n_value - seg.vmaddr), + .stacksize = self.base.options.stack_size_override orelse 0, + }); + ncmds.* += 1; +} + +const WriteDylibLCCtx = struct { + cmd: macho.LC, + name: []const u8, + timestamp: u32 = 2, + current_version: u32 = 0x10000, + compatibility_version: u32 = 0x10000, +}; + +fn writeDylibLC(ctx: WriteDylibLCCtx, ncmds: *u32, lc_writer: anytype) !void { + const name_len = ctx.name.len + 1; + const cmdsize = @intCast(u32, mem.alignForwardGeneric( + u64, + @sizeOf(macho.dylib_command) + name_len, + @sizeOf(u64), + )); + try lc_writer.writeStruct(macho.dylib_command{ + .cmd = ctx.cmd, + .cmdsize = cmdsize, + .dylib = .{ + .name = @sizeOf(macho.dylib_command), + .timestamp = ctx.timestamp, + .current_version = ctx.current_version, + .compatibility_version = ctx.compatibility_version, + }, + }); + try lc_writer.writeAll(ctx.name); + try lc_writer.writeByte(0); + const padding = cmdsize - @sizeOf(macho.dylib_command) - name_len; + if (padding > 0) { + try lc_writer.writeByteNTimes(0, padding); + } + ncmds.* += 1; +} + +fn writeDylibIdLC(self: *MachO, ncmds: *u32, lc_writer: anytype) !void { + if (self.base.options.output_mode != .Lib) return; + const install_name = self.base.options.install_name orelse self.base.options.emit.?.sub_path; + const curr = self.base.options.version orelse std.builtin.Version{ + .major = 1, + .minor = 0, + .patch = 0, + }; + const compat = self.base.options.compatibility_version orelse std.builtin.Version{ + .major = 1, + .minor = 0, + .patch = 0, + }; + try writeDylibLC(.{ + .cmd = .ID_DYLIB, + .name = install_name, + .current_version = curr.major << 16 | curr.minor << 8 | curr.patch, + .compatibility_version = compat.major << 16 | compat.minor << 8 | compat.patch, + }, ncmds, lc_writer); +} + +const RpathIterator = struct { + buffer: []const []const u8, + table: std.StringHashMap(void), + count: usize = 0, + + fn init(gpa: Allocator, rpaths: []const []const u8) RpathIterator { + return .{ .buffer = rpaths, .table = std.StringHashMap(void).init(gpa) }; + } + + fn deinit(it: *RpathIterator) void { + it.table.deinit(); + } + + fn next(it: *RpathIterator) !?[]const u8 { + while (true) { + if (it.count >= it.buffer.len) return null; + const rpath = it.buffer[it.count]; + it.count += 1; + const gop = try it.table.getOrPut(rpath); + if (gop.found_existing) continue; + return rpath; + } + } +}; + +fn writeRpathLCs(self: *MachO, ncmds: *u32, lc_writer: anytype) !void { + const gpa = self.base.allocator; + + var it = RpathIterator.init(gpa, self.base.options.rpath_list); + defer it.deinit(); + + while (try it.next()) |rpath| { + const rpath_len = rpath.len + 1; + const cmdsize = @intCast(u32, mem.alignForwardGeneric( + u64, + @sizeOf(macho.rpath_command) + rpath_len, + @sizeOf(u64), + )); + try lc_writer.writeStruct(macho.rpath_command{ + .cmdsize = cmdsize, + .path = @sizeOf(macho.rpath_command), + }); + try lc_writer.writeAll(rpath); + try lc_writer.writeByte(0); + const padding = cmdsize - @sizeOf(macho.rpath_command) - rpath_len; + if (padding > 0) { + try lc_writer.writeByteNTimes(0, padding); + } + ncmds.* += 1; + } +} + +fn writeBuildVersionLC(self: *MachO, ncmds: *u32, lc_writer: anytype) !void { + const cmdsize = @sizeOf(macho.build_version_command) + @sizeOf(macho.build_tool_version); + const platform_version = blk: { + const ver = self.base.options.target.os.version_range.semver.min; + const platform_version = ver.major << 16 | ver.minor << 8; + break :blk platform_version; + }; + const sdk_version = if (self.base.options.native_darwin_sdk) |sdk| blk: { + const ver = sdk.version; + const sdk_version = ver.major << 16 | ver.minor << 8; + break :blk sdk_version; + } else platform_version; + const is_simulator_abi = self.base.options.target.abi == .simulator; + try lc_writer.writeStruct(macho.build_version_command{ + .cmdsize = cmdsize, + .platform = switch (self.base.options.target.os.tag) { + .macos => .MACOS, + .ios => if (is_simulator_abi) macho.PLATFORM.IOSSIMULATOR else macho.PLATFORM.IOS, + .watchos => if (is_simulator_abi) macho.PLATFORM.WATCHOSSIMULATOR else macho.PLATFORM.WATCHOS, + .tvos => if (is_simulator_abi) macho.PLATFORM.TVOSSIMULATOR else macho.PLATFORM.TVOS, + else => unreachable, + }, + .minos = platform_version, + .sdk = sdk_version, + .ntools = 1, + }); + try lc_writer.writeAll(mem.asBytes(&macho.build_tool_version{ + .tool = .LD, + .version = 0x0, + })); + ncmds.* += 1; +} + +fn writeLoadDylibLCs(self: *MachO, ncmds: *u32, lc_writer: anytype) !void { + for (self.referenced_dylibs.keys()) |id| { + const dylib = self.dylibs.items[id]; + const dylib_id = dylib.id orelse unreachable; + try writeDylibLC(.{ + .cmd = if (dylib.weak) .LOAD_WEAK_DYLIB else .LOAD_DYLIB, + .name = dylib_id.name, + .timestamp = dylib_id.timestamp, + .current_version = dylib_id.current_version, + .compatibility_version = dylib_id.compatibility_version, + }, ncmds, lc_writer); + } } pub fn deinit(self: *MachO) void { @@ -3334,7 +3341,6 @@ pub fn deinit(self: *MachO) void { d_sym.deinit(gpa); } - self.section_ordinals.deinit(gpa); self.tlv_ptr_entries.deinit(gpa); self.tlv_ptr_entries_free_list.deinit(gpa); self.tlv_ptr_entries_table.deinit(gpa); @@ -3371,24 +3377,19 @@ pub fn deinit(self: *MachO) void { self.dylibs_map.deinit(gpa); self.referenced_dylibs.deinit(gpa); - for (self.load_commands.items) |*lc| { - lc.deinit(gpa); + self.segments.deinit(gpa); + + for (self.sections.items(.free_list)) |*list| { + list.deinit(gpa); } - self.load_commands.deinit(gpa); + self.sections.deinit(gpa); for (self.managed_atoms.items) |atom| { atom.deinit(gpa); gpa.destroy(atom); } self.managed_atoms.deinit(gpa); - self.atoms.deinit(gpa); - { - var it = self.atom_free_lists.valueIterator(); - while (it.next()) |free_list| { - free_list.deinit(gpa); - } - self.atom_free_lists.deinit(gpa); - } + if (self.base.options.module) |mod| { for (self.decls.keys()) |decl_index| { const decl = mod.declPtr(decl_index); @@ -3408,34 +3409,24 @@ pub fn deinit(self: *MachO) void { } self.atom_by_index_table.deinit(gpa); - - if (self.code_signature) |*csig| { - csig.deinit(gpa); - } } pub fn closeFiles(self: MachO) void { - for (self.objects.items) |object| { - object.file.close(); - } for (self.archives.items) |archive| { archive.file.close(); } - for (self.dylibs.items) |dylib| { - dylib.file.close(); - } if (self.d_sym) |ds| { ds.file.close(); } } -fn freeAtom(self: *MachO, atom: *Atom, match: MatchingSection, owns_atom: bool) void { +fn freeAtom(self: *MachO, atom: *Atom, sect_id: u8, owns_atom: bool) void { log.debug("freeAtom {*}", .{atom}); if (!owns_atom) { atom.deinit(self.base.allocator); } - const free_list = self.atom_free_lists.getPtr(match).?; + const free_list = &self.sections.items(.free_list)[sect_id]; var already_have_free_list_node = false; { var i: usize = 0; @@ -3452,13 +3443,14 @@ fn freeAtom(self: *MachO, atom: *Atom, match: MatchingSection, owns_atom: bool) } } - if (self.atoms.getPtr(match)) |last_atom| { - if (last_atom.* == atom) { + const maybe_last_atom = &self.sections.items(.last_atom)[sect_id]; + if (maybe_last_atom.*) |last_atom| { + if (last_atom == atom) { if (atom.prev) |prev| { // TODO shrink the section size here - last_atom.* = prev; + maybe_last_atom.* = prev; } else { - _ = self.atoms.fetchRemove(match); + maybe_last_atom.* = null; } } } @@ -3486,21 +3478,21 @@ fn freeAtom(self: *MachO, atom: *Atom, match: MatchingSection, owns_atom: bool) } } -fn shrinkAtom(self: *MachO, atom: *Atom, new_block_size: u64, match: MatchingSection) void { +fn shrinkAtom(self: *MachO, atom: *Atom, new_block_size: u64, sect_id: u8) void { _ = self; _ = atom; _ = new_block_size; - _ = match; + _ = sect_id; // TODO check the new capacity, and if it crosses the size threshold into a big enough // capacity, insert a free list node for it. } -fn growAtom(self: *MachO, atom: *Atom, new_atom_size: u64, alignment: u64, match: MatchingSection) !u64 { +fn growAtom(self: *MachO, atom: *Atom, new_atom_size: u64, alignment: u64, sect_id: u8) !u64 { const sym = atom.getSymbol(self); const align_ok = mem.alignBackwardGeneric(u64, sym.n_value, alignment) == sym.n_value; const need_realloc = !align_ok or new_atom_size > atom.capacity(self); if (!need_realloc) return sym.n_value; - return self.allocateAtom(atom, new_atom_size, alignment, match); + return self.allocateAtom(atom, new_atom_size, alignment, sect_id); } fn allocateSymbol(self: *MachO) !u32 { @@ -3671,10 +3663,11 @@ pub fn updateFunc(self: *MachO, module: *Module, func: *Module.Fn, air: Air, liv } pub fn lowerUnnamedConst(self: *MachO, typed_value: TypedValue, decl_index: Module.Decl.Index) !u32 { - var code_buffer = std.ArrayList(u8).init(self.base.allocator); + const gpa = self.base.allocator; + + var code_buffer = std.ArrayList(u8).init(gpa); defer code_buffer.deinit(); - const gpa = self.base.allocator; const module = self.base.options.module.?; const gop = try self.unnamed_const_atoms.getOrPut(gpa, decl_index); if (!gop.found_existing) { @@ -3725,25 +3718,25 @@ pub fn lowerUnnamedConst(self: *MachO, typed_value: TypedValue, decl_index: Modu atom.code.clearRetainingCapacity(); try atom.code.appendSlice(gpa, code); - const match = try self.getMatchingSectionAtom( + const sect_id = try self.getOutputSectionAtom( atom, decl_name, typed_value.ty, typed_value.val, required_alignment, ); - const addr = try self.allocateAtom(atom, code.len, required_alignment, match); + const addr = try self.allocateAtom(atom, code.len, required_alignment, sect_id); log.debug("allocated atom for {?s} at 0x{x}", .{ name, addr }); log.debug(" (required alignment 0x{x})", .{required_alignment}); - errdefer self.freeAtom(atom, match, true); + errdefer self.freeAtom(atom, sect_id, true); const symbol = atom.getSymbolPtr(self); symbol.* = .{ .n_strx = name_str_index, .n_type = macho.N_SECT, - .n_sect = self.getSectionOrdinal(match), + .n_sect = sect_id + 1, .n_desc = 0, .n_value = addr, }; @@ -3894,44 +3887,35 @@ fn needsPointerRebase(ty: Type, val: Value, mod: *Module) bool { } } -fn getMatchingSectionAtom( +fn getOutputSectionAtom( self: *MachO, atom: *Atom, name: []const u8, ty: Type, val: Value, alignment: u32, -) !MatchingSection { +) !u8 { const code = atom.code.items; const mod = self.base.options.module.?; const align_log_2 = math.log2(alignment); const zig_ty = ty.zigTypeTag(); const mode = self.base.options.optimize_mode; - const match: MatchingSection = blk: { + const sect_id: u8 = blk: { // TODO finish and audit this function if (val.isUndefDeep()) { if (mode == .ReleaseFast or mode == .ReleaseSmall) { - break :blk MatchingSection{ - .seg = self.data_segment_cmd_index.?, - .sect = self.bss_section_index.?, - }; + break :blk self.bss_section_index.?; } else { - break :blk MatchingSection{ - .seg = self.data_segment_cmd_index.?, - .sect = self.data_section_index.?, - }; + break :blk self.data_section_index.?; } } if (val.castTag(.variable)) |_| { - break :blk MatchingSection{ - .seg = self.data_segment_cmd_index.?, - .sect = self.data_section_index.?, - }; + break :blk self.data_section_index.?; } if (needsPointerRebase(ty, val, mod)) { - break :blk (try self.getMatchingSection(.{ + break :blk (try self.getOutputSection(.{ .segname = makeStaticString("__DATA_CONST"), .sectname = makeStaticString("__const"), .size = code.len, @@ -3941,10 +3925,7 @@ fn getMatchingSectionAtom( switch (zig_ty) { .Fn => { - break :blk MatchingSection{ - .seg = self.text_segment_cmd_index.?, - .sect = self.text_section_index.?, - }; + break :blk self.text_section_index.?; }, .Array => { if (val.tag() == .bytes) { @@ -3953,7 +3934,7 @@ fn getMatchingSectionAtom( .const_slice_u8_sentinel_0, .manyptr_const_u8_sentinel_0, => { - break :blk (try self.getMatchingSection(.{ + break :blk (try self.getOutputSection(.{ .segname = makeStaticString("__TEXT"), .sectname = makeStaticString("__cstring"), .flags = macho.S_CSTRING_LITERALS, @@ -3967,22 +3948,21 @@ fn getMatchingSectionAtom( }, else => {}, } - break :blk (try self.getMatchingSection(.{ + break :blk (try self.getOutputSection(.{ .segname = makeStaticString("__TEXT"), .sectname = makeStaticString("__const"), .size = code.len, .@"align" = align_log_2, })).?; }; - const sect = self.getSection(match); - log.debug(" allocating atom '{s}' in '{s},{s}' ({d},{d})", .{ + const header = self.sections.items(.header)[sect_id]; + log.debug(" allocating atom '{s}' in '{s},{s}', ord({d})", .{ name, - sect.segName(), - sect.sectName(), - match.seg, - match.sect, + header.segName(), + header.sectName(), + sect_id, }); - return match; + return sect_id; } fn placeDecl(self: *MachO, decl_index: Module.Decl.Index, code_len: usize) !u64 { @@ -3996,7 +3976,7 @@ fn placeDecl(self: *MachO, decl_index: Module.Decl.Index, code_len: usize) !u64 const decl_ptr = self.decls.getPtr(decl_index).?; if (decl_ptr.* == null) { - decl_ptr.* = try self.getMatchingSectionAtom( + decl_ptr.* = try self.getOutputSectionAtom( &decl.link.macho, sym_name, decl.ty, @@ -4045,7 +4025,7 @@ fn placeDecl(self: *MachO, decl_index: Module.Decl.Index, code_len: usize) !u64 symbol.* = .{ .n_strx = name_str_index, .n_type = macho.N_SECT, - .n_sect = self.getSectionOrdinal(match), + .n_sect = match + 1, .n_desc = 0, .n_value = addr, }; @@ -4134,10 +4114,7 @@ pub fn updateDeclExports( sym.* = .{ .n_strx = try self.strtab.insert(gpa, exp_name), .n_type = macho.N_SECT | macho.N_EXT, - .n_sect = self.getSectionOrdinal(.{ - .seg = self.text_segment_cmd_index.?, - .sect = self.text_section_index.?, // TODO what if we export a variable? - }), + .n_sect = self.text_section_index.? + 1, // TODO what if we export a variable? .n_desc = 0, .n_value = decl_sym.n_value, }; @@ -4208,10 +4185,7 @@ pub fn deleteExport(self: *MachO, exp: Export) void { fn freeUnnamedConsts(self: *MachO, decl_index: Module.Decl.Index) void { const unnamed_consts = self.unnamed_const_atoms.getPtr(decl_index) orelse return; for (unnamed_consts.items) |atom| { - self.freeAtom(atom, .{ - .seg = self.text_segment_cmd_index.?, - .sect = self.text_const_section_index.?, - }, true); + self.freeAtom(atom, self.text_const_section_index.?, true); self.locals_free_list.append(self.base.allocator, atom.sym_index) catch {}; self.locals.items[atom.sym_index].n_type = 0; _ = self.atom_by_index_table.remove(atom.sym_index); @@ -4294,6 +4268,7 @@ pub fn getDeclVAddr(self: *MachO, decl_index: Module.Decl.Index, reloc_info: Fil } fn populateMissingMetadata(self: *MachO) !void { + const gpa = self.base.allocator; const cpu_arch = self.base.options.target.cpu.arch; const pagezero_vmsize = self.base.options.pagezero_size orelse default_pagezero_vmsize; const aligned_pagezero_vmsize = mem.alignBackwardGeneric(u64, pagezero_vmsize, self.page_size); @@ -4305,21 +4280,16 @@ fn populateMissingMetadata(self: *MachO) !void { log.warn("requested __PAGEZERO size (0x{x}) is not page aligned", .{pagezero_vmsize}); log.warn(" rounding down to 0x{x}", .{aligned_pagezero_vmsize}); } - self.pagezero_segment_cmd_index = @intCast(u16, self.load_commands.items.len); - try self.load_commands.append(self.base.allocator, .{ - .segment = .{ - .inner = .{ - .segname = makeStaticString("__PAGEZERO"), - .vmsize = aligned_pagezero_vmsize, - .cmdsize = @sizeOf(macho.segment_command_64), - }, - }, + self.pagezero_segment_cmd_index = @intCast(u8, self.segments.items.len); + try self.segments.append(gpa, .{ + .segname = makeStaticString("__PAGEZERO"), + .vmsize = aligned_pagezero_vmsize, + .cmdsize = @sizeOf(macho.segment_command_64), }); - self.load_commands_dirty = true; } if (self.text_segment_cmd_index == null) { - self.text_segment_cmd_index = @intCast(u16, self.load_commands.items.len); + self.text_segment_cmd_index = @intCast(u8, self.segments.items.len); const needed_size = if (self.mode == .incremental) blk: { const headerpad_size = @maximum(self.base.options.headerpad_size orelse 0, default_headerpad_size); const program_code_size_hint = self.base.options.program_code_size_hint; @@ -4329,20 +4299,15 @@ fn populateMissingMetadata(self: *MachO) !void { log.debug("found __TEXT segment free space 0x{x} to 0x{x}", .{ 0, needed_size }); break :blk needed_size; } else 0; - try self.load_commands.append(self.base.allocator, .{ - .segment = .{ - .inner = .{ - .segname = makeStaticString("__TEXT"), - .vmaddr = aligned_pagezero_vmsize, - .vmsize = needed_size, - .filesize = needed_size, - .maxprot = macho.PROT.READ | macho.PROT.EXEC, - .initprot = macho.PROT.READ | macho.PROT.EXEC, - .cmdsize = @sizeOf(macho.segment_command_64), - }, - }, + try self.segments.append(gpa, .{ + .segname = makeStaticString("__TEXT"), + .vmaddr = aligned_pagezero_vmsize, + .vmsize = needed_size, + .filesize = needed_size, + .maxprot = macho.PROT.READ | macho.PROT.EXEC, + .initprot = macho.PROT.READ | macho.PROT.EXEC, + .cmdsize = @sizeOf(macho.segment_command_64), }); - self.load_commands_dirty = true; } if (self.text_section_index == null) { @@ -4419,7 +4384,7 @@ fn populateMissingMetadata(self: *MachO) !void { } if (self.data_const_segment_cmd_index == null) { - self.data_const_segment_cmd_index = @intCast(u16, self.load_commands.items.len); + self.data_const_segment_cmd_index = @intCast(u8, self.segments.items.len); var vmaddr: u64 = 0; var fileoff: u64 = 0; var needed_size: u64 = 0; @@ -4434,21 +4399,16 @@ fn populateMissingMetadata(self: *MachO) !void { fileoff + needed_size, }); } - try self.load_commands.append(self.base.allocator, .{ - .segment = .{ - .inner = .{ - .segname = makeStaticString("__DATA_CONST"), - .vmaddr = vmaddr, - .vmsize = needed_size, - .fileoff = fileoff, - .filesize = needed_size, - .maxprot = macho.PROT.READ | macho.PROT.WRITE, - .initprot = macho.PROT.READ | macho.PROT.WRITE, - .cmdsize = @sizeOf(macho.segment_command_64), - }, - }, + try self.segments.append(gpa, .{ + .segname = makeStaticString("__DATA_CONST"), + .vmaddr = vmaddr, + .vmsize = needed_size, + .fileoff = fileoff, + .filesize = needed_size, + .maxprot = macho.PROT.READ | macho.PROT.WRITE, + .initprot = macho.PROT.READ | macho.PROT.WRITE, + .cmdsize = @sizeOf(macho.segment_command_64), }); - self.load_commands_dirty = true; } if (self.got_section_index == null) { @@ -4469,7 +4429,7 @@ fn populateMissingMetadata(self: *MachO) !void { } if (self.data_segment_cmd_index == null) { - self.data_segment_cmd_index = @intCast(u16, self.load_commands.items.len); + self.data_segment_cmd_index = @intCast(u8, self.segments.items.len); var vmaddr: u64 = 0; var fileoff: u64 = 0; var needed_size: u64 = 0; @@ -4484,21 +4444,16 @@ fn populateMissingMetadata(self: *MachO) !void { fileoff + needed_size, }); } - try self.load_commands.append(self.base.allocator, .{ - .segment = .{ - .inner = .{ - .segname = makeStaticString("__DATA"), - .vmaddr = vmaddr, - .vmsize = needed_size, - .fileoff = fileoff, - .filesize = needed_size, - .maxprot = macho.PROT.READ | macho.PROT.WRITE, - .initprot = macho.PROT.READ | macho.PROT.WRITE, - .cmdsize = @sizeOf(macho.segment_command_64), - }, - }, + try self.segments.append(gpa, .{ + .segname = makeStaticString("__DATA"), + .vmaddr = vmaddr, + .vmsize = needed_size, + .fileoff = fileoff, + .filesize = needed_size, + .maxprot = macho.PROT.READ | macho.PROT.WRITE, + .initprot = macho.PROT.READ | macho.PROT.WRITE, + .cmdsize = @sizeOf(macho.segment_command_64), }); - self.load_commands_dirty = true; } if (self.la_symbol_ptr_section_index == null) { @@ -4602,7 +4557,7 @@ fn populateMissingMetadata(self: *MachO) !void { } if (self.linkedit_segment_cmd_index == null) { - self.linkedit_segment_cmd_index = @intCast(u16, self.load_commands.items.len); + self.linkedit_segment_cmd_index = @intCast(u8, self.segments.items.len); var vmaddr: u64 = 0; var fileoff: u64 = 0; if (self.mode == .incremental) { @@ -4611,249 +4566,113 @@ fn populateMissingMetadata(self: *MachO) !void { fileoff = base.fileoff; log.debug("found __LINKEDIT segment free space at 0x{x}", .{fileoff}); } - try self.load_commands.append(self.base.allocator, .{ - .segment = .{ - .inner = .{ - .segname = makeStaticString("__LINKEDIT"), - .vmaddr = vmaddr, - .fileoff = fileoff, - .maxprot = macho.PROT.READ, - .initprot = macho.PROT.READ, - .cmdsize = @sizeOf(macho.segment_command_64), - }, - }, + try self.segments.append(gpa, .{ + .segname = makeStaticString("__LINKEDIT"), + .vmaddr = vmaddr, + .fileoff = fileoff, + .maxprot = macho.PROT.READ, + .initprot = macho.PROT.READ, + .cmdsize = @sizeOf(macho.segment_command_64), }); - self.load_commands_dirty = true; - } - - if (self.dyld_info_cmd_index == null) { - self.dyld_info_cmd_index = @intCast(u16, self.load_commands.items.len); - try self.load_commands.append(self.base.allocator, .{ - .dyld_info_only = .{ - .cmd = .DYLD_INFO_ONLY, - .cmdsize = @sizeOf(macho.dyld_info_command), - .rebase_off = 0, - .rebase_size = 0, - .bind_off = 0, - .bind_size = 0, - .weak_bind_off = 0, - .weak_bind_size = 0, - .lazy_bind_off = 0, - .lazy_bind_size = 0, - .export_off = 0, - .export_size = 0, - }, - }); - self.load_commands_dirty = true; - } - - if (self.symtab_cmd_index == null) { - self.symtab_cmd_index = @intCast(u16, self.load_commands.items.len); - try self.load_commands.append(self.base.allocator, .{ - .symtab = .{ - .cmdsize = @sizeOf(macho.symtab_command), - .symoff = 0, - .nsyms = 0, - .stroff = 0, - .strsize = 0, - }, - }); - self.load_commands_dirty = true; - } - - if (self.dysymtab_cmd_index == null) { - self.dysymtab_cmd_index = @intCast(u16, self.load_commands.items.len); - try self.load_commands.append(self.base.allocator, .{ - .dysymtab = .{ - .cmdsize = @sizeOf(macho.dysymtab_command), - .ilocalsym = 0, - .nlocalsym = 0, - .iextdefsym = 0, - .nextdefsym = 0, - .iundefsym = 0, - .nundefsym = 0, - .tocoff = 0, - .ntoc = 0, - .modtaboff = 0, - .nmodtab = 0, - .extrefsymoff = 0, - .nextrefsyms = 0, - .indirectsymoff = 0, - .nindirectsyms = 0, - .extreloff = 0, - .nextrel = 0, - .locreloff = 0, - .nlocrel = 0, - }, - }); - self.load_commands_dirty = true; - } - - if (self.dylinker_cmd_index == null) { - self.dylinker_cmd_index = @intCast(u16, self.load_commands.items.len); - const cmdsize = @intCast(u32, mem.alignForwardGeneric( - u64, - @sizeOf(macho.dylinker_command) + mem.sliceTo(default_dyld_path, 0).len, - @sizeOf(u64), - )); - var dylinker_cmd = macho.emptyGenericCommandWithData(macho.dylinker_command{ - .cmd = .LOAD_DYLINKER, - .cmdsize = cmdsize, - .name = @sizeOf(macho.dylinker_command), - }); - dylinker_cmd.data = try self.base.allocator.alloc(u8, cmdsize - dylinker_cmd.inner.name); - mem.set(u8, dylinker_cmd.data, 0); - mem.copy(u8, dylinker_cmd.data, mem.sliceTo(default_dyld_path, 0)); - try self.load_commands.append(self.base.allocator, .{ .dylinker = dylinker_cmd }); - self.load_commands_dirty = true; - } - - if (self.main_cmd_index == null and self.base.options.output_mode == .Exe) { - self.main_cmd_index = @intCast(u16, self.load_commands.items.len); - try self.load_commands.append(self.base.allocator, .{ - .main = .{ - .cmdsize = @sizeOf(macho.entry_point_command), - .entryoff = 0x0, - .stacksize = 0, - }, - }); - self.load_commands_dirty = true; - } - - if (self.dylib_id_cmd_index == null and self.base.options.output_mode == .Lib) { - self.dylib_id_cmd_index = @intCast(u16, self.load_commands.items.len); - const install_name = self.base.options.install_name orelse self.base.options.emit.?.sub_path; - const current_version = self.base.options.version orelse - std.builtin.Version{ .major = 1, .minor = 0, .patch = 0 }; - const compat_version = self.base.options.compatibility_version orelse - std.builtin.Version{ .major = 1, .minor = 0, .patch = 0 }; - var dylib_cmd = try macho.createLoadDylibCommand( - self.base.allocator, - .ID_DYLIB, - install_name, - 2, - current_version.major << 16 | current_version.minor << 8 | current_version.patch, - compat_version.major << 16 | compat_version.minor << 8 | compat_version.patch, - ); - errdefer dylib_cmd.deinit(self.base.allocator); - try self.load_commands.append(self.base.allocator, .{ .dylib = dylib_cmd }); - self.load_commands_dirty = true; - } - - if (self.source_version_cmd_index == null) { - self.source_version_cmd_index = @intCast(u16, self.load_commands.items.len); - try self.load_commands.append(self.base.allocator, .{ - .source_version = .{ - .cmdsize = @sizeOf(macho.source_version_command), - .version = 0x0, - }, - }); - self.load_commands_dirty = true; - } - - if (self.build_version_cmd_index == null) { - self.build_version_cmd_index = @intCast(u16, self.load_commands.items.len); - const cmdsize = @intCast(u32, mem.alignForwardGeneric( - u64, - @sizeOf(macho.build_version_command) + @sizeOf(macho.build_tool_version), - @sizeOf(u64), - )); - const platform_version = blk: { - const ver = self.base.options.target.os.version_range.semver.min; - const platform_version = ver.major << 16 | ver.minor << 8; - break :blk platform_version; - }; - const sdk_version = if (self.base.options.native_darwin_sdk) |sdk| blk: { - const ver = sdk.version; - const sdk_version = ver.major << 16 | ver.minor << 8; - break :blk sdk_version; - } else platform_version; - const is_simulator_abi = self.base.options.target.abi == .simulator; - var cmd = macho.emptyGenericCommandWithData(macho.build_version_command{ - .cmdsize = cmdsize, - .platform = switch (self.base.options.target.os.tag) { - .macos => .MACOS, - .ios => if (is_simulator_abi) macho.PLATFORM.IOSSIMULATOR else macho.PLATFORM.IOS, - .watchos => if (is_simulator_abi) macho.PLATFORM.WATCHOSSIMULATOR else macho.PLATFORM.WATCHOS, - .tvos => if (is_simulator_abi) macho.PLATFORM.TVOSSIMULATOR else macho.PLATFORM.TVOS, - else => unreachable, - }, - .minos = platform_version, - .sdk = sdk_version, - .ntools = 1, - }); - const ld_ver = macho.build_tool_version{ - .tool = .LD, - .version = 0x0, - }; - cmd.data = try self.base.allocator.alloc(u8, cmdsize - @sizeOf(macho.build_version_command)); - mem.set(u8, cmd.data, 0); - mem.copy(u8, cmd.data, mem.asBytes(&ld_ver)); - try self.load_commands.append(self.base.allocator, .{ .build_version = cmd }); - self.load_commands_dirty = true; - } - - if (self.uuid_cmd_index == null) { - self.uuid_cmd_index = @intCast(u16, self.load_commands.items.len); - var uuid_cmd: macho.uuid_command = .{ - .cmdsize = @sizeOf(macho.uuid_command), - .uuid = undefined, - }; - std.crypto.random.bytes(&uuid_cmd.uuid); - try self.load_commands.append(self.base.allocator, .{ .uuid = uuid_cmd }); - self.load_commands_dirty = true; - } - - if (self.function_starts_cmd_index == null) { - self.function_starts_cmd_index = @intCast(u16, self.load_commands.items.len); - try self.load_commands.append(self.base.allocator, .{ - .linkedit_data = .{ - .cmd = .FUNCTION_STARTS, - .cmdsize = @sizeOf(macho.linkedit_data_command), - .dataoff = 0, - .datasize = 0, - }, - }); - self.load_commands_dirty = true; - } - - if (self.data_in_code_cmd_index == null) { - self.data_in_code_cmd_index = @intCast(u16, self.load_commands.items.len); - try self.load_commands.append(self.base.allocator, .{ - .linkedit_data = .{ - .cmd = .DATA_IN_CODE, - .cmdsize = @sizeOf(macho.linkedit_data_command), - .dataoff = 0, - .datasize = 0, - }, - }); - self.load_commands_dirty = true; } } -fn calcMinHeaderpad(self: *MachO) u64 { - var sizeofcmds: u32 = 0; - for (self.load_commands.items) |lc| { - if (lc.cmd() == .NONE) continue; - sizeofcmds += lc.cmdsize(); +inline fn calcInstallNameLen(cmd_size: u64, name: []const u8, assume_max_path_len: bool) u64 { + const name_len = if (assume_max_path_len) std.os.PATH_MAX else std.mem.len(name) + 1; + return mem.alignForwardGeneric(u64, cmd_size + name_len, @alignOf(u64)); +} + +fn calcLCsSize(self: *MachO, assume_max_path_len: bool) !u32 { + const gpa = self.base.allocator; + var sizeofcmds: u64 = 0; + for (self.segments.items) |seg| { + sizeofcmds += seg.nsects * @sizeOf(macho.section_64) + @sizeOf(macho.segment_command_64); } - var padding: u32 = sizeofcmds + (self.base.options.headerpad_size orelse 0); + // LC_DYLD_INFO_ONLY + sizeofcmds += @sizeOf(macho.dyld_info_command); + // LC_FUNCTION_STARTS + if (self.text_section_index != null) { + sizeofcmds += @sizeOf(macho.linkedit_data_command); + } + // LC_DATA_IN_CODE + sizeofcmds += @sizeOf(macho.linkedit_data_command); + // LC_SYMTAB + sizeofcmds += @sizeOf(macho.symtab_command); + // LC_DYSYMTAB + sizeofcmds += @sizeOf(macho.dysymtab_command); + // LC_LOAD_DYLINKER + sizeofcmds += calcInstallNameLen( + @sizeOf(macho.dylinker_command), + mem.sliceTo(default_dyld_path, 0), + false, + ); + // LC_MAIN + if (self.base.options.output_mode == .Exe) { + sizeofcmds += @sizeOf(macho.entry_point_command); + } + // LC_ID_DYLIB + if (self.base.options.output_mode == .Lib) { + sizeofcmds += blk: { + const install_name = self.base.options.install_name orelse self.base.options.emit.?.sub_path; + break :blk calcInstallNameLen( + @sizeOf(macho.dylib_command), + install_name, + assume_max_path_len, + ); + }; + } + // LC_RPATH + { + var it = RpathIterator.init(gpa, self.base.options.rpath_list); + defer it.deinit(); + while (try it.next()) |rpath| { + sizeofcmds += calcInstallNameLen( + @sizeOf(macho.rpath_command), + rpath, + assume_max_path_len, + ); + } + } + // LC_SOURCE_VERSION + sizeofcmds += @sizeOf(macho.source_version_command); + // LC_BUILD_VERSION + sizeofcmds += @sizeOf(macho.build_version_command) + @sizeOf(macho.build_tool_version); + // LC_UUID + sizeofcmds += @sizeOf(macho.uuid_command); + // LC_LOAD_DYLIB + for (self.referenced_dylibs.keys()) |id| { + const dylib = self.dylibs.items[id]; + const dylib_id = dylib.id orelse unreachable; + sizeofcmds += calcInstallNameLen( + @sizeOf(macho.dylib_command), + dylib_id.name, + assume_max_path_len, + ); + } + // LC_CODE_SIGNATURE + { + const target = self.base.options.target; + const requires_codesig = blk: { + if (self.base.options.entitlements) |_| break :blk true; + if (target.cpu.arch == .aarch64 and (target.os.tag == .macos or target.abi == .simulator)) + break :blk true; + break :blk false; + }; + if (requires_codesig) { + sizeofcmds += @sizeOf(macho.linkedit_data_command); + } + } + + return @intCast(u32, sizeofcmds); +} + +fn calcMinHeaderPad(self: *MachO) !u64 { + var padding: u32 = (try self.calcLCsSize(false)) + (self.base.options.headerpad_size orelse 0); log.debug("minimum requested headerpad size 0x{x}", .{padding + @sizeOf(macho.mach_header_64)}); if (self.base.options.headerpad_max_install_names) { - var min_headerpad_size: u32 = 0; - for (self.load_commands.items) |lc| switch (lc.cmd()) { - .ID_DYLIB, - .LOAD_WEAK_DYLIB, - .LOAD_DYLIB, - .REEXPORT_DYLIB, - => { - min_headerpad_size += @sizeOf(macho.dylib_command) + std.os.PATH_MAX + 1; - }, - - else => {}, - }; + var min_headerpad_size: u32 = try self.calcLCsSize(true); log.debug("headerpad_max_install_names minimum headerpad size 0x{x}", .{ min_headerpad_size + @sizeOf(macho.mach_header_64), }); @@ -4868,32 +4687,31 @@ fn calcMinHeaderpad(self: *MachO) u64 { fn allocateSegments(self: *MachO) !void { try self.allocateSegment(self.text_segment_cmd_index, &.{ self.pagezero_segment_cmd_index, - }, self.calcMinHeaderpad()); + }, try self.calcMinHeaderPad()); if (self.text_segment_cmd_index) |index| blk: { - const seg = &self.load_commands.items[index].segment; - if (seg.sections.items.len == 0) break :blk; + const seg = &self.segments.items[index]; + if (seg.nsects == 0) break :blk; // Shift all sections to the back to minimize jump size between __TEXT and __DATA segments. var min_alignment: u32 = 0; - for (seg.sections.items) |sect| { - const alignment = try math.powi(u32, 2, sect.@"align"); + for (self.sections.items(.header)[0..seg.nsects]) |header| { + const alignment = try math.powi(u32, 2, header.@"align"); min_alignment = math.max(min_alignment, alignment); } assert(min_alignment > 0); - const last_sect_idx = seg.sections.items.len - 1; - const last_sect = seg.sections.items[last_sect_idx]; + const last_header = self.sections.items(.header)[seg.nsects - 1]; const shift: u32 = shift: { - const diff = seg.inner.filesize - last_sect.offset - last_sect.size; + const diff = seg.filesize - last_header.offset - last_header.size; const factor = @divTrunc(diff, min_alignment); break :shift @intCast(u32, factor * min_alignment); }; if (shift > 0) { - for (seg.sections.items) |*sect| { - sect.offset += shift; - sect.addr += shift; + for (self.sections.items(.header)[0..seg.nsects]) |*header| { + header.offset += shift; + header.addr += shift; } } } @@ -4917,42 +4735,42 @@ fn allocateSegments(self: *MachO) !void { }, 0); } -fn allocateSegment(self: *MachO, maybe_index: ?u16, indices: []const ?u16, init_size: u64) !void { +fn allocateSegment(self: *MachO, maybe_index: ?u8, indices: []const ?u8, init_size: u64) !void { const index = maybe_index orelse return; - const seg = &self.load_commands.items[index].segment; + const seg = &self.segments.items[index]; const base = self.getSegmentAllocBase(indices); - seg.inner.vmaddr = base.vmaddr; - seg.inner.fileoff = base.fileoff; - seg.inner.filesize = init_size; - seg.inner.vmsize = init_size; + seg.vmaddr = base.vmaddr; + seg.fileoff = base.fileoff; + seg.filesize = init_size; + seg.vmsize = init_size; // Allocate the sections according to their alignment at the beginning of the segment. var start = init_size; - for (seg.sections.items) |*sect| { - const is_zerofill = sect.flags == macho.S_ZEROFILL or sect.flags == macho.S_THREAD_LOCAL_ZEROFILL; - const use_llvm = build_options.have_llvm and self.base.options.use_llvm; - const use_stage1 = build_options.is_stage1 and self.base.options.use_stage1; - const alignment = try math.powi(u32, 2, sect.@"align"); + const slice = self.sections.slice(); + for (slice.items(.header)) |*header, sect_id| { + const segment_index = slice.items(.segment_index)[sect_id]; + if (segment_index != index) continue; + const is_zerofill = header.flags == macho.S_ZEROFILL or header.flags == macho.S_THREAD_LOCAL_ZEROFILL; + const alignment = try math.powi(u32, 2, header.@"align"); const start_aligned = mem.alignForwardGeneric(u64, start, alignment); - // TODO handle zerofill sections in stage2 - sect.offset = if (is_zerofill and (use_stage1 or use_llvm)) + header.offset = if (is_zerofill) 0 else - @intCast(u32, seg.inner.fileoff + start_aligned); - sect.addr = seg.inner.vmaddr + start_aligned; + @intCast(u32, seg.fileoff + start_aligned); + header.addr = seg.vmaddr + start_aligned; - start = start_aligned + sect.size; + start = start_aligned + header.size; - if (!(is_zerofill and (use_stage1 or use_llvm))) { - seg.inner.filesize = start; + if (!is_zerofill) { + seg.filesize = start; } - seg.inner.vmsize = start; + seg.vmsize = start; } - seg.inner.filesize = mem.alignForwardGeneric(u64, seg.inner.filesize, self.page_size); - seg.inner.vmsize = mem.alignForwardGeneric(u64, seg.inner.vmsize, self.page_size); + seg.filesize = mem.alignForwardGeneric(u64, seg.filesize, self.page_size); + seg.vmsize = mem.alignForwardGeneric(u64, seg.vmsize, self.page_size); } const InitSectionOpts = struct { @@ -4963,16 +4781,16 @@ const InitSectionOpts = struct { fn initSection( self: *MachO, - segment_id: u16, + segment_id: u8, sectname: []const u8, size: u64, alignment: u32, opts: InitSectionOpts, -) !u16 { - const seg = &self.load_commands.items[segment_id].segment; - var sect = macho.section_64{ +) !u8 { + const seg = &self.segments.items[segment_id]; + var header = macho.section_64{ .sectname = makeStaticString(sectname), - .segname = seg.inner.segname, + .segname = seg.segname, .size = if (self.mode == .incremental) @intCast(u32, size) else 0, .@"align" = alignment, .flags = opts.flags, @@ -4982,165 +4800,157 @@ fn initSection( if (self.mode == .incremental) { const alignment_pow_2 = try math.powi(u32, 2, alignment); - const padding: ?u32 = if (segment_id == self.text_segment_cmd_index.?) - @maximum(self.base.options.headerpad_size orelse 0, default_headerpad_size) + const padding: ?u64 = if (segment_id == self.text_segment_cmd_index.?) + try self.calcMinHeaderPad() else null; const off = self.findFreeSpace(segment_id, alignment_pow_2, padding); log.debug("allocating {s},{s} section from 0x{x} to 0x{x}", .{ - sect.segName(), - sect.sectName(), + header.segName(), + header.sectName(), off, off + size, }); - sect.addr = seg.inner.vmaddr + off - seg.inner.fileoff; - - const is_zerofill = opts.flags == macho.S_ZEROFILL or opts.flags == macho.S_THREAD_LOCAL_ZEROFILL; - const use_llvm = build_options.have_llvm and self.base.options.use_llvm; - const use_stage1 = build_options.is_stage1 and self.base.options.use_stage1; + header.addr = seg.vmaddr + off - seg.fileoff; // TODO handle zerofill in stage2 - if (!(is_zerofill and (use_stage1 or use_llvm))) { - sect.offset = @intCast(u32, off); - } + // const is_zerofill = opts.flags == macho.S_ZEROFILL or opts.flags == macho.S_THREAD_LOCAL_ZEROFILL; + header.offset = @intCast(u32, off); + + try self.updateSectionOrdinals(); } - const index = @intCast(u16, seg.sections.items.len); - try seg.sections.append(self.base.allocator, sect); - seg.inner.cmdsize += @sizeOf(macho.section_64); - seg.inner.nsects += 1; - - const match = MatchingSection{ - .seg = segment_id, - .sect = index, - }; - _ = try self.section_ordinals.getOrPut(self.base.allocator, match); - try self.atom_free_lists.putNoClobber(self.base.allocator, match, .{}); - - self.load_commands_dirty = true; - self.sections_order_dirty = true; + const index = @intCast(u8, self.sections.slice().len); + try self.sections.append(self.base.allocator, .{ + .segment_index = segment_id, + .header = header, + }); + seg.cmdsize += @sizeOf(macho.section_64); + seg.nsects += 1; return index; } -fn findFreeSpace(self: MachO, segment_id: u16, alignment: u64, start: ?u32) u64 { - const seg = self.load_commands.items[segment_id].segment; - if (seg.sections.items.len == 0) { - return if (start) |v| v else seg.inner.fileoff; +fn findFreeSpace(self: MachO, segment_id: u8, alignment: u64, start: ?u64) u64 { + const seg = self.segments.items[segment_id]; + const indexes = self.getSectionIndexes(segment_id); + if (indexes.end - indexes.start == 0) { + return if (start) |v| v else seg.fileoff; } - const last_sect = seg.sections.items[seg.sections.items.len - 1]; + const last_sect = self.sections.items(.header)[indexes.end - 1]; const final_off = last_sect.offset + padToIdeal(last_sect.size); return mem.alignForwardGeneric(u64, final_off, alignment); } -fn growSegment(self: *MachO, seg_id: u16, new_size: u64) !void { - const seg = &self.load_commands.items[seg_id].segment; - const new_seg_size = mem.alignForwardGeneric(u64, new_size, self.page_size); - assert(new_seg_size > seg.inner.filesize); - const offset_amt = new_seg_size - seg.inner.filesize; +fn growSegment(self: *MachO, segment_index: u8, new_size: u64) !void { + const segment = &self.segments.items[segment_index]; + const new_segment_size = mem.alignForwardGeneric(u64, new_size, self.page_size); + assert(new_segment_size > segment.filesize); + const offset_amt = new_segment_size - segment.filesize; log.debug("growing segment {s} from 0x{x} to 0x{x}", .{ - seg.inner.segname, - seg.inner.filesize, - new_seg_size, + segment.segname, + segment.filesize, + new_segment_size, }); - seg.inner.filesize = new_seg_size; - seg.inner.vmsize = new_seg_size; + segment.filesize = new_segment_size; + segment.vmsize = new_segment_size; log.debug(" (new segment file offsets from 0x{x} to 0x{x} (in memory 0x{x} to 0x{x}))", .{ - seg.inner.fileoff, - seg.inner.fileoff + seg.inner.filesize, - seg.inner.vmaddr, - seg.inner.vmaddr + seg.inner.vmsize, + segment.fileoff, + segment.fileoff + segment.filesize, + segment.vmaddr, + segment.vmaddr + segment.vmsize, }); - var next: usize = seg_id + 1; + var next: u8 = segment_index + 1; while (next < self.linkedit_segment_cmd_index.? + 1) : (next += 1) { - const next_seg = &self.load_commands.items[next].segment; + const next_segment = &self.segments.items[next]; try MachO.copyRangeAllOverlappingAlloc( self.base.allocator, self.base.file.?, - next_seg.inner.fileoff, - next_seg.inner.fileoff + offset_amt, - math.cast(usize, next_seg.inner.filesize) orelse return error.Overflow, + next_segment.fileoff, + next_segment.fileoff + offset_amt, + math.cast(usize, next_segment.filesize) orelse return error.Overflow, ); - next_seg.inner.fileoff += offset_amt; - next_seg.inner.vmaddr += offset_amt; + next_segment.fileoff += offset_amt; + next_segment.vmaddr += offset_amt; log.debug(" (new {s} segment file offsets from 0x{x} to 0x{x} (in memory 0x{x} to 0x{x}))", .{ - next_seg.inner.segname, - next_seg.inner.fileoff, - next_seg.inner.fileoff + next_seg.inner.filesize, - next_seg.inner.vmaddr, - next_seg.inner.vmaddr + next_seg.inner.vmsize, + next_segment.segname, + next_segment.fileoff, + next_segment.fileoff + next_segment.filesize, + next_segment.vmaddr, + next_segment.vmaddr + next_segment.vmsize, }); - for (next_seg.sections.items) |*moved_sect, moved_sect_id| { - moved_sect.offset += @intCast(u32, offset_amt); - moved_sect.addr += offset_amt; + const indexes = self.getSectionIndexes(next); + for (self.sections.items(.header)[indexes.start..indexes.end]) |*header, i| { + header.offset += @intCast(u32, offset_amt); + header.addr += offset_amt; log.debug(" (new {s},{s} file offsets from 0x{x} to 0x{x} (in memory 0x{x} to 0x{x}))", .{ - moved_sect.segName(), - moved_sect.sectName(), - moved_sect.offset, - moved_sect.offset + moved_sect.size, - moved_sect.addr, - moved_sect.addr + moved_sect.size, + header.segName(), + header.sectName(), + header.offset, + header.offset + header.size, + header.addr, + header.addr + header.size, }); - try self.shiftLocalsByOffset(.{ - .seg = @intCast(u16, next), - .sect = @intCast(u16, moved_sect_id), - }, @intCast(i64, offset_amt)); + try self.shiftLocalsByOffset(@intCast(u8, i + indexes.start), @intCast(i64, offset_amt)); } } } -fn growSection(self: *MachO, match: MatchingSection, new_size: u32) !void { +fn growSection(self: *MachO, sect_id: u8, new_size: u32) !void { const tracy = trace(@src()); defer tracy.end(); - const seg = &self.load_commands.items[match.seg].segment; - const sect = &seg.sections.items[match.sect]; + const section = self.sections.get(sect_id); + const segment_index = section.segment_index; + const header = section.header; + const segment = self.segments.items[segment_index]; - const alignment = try math.powi(u32, 2, sect.@"align"); - const max_size = self.allocatedSize(match.seg, sect.offset); + const alignment = try math.powi(u32, 2, header.@"align"); + const max_size = self.allocatedSize(segment_index, header.offset); const ideal_size = padToIdeal(new_size); const needed_size = mem.alignForwardGeneric(u32, ideal_size, alignment); if (needed_size > max_size) blk: { log.debug(" (need to grow! needed 0x{x}, max 0x{x})", .{ needed_size, max_size }); - if (match.sect == seg.sections.items.len - 1) { + const indexes = self.getSectionIndexes(segment_index); + if (sect_id == indexes.end - 1) { // Last section, just grow segments - try self.growSegment(match.seg, seg.inner.filesize + needed_size - max_size); + try self.growSegment(segment_index, segment.filesize + needed_size - max_size); break :blk; } // Need to move all sections below in file and address spaces. const offset_amt = offset: { - const max_alignment = try self.getSectionMaxAlignment(match.seg, match.sect + 1); + const max_alignment = try self.getSectionMaxAlignment(sect_id + 1, indexes.end); break :offset mem.alignForwardGeneric(u64, needed_size - max_size, max_alignment); }; // Before we commit to this, check if the segment needs to grow too. // We assume that each section header is growing linearly with the increasing // file offset / virtual memory address space. - const last_sect = seg.sections.items[seg.sections.items.len - 1]; - const last_sect_off = last_sect.offset + last_sect.size; - const seg_off = seg.inner.fileoff + seg.inner.filesize; + const last_sect_header = self.sections.items(.header)[indexes.end - 1]; + const last_sect_off = last_sect_header.offset + last_sect_header.size; + const seg_off = segment.fileoff + segment.filesize; if (last_sect_off + offset_amt > seg_off) { // Need to grow segment first. const spill_size = (last_sect_off + offset_amt) - seg_off; - try self.growSegment(match.seg, seg.inner.filesize + spill_size); + try self.growSegment(segment_index, segment.filesize + spill_size); } // We have enough space to expand within the segment, so move all sections by // the required amount and update their header offsets. - const next_sect = seg.sections.items[match.sect + 1]; + const next_sect = self.sections.items(.header)[sect_id + 1]; const total_size = last_sect_off - next_sect.offset; try MachO.copyRangeAllOverlappingAlloc( @@ -5151,9 +4961,7 @@ fn growSection(self: *MachO, match: MatchingSection, new_size: u32) !void { math.cast(usize, total_size) orelse return error.Overflow, ); - var next = match.sect + 1; - while (next < seg.sections.items.len) : (next += 1) { - const moved_sect = &seg.sections.items[next]; + for (self.sections.items(.header)[sect_id + 1 .. indexes.end]) |*moved_sect, i| { moved_sect.offset += @intCast(u32, offset_amt); moved_sect.addr += offset_amt; @@ -5166,49 +4974,45 @@ fn growSection(self: *MachO, match: MatchingSection, new_size: u32) !void { moved_sect.addr + moved_sect.size, }); - try self.shiftLocalsByOffset(.{ - .seg = match.seg, - .sect = next, - }, @intCast(i64, offset_amt)); + try self.shiftLocalsByOffset(@intCast(u8, sect_id + 1 + i), @intCast(i64, offset_amt)); } } } -fn allocatedSize(self: MachO, segment_id: u16, start: u64) u64 { - const seg = self.load_commands.items[segment_id].segment; - assert(start >= seg.inner.fileoff); - var min_pos: u64 = seg.inner.fileoff + seg.inner.filesize; +fn allocatedSize(self: MachO, segment_id: u8, start: u64) u64 { + const segment = self.segments.items[segment_id]; + const indexes = self.getSectionIndexes(segment_id); + assert(start >= segment.fileoff); + var min_pos: u64 = segment.fileoff + segment.filesize; if (start > min_pos) return 0; - for (seg.sections.items) |section| { - if (section.offset <= start) continue; - if (section.offset < min_pos) min_pos = section.offset; + for (self.sections.items(.header)[indexes.start..indexes.end]) |header| { + if (header.offset <= start) continue; + if (header.offset < min_pos) min_pos = header.offset; } return min_pos - start; } -fn getSectionMaxAlignment(self: *MachO, segment_id: u16, start_sect_id: u16) !u32 { - const seg = self.load_commands.items[segment_id].segment; +fn getSectionMaxAlignment(self: *MachO, start: u8, end: u8) !u32 { var max_alignment: u32 = 1; - var next = start_sect_id; - while (next < seg.sections.items.len) : (next += 1) { - const sect = seg.sections.items[next]; - const alignment = try math.powi(u32, 2, sect.@"align"); + const slice = self.sections.slice(); + for (slice.items(.header)[start..end]) |header| { + const alignment = try math.powi(u32, 2, header.@"align"); max_alignment = math.max(max_alignment, alignment); } return max_alignment; } -fn allocateAtomCommon(self: *MachO, atom: *Atom, match: MatchingSection) !void { +fn allocateAtomCommon(self: *MachO, atom: *Atom, sect_id: u8) !void { const sym = atom.getSymbolPtr(self); if (self.mode == .incremental) { const size = atom.size; const alignment = try math.powi(u32, 2, atom.alignment); - const vaddr = try self.allocateAtom(atom, size, alignment, match); + const vaddr = try self.allocateAtom(atom, size, alignment, sect_id); const sym_name = atom.getName(self); log.debug("allocated {s} atom at 0x{x}", .{ sym_name, vaddr }); sym.n_value = vaddr; - } else try self.addAtomToSection(atom, match); - sym.n_sect = self.getSectionOrdinal(match); + } else try self.addAtomToSection(atom, sect_id); + sym.n_sect = sect_id + 1; } fn allocateAtom( @@ -5216,15 +5020,15 @@ fn allocateAtom( atom: *Atom, new_atom_size: u64, alignment: u64, - match: MatchingSection, + sect_id: u8, ) !u64 { const tracy = trace(@src()); defer tracy.end(); - const sect = self.getSectionPtr(match); - var free_list = self.atom_free_lists.get(match).?; - const needs_padding = match.seg == self.text_segment_cmd_index.? and match.sect == self.text_section_index.?; - const new_atom_ideal_capacity = if (needs_padding) padToIdeal(new_atom_size) else new_atom_size; + const header = &self.sections.items(.header)[sect_id]; + const free_list = &self.sections.items(.free_list)[sect_id]; + const maybe_last_atom = &self.sections.items(.last_atom)[sect_id]; + const new_atom_ideal_capacity = if (header.isCode()) padToIdeal(new_atom_size) else new_atom_size; // We use these to indicate our intention to update metadata, placing the new atom, // and possibly removing a free list node. @@ -5244,7 +5048,7 @@ fn allocateAtom( // Is it enough that we could fit this new atom? const sym = big_atom.getSymbol(self); const capacity = big_atom.capacity(self); - const ideal_capacity = if (needs_padding) padToIdeal(capacity) else capacity; + const ideal_capacity = if (header.isCode()) padToIdeal(capacity) else capacity; const ideal_capacity_end_vaddr = math.add(u64, sym.n_value, ideal_capacity) catch ideal_capacity; const capacity_end_vaddr = sym.n_value + capacity; const new_start_vaddr_unaligned = capacity_end_vaddr - new_atom_ideal_capacity; @@ -5272,30 +5076,28 @@ fn allocateAtom( free_list_removal = i; } break :blk new_start_vaddr; - } else if (self.atoms.get(match)) |last| { + } else if (maybe_last_atom.*) |last| { const last_symbol = last.getSymbol(self); - const ideal_capacity = if (needs_padding) padToIdeal(last.size) else last.size; + const ideal_capacity = if (header.isCode()) padToIdeal(last.size) else last.size; const ideal_capacity_end_vaddr = last_symbol.n_value + ideal_capacity; const new_start_vaddr = mem.alignForwardGeneric(u64, ideal_capacity_end_vaddr, alignment); atom_placement = last; break :blk new_start_vaddr; } else { - break :blk mem.alignForwardGeneric(u64, sect.addr, alignment); + break :blk mem.alignForwardGeneric(u64, header.addr, alignment); } }; const expand_section = atom_placement == null or atom_placement.?.next == null; if (expand_section) { - const needed_size = @intCast(u32, (vaddr + new_atom_size) - sect.addr); - try self.growSection(match, needed_size); - _ = try self.atoms.put(self.base.allocator, match, atom); - sect.size = needed_size; - self.load_commands_dirty = true; + const needed_size = @intCast(u32, (vaddr + new_atom_size) - header.addr); + try self.growSection(sect_id, needed_size); + maybe_last_atom.* = atom; + header.size = needed_size; } const align_pow = @intCast(u32, math.log2(alignment)); - if (sect.@"align" < align_pow) { - sect.@"align" = align_pow; - self.load_commands_dirty = true; + if (header.@"align" < align_pow) { + header.@"align" = align_pow; } atom.size = new_atom_size; atom.alignment = align_pow; @@ -5322,20 +5124,19 @@ fn allocateAtom( return vaddr; } -pub fn addAtomToSection(self: *MachO, atom: *Atom, match: MatchingSection) !void { - if (self.atoms.getPtr(match)) |last| { - last.*.next = atom; - atom.prev = last.*; - last.* = atom; - } else { - try self.atoms.putNoClobber(self.base.allocator, match, atom); +pub fn addAtomToSection(self: *MachO, atom: *Atom, sect_id: u8) !void { + var section = self.sections.get(sect_id); + if (section.header.size > 0) { + section.last_atom.?.next = atom; + atom.prev = section.last_atom.?; } - const sect = self.getSectionPtr(match); + section.last_atom = atom; const atom_alignment = try math.powi(u32, 2, atom.alignment); - const aligned_end_addr = mem.alignForwardGeneric(u64, sect.size, atom_alignment); - const padding = aligned_end_addr - sect.size; - sect.size += padding + atom.size; - sect.@"align" = @maximum(sect.@"align", atom.alignment); + const aligned_end_addr = mem.alignForwardGeneric(u64, section.header.size, atom_alignment); + const padding = aligned_end_addr - section.header.size; + section.header.size += padding + atom.size; + section.header.@"align" = @maximum(section.header.@"align", atom.alignment); + self.sections.set(sect_id, section); } pub fn getGlobalSymbol(self: *MachO, name: []const u8) !u32 { @@ -5368,74 +5169,27 @@ pub fn getGlobalSymbol(self: *MachO, name: []const u8) !u32 { return sym_index; } -fn getSegmentAllocBase(self: MachO, indices: []const ?u16) struct { vmaddr: u64, fileoff: u64 } { +fn getSegmentAllocBase(self: MachO, indices: []const ?u8) struct { vmaddr: u64, fileoff: u64 } { for (indices) |maybe_prev_id| { const prev_id = maybe_prev_id orelse continue; - const prev = self.load_commands.items[prev_id].segment; + const prev = self.segments.items[prev_id]; return .{ - .vmaddr = prev.inner.vmaddr + prev.inner.vmsize, - .fileoff = prev.inner.fileoff + prev.inner.filesize, + .vmaddr = prev.vmaddr + prev.vmsize, + .fileoff = prev.fileoff + prev.filesize, }; } return .{ .vmaddr = 0, .fileoff = 0 }; } -fn pruneAndSortSectionsInSegment(self: *MachO, maybe_seg_id: *?u16, indices: []*?u16) !void { - const seg_id = maybe_seg_id.* orelse return; - - var mapping = std.AutoArrayHashMap(u16, ?u16).init(self.base.allocator); - defer mapping.deinit(); - - const seg = &self.load_commands.items[seg_id].segment; - var sections = seg.sections.toOwnedSlice(self.base.allocator); - defer self.base.allocator.free(sections); - try seg.sections.ensureTotalCapacity(self.base.allocator, sections.len); - - for (indices) |maybe_index| { - const old_idx = maybe_index.* orelse continue; - const sect = §ions[old_idx]; - if (sect.size == 0) { - log.debug("pruning section {s},{s}", .{ sect.segName(), sect.sectName() }); - maybe_index.* = null; - seg.inner.cmdsize -= @sizeOf(macho.section_64); - seg.inner.nsects -= 1; - } else { - maybe_index.* = @intCast(u16, seg.sections.items.len); - seg.sections.appendAssumeCapacity(sect.*); - } - try mapping.putNoClobber(old_idx, maybe_index.*); - } - - var atoms = std.ArrayList(struct { match: MatchingSection, atom: *Atom }).init(self.base.allocator); - defer atoms.deinit(); - try atoms.ensureTotalCapacity(mapping.count()); - - for (mapping.keys()) |old_sect| { - const new_sect = mapping.get(old_sect).? orelse { - _ = self.atoms.remove(.{ .seg = seg_id, .sect = old_sect }); - continue; - }; - const kv = self.atoms.fetchRemove(.{ .seg = seg_id, .sect = old_sect }).?; - atoms.appendAssumeCapacity(.{ - .match = .{ .seg = seg_id, .sect = new_sect }, - .atom = kv.value, - }); - } - - while (atoms.popOrNull()) |next| { - try self.atoms.putNoClobber(self.base.allocator, next.match, next.atom); - } - - if (seg.inner.nsects == 0 and !mem.eql(u8, "__TEXT", seg.inner.segName())) { - // Segment has now become empty, so mark it as such - log.debug("marking segment {s} as dead", .{seg.inner.segName()}); - seg.inner.cmd = @intToEnum(macho.LC, 0); - maybe_seg_id.* = null; - } -} - fn pruneAndSortSections(self: *MachO) !void { - try self.pruneAndSortSectionsInSegment(&self.text_segment_cmd_index, &.{ + const gpa = self.base.allocator; + + var sections = self.sections.toOwnedSlice(); + defer sections.deinit(gpa); + try self.sections.ensureTotalCapacity(gpa, sections.len); + + for (&[_]*?u8{ + // __TEXT &self.text_section_index, &self.stubs_section_index, &self.stub_helper_section_index, @@ -5448,9 +5202,7 @@ fn pruneAndSortSections(self: *MachO) !void { &self.objc_methtype_section_index, &self.objc_classname_section_index, &self.eh_frame_section_index, - }); - - try self.pruneAndSortSectionsInSegment(&self.data_const_segment_cmd_index, &.{ + // __DATA_CONST &self.got_section_index, &self.mod_init_func_section_index, &self.mod_term_func_section_index, @@ -5458,9 +5210,7 @@ fn pruneAndSortSections(self: *MachO) !void { &self.objc_cfstring_section_index, &self.objc_classlist_section_index, &self.objc_imageinfo_section_index, - }); - - try self.pruneAndSortSectionsInSegment(&self.data_segment_cmd_index, &.{ + // __DATA &self.rustc_section_index, &self.la_symbol_ptr_section_index, &self.objc_const_section_index, @@ -5473,103 +5223,129 @@ fn pruneAndSortSections(self: *MachO) !void { &self.tlv_data_section_index, &self.tlv_bss_section_index, &self.bss_section_index, - }); + }) |maybe_index| { + const old_idx = maybe_index.* orelse continue; + const segment_index = sections.items(.segment_index)[old_idx]; + const header = sections.items(.header)[old_idx]; + const last_atom = sections.items(.last_atom)[old_idx]; + if (header.size == 0) { + log.debug("pruning section {s},{s}", .{ header.segName(), header.sectName() }); + maybe_index.* = null; + const seg = &self.segments.items[segment_index]; + seg.cmdsize -= @sizeOf(macho.section_64); + seg.nsects -= 1; + } else { + maybe_index.* = @intCast(u8, self.sections.slice().len); + self.sections.appendAssumeCapacity(.{ + .segment_index = segment_index, + .header = header, + .last_atom = last_atom, + }); + } + } - // Create new section ordinals. - self.section_ordinals.clearRetainingCapacity(); - if (self.text_segment_cmd_index) |seg_id| { - const seg = self.load_commands.items[seg_id].segment; - for (seg.sections.items) |_, sect_id| { - const res = self.section_ordinals.getOrPutAssumeCapacity(.{ - .seg = seg_id, - .sect = @intCast(u16, sect_id), - }); - assert(!res.found_existing); + for (self.segments.items) |*seg| { + const segname = seg.segName(); + if (seg.nsects == 0 and + !mem.eql(u8, "__TEXT", segname) and + !mem.eql(u8, "__PAGEZERO", segname) and + !mem.eql(u8, "__LINKEDIT", segname)) + { + // Segment has now become empty, so mark it as such + log.debug("marking segment {s} as dead", .{seg.segName()}); + seg.cmd = @intToEnum(macho.LC, 0); } } - if (self.data_const_segment_cmd_index) |seg_id| { - const seg = self.load_commands.items[seg_id].segment; - for (seg.sections.items) |_, sect_id| { - const res = self.section_ordinals.getOrPutAssumeCapacity(.{ - .seg = seg_id, - .sect = @intCast(u16, sect_id), - }); - assert(!res.found_existing); - } - } - if (self.data_segment_cmd_index) |seg_id| { - const seg = self.load_commands.items[seg_id].segment; - for (seg.sections.items) |_, sect_id| { - const res = self.section_ordinals.getOrPutAssumeCapacity(.{ - .seg = seg_id, - .sect = @intCast(u16, sect_id), - }); - assert(!res.found_existing); - } - } - self.sections_order_dirty = false; } fn updateSectionOrdinals(self: *MachO) !void { - if (!self.sections_order_dirty) return; - + _ = self; const tracy = trace(@src()); defer tracy.end(); - log.debug("updating section ordinals", .{}); + @panic("updating section ordinals"); - const gpa = self.base.allocator; + // const gpa = self.base.allocator; - var ordinal_remap = std.AutoHashMap(u8, u8).init(gpa); - defer ordinal_remap.deinit(); - var ordinals: std.AutoArrayHashMapUnmanaged(MatchingSection, void) = .{}; + // var ordinal_remap = std.AutoHashMap(u8, u8).init(gpa); + // defer ordinal_remap.deinit(); + // var ordinals: std.AutoArrayHashMapUnmanaged(MatchingSection, void) = .{}; - var new_ordinal: u8 = 0; - for (&[_]?u16{ - self.text_segment_cmd_index, - self.data_const_segment_cmd_index, - self.data_segment_cmd_index, - }) |maybe_index| { - const index = maybe_index orelse continue; - const seg = self.load_commands.items[index].segment; - for (seg.sections.items) |sect, sect_id| { - const match = MatchingSection{ - .seg = @intCast(u16, index), - .sect = @intCast(u16, sect_id), - }; - const old_ordinal = self.getSectionOrdinal(match); - new_ordinal += 1; - log.debug("'{s},{s}': sect({d}, '_,_') => sect({d}, '_,_')", .{ - sect.segName(), - sect.sectName(), - old_ordinal, - new_ordinal, - }); - try ordinal_remap.putNoClobber(old_ordinal, new_ordinal); - try ordinals.putNoClobber(gpa, match, {}); - } - } + // var new_ordinal: u8 = 0; + // for (&[_]?u16{ + // self.text_segment_cmd_index, + // self.data_const_segment_cmd_index, + // self.data_segment_cmd_index, + // }) |maybe_index| { + // const index = maybe_index orelse continue; + // const seg = self.load_commands.items[index].segment; + // for (seg.sections.items) |sect, sect_id| { + // const match = MatchingSection{ + // .seg = @intCast(u16, index), + // .sect = @intCast(u16, sect_id), + // }; + // const old_ordinal = self.getSectionOrdinal(match); + // new_ordinal += 1; + // log.debug("'{s},{s}': sect({d}, '_,_') => sect({d}, '_,_')", .{ + // sect.segName(), + // sect.sectName(), + // old_ordinal, + // new_ordinal, + // }); + // try ordinal_remap.putNoClobber(old_ordinal, new_ordinal); + // try ordinals.putNoClobber(gpa, match, {}); + // } + // } - // FIXME Jakub - // TODO no need for duping work here; simply walk the atom graph - for (self.locals.items) |*sym| { - if (sym.undf()) continue; - if (sym.n_sect == 0) continue; - sym.n_sect = ordinal_remap.get(sym.n_sect).?; - } - for (self.objects.items) |*object| { - for (object.symtab.items) |*sym| { - if (sym.undf()) continue; - if (sym.n_sect == 0) continue; - sym.n_sect = ordinal_remap.get(sym.n_sect).?; - } - } + // // FIXME Jakub + // // TODO no need for duping work here; simply walk the atom graph + // for (self.locals.items) |*sym| { + // if (sym.undf()) continue; + // if (sym.n_sect == 0) continue; + // sym.n_sect = ordinal_remap.get(sym.n_sect).?; + // } + // for (self.objects.items) |*object| { + // for (object.symtab.items) |*sym| { + // if (sym.undf()) continue; + // if (sym.n_sect == 0) continue; + // sym.n_sect = ordinal_remap.get(sym.n_sect).?; + // } + // } - self.section_ordinals.deinit(gpa); - self.section_ordinals = ordinals; + // self.section_ordinals.deinit(gpa); + // self.section_ordinals = ordinals; } -fn writeDyldInfoData(self: *MachO) !void { +pub fn writeSegmentHeaders(self: *MachO, start: usize, end: usize, ncmds: *u32, writer: anytype) !void { + var count: usize = 0; + for (self.segments.items[start..end]) |seg| { + if (seg.cmd == .NONE) continue; + try writer.writeStruct(seg); + + // TODO + for (self.sections.items(.header)[count..][0..seg.nsects]) |header| { + try writer.writeStruct(header); + } + + count += seg.nsects; + ncmds.* += 1; + } +} + +fn writeLinkeditSegmentData(self: *MachO, ncmds: *u32, lc_writer: anytype) !void { + const seg = &self.segments.items[self.linkedit_segment_cmd_index.?]; + seg.filesize = 0; + seg.vmsize = 0; + + try self.writeDyldInfoData(ncmds, lc_writer); + try self.writeFunctionStarts(ncmds, lc_writer); + try self.writeDataInCode(ncmds, lc_writer); + try self.writeSymtabs(ncmds, lc_writer); + + seg.vmsize = mem.alignForwardGeneric(u64, seg.filesize, self.page_size); +} + +fn writeDyldInfoData(self: *MachO, ncmds: *u32, lc_writer: anytype) !void { const tracy = trace(@src()); defer tracy.end(); @@ -5582,89 +5358,86 @@ fn writeDyldInfoData(self: *MachO) !void { var lazy_bind_pointers = std.ArrayList(bind.Pointer).init(gpa); defer lazy_bind_pointers.deinit(); - { - var it = self.atoms.iterator(); - while (it.next()) |entry| { - const match = entry.key_ptr.*; - var atom: *Atom = entry.value_ptr.*; + const slice = self.sections.slice(); + for (slice.items(.last_atom)) |last_atom, sect_id| { + var atom = last_atom orelse continue; + const segment_index = slice.items(.segment_index)[sect_id]; + const header = slice.items(.header)[sect_id]; - if (self.text_segment_cmd_index) |seg| { - if (match.seg == seg) continue; // __TEXT is non-writable + if (mem.eql(u8, header.segName(), "__TEXT")) continue; // __TEXT is non-writable + + log.debug("dyld info for {s},{s}", .{ header.segName(), header.sectName() }); + + const seg = self.segments.items[segment_index]; + + while (true) { + log.debug(" ATOM(%{d}, '{s}')", .{ atom.sym_index, atom.getName(self) }); + const sym = atom.getSymbol(self); + const base_offset = sym.n_value - seg.vmaddr; + + for (atom.rebases.items) |offset| { + log.debug(" | rebase at {x}", .{base_offset + offset}); + try rebase_pointers.append(.{ + .offset = base_offset + offset, + .segment_id = segment_index, + }); } - const seg = self.getSegment(match); - const sect = self.getSection(match); - log.debug("dyld info for {s},{s}", .{ sect.segName(), sect.sectName() }); - - while (true) { - log.debug(" ATOM(%{d}, '{s}')", .{ atom.sym_index, atom.getName(self) }); - const sym = atom.getSymbol(self); - const base_offset = sym.n_value - seg.inner.vmaddr; - - for (atom.rebases.items) |offset| { - log.debug(" | rebase at {x}", .{base_offset + offset}); - try rebase_pointers.append(.{ - .offset = base_offset + offset, - .segment_id = match.seg, - }); + for (atom.bindings.items) |binding| { + const bind_sym = self.getSymbol(binding.target); + const bind_sym_name = self.getSymbolName(binding.target); + const dylib_ordinal = @divTrunc( + @bitCast(i16, bind_sym.n_desc), + macho.N_SYMBOL_RESOLVER, + ); + var flags: u4 = 0; + log.debug(" | bind at {x}, import('{s}') in dylib({d})", .{ + binding.offset + base_offset, + bind_sym_name, + dylib_ordinal, + }); + if (bind_sym.weakRef()) { + log.debug(" | marking as weak ref ", .{}); + flags |= @truncate(u4, macho.BIND_SYMBOL_FLAGS_WEAK_IMPORT); } - - for (atom.bindings.items) |binding| { - const bind_sym = self.getSymbol(binding.target); - const bind_sym_name = self.getSymbolName(binding.target); - const dylib_ordinal = @divTrunc( - @bitCast(i16, bind_sym.n_desc), - macho.N_SYMBOL_RESOLVER, - ); - var flags: u4 = 0; - log.debug(" | bind at {x}, import('{s}') in dylib({d})", .{ - binding.offset + base_offset, - bind_sym_name, - dylib_ordinal, - }); - if (bind_sym.weakRef()) { - log.debug(" | marking as weak ref ", .{}); - flags |= @truncate(u4, macho.BIND_SYMBOL_FLAGS_WEAK_IMPORT); - } - try bind_pointers.append(.{ - .offset = binding.offset + base_offset, - .segment_id = match.seg, - .dylib_ordinal = dylib_ordinal, - .name = bind_sym_name, - .bind_flags = flags, - }); - } - - for (atom.lazy_bindings.items) |binding| { - const bind_sym = self.getSymbol(binding.target); - const bind_sym_name = self.getSymbolName(binding.target); - const dylib_ordinal = @divTrunc( - @bitCast(i16, bind_sym.n_desc), - macho.N_SYMBOL_RESOLVER, - ); - var flags: u4 = 0; - log.debug(" | lazy bind at {x} import('{s}') ord({d})", .{ - binding.offset + base_offset, - bind_sym_name, - dylib_ordinal, - }); - if (bind_sym.weakRef()) { - log.debug(" | marking as weak ref ", .{}); - flags |= @truncate(u4, macho.BIND_SYMBOL_FLAGS_WEAK_IMPORT); - } - try lazy_bind_pointers.append(.{ - .offset = binding.offset + base_offset, - .segment_id = match.seg, - .dylib_ordinal = dylib_ordinal, - .name = bind_sym_name, - .bind_flags = flags, - }); - } - - if (atom.prev) |prev| { - atom = prev; - } else break; + try bind_pointers.append(.{ + .offset = binding.offset + base_offset, + .segment_id = segment_index, + .dylib_ordinal = dylib_ordinal, + .name = bind_sym_name, + .bind_flags = flags, + }); } + + for (atom.lazy_bindings.items) |binding| { + const bind_sym = self.getSymbol(binding.target); + const bind_sym_name = self.getSymbolName(binding.target); + const dylib_ordinal = @divTrunc( + @bitCast(i16, bind_sym.n_desc), + macho.N_SYMBOL_RESOLVER, + ); + var flags: u4 = 0; + log.debug(" | lazy bind at {x} import('{s}') ord({d})", .{ + binding.offset + base_offset, + bind_sym_name, + dylib_ordinal, + }); + if (bind_sym.weakRef()) { + log.debug(" | marking as weak ref ", .{}); + flags |= @truncate(u4, macho.BIND_SYMBOL_FLAGS_WEAK_IMPORT); + } + try lazy_bind_pointers.append(.{ + .offset = binding.offset + base_offset, + .segment_id = segment_index, + .dylib_ordinal = dylib_ordinal, + .name = bind_sym_name, + .bind_flags = flags, + }); + } + + if (atom.prev) |prev| { + atom = prev; + } else break; } } @@ -5675,8 +5448,8 @@ fn writeDyldInfoData(self: *MachO) !void { // TODO handle macho.EXPORT_SYMBOL_FLAGS_REEXPORT and macho.EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER. log.debug("generating export trie", .{}); - const text_segment = self.load_commands.items[self.text_segment_cmd_index.?].segment; - const base_address = text_segment.inner.vmaddr; + const text_segment = self.segments.items[self.text_segment_cmd_index.?]; + const base_address = text_segment.vmaddr; if (self.base.options.output_mode == .Exe) { for (&[_]SymbolWithLoc{ @@ -5714,48 +5487,27 @@ fn writeDyldInfoData(self: *MachO) !void { try trie.finalize(gpa); } - const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].segment; - const dyld_info = &self.load_commands.items[self.dyld_info_cmd_index.?].dyld_info_only; - - const rebase_off = mem.alignForwardGeneric(u64, seg.inner.fileoff, @alignOf(u64)); + const link_seg = &self.segments.items[self.linkedit_segment_cmd_index.?]; + const rebase_off = mem.alignForwardGeneric(u64, link_seg.fileoff, @alignOf(u64)); + assert(rebase_off == link_seg.fileoff); const rebase_size = try bind.rebaseInfoSize(rebase_pointers.items); - dyld_info.rebase_off = @intCast(u32, rebase_off); - dyld_info.rebase_size = @intCast(u32, rebase_size); - log.debug("writing rebase info from 0x{x} to 0x{x}", .{ - dyld_info.rebase_off, - dyld_info.rebase_off + dyld_info.rebase_size, - }); + log.debug("writing rebase info from 0x{x} to 0x{x}", .{ rebase_off, rebase_off + rebase_size }); - const bind_off = mem.alignForwardGeneric(u64, dyld_info.rebase_off + dyld_info.rebase_size, @alignOf(u64)); + const bind_off = mem.alignForwardGeneric(u64, rebase_off + rebase_size, @alignOf(u64)); const bind_size = try bind.bindInfoSize(bind_pointers.items); - dyld_info.bind_off = @intCast(u32, bind_off); - dyld_info.bind_size = @intCast(u32, bind_size); - log.debug("writing bind info from 0x{x} to 0x{x}", .{ - dyld_info.bind_off, - dyld_info.bind_off + dyld_info.bind_size, - }); + log.debug("writing bind info from 0x{x} to 0x{x}", .{ bind_off, bind_off + bind_size }); - const lazy_bind_off = mem.alignForwardGeneric(u64, dyld_info.bind_off + dyld_info.bind_size, @alignOf(u64)); + const lazy_bind_off = mem.alignForwardGeneric(u64, bind_off + bind_size, @alignOf(u64)); const lazy_bind_size = try bind.lazyBindInfoSize(lazy_bind_pointers.items); - dyld_info.lazy_bind_off = @intCast(u32, lazy_bind_off); - dyld_info.lazy_bind_size = @intCast(u32, lazy_bind_size); - log.debug("writing lazy bind info from 0x{x} to 0x{x}", .{ - dyld_info.lazy_bind_off, - dyld_info.lazy_bind_off + dyld_info.lazy_bind_size, - }); + log.debug("writing lazy bind info from 0x{x} to 0x{x}", .{ lazy_bind_off, lazy_bind_off + lazy_bind_size }); - const export_off = mem.alignForwardGeneric(u64, dyld_info.lazy_bind_off + dyld_info.lazy_bind_size, @alignOf(u64)); + const export_off = mem.alignForwardGeneric(u64, lazy_bind_off + lazy_bind_size, @alignOf(u64)); const export_size = trie.size; - dyld_info.export_off = @intCast(u32, export_off); - dyld_info.export_size = @intCast(u32, export_size); - log.debug("writing export trie from 0x{x} to 0x{x}", .{ - dyld_info.export_off, - dyld_info.export_off + dyld_info.export_size, - }); + log.debug("writing export trie from 0x{x} to 0x{x}", .{ export_off, export_off + export_size }); - seg.inner.filesize = dyld_info.export_off + dyld_info.export_size - seg.inner.fileoff; + const needed_size = export_off + export_size - rebase_off; + link_seg.filesize = needed_size; - const needed_size = dyld_info.export_off + dyld_info.export_size - dyld_info.rebase_off; var buffer = try gpa.alloc(u8, needed_size); defer gpa.free(buffer); mem.set(u8, buffer, 0); @@ -5763,54 +5515,61 @@ fn writeDyldInfoData(self: *MachO) !void { var stream = std.io.fixedBufferStream(buffer); const writer = stream.writer(); - const base_off = dyld_info.rebase_off; try bind.writeRebaseInfo(rebase_pointers.items, writer); - try stream.seekTo(dyld_info.bind_off - base_off); + try stream.seekTo(bind_off - rebase_off); try bind.writeBindInfo(bind_pointers.items, writer); - try stream.seekTo(dyld_info.lazy_bind_off - base_off); + try stream.seekTo(lazy_bind_off - rebase_off); try bind.writeLazyBindInfo(lazy_bind_pointers.items, writer); - try stream.seekTo(dyld_info.export_off - base_off); + try stream.seekTo(export_off - rebase_off); _ = try trie.write(writer); log.debug("writing dyld info from 0x{x} to 0x{x}", .{ - dyld_info.rebase_off, - dyld_info.rebase_off + needed_size, + rebase_off, + rebase_off + needed_size, }); - try self.base.file.?.pwriteAll(buffer, dyld_info.rebase_off); - try self.populateLazyBindOffsetsInStubHelper( - buffer[dyld_info.lazy_bind_off - base_off ..][0..dyld_info.lazy_bind_size], - ); + try self.base.file.?.pwriteAll(buffer, rebase_off); + try self.populateLazyBindOffsetsInStubHelper(buffer[lazy_bind_off - rebase_off ..][0..lazy_bind_size]); - self.load_commands_dirty = true; + try lc_writer.writeStruct(macho.dyld_info_command{ + .cmd = .DYLD_INFO_ONLY, + .cmdsize = @sizeOf(macho.dyld_info_command), + .rebase_off = @intCast(u32, rebase_off), + .rebase_size = @intCast(u32, rebase_size), + .bind_off = @intCast(u32, bind_off), + .bind_size = @intCast(u32, bind_size), + .weak_bind_off = 0, + .weak_bind_size = 0, + .lazy_bind_off = @intCast(u32, lazy_bind_off), + .lazy_bind_size = @intCast(u32, lazy_bind_size), + .export_off = @intCast(u32, export_off), + .export_size = @intCast(u32, export_size), + }); + ncmds.* += 1; } fn populateLazyBindOffsetsInStubHelper(self: *MachO, buffer: []const u8) !void { const gpa = self.base.allocator; - const text_segment_cmd_index = self.text_segment_cmd_index orelse return; + const stub_helper_section_index = self.stub_helper_section_index orelse return; - const last_atom = self.atoms.get(.{ - .seg = text_segment_cmd_index, - .sect = stub_helper_section_index, - }) orelse return; if (self.stub_helper_preamble_atom == null) return; - if (last_atom == self.stub_helper_preamble_atom.?) return; + + const section = self.sections.get(stub_helper_section_index); + const last_atom = section.last_atom orelse return; + if (last_atom == self.stub_helper_preamble_atom.?) return; // TODO is this a redundant check? var table = std.AutoHashMap(i64, *Atom).init(gpa); defer table.deinit(); { var stub_atom = last_atom; - var laptr_atom = self.atoms.get(.{ - .seg = self.data_segment_cmd_index.?, - .sect = self.la_symbol_ptr_section_index.?, - }).?; + var laptr_atom = self.sections.items(.last_atom)[self.la_symbol_ptr_section_index.?].?; const base_addr = blk: { - const seg = self.load_commands.items[self.data_segment_cmd_index.?].segment; - break :blk seg.inner.vmaddr; + const seg = self.segments.items[self.data_segment_cmd_index.?]; + break :blk seg.vmaddr; }; while (true) { @@ -5871,10 +5630,7 @@ fn populateLazyBindOffsetsInStubHelper(self: *MachO, buffer: []const u8) !void { } } - const sect = self.getSection(.{ - .seg = text_segment_cmd_index, - .sect = stub_helper_section_index, - }); + const header = self.sections.items(.header)[stub_helper_section_index]; const stub_offset: u4 = switch (self.base.options.target.cpu.arch) { .x86_64 => 1, .aarch64 => 2 * @sizeOf(u32), @@ -5886,7 +5642,7 @@ fn populateLazyBindOffsetsInStubHelper(self: *MachO, buffer: []const u8) !void { while (offsets.popOrNull()) |bind_offset| { const atom = table.get(bind_offset.sym_offset).?; const sym = atom.getSymbol(self); - const file_offset = sect.offset + sym.n_value - sect.addr + stub_offset; + const file_offset = header.offset + sym.n_value - header.addr + stub_offset; mem.writeIntLittle(u32, &buf, bind_offset.offset); log.debug("writing lazy bind offset in stub helper of 0x{x} for symbol {s} at offset 0x{x}", .{ bind_offset.offset, @@ -5899,14 +5655,14 @@ fn populateLazyBindOffsetsInStubHelper(self: *MachO, buffer: []const u8) !void { const asc_u64 = std.sort.asc(u64); -fn writeFunctionStarts(self: *MachO) !void { - const text_seg_index = self.text_segment_cmd_index orelse return; - const text_sect_index = self.text_section_index orelse return; - const text_seg = self.load_commands.items[text_seg_index].segment; - +fn writeFunctionStarts(self: *MachO, ncmds: *u32, lc_writer: anytype) !void { const tracy = trace(@src()); defer tracy.end(); + const text_seg_index = self.text_segment_cmd_index orelse return; + const text_sect_index = self.text_section_index orelse return; + const text_seg = self.segments.items[text_seg_index]; + const gpa = self.base.allocator; // We need to sort by address first @@ -5918,8 +5674,8 @@ fn writeFunctionStarts(self: *MachO) !void { const sym = self.getSymbol(global); if (sym.undf()) continue; if (sym.n_desc == N_DESC_GCED) continue; - const match = self.getMatchingSectionFromOrdinal(sym.n_sect); - if (match.seg != text_seg_index or match.sect != text_sect_index) continue; + const sect_id = sym.n_sect - 1; + if (sect_id != text_sect_index) continue; addresses.appendAssumeCapacity(sym.n_value); } @@ -5932,7 +5688,7 @@ fn writeFunctionStarts(self: *MachO) !void { var last_off: u32 = 0; for (addresses.items) |addr| { - const offset = @intCast(u32, addr - text_seg.inner.vmaddr); + const offset = @intCast(u32, addr - text_seg.vmaddr); const diff = offset - last_off; if (diff == 0) continue; @@ -5951,22 +5707,22 @@ fn writeFunctionStarts(self: *MachO) !void { try std.leb.writeULEB128(buffer.writer(), offset); } - const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].segment; - const fn_cmd = &self.load_commands.items[self.function_starts_cmd_index.?].linkedit_data; + const link_seg = &self.segments.items[self.linkedit_segment_cmd_index.?]; + const offset = mem.alignForwardGeneric(u64, link_seg.fileoff + link_seg.filesize, @alignOf(u64)); + const needed_size = buffer.items.len; + link_seg.filesize = offset + needed_size - link_seg.fileoff; - const dataoff = mem.alignForwardGeneric(u64, seg.inner.fileoff + seg.inner.filesize, @alignOf(u64)); - const datasize = buffer.items.len; - fn_cmd.dataoff = @intCast(u32, dataoff); - fn_cmd.datasize = @intCast(u32, datasize); - seg.inner.filesize = fn_cmd.dataoff + fn_cmd.datasize - seg.inner.fileoff; + log.debug("writing function starts info from 0x{x} to 0x{x}", .{ offset, offset + needed_size }); - log.debug("writing function starts info from 0x{x} to 0x{x}", .{ - fn_cmd.dataoff, - fn_cmd.dataoff + fn_cmd.datasize, + try self.base.file.?.pwriteAll(buffer.items, offset); + + try lc_writer.writeStruct(macho.linkedit_data_command{ + .cmd = .FUNCTION_STARTS, + .cmdsize = @sizeOf(macho.linkedit_data_command), + .dataoff = @intCast(u32, offset), + .datasize = @intCast(u32, needed_size), }); - - try self.base.file.?.pwriteAll(buffer.items, fn_cmd.dataoff); - self.load_commands_dirty = true; + ncmds.* += 1; } fn filterDataInCode( @@ -5988,17 +5744,15 @@ fn filterDataInCode( return dices[start..end]; } -fn writeDataInCode(self: *MachO) !void { +fn writeDataInCode(self: *MachO, ncmds: *u32, lc_writer: anytype) !void { const tracy = trace(@src()); defer tracy.end(); var out_dice = std.ArrayList(macho.data_in_code_entry).init(self.base.allocator); defer out_dice.deinit(); - const text_sect = self.getSection(.{ - .seg = self.text_segment_cmd_index orelse return, - .sect = self.text_section_index orelse return, - }); + const text_sect_id = self.text_section_index orelse return; + const text_sect_header = self.sections.items(.header)[text_sect_id]; for (self.objects.items) |object| { const dice = object.parseDataInCode() orelse continue; @@ -6008,15 +5762,15 @@ fn writeDataInCode(self: *MachO) !void { const sym = atom.getSymbol(self); if (sym.n_desc == N_DESC_GCED) continue; - const match = self.getMatchingSectionFromOrdinal(sym.n_sect); - if (match.seg != self.text_segment_cmd_index.? and match.sect != self.text_section_index.?) { + const sect_id = sym.n_sect - 1; + if (sect_id != self.text_section_index.?) { continue; } const source_sym = object.getSourceSymbol(atom.sym_index) orelse continue; const source_addr = math.cast(u32, source_sym.n_value) orelse return error.Overflow; const filtered_dice = filterDataInCode(dice, source_addr, source_addr + atom.size); - const base = math.cast(u32, sym.n_value - text_sect.addr + text_sect.offset) orelse + const base = math.cast(u32, sym.n_value - text_sect_header.addr + text_sect_header.offset) orelse return error.Overflow; for (filtered_dice) |single| { @@ -6030,33 +5784,63 @@ fn writeDataInCode(self: *MachO) !void { } } - const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].segment; - const dice_cmd = &self.load_commands.items[self.data_in_code_cmd_index.?].linkedit_data; + const seg = &self.segments.items[self.linkedit_segment_cmd_index.?]; + const offset = mem.alignForwardGeneric(u64, seg.fileoff + seg.filesize, @alignOf(u64)); + const needed_size = out_dice.items.len * @sizeOf(macho.data_in_code_entry); + seg.filesize = offset + needed_size - seg.fileoff; - const dataoff = mem.alignForwardGeneric(u64, seg.inner.fileoff + seg.inner.filesize, @alignOf(u64)); - const datasize = out_dice.items.len * @sizeOf(macho.data_in_code_entry); - dice_cmd.dataoff = @intCast(u32, dataoff); - dice_cmd.datasize = @intCast(u32, datasize); - seg.inner.filesize = dice_cmd.dataoff + dice_cmd.datasize - seg.inner.fileoff; + log.debug("writing data-in-code from 0x{x} to 0x{x}", .{ offset, offset + needed_size }); - log.debug("writing data-in-code from 0x{x} to 0x{x}", .{ - dice_cmd.dataoff, - dice_cmd.dataoff + dice_cmd.datasize, + try self.base.file.?.pwriteAll(mem.sliceAsBytes(out_dice.items), offset); + try lc_writer.writeStruct(macho.linkedit_data_command{ + .cmd = .DATA_IN_CODE, + .cmdsize = @sizeOf(macho.linkedit_data_command), + .dataoff = @intCast(u32, offset), + .datasize = @intCast(u32, needed_size), }); - - try self.base.file.?.pwriteAll(mem.sliceAsBytes(out_dice.items), dice_cmd.dataoff); - self.load_commands_dirty = true; + ncmds.* += 1; } -fn writeSymtab(self: *MachO) !void { - const tracy = trace(@src()); - defer tracy.end(); +fn writeSymtabs(self: *MachO, ncmds: *u32, lc_writer: anytype) !void { + var symtab_cmd = macho.symtab_command{ + .cmdsize = @sizeOf(macho.symtab_command), + .symoff = 0, + .nsyms = 0, + .stroff = 0, + .strsize = 0, + }; + var dysymtab_cmd = macho.dysymtab_command{ + .cmdsize = @sizeOf(macho.dysymtab_command), + .ilocalsym = 0, + .nlocalsym = 0, + .iextdefsym = 0, + .nextdefsym = 0, + .iundefsym = 0, + .nundefsym = 0, + .tocoff = 0, + .ntoc = 0, + .modtaboff = 0, + .nmodtab = 0, + .extrefsymoff = 0, + .nextrefsyms = 0, + .indirectsymoff = 0, + .nindirectsyms = 0, + .extreloff = 0, + .nextrel = 0, + .locreloff = 0, + .nlocrel = 0, + }; + var ctx = try self.writeSymtab(&symtab_cmd); + defer ctx.imports_table.deinit(); + try self.writeDysymtab(ctx, &dysymtab_cmd); + try self.writeStrtab(&symtab_cmd); + try lc_writer.writeStruct(symtab_cmd); + try lc_writer.writeStruct(dysymtab_cmd); + ncmds.* += 2; +} +fn writeSymtab(self: *MachO, lc: *macho.symtab_command) !SymtabCtx { const gpa = self.base.allocator; - const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].segment; - const symtab = &self.load_commands.items[self.symtab_cmd_index.?].symtab; - const symoff = mem.alignForwardGeneric(u64, seg.inner.fileoff + seg.inner.filesize, @alignOf(macho.nlist_64)); - symtab.symoff = @intCast(u32, symoff); var locals = std.ArrayList(macho.nlist_64).init(gpa); defer locals.deinit(); @@ -6101,8 +5885,8 @@ fn writeSymtab(self: *MachO) !void { var imports = std.ArrayList(macho.nlist_64).init(gpa); defer imports.deinit(); + var imports_table = std.AutoHashMap(SymbolWithLoc, u32).init(gpa); - defer imports_table.deinit(); for (self.globals.values()) |global| { const sym = self.getSymbol(global); @@ -6115,56 +5899,84 @@ fn writeSymtab(self: *MachO) !void { try imports_table.putNoClobber(global, new_index); } - const nlocals = locals.items.len; - const nexports = exports.items.len; - const nimports = imports.items.len; - symtab.nsyms = @intCast(u32, nlocals + nexports + nimports); + const nlocals = @intCast(u32, locals.items.len); + const nexports = @intCast(u32, exports.items.len); + const nimports = @intCast(u32, imports.items.len); + const nsyms = nlocals + nexports + nimports; + + const seg = &self.segments.items[self.linkedit_segment_cmd_index.?]; + const offset = mem.alignForwardGeneric( + u64, + seg.fileoff + seg.filesize, + @alignOf(macho.nlist_64), + ); + const needed_size = nsyms * @sizeOf(macho.nlist_64); + seg.filesize = offset + needed_size - seg.fileoff; var buffer = std.ArrayList(u8).init(gpa); defer buffer.deinit(); - try buffer.ensureTotalCapacityPrecise(symtab.nsyms * @sizeOf(macho.nlist_64)); + try buffer.ensureTotalCapacityPrecise(needed_size); buffer.appendSliceAssumeCapacity(mem.sliceAsBytes(locals.items)); buffer.appendSliceAssumeCapacity(mem.sliceAsBytes(exports.items)); buffer.appendSliceAssumeCapacity(mem.sliceAsBytes(imports.items)); - log.debug("writing symtab from 0x{x} to 0x{x}", .{ symtab.symoff, symtab.symoff + buffer.items.len }); - try self.base.file.?.pwriteAll(buffer.items, symtab.symoff); + log.debug("writing symtab from 0x{x} to 0x{x}", .{ offset, offset + needed_size }); + try self.base.file.?.pwriteAll(buffer.items, offset); - seg.inner.filesize = symtab.symoff + buffer.items.len - seg.inner.fileoff; + lc.symoff = @intCast(u32, offset); + lc.nsyms = nsyms; - // Update dynamic symbol table. - const dysymtab = &self.load_commands.items[self.dysymtab_cmd_index.?].dysymtab; - dysymtab.nlocalsym = @intCast(u32, nlocals); - dysymtab.iextdefsym = dysymtab.nlocalsym; - dysymtab.nextdefsym = @intCast(u32, nexports); - dysymtab.iundefsym = dysymtab.nlocalsym + dysymtab.nextdefsym; - dysymtab.nundefsym = @intCast(u32, nimports); + return SymtabCtx{ + .nlocalsym = nlocals, + .nextdefsym = nexports, + .nundefsym = nimports, + .imports_table = imports_table, + }; +} +fn writeStrtab(self: *MachO, lc: *macho.symtab_command) !void { + const seg = &self.segments.items[self.linkedit_segment_cmd_index.?]; + const offset = mem.alignForwardGeneric(u64, seg.fileoff + seg.filesize, @alignOf(u64)); + const needed_size = self.strtab.buffer.items.len; + seg.filesize = offset + needed_size - seg.fileoff; + + log.debug("writing string table from 0x{x} to 0x{x}", .{ offset, offset + needed_size }); + + try self.base.file.?.pwriteAll(self.strtab.buffer.items, offset); + + lc.stroff = @intCast(u32, offset); + lc.strsize = @intCast(u32, needed_size); +} + +const SymtabCtx = struct { + nlocalsym: u32, + nextdefsym: u32, + nundefsym: u32, + imports_table: std.AutoHashMap(SymbolWithLoc, u32), +}; + +fn writeDysymtab(self: *MachO, ctx: SymtabCtx, lc: *macho.dysymtab_command) !void { + const gpa = self.base.allocator; const nstubs = @intCast(u32, self.stubs_table.count()); const ngot_entries = @intCast(u32, self.got_entries_table.count()); + const nindirectsyms = nstubs * 2 + ngot_entries; + const iextdefsym = ctx.nlocalsym; + const iundefsym = iextdefsym + ctx.nextdefsym; - const indirectsymoff = mem.alignForwardGeneric(u64, seg.inner.fileoff + seg.inner.filesize, @alignOf(u64)); - dysymtab.indirectsymoff = @intCast(u32, indirectsymoff); - dysymtab.nindirectsyms = nstubs * 2 + ngot_entries; + const seg = &self.segments.items[self.linkedit_segment_cmd_index.?]; + const offset = mem.alignForwardGeneric(u64, seg.fileoff + seg.filesize, @alignOf(u64)); + const needed_size = nindirectsyms * @sizeOf(u32); + seg.filesize = offset + needed_size - seg.fileoff; - seg.inner.filesize = dysymtab.indirectsymoff + dysymtab.nindirectsyms * @sizeOf(u32) - seg.inner.fileoff; - - log.debug("writing indirect symbol table from 0x{x} to 0x{x}", .{ - dysymtab.indirectsymoff, - dysymtab.indirectsymoff + dysymtab.nindirectsyms * @sizeOf(u32), - }); + log.debug("writing indirect symbol table from 0x{x} to 0x{x}", .{ offset, offset + needed_size }); var buf = std.ArrayList(u8).init(gpa); defer buf.deinit(); - try buf.ensureTotalCapacity(dysymtab.nindirectsyms * @sizeOf(u32)); + try buf.ensureTotalCapacity(needed_size); const writer = buf.writer(); - if (self.text_segment_cmd_index) |text_segment_cmd_index| blk: { - const stubs_section_index = self.stubs_section_index orelse break :blk; - const stubs = self.getSectionPtr(.{ - .seg = text_segment_cmd_index, - .sect = stubs_section_index, - }); + if (self.stubs_section_index) |sect_id| { + const stubs = &self.sections.items(.header)[sect_id]; stubs.reserved1 = 0; for (self.stubs.items) |entry| { if (entry.sym_index == 0) continue; @@ -6172,16 +5984,12 @@ fn writeSymtab(self: *MachO) !void { if (atom_sym.n_desc == N_DESC_GCED) continue; const target_sym = self.getSymbol(entry.target); assert(target_sym.undf()); - try writer.writeIntLittle(u32, dysymtab.iundefsym + imports_table.get(entry.target).?); + try writer.writeIntLittle(u32, iundefsym + ctx.imports_table.get(entry.target).?); } } - if (self.data_const_segment_cmd_index) |data_const_segment_cmd_index| blk: { - const got_section_index = self.got_section_index orelse break :blk; - const got = self.getSectionPtr(.{ - .seg = data_const_segment_cmd_index, - .sect = got_section_index, - }); + if (self.got_section_index) |sect_id| { + const got = &self.sections.items(.header)[sect_id]; got.reserved1 = nstubs; for (self.got_entries.items) |entry| { if (entry.sym_index == 0) continue; @@ -6189,19 +5997,15 @@ fn writeSymtab(self: *MachO) !void { if (atom_sym.n_desc == N_DESC_GCED) continue; const target_sym = self.getSymbol(entry.target); if (target_sym.undf()) { - try writer.writeIntLittle(u32, dysymtab.iundefsym + imports_table.get(entry.target).?); + try writer.writeIntLittle(u32, iundefsym + ctx.imports_table.get(entry.target).?); } else { try writer.writeIntLittle(u32, macho.INDIRECT_SYMBOL_LOCAL); } } } - if (self.data_segment_cmd_index) |data_segment_cmd_index| blk: { - const la_symbol_ptr_section_index = self.la_symbol_ptr_section_index orelse break :blk; - const la_symbol_ptr = self.getSectionPtr(.{ - .seg = data_segment_cmd_index, - .sect = la_symbol_ptr_section_index, - }); + if (self.la_symbol_ptr_section_index) |sect_id| { + const la_symbol_ptr = &self.sections.items(.header)[sect_id]; la_symbol_ptr.reserved1 = nstubs + ngot_entries; for (self.stubs.items) |entry| { if (entry.sym_index == 0) continue; @@ -6209,131 +6013,76 @@ fn writeSymtab(self: *MachO) !void { if (atom_sym.n_desc == N_DESC_GCED) continue; const target_sym = self.getSymbol(entry.target); assert(target_sym.undf()); - try writer.writeIntLittle(u32, dysymtab.iundefsym + imports_table.get(entry.target).?); + try writer.writeIntLittle(u32, iundefsym + ctx.imports_table.get(entry.target).?); } } - assert(buf.items.len == dysymtab.nindirectsyms * @sizeOf(u32)); + assert(buf.items.len == needed_size); + try self.base.file.?.pwriteAll(buf.items, offset); - try self.base.file.?.pwriteAll(buf.items, dysymtab.indirectsymoff); - self.load_commands_dirty = true; + lc.nlocalsym = ctx.nlocalsym; + lc.iextdefsym = iextdefsym; + lc.nextdefsym = ctx.nextdefsym; + lc.iundefsym = iundefsym; + lc.nundefsym = ctx.nundefsym; + lc.indirectsymoff = @intCast(u32, offset); + lc.nindirectsyms = nindirectsyms; } -fn writeStrtab(self: *MachO) !void { - const tracy = trace(@src()); - defer tracy.end(); - - const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].segment; - const symtab = &self.load_commands.items[self.symtab_cmd_index.?].symtab; - const stroff = mem.alignForwardGeneric(u64, seg.inner.fileoff + seg.inner.filesize, @alignOf(u64)); - - const strsize = self.strtab.buffer.items.len; - symtab.stroff = @intCast(u32, stroff); - symtab.strsize = @intCast(u32, strsize); - seg.inner.filesize = symtab.stroff + symtab.strsize - seg.inner.fileoff; - - log.debug("writing string table from 0x{x} to 0x{x}", .{ symtab.stroff, symtab.stroff + symtab.strsize }); - - try self.base.file.?.pwriteAll(self.strtab.buffer.items, symtab.stroff); - - self.load_commands_dirty = true; -} - -fn writeLinkeditSegment(self: *MachO) !void { - const tracy = trace(@src()); - defer tracy.end(); - - const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].segment; - seg.inner.filesize = 0; - - try self.writeDyldInfoData(); - try self.writeFunctionStarts(); - try self.writeDataInCode(); - try self.writeSymtab(); - try self.writeStrtab(); - - seg.inner.vmsize = mem.alignForwardGeneric(u64, seg.inner.filesize, self.page_size); -} - -fn writeCodeSignaturePadding(self: *MachO, code_sig: *CodeSignature) !void { - const tracy = trace(@src()); - defer tracy.end(); - - const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].segment; - const cs_cmd = &self.load_commands.items[self.code_signature_cmd_index.?].linkedit_data; +fn writeCodeSignaturePadding( + self: *MachO, + code_sig: *CodeSignature, + ncmds: *u32, + lc_writer: anytype, +) !u32 { + const seg = &self.segments.items[self.linkedit_segment_cmd_index.?]; // Code signature data has to be 16-bytes aligned for Apple tools to recognize the file // https://github.com/opensource-apple/cctools/blob/fdb4825f303fd5c0751be524babd32958181b3ed/libstuff/checkout.c#L271 - const dataoff = mem.alignForwardGeneric(u64, seg.inner.fileoff + seg.inner.filesize, 16); - const datasize = code_sig.estimateSize(dataoff); - cs_cmd.dataoff = @intCast(u32, dataoff); - cs_cmd.datasize = @intCast(u32, code_sig.estimateSize(dataoff)); - - // Advance size of __LINKEDIT segment - seg.inner.filesize = cs_cmd.dataoff + cs_cmd.datasize - seg.inner.fileoff; - seg.inner.vmsize = mem.alignForwardGeneric(u64, seg.inner.filesize, self.page_size); - log.debug("writing code signature padding from 0x{x} to 0x{x}", .{ dataoff, dataoff + datasize }); + const offset = mem.alignForwardGeneric(u64, seg.fileoff + seg.filesize, 16); + const needed_size = code_sig.estimateSize(offset); + seg.filesize = offset + needed_size - seg.fileoff; + seg.vmsize = mem.alignForwardGeneric(u64, seg.filesize, self.page_size); + log.debug("writing code signature padding from 0x{x} to 0x{x}", .{ offset, offset + needed_size }); // Pad out the space. We need to do this to calculate valid hashes for everything in the file // except for code signature data. - try self.base.file.?.pwriteAll(&[_]u8{0}, dataoff + datasize - 1); - self.load_commands_dirty = true; + try self.base.file.?.pwriteAll(&[_]u8{0}, offset + needed_size - 1); + + try lc_writer.writeStruct(macho.linkedit_data_command{ + .cmd = .CODE_SIGNATURE, + .cmdsize = @sizeOf(macho.linkedit_data_command), + .dataoff = @intCast(u32, offset), + .datasize = @intCast(u32, needed_size), + }); + ncmds.* += 1; + + return @intCast(u32, offset); } -fn writeCodeSignature(self: *MachO, code_sig: *CodeSignature) !void { - const tracy = trace(@src()); - defer tracy.end(); - - const code_sig_cmd = self.load_commands.items[self.code_signature_cmd_index.?].linkedit_data; - const seg = self.load_commands.items[self.text_segment_cmd_index.?].segment; +fn writeCodeSignature(self: *MachO, code_sig: *CodeSignature, offset: u32) !void { + const seg = self.segments.items[self.text_segment_cmd_index.?]; var buffer = std.ArrayList(u8).init(self.base.allocator); defer buffer.deinit(); try buffer.ensureTotalCapacityPrecise(code_sig.size()); try code_sig.writeAdhocSignature(self.base.allocator, .{ .file = self.base.file.?, - .exec_seg_base = seg.inner.fileoff, - .exec_seg_limit = seg.inner.filesize, - .code_sig_cmd = code_sig_cmd, + .exec_seg_base = seg.fileoff, + .exec_seg_limit = seg.filesize, + .file_size = offset, .output_mode = self.base.options.output_mode, }, buffer.writer()); assert(buffer.items.len == code_sig.size()); log.debug("writing code signature from 0x{x} to 0x{x}", .{ - code_sig_cmd.dataoff, - code_sig_cmd.dataoff + buffer.items.len, + offset, + offset + buffer.items.len, }); - try self.base.file.?.pwriteAll(buffer.items, code_sig_cmd.dataoff); -} - -/// Writes all load commands and section headers. -fn writeLoadCommands(self: *MachO) !void { - if (!self.load_commands_dirty) return; - - var sizeofcmds: u32 = 0; - for (self.load_commands.items) |lc| { - if (lc.cmd() == .NONE) continue; - sizeofcmds += lc.cmdsize(); - } - - var buffer = try self.base.allocator.alloc(u8, sizeofcmds); - defer self.base.allocator.free(buffer); - var fib = std.io.fixedBufferStream(buffer); - const writer = fib.writer(); - for (self.load_commands.items) |lc| { - if (lc.cmd() == .NONE) continue; - try lc.write(writer); - } - - const off = @sizeOf(macho.mach_header_64); - - log.debug("writing load commands from 0x{x} to 0x{x}", .{ off, off + sizeofcmds }); - - try self.base.file.?.pwriteAll(buffer, off); - self.load_commands_dirty = false; + try self.base.file.?.pwriteAll(buffer.items, offset); } /// Writes Mach-O file header. -fn writeHeader(self: *MachO) !void { +fn writeHeader(self: *MachO, ncmds: u32, sizeofcmds: u32) !void { var header: macho.mach_header_64 = .{}; header.flags = macho.MH_NOUNDEFS | macho.MH_DYLDLINK | macho.MH_PIE | macho.MH_TWOLEVEL; @@ -6365,14 +6114,8 @@ fn writeHeader(self: *MachO) !void { header.flags |= macho.MH_HAS_TLV_DESCRIPTORS; } - header.ncmds = 0; - header.sizeofcmds = 0; - - for (self.load_commands.items) |cmd| { - if (cmd.cmd() == .NONE) continue; - header.sizeofcmds += cmd.cmdsize(); - header.ncmds += 1; - } + header.ncmds = ncmds; + header.sizeofcmds = sizeofcmds; log.debug("writing Mach-O header {}", .{header}); @@ -6392,33 +6135,13 @@ pub fn makeStaticString(bytes: []const u8) [16]u8 { return buf; } -pub fn getSectionOrdinal(self: *MachO, match: MatchingSection) u8 { - return @intCast(u8, self.section_ordinals.getIndex(match).?) + 1; -} - -pub fn getMatchingSectionFromOrdinal(self: *MachO, ord: u8) MatchingSection { - const index = ord - 1; - assert(index < self.section_ordinals.count()); - return self.section_ordinals.keys()[index]; -} - -pub fn getSegmentPtr(self: *MachO, match: MatchingSection) *macho.SegmentCommand { - assert(match.seg < self.load_commands.items.len); - return &self.load_commands.items[match.seg].segment; -} - -pub fn getSegment(self: *MachO, match: MatchingSection) macho.SegmentCommand { - return self.getSegmentPtr(match).*; -} - -pub fn getSectionPtr(self: *MachO, match: MatchingSection) *macho.section_64 { - const seg = self.getSegmentPtr(match); - assert(match.sect < seg.sections.items.len); - return &seg.sections.items[match.sect]; -} - -pub fn getSection(self: *MachO, match: MatchingSection) macho.section_64 { - return self.getSectionPtr(match).*; +fn getSectionIndexes(self: MachO, segment_index: u8) struct { start: u8, end: u8 } { + var start: u8 = 0; + const nsects = for (self.segments.items) |seg, i| { + if (i == segment_index) break @intCast(u8, seg.nsects); + start += @intCast(u8, seg.nsects); + } else 0; + return .{ .start = start, .end = start + nsects }; } pub fn symbolIsTemp(self: *MachO, sym_with_loc: SymbolWithLoc) bool { @@ -6512,72 +6235,6 @@ pub fn findFirst(comptime T: type, haystack: []const T, start: usize, predicate: return i; } -const DebugInfo = struct { - inner: dwarf.DwarfInfo, - debug_info: []const u8, - debug_abbrev: []const u8, - debug_str: []const u8, - debug_line: []const u8, - debug_line_str: []const u8, - debug_ranges: []const u8, - - pub fn parse(allocator: Allocator, object: Object) !?DebugInfo { - var debug_info = blk: { - const index = object.dwarf_debug_info_index orelse return null; - break :blk try object.getSectionContents(index); - }; - var debug_abbrev = blk: { - const index = object.dwarf_debug_abbrev_index orelse return null; - break :blk try object.getSectionContents(index); - }; - var debug_str = blk: { - const index = object.dwarf_debug_str_index orelse return null; - break :blk try object.getSectionContents(index); - }; - var debug_line = blk: { - const index = object.dwarf_debug_line_index orelse return null; - break :blk try object.getSectionContents(index); - }; - var debug_line_str = blk: { - if (object.dwarf_debug_line_str_index) |ind| { - break :blk try object.getSectionContents(ind); - } - break :blk &[0]u8{}; - }; - var debug_ranges = blk: { - if (object.dwarf_debug_ranges_index) |ind| { - break :blk try object.getSectionContents(ind); - } - break :blk &[0]u8{}; - }; - - var inner: dwarf.DwarfInfo = .{ - .endian = .Little, - .debug_info = debug_info, - .debug_abbrev = debug_abbrev, - .debug_str = debug_str, - .debug_line = debug_line, - .debug_line_str = debug_line_str, - .debug_ranges = debug_ranges, - }; - try dwarf.openDwarfDebugInfo(&inner, allocator); - - return DebugInfo{ - .inner = inner, - .debug_info = debug_info, - .debug_abbrev = debug_abbrev, - .debug_str = debug_str, - .debug_line = debug_line, - .debug_line_str = debug_line_str, - .debug_ranges = debug_ranges, - }; - } - - pub fn deinit(self: *DebugInfo, allocator: Allocator) void { - self.inner.deinit(allocator); - } -}; - pub fn generateSymbolStabs( self: *MachO, object: Object, @@ -6585,14 +6242,15 @@ pub fn generateSymbolStabs( ) !void { assert(!self.base.options.strip); - const gpa = self.base.allocator; - log.debug("parsing debug info in '{s}'", .{object.name}); - var debug_info = (try DebugInfo.parse(gpa, object)) orelse return; + const gpa = self.base.allocator; + var debug_info = try object.parseDwarfInfo(); + defer debug_info.deinit(gpa); + try dwarf.openDwarfDebugInfo(&debug_info, gpa); // We assume there is only one CU. - const compile_unit = debug_info.inner.findCompileUnit(0x0) catch |err| switch (err) { + const compile_unit = debug_info.findCompileUnit(0x0) catch |err| switch (err) { error.MissingDebugInfo => { // TODO audit cases with missing debug info and audit our dwarf.zig module. log.debug("invalid or missing debug info in {s}; skipping", .{object.name}); @@ -6600,8 +6258,8 @@ pub fn generateSymbolStabs( }, else => |e| return e, }; - const tu_name = try compile_unit.die.getAttrString(&debug_info.inner, dwarf.AT.name); - const tu_comp_dir = try compile_unit.die.getAttrString(&debug_info.inner, dwarf.AT.comp_dir); + const tu_name = try compile_unit.die.getAttrString(&debug_info, dwarf.AT.name); + const tu_comp_dir = try compile_unit.die.getAttrString(&debug_info, dwarf.AT.comp_dir); // Open scope try locals.ensureUnusedCapacity(3); @@ -6664,7 +6322,7 @@ pub fn generateSymbolStabs( fn generateSymbolStabsForSymbol( self: *MachO, sym_loc: SymbolWithLoc, - debug_info: DebugInfo, + debug_info: dwarf.DwarfInfo, buf: *[4]macho.nlist_64, ) ![]const macho.nlist_64 { const gpa = self.base.allocator; @@ -6679,7 +6337,7 @@ fn generateSymbolStabsForSymbol( const source_sym = object.getSourceSymbol(sym_loc.sym_index) orelse return buf[0..0]; const size: ?u64 = size: { if (source_sym.tentative()) break :size null; - for (debug_info.inner.func_list.items) |func| { + for (debug_info.func_list.items) |func| { if (func.pc_range) |range| { if (source_sym.n_value >= range.start and source_sym.n_value < range.end) { break :size range.end - range.start; @@ -6731,260 +6389,260 @@ fn generateSymbolStabsForSymbol( } } -fn snapshotState(self: *MachO) !void { - const emit = self.base.options.emit orelse { - log.debug("no emit directory found; skipping snapshot...", .{}); - return; - }; +// fn snapshotState(self: *MachO) !void { +// const emit = self.base.options.emit orelse { +// log.debug("no emit directory found; skipping snapshot...", .{}); +// return; +// }; - const Snapshot = struct { - const Node = struct { - const Tag = enum { - section_start, - section_end, - atom_start, - atom_end, - relocation, +// const Snapshot = struct { +// const Node = struct { +// const Tag = enum { +// section_start, +// section_end, +// atom_start, +// atom_end, +// relocation, - pub fn jsonStringify( - tag: Tag, - options: std.json.StringifyOptions, - out_stream: anytype, - ) !void { - _ = options; - switch (tag) { - .section_start => try out_stream.writeAll("\"section_start\""), - .section_end => try out_stream.writeAll("\"section_end\""), - .atom_start => try out_stream.writeAll("\"atom_start\""), - .atom_end => try out_stream.writeAll("\"atom_end\""), - .relocation => try out_stream.writeAll("\"relocation\""), - } - } - }; - const Payload = struct { - name: []const u8 = "", - aliases: [][]const u8 = &[0][]const u8{}, - is_global: bool = false, - target: u64 = 0, - }; - address: u64, - tag: Tag, - payload: Payload, - }; - timestamp: i128, - nodes: []Node, - }; +// pub fn jsonStringify( +// tag: Tag, +// options: std.json.StringifyOptions, +// out_stream: anytype, +// ) !void { +// _ = options; +// switch (tag) { +// .section_start => try out_stream.writeAll("\"section_start\""), +// .section_end => try out_stream.writeAll("\"section_end\""), +// .atom_start => try out_stream.writeAll("\"atom_start\""), +// .atom_end => try out_stream.writeAll("\"atom_end\""), +// .relocation => try out_stream.writeAll("\"relocation\""), +// } +// } +// }; +// const Payload = struct { +// name: []const u8 = "", +// aliases: [][]const u8 = &[0][]const u8{}, +// is_global: bool = false, +// target: u64 = 0, +// }; +// address: u64, +// tag: Tag, +// payload: Payload, +// }; +// timestamp: i128, +// nodes: []Node, +// }; - var arena_allocator = std.heap.ArenaAllocator.init(self.base.allocator); - defer arena_allocator.deinit(); - const arena = arena_allocator.allocator(); +// var arena_allocator = std.heap.ArenaAllocator.init(self.base.allocator); +// defer arena_allocator.deinit(); +// const arena = arena_allocator.allocator(); - const out_file = try emit.directory.handle.createFile("snapshots.json", .{ - .truncate = false, - .read = true, - }); - defer out_file.close(); +// const out_file = try emit.directory.handle.createFile("snapshots.json", .{ +// .truncate = false, +// .read = true, +// }); +// defer out_file.close(); - if (out_file.seekFromEnd(-1)) { - try out_file.writer().writeByte(','); - } else |err| switch (err) { - error.Unseekable => try out_file.writer().writeByte('['), - else => |e| return e, - } - const writer = out_file.writer(); +// if (out_file.seekFromEnd(-1)) { +// try out_file.writer().writeByte(','); +// } else |err| switch (err) { +// error.Unseekable => try out_file.writer().writeByte('['), +// else => |e| return e, +// } +// const writer = out_file.writer(); - var snapshot = Snapshot{ - .timestamp = std.time.nanoTimestamp(), - .nodes = undefined, - }; - var nodes = std.ArrayList(Snapshot.Node).init(arena); +// var snapshot = Snapshot{ +// .timestamp = std.time.nanoTimestamp(), +// .nodes = undefined, +// }; +// var nodes = std.ArrayList(Snapshot.Node).init(arena); - for (self.section_ordinals.keys()) |key| { - const sect = self.getSection(key); - const sect_name = try std.fmt.allocPrint(arena, "{s},{s}", .{ sect.segName(), sect.sectName() }); - try nodes.append(.{ - .address = sect.addr, - .tag = .section_start, - .payload = .{ .name = sect_name }, - }); +// for (self.section_ordinals.keys()) |key| { +// const sect = self.getSection(key); +// const sect_name = try std.fmt.allocPrint(arena, "{s},{s}", .{ sect.segName(), sect.sectName() }); +// try nodes.append(.{ +// .address = sect.addr, +// .tag = .section_start, +// .payload = .{ .name = sect_name }, +// }); - const is_tlv = sect.type_() == macho.S_THREAD_LOCAL_VARIABLES; +// const is_tlv = sect.type_() == macho.S_THREAD_LOCAL_VARIABLES; - var atom: *Atom = self.atoms.get(key) orelse { - try nodes.append(.{ - .address = sect.addr + sect.size, - .tag = .section_end, - .payload = .{}, - }); - continue; - }; +// var atom: *Atom = self.atoms.get(key) orelse { +// try nodes.append(.{ +// .address = sect.addr + sect.size, +// .tag = .section_end, +// .payload = .{}, +// }); +// continue; +// }; - while (atom.prev) |prev| { - atom = prev; - } +// while (atom.prev) |prev| { +// atom = prev; +// } - while (true) { - const atom_sym = atom.getSymbol(self); - var node = Snapshot.Node{ - .address = atom_sym.n_value, - .tag = .atom_start, - .payload = .{ - .name = atom.getName(self), - .is_global = self.globals.contains(atom.getName(self)), - }, - }; +// while (true) { +// const atom_sym = atom.getSymbol(self); +// var node = Snapshot.Node{ +// .address = atom_sym.n_value, +// .tag = .atom_start, +// .payload = .{ +// .name = atom.getName(self), +// .is_global = self.globals.contains(atom.getName(self)), +// }, +// }; - var aliases = std.ArrayList([]const u8).init(arena); - for (atom.contained.items) |sym_off| { - if (sym_off.offset == 0) { - try aliases.append(self.getSymbolName(.{ - .sym_index = sym_off.sym_index, - .file = atom.file, - })); - } - } - node.payload.aliases = aliases.toOwnedSlice(); - try nodes.append(node); +// var aliases = std.ArrayList([]const u8).init(arena); +// for (atom.contained.items) |sym_off| { +// if (sym_off.offset == 0) { +// try aliases.append(self.getSymbolName(.{ +// .sym_index = sym_off.sym_index, +// .file = atom.file, +// })); +// } +// } +// node.payload.aliases = aliases.toOwnedSlice(); +// try nodes.append(node); - var relocs = try std.ArrayList(Snapshot.Node).initCapacity(arena, atom.relocs.items.len); - for (atom.relocs.items) |rel| { - const source_addr = blk: { - const source_sym = atom.getSymbol(self); - break :blk source_sym.n_value + rel.offset; - }; - const target_addr = blk: { - const target_atom = rel.getTargetAtom(self) orelse { - // If there is no atom for target, we still need to check for special, atom-less - // symbols such as `___dso_handle`. - const target_name = self.getSymbolName(rel.target); - if (self.globals.contains(target_name)) { - const atomless_sym = self.getSymbol(rel.target); - break :blk atomless_sym.n_value; - } - break :blk 0; - }; - const target_sym = if (target_atom.isSymbolContained(rel.target, self)) - self.getSymbol(rel.target) - else - target_atom.getSymbol(self); - const base_address: u64 = if (is_tlv) base_address: { - const sect_id: u16 = sect_id: { - if (self.tlv_data_section_index) |i| { - break :sect_id i; - } else if (self.tlv_bss_section_index) |i| { - break :sect_id i; - } else unreachable; - }; - break :base_address self.getSection(.{ - .seg = self.data_segment_cmd_index.?, - .sect = sect_id, - }).addr; - } else 0; - break :blk target_sym.n_value - base_address; - }; +// var relocs = try std.ArrayList(Snapshot.Node).initCapacity(arena, atom.relocs.items.len); +// for (atom.relocs.items) |rel| { +// const source_addr = blk: { +// const source_sym = atom.getSymbol(self); +// break :blk source_sym.n_value + rel.offset; +// }; +// const target_addr = blk: { +// const target_atom = rel.getTargetAtom(self) orelse { +// // If there is no atom for target, we still need to check for special, atom-less +// // symbols such as `___dso_handle`. +// const target_name = self.getSymbolName(rel.target); +// if (self.globals.contains(target_name)) { +// const atomless_sym = self.getSymbol(rel.target); +// break :blk atomless_sym.n_value; +// } +// break :blk 0; +// }; +// const target_sym = if (target_atom.isSymbolContained(rel.target, self)) +// self.getSymbol(rel.target) +// else +// target_atom.getSymbol(self); +// const base_address: u64 = if (is_tlv) base_address: { +// const sect_id: u16 = sect_id: { +// if (self.tlv_data_section_index) |i| { +// break :sect_id i; +// } else if (self.tlv_bss_section_index) |i| { +// break :sect_id i; +// } else unreachable; +// }; +// break :base_address self.getSection(.{ +// .seg = self.data_segment_cmd_index.?, +// .sect = sect_id, +// }).addr; +// } else 0; +// break :blk target_sym.n_value - base_address; +// }; - relocs.appendAssumeCapacity(.{ - .address = source_addr, - .tag = .relocation, - .payload = .{ .target = target_addr }, - }); - } +// relocs.appendAssumeCapacity(.{ +// .address = source_addr, +// .tag = .relocation, +// .payload = .{ .target = target_addr }, +// }); +// } - if (atom.contained.items.len == 0) { - try nodes.appendSlice(relocs.items); - } else { - // Need to reverse iteration order of relocs since by default for relocatable sources - // they come in reverse. For linking, this doesn't matter in any way, however, for - // arranging the memoryline for displaying it does. - std.mem.reverse(Snapshot.Node, relocs.items); +// if (atom.contained.items.len == 0) { +// try nodes.appendSlice(relocs.items); +// } else { +// // Need to reverse iteration order of relocs since by default for relocatable sources +// // they come in reverse. For linking, this doesn't matter in any way, however, for +// // arranging the memoryline for displaying it does. +// std.mem.reverse(Snapshot.Node, relocs.items); - var next_i: usize = 0; - var last_rel: usize = 0; - while (next_i < atom.contained.items.len) : (next_i += 1) { - const loc = SymbolWithLoc{ - .sym_index = atom.contained.items[next_i].sym_index, - .file = atom.file, - }; - const cont_sym = self.getSymbol(loc); - const cont_sym_name = self.getSymbolName(loc); - var contained_node = Snapshot.Node{ - .address = cont_sym.n_value, - .tag = .atom_start, - .payload = .{ - .name = cont_sym_name, - .is_global = self.globals.contains(cont_sym_name), - }, - }; +// var next_i: usize = 0; +// var last_rel: usize = 0; +// while (next_i < atom.contained.items.len) : (next_i += 1) { +// const loc = SymbolWithLoc{ +// .sym_index = atom.contained.items[next_i].sym_index, +// .file = atom.file, +// }; +// const cont_sym = self.getSymbol(loc); +// const cont_sym_name = self.getSymbolName(loc); +// var contained_node = Snapshot.Node{ +// .address = cont_sym.n_value, +// .tag = .atom_start, +// .payload = .{ +// .name = cont_sym_name, +// .is_global = self.globals.contains(cont_sym_name), +// }, +// }; - // Accumulate aliases - var inner_aliases = std.ArrayList([]const u8).init(arena); - while (true) { - if (next_i + 1 >= atom.contained.items.len) break; - const next_sym_loc = SymbolWithLoc{ - .sym_index = atom.contained.items[next_i + 1].sym_index, - .file = atom.file, - }; - const next_sym = self.getSymbol(next_sym_loc); - if (next_sym.n_value != cont_sym.n_value) break; - const next_sym_name = self.getSymbolName(next_sym_loc); - if (self.globals.contains(next_sym_name)) { - try inner_aliases.append(contained_node.payload.name); - contained_node.payload.name = next_sym_name; - contained_node.payload.is_global = true; - } else try inner_aliases.append(next_sym_name); - next_i += 1; - } +// // Accumulate aliases +// var inner_aliases = std.ArrayList([]const u8).init(arena); +// while (true) { +// if (next_i + 1 >= atom.contained.items.len) break; +// const next_sym_loc = SymbolWithLoc{ +// .sym_index = atom.contained.items[next_i + 1].sym_index, +// .file = atom.file, +// }; +// const next_sym = self.getSymbol(next_sym_loc); +// if (next_sym.n_value != cont_sym.n_value) break; +// const next_sym_name = self.getSymbolName(next_sym_loc); +// if (self.globals.contains(next_sym_name)) { +// try inner_aliases.append(contained_node.payload.name); +// contained_node.payload.name = next_sym_name; +// contained_node.payload.is_global = true; +// } else try inner_aliases.append(next_sym_name); +// next_i += 1; +// } - const cont_size = if (next_i + 1 < atom.contained.items.len) - self.getSymbol(.{ - .sym_index = atom.contained.items[next_i + 1].sym_index, - .file = atom.file, - }).n_value - cont_sym.n_value - else - atom_sym.n_value + atom.size - cont_sym.n_value; +// const cont_size = if (next_i + 1 < atom.contained.items.len) +// self.getSymbol(.{ +// .sym_index = atom.contained.items[next_i + 1].sym_index, +// .file = atom.file, +// }).n_value - cont_sym.n_value +// else +// atom_sym.n_value + atom.size - cont_sym.n_value; - contained_node.payload.aliases = inner_aliases.toOwnedSlice(); - try nodes.append(contained_node); +// contained_node.payload.aliases = inner_aliases.toOwnedSlice(); +// try nodes.append(contained_node); - for (relocs.items[last_rel..]) |rel| { - if (rel.address >= cont_sym.n_value + cont_size) { - break; - } - try nodes.append(rel); - last_rel += 1; - } +// for (relocs.items[last_rel..]) |rel| { +// if (rel.address >= cont_sym.n_value + cont_size) { +// break; +// } +// try nodes.append(rel); +// last_rel += 1; +// } - try nodes.append(.{ - .address = cont_sym.n_value + cont_size, - .tag = .atom_end, - .payload = .{}, - }); - } - } +// try nodes.append(.{ +// .address = cont_sym.n_value + cont_size, +// .tag = .atom_end, +// .payload = .{}, +// }); +// } +// } - try nodes.append(.{ - .address = atom_sym.n_value + atom.size, - .tag = .atom_end, - .payload = .{}, - }); +// try nodes.append(.{ +// .address = atom_sym.n_value + atom.size, +// .tag = .atom_end, +// .payload = .{}, +// }); - if (atom.next) |next| { - atom = next; - } else break; - } +// if (atom.next) |next| { +// atom = next; +// } else break; +// } - try nodes.append(.{ - .address = sect.addr + sect.size, - .tag = .section_end, - .payload = .{}, - }); - } +// try nodes.append(.{ +// .address = sect.addr + sect.size, +// .tag = .section_end, +// .payload = .{}, +// }); +// } - snapshot.nodes = nodes.toOwnedSlice(); +// snapshot.nodes = nodes.toOwnedSlice(); - try std.json.stringify(snapshot, .{}, writer); - try writer.writeByte(']'); -} +// try std.json.stringify(snapshot, .{}, writer); +// try writer.writeByte(']'); +// } fn logSymAttributes(sym: macho.nlist_64, buf: *[9]u8) []const u8 { mem.set(u8, buf[0..4], '_'); @@ -7104,26 +6762,19 @@ fn logSymtab(self: *MachO) void { } } -fn logSectionOrdinals(self: *MachO) void { - for (self.section_ordinals.keys()) |match, i| { - const sect = self.getSection(match); - log.debug("sect({d}, '{s},{s}')", .{ i + 1, sect.segName(), sect.sectName() }); - } -} - fn logAtoms(self: *MachO) void { log.debug("atoms:", .{}); - var it = self.atoms.iterator(); - while (it.next()) |entry| { - const match = entry.key_ptr.*; - var atom = entry.value_ptr.*; + + const slice = self.sections.slice(); + for (slice.items(.last_atom)) |last, i| { + var atom = last orelse continue; + const header = slice.items(.header)[i]; while (atom.prev) |prev| { atom = prev; } - const sect = self.getSection(match); - log.debug("{s},{s}", .{ sect.segName(), sect.sectName() }); + log.debug("{s},{s}", .{ header.segName(), header.sectName() }); while (true) { self.logAtom(atom); diff --git a/src/link/MachO/Archive.zig b/src/link/MachO/Archive.zig index ee43e5b2a2..054f75fff3 100644 --- a/src/link/MachO/Archive.zig +++ b/src/link/MachO/Archive.zig @@ -6,19 +6,14 @@ const fs = std.fs; const log = std.log.scoped(.link); const macho = std.macho; const mem = std.mem; -const fat = @import("fat.zig"); const Allocator = mem.Allocator; const Object = @import("Object.zig"); file: fs.File, +fat_offset: u64, name: []const u8, - -header: ?ar_hdr = null, - -// The actual contents we care about linking with will be embedded at -// an offset within a file if we are linking against a fat lib -library_offset: u64 = 0, +header: ar_hdr = undefined, /// Parsed table of contents. /// Each symbol name points to a list of all definition @@ -103,11 +98,7 @@ pub fn deinit(self: *Archive, allocator: Allocator) void { allocator.free(self.name); } -pub fn parse(self: *Archive, allocator: Allocator, cpu_arch: std.Target.Cpu.Arch) !void { - const reader = self.file.reader(); - self.library_offset = try fat.getLibraryOffset(reader, cpu_arch); - try self.file.seekTo(self.library_offset); - +pub fn parse(self: *Archive, allocator: Allocator, reader: anytype) !void { const magic = try reader.readBytesNoEof(SARMAG); if (!mem.eql(u8, &magic, ARMAG)) { log.debug("invalid magic: expected '{s}', found '{s}'", .{ ARMAG, magic }); @@ -115,21 +106,23 @@ pub fn parse(self: *Archive, allocator: Allocator, cpu_arch: std.Target.Cpu.Arch } self.header = try reader.readStruct(ar_hdr); - if (!mem.eql(u8, &self.header.?.ar_fmag, ARFMAG)) { - log.debug("invalid header delimiter: expected '{s}', found '{s}'", .{ ARFMAG, self.header.?.ar_fmag }); + if (!mem.eql(u8, &self.header.ar_fmag, ARFMAG)) { + log.debug("invalid header delimiter: expected '{s}', found '{s}'", .{ + ARFMAG, + self.header.ar_fmag, + }); return error.NotArchive; } - var embedded_name = try parseName(allocator, self.header.?, reader); + const name_or_length = try self.header.nameOrLength(); + var embedded_name = try parseName(allocator, name_or_length, reader); log.debug("parsing archive '{s}' at '{s}'", .{ embedded_name, self.name }); defer allocator.free(embedded_name); try self.parseTableOfContents(allocator, reader); - try reader.context.seekTo(0); } -fn parseName(allocator: Allocator, header: ar_hdr, reader: anytype) ![]u8 { - const name_or_length = try header.nameOrLength(); +fn parseName(allocator: Allocator, name_or_length: ar_hdr.NameOrLength, reader: anytype) ![]u8 { var name: []u8 = undefined; switch (name_or_length) { .Name => |n| { @@ -187,9 +180,14 @@ fn parseTableOfContents(self: *Archive, allocator: Allocator, reader: anytype) ! } } -pub fn parseObject(self: Archive, allocator: Allocator, cpu_arch: std.Target.Cpu.Arch, offset: u32) !Object { +pub fn parseObject( + self: Archive, + allocator: Allocator, + cpu_arch: std.Target.Cpu.Arch, + offset: u32, +) !Object { const reader = self.file.reader(); - try reader.context.seekTo(offset + self.library_offset); + try reader.context.seekTo(self.fat_offset + offset); const object_header = try reader.readStruct(ar_hdr); @@ -198,7 +196,8 @@ pub fn parseObject(self: Archive, allocator: Allocator, cpu_arch: std.Target.Cpu return error.MalformedArchive; } - const object_name = try parseName(allocator, object_header, reader); + const name_or_length = try object_header.nameOrLength(); + const object_name = try parseName(allocator, name_or_length, reader); defer allocator.free(object_name); log.debug("extracting object '{s}' from archive '{s}'", .{ object_name, self.name }); @@ -209,15 +208,24 @@ pub fn parseObject(self: Archive, allocator: Allocator, cpu_arch: std.Target.Cpu break :name try std.fmt.allocPrint(allocator, "{s}({s})", .{ path, object_name }); }; + const object_name_len = switch (name_or_length) { + .Name => 0, + .Length => |len| len, + }; + const object_size = (try object_header.size()) - object_name_len; + const contents = try allocator.allocWithOptions(u8, object_size, @alignOf(u64), null); + const amt = try reader.readAll(contents); + if (amt != object_size) { + return error.InputOutput; + } + var object = Object{ - .file = try fs.cwd().openFile(self.name, .{}), .name = name, - .file_offset = @intCast(u32, try reader.context.getPos()), - .mtime = try self.header.?.date(), + .mtime = try self.header.date(), + .contents = contents, }; try object.parse(allocator, cpu_arch); - try reader.context.seekTo(0); return object; } diff --git a/src/link/MachO/Atom.zig b/src/link/MachO/Atom.zig index ba00764127..90c86e24ed 100644 --- a/src/link/MachO/Atom.zig +++ b/src/link/MachO/Atom.zig @@ -246,7 +246,7 @@ pub fn parseRelocs(self: *Atom, relocs: []const macho.relocation_info, context: else => { log.err("unexpected relocation type after ARM64_RELOC_ADDEND", .{}); log.err(" expected ARM64_RELOC_PAGE21 or ARM64_RELOC_PAGEOFF12", .{}); - log.err(" found {}", .{next}); + log.err(" found {s}", .{@tagName(next)}); return error.UnexpectedRelocationType; }, } @@ -285,7 +285,9 @@ pub fn parseRelocs(self: *Atom, relocs: []const macho.relocation_info, context: else => { log.err("unexpected relocation type after ARM64_RELOC_ADDEND", .{}); log.err(" expected ARM64_RELOC_UNSIGNED", .{}); - log.err(" found {}", .{@intToEnum(macho.reloc_type_arm64, relocs[i + 1].r_type)}); + log.err(" found {s}", .{ + @tagName(@intToEnum(macho.reloc_type_arm64, relocs[i + 1].r_type)), + }); return error.UnexpectedRelocationType; }, }, @@ -294,7 +296,9 @@ pub fn parseRelocs(self: *Atom, relocs: []const macho.relocation_info, context: else => { log.err("unexpected relocation type after X86_64_RELOC_ADDEND", .{}); log.err(" expected X86_64_RELOC_UNSIGNED", .{}); - log.err(" found {}", .{@intToEnum(macho.reloc_type_x86_64, relocs[i + 1].r_type)}); + log.err(" found {s}", .{ + @tagName(@intToEnum(macho.reloc_type_x86_64, relocs[i + 1].r_type)), + }); return error.UnexpectedRelocationType; }, }, @@ -309,13 +313,13 @@ pub fn parseRelocs(self: *Atom, relocs: []const macho.relocation_info, context: const sect_id = @intCast(u16, rel.r_symbolnum - 1); const sym_index = object.sections_as_symbols.get(sect_id) orelse blk: { const sect = object.getSourceSection(sect_id); - const match = (try context.macho_file.getMatchingSection(sect)) orelse + const match = (try context.macho_file.getOutputSection(sect)) orelse unreachable; const sym_index = @intCast(u32, object.symtab.items.len); try object.symtab.append(gpa, .{ .n_strx = 0, .n_type = macho.N_SECT, - .n_sect = context.macho_file.getSectionOrdinal(match), + .n_sect = match + 1, .n_desc = 0, .n_value = sect.addr, }); @@ -459,9 +463,10 @@ fn addPtrBindingOrRebase( }); } else { const source_sym = self.getSymbol(context.macho_file); - const match = context.macho_file.getMatchingSectionFromOrdinal(source_sym.n_sect); - const sect = context.macho_file.getSection(match); - const sect_type = sect.type_(); + const section = context.macho_file.sections.get(source_sym.n_sect - 1); + const header = section.header; + const segment_index = section.segment_index; + const sect_type = header.type_(); const should_rebase = rebase: { if (rel.r_length != 3) break :rebase false; @@ -470,12 +475,12 @@ fn addPtrBindingOrRebase( // that the segment is writable should be enough here. const is_right_segment = blk: { if (context.macho_file.data_segment_cmd_index) |idx| { - if (match.seg == idx) { + if (segment_index == idx) { break :blk true; } } if (context.macho_file.data_const_segment_cmd_index) |idx| { - if (match.seg == idx) { + if (segment_index == idx) { break :blk true; } } @@ -565,9 +570,8 @@ pub fn resolveRelocs(self: *Atom, macho_file: *MachO) !void { }; const is_tlv = is_tlv: { const source_sym = self.getSymbol(macho_file); - const match = macho_file.getMatchingSectionFromOrdinal(source_sym.n_sect); - const sect = macho_file.getSection(match); - break :is_tlv sect.type_() == macho.S_THREAD_LOCAL_VARIABLES; + const header = macho_file.sections.items(.header)[source_sym.n_sect - 1]; + break :is_tlv header.type_() == macho.S_THREAD_LOCAL_VARIABLES; }; const target_addr = blk: { const target_atom = rel.getTargetAtom(macho_file) orelse { @@ -608,10 +612,7 @@ pub fn resolveRelocs(self: *Atom, macho_file: *MachO) !void { return error.FailedToResolveRelocationTarget; } }; - break :base_address macho_file.getSection(.{ - .seg = macho_file.data_segment_cmd_index.?, - .sect = sect_id, - }).addr; + break :base_address macho_file.sections.items(.header)[sect_id].addr; } else 0; break :blk target_sym.n_value - base_address; }; diff --git a/src/link/MachO/CodeSignature.zig b/src/link/MachO/CodeSignature.zig index fbfd487ce2..530a13dc51 100644 --- a/src/link/MachO/CodeSignature.zig +++ b/src/link/MachO/CodeSignature.zig @@ -252,7 +252,7 @@ pub const WriteOpts = struct { file: fs.File, exec_seg_base: u64, exec_seg_limit: u64, - code_sig_cmd: macho.linkedit_data_command, + file_size: u32, output_mode: std.builtin.OutputMode, }; @@ -274,10 +274,9 @@ pub fn writeAdhocSignature( self.code_directory.inner.execSegBase = opts.exec_seg_base; self.code_directory.inner.execSegLimit = opts.exec_seg_limit; self.code_directory.inner.execSegFlags = if (opts.output_mode == .Exe) macho.CS_EXECSEG_MAIN_BINARY else 0; - const file_size = opts.code_sig_cmd.dataoff; - self.code_directory.inner.codeLimit = file_size; + self.code_directory.inner.codeLimit = opts.file_size; - const total_pages = mem.alignForward(file_size, self.page_size) / self.page_size; + const total_pages = mem.alignForward(opts.file_size, self.page_size) / self.page_size; var buffer = try allocator.alloc(u8, self.page_size); defer allocator.free(buffer); @@ -289,7 +288,10 @@ pub fn writeAdhocSignature( var i: usize = 0; while (i < total_pages) : (i += 1) { const fstart = i * self.page_size; - const fsize = if (fstart + self.page_size > file_size) file_size - fstart else self.page_size; + const fsize = if (fstart + self.page_size > opts.file_size) + opts.file_size - fstart + else + self.page_size; const len = try opts.file.preadAll(buffer, fstart); assert(fsize <= len); diff --git a/src/link/MachO/DebugSymbols.zig b/src/link/MachO/DebugSymbols.zig index 4da106eca1..f191d43f98 100644 --- a/src/link/MachO/DebugSymbols.zig +++ b/src/link/MachO/DebugSymbols.zig @@ -25,35 +25,18 @@ base: *MachO, dwarf: Dwarf, file: fs.File, -/// Table of all load commands -load_commands: std.ArrayListUnmanaged(macho.LoadCommand) = .{}, -/// __PAGEZERO segment -pagezero_segment_cmd_index: ?u16 = null, -/// __TEXT segment -text_segment_cmd_index: ?u16 = null, -/// __DATA_CONST segment -data_const_segment_cmd_index: ?u16 = null, -/// __DATA segment -data_segment_cmd_index: ?u16 = null, -/// __LINKEDIT segment -linkedit_segment_cmd_index: ?u16 = null, -/// __DWARF segment -dwarf_segment_cmd_index: ?u16 = null, -/// Symbol table -symtab_cmd_index: ?u16 = null, -/// UUID load command -uuid_cmd_index: ?u16 = null, +segments: std.ArrayListUnmanaged(macho.segment_command_64) = .{}, +sections: std.ArrayListUnmanaged(macho.section_64) = .{}, -/// Index into __TEXT,__text section. -text_section_index: ?u16 = null, +linkedit_segment_cmd_index: ?u8 = null, +dwarf_segment_cmd_index: ?u8 = null, -debug_info_section_index: ?u16 = null, -debug_abbrev_section_index: ?u16 = null, -debug_str_section_index: ?u16 = null, -debug_aranges_section_index: ?u16 = null, -debug_line_section_index: ?u16 = null, +debug_info_section_index: ?u8 = null, +debug_abbrev_section_index: ?u8 = null, +debug_str_section_index: ?u8 = null, +debug_aranges_section_index: ?u8 = null, +debug_line_section_index: ?u8 = null, -load_commands_dirty: bool = false, debug_string_table_dirty: bool = false, debug_abbrev_section_dirty: bool = false, debug_aranges_section_dirty: bool = false, @@ -78,98 +61,40 @@ pub const Reloc = struct { /// You must call this function *after* `MachO.populateMissingMetadata()` /// has been called to get a viable debug symbols output. pub fn populateMissingMetadata(self: *DebugSymbols, allocator: Allocator) !void { - if (self.uuid_cmd_index == null) { - const base_cmd = self.base.load_commands.items[self.base.uuid_cmd_index.?]; - self.uuid_cmd_index = @intCast(u16, self.load_commands.items.len); - try self.load_commands.append(allocator, base_cmd); - self.load_commands_dirty = true; - } - - if (self.symtab_cmd_index == null) { - self.symtab_cmd_index = @intCast(u16, self.load_commands.items.len); - try self.load_commands.append(self.base.base.allocator, .{ - .symtab = .{ - .cmdsize = @sizeOf(macho.symtab_command), - .symoff = 0, - .nsyms = 0, - .stroff = 0, - .strsize = 0, - }, - }); - try self.strtab.buffer.append(allocator, 0); - self.load_commands_dirty = true; - } - - if (self.pagezero_segment_cmd_index == null) { - self.pagezero_segment_cmd_index = @intCast(u16, self.load_commands.items.len); - const base_cmd = self.base.load_commands.items[self.base.pagezero_segment_cmd_index.?].segment; - const cmd = try self.copySegmentCommand(allocator, base_cmd); - try self.load_commands.append(allocator, .{ .segment = cmd }); - self.load_commands_dirty = true; - } - - if (self.text_segment_cmd_index == null) { - self.text_segment_cmd_index = @intCast(u16, self.load_commands.items.len); - const base_cmd = self.base.load_commands.items[self.base.text_segment_cmd_index.?].segment; - const cmd = try self.copySegmentCommand(allocator, base_cmd); - try self.load_commands.append(allocator, .{ .segment = cmd }); - self.load_commands_dirty = true; - } - - if (self.data_const_segment_cmd_index == null) outer: { - if (self.base.data_const_segment_cmd_index == null) break :outer; // __DATA_CONST is optional - self.data_const_segment_cmd_index = @intCast(u16, self.load_commands.items.len); - const base_cmd = self.base.load_commands.items[self.base.data_const_segment_cmd_index.?].segment; - const cmd = try self.copySegmentCommand(allocator, base_cmd); - try self.load_commands.append(allocator, .{ .segment = cmd }); - self.load_commands_dirty = true; - } - - if (self.data_segment_cmd_index == null) outer: { - if (self.base.data_segment_cmd_index == null) break :outer; // __DATA is optional - self.data_segment_cmd_index = @intCast(u16, self.load_commands.items.len); - const base_cmd = self.base.load_commands.items[self.base.data_segment_cmd_index.?].segment; - const cmd = try self.copySegmentCommand(allocator, base_cmd); - try self.load_commands.append(allocator, .{ .segment = cmd }); - self.load_commands_dirty = true; - } - if (self.linkedit_segment_cmd_index == null) { - self.linkedit_segment_cmd_index = @intCast(u16, self.load_commands.items.len); - const base_cmd = self.base.load_commands.items[self.base.linkedit_segment_cmd_index.?].segment; - var cmd = try self.copySegmentCommand(allocator, base_cmd); + self.linkedit_segment_cmd_index = @intCast(u8, self.segments.items.len); // TODO this needs reworking - cmd.inner.vmsize = self.base.page_size; - cmd.inner.fileoff = self.base.page_size; - cmd.inner.filesize = self.base.page_size; - try self.load_commands.append(allocator, .{ .segment = cmd }); - self.load_commands_dirty = true; + try self.segments.append(allocator, .{ + .segname = makeStaticString("__LINKEDIT"), + .vmaddr = self.base.page_size, + .vmsize = self.base.page_size, + .fileoff = self.base.page_size, + .filesize = self.base.page_size, + .maxprot = macho.PROT.READ, + .initprot = macho.PROT.READ, + .cmdsize = @sizeOf(macho.segment_command_64), + }); } if (self.dwarf_segment_cmd_index == null) { - self.dwarf_segment_cmd_index = @intCast(u16, self.load_commands.items.len); + self.dwarf_segment_cmd_index = @intCast(u8, self.segments.items.len); - const linkedit = self.load_commands.items[self.linkedit_segment_cmd_index.?].segment; + const linkedit = self.segments.items[self.base.linkedit_segment_cmd_index.?]; const ideal_size: u16 = 200 + 128 + 160 + 250; const needed_size = mem.alignForwardGeneric(u64, padToIdeal(ideal_size), self.base.page_size); - const fileoff = linkedit.inner.fileoff + linkedit.inner.filesize; - const vmaddr = linkedit.inner.vmaddr + linkedit.inner.vmsize; + const fileoff = linkedit.fileoff + linkedit.filesize; + const vmaddr = linkedit.vmaddr + linkedit.vmsize; log.debug("found __DWARF segment free space 0x{x} to 0x{x}", .{ fileoff, fileoff + needed_size }); - try self.load_commands.append(allocator, .{ - .segment = .{ - .inner = .{ - .segname = makeStaticString("__DWARF"), - .vmaddr = vmaddr, - .vmsize = needed_size, - .fileoff = fileoff, - .filesize = needed_size, - .cmdsize = @sizeOf(macho.segment_command_64), - }, - }, + try self.segments.append(allocator, .{ + .segname = makeStaticString("__DWARF"), + .vmaddr = vmaddr, + .vmsize = needed_size, + .fileoff = fileoff, + .filesize = needed_size, + .cmdsize = @sizeOf(macho.segment_command_64), }); - self.load_commands_dirty = true; } if (self.debug_str_section_index == null) { @@ -203,18 +128,18 @@ pub fn populateMissingMetadata(self: *DebugSymbols, allocator: Allocator) !void } } -fn allocateSection(self: *DebugSymbols, sectname: []const u8, size: u64, alignment: u16) !u16 { - const seg = &self.load_commands.items[self.dwarf_segment_cmd_index.?].segment; +fn allocateSection(self: *DebugSymbols, sectname: []const u8, size: u64, alignment: u16) !u8 { + const segment = &self.segments.items[self.dwarf_segment_cmd_index.?]; var sect = macho.section_64{ .sectname = makeStaticString(sectname), - .segname = seg.inner.segname, + .segname = segment.segname, .size = @intCast(u32, size), .@"align" = alignment, }; const alignment_pow_2 = try math.powi(u32, 2, alignment); const off = self.findFreeSpace(size, alignment_pow_2); - assert(off + size <= seg.inner.fileoff + seg.inner.filesize); // TODO expand + assert(off + size <= segment.fileoff + segment.filesize); // TODO expand log.debug("found {s},{s} section free space 0x{x} to 0x{x}", .{ sect.segName(), @@ -223,31 +148,20 @@ fn allocateSection(self: *DebugSymbols, sectname: []const u8, size: u64, alignme off + size, }); - sect.addr = seg.inner.vmaddr + off - seg.inner.fileoff; + sect.addr = segment.vmaddr + off - segment.fileoff; sect.offset = @intCast(u32, off); - const index = @intCast(u16, seg.sections.items.len); - try seg.sections.append(self.base.base.allocator, sect); - seg.inner.cmdsize += @sizeOf(macho.section_64); - seg.inner.nsects += 1; - - // TODO - // const match = MatchingSection{ - // .seg = segment_id, - // .sect = index, - // }; - // _ = try self.section_ordinals.getOrPut(self.base.allocator, match); - // try self.block_free_lists.putNoClobber(self.base.allocator, match, .{}); - - self.load_commands_dirty = true; + const index = @intCast(u8, self.sections.items.len); + try self.sections.append(self.base.base.allocator, sect); + segment.cmdsize += @sizeOf(macho.section_64); + segment.nsects += 1; return index; } fn detectAllocCollision(self: *DebugSymbols, start: u64, size: u64) ?u64 { - const seg = self.load_commands.items[self.dwarf_segment_cmd_index.?].segment; const end = start + padToIdeal(size); - for (seg.sections.items) |section| { + for (self.sections.items) |section| { const increased_size = padToIdeal(section.size); const test_end = section.offset + increased_size; if (end > section.offset and start < test_end) { @@ -258,8 +172,8 @@ fn detectAllocCollision(self: *DebugSymbols, start: u64, size: u64) ?u64 { } pub fn findFreeSpace(self: *DebugSymbols, object_size: u64, min_alignment: u64) u64 { - const seg = self.load_commands.items[self.dwarf_segment_cmd_index.?].segment; - var offset: u64 = seg.inner.fileoff; + const segment = self.segments.items[self.dwarf_segment_cmd_index.?]; + var offset: u64 = segment.fileoff; while (self.detectAllocCollision(offset, object_size)) |item_end| { offset = mem.alignForwardGeneric(u64, item_end, min_alignment); } @@ -296,8 +210,7 @@ pub fn flushModule(self: *DebugSymbols, allocator: Allocator, options: link.Opti break :blk got_entry.getName(self.base); }, }; - const seg = &self.load_commands.items[self.dwarf_segment_cmd_index.?].segment; - const sect = &seg.sections.items[self.debug_info_section_index.?]; + const sect = &self.sections.items[self.debug_info_section_index.?]; const file_offset = sect.offset + reloc.offset; log.debug("resolving relocation: {d}@{x} ('{s}') at offset {x}", .{ reloc.target, @@ -311,15 +224,13 @@ pub fn flushModule(self: *DebugSymbols, allocator: Allocator, options: link.Opti if (self.debug_abbrev_section_dirty) { try self.dwarf.writeDbgAbbrev(&self.base.base); - self.load_commands_dirty = true; self.debug_abbrev_section_dirty = false; } if (self.debug_info_header_dirty) { // Currently only one compilation unit is supported, so the address range is simply // identical to the main program header virtual address and memory size. - const text_segment = self.load_commands.items[self.text_segment_cmd_index.?].segment; - const text_section = text_segment.sections.items[self.text_section_index.?]; + const text_section = self.base.sections.items(.header)[self.base.text_section_index.?]; const low_pc = text_section.addr; const high_pc = text_section.addr + text_section.size; try self.dwarf.writeDbgInfoHeader(&self.base.base, module, low_pc, high_pc); @@ -329,10 +240,8 @@ pub fn flushModule(self: *DebugSymbols, allocator: Allocator, options: link.Opti if (self.debug_aranges_section_dirty) { // Currently only one compilation unit is supported, so the address range is simply // identical to the main program header virtual address and memory size. - const text_segment = self.load_commands.items[self.text_segment_cmd_index.?].segment; - const text_section = text_segment.sections.items[self.text_section_index.?]; + const text_section = self.base.sections.items(.header)[self.base.text_section_index.?]; try self.dwarf.writeDbgAranges(&self.base.base, text_section.addr, text_section.size); - self.load_commands_dirty = true; self.debug_aranges_section_dirty = false; } @@ -342,8 +251,8 @@ pub fn flushModule(self: *DebugSymbols, allocator: Allocator, options: link.Opti } { - const dwarf_segment = &self.load_commands.items[self.dwarf_segment_cmd_index.?].segment; - const debug_strtab_sect = &dwarf_segment.sections.items[self.debug_str_section_index.?]; + const dwarf_segment = &self.segments.items[self.dwarf_segment_cmd_index.?]; + const debug_strtab_sect = &self.sections.items[self.debug_str_section_index.?]; if (self.debug_string_table_dirty or self.dwarf.strtab.items.len != debug_strtab_sect.size) { const allocated_size = self.allocatedSize(debug_strtab_sect.offset); const needed_size = self.dwarf.strtab.items.len; @@ -351,7 +260,7 @@ pub fn flushModule(self: *DebugSymbols, allocator: Allocator, options: link.Opti if (needed_size > allocated_size) { debug_strtab_sect.size = 0; // free the space const new_offset = self.findFreeSpace(needed_size, 1); - debug_strtab_sect.addr = dwarf_segment.inner.vmaddr + new_offset - dwarf_segment.inner.fileoff; + debug_strtab_sect.addr = dwarf_segment.vmaddr + new_offset - dwarf_segment.fileoff; debug_strtab_sect.offset = @intCast(u32, new_offset); } debug_strtab_sect.size = @intCast(u32, needed_size); @@ -362,28 +271,53 @@ pub fn flushModule(self: *DebugSymbols, allocator: Allocator, options: link.Opti }); try self.file.pwriteAll(self.dwarf.strtab.items, debug_strtab_sect.offset); - self.load_commands_dirty = true; self.debug_string_table_dirty = false; } } - self.updateDwarfSegment(); - try self.writeLinkeditSegment(); - try self.updateVirtualMemoryMapping(); - try self.writeLoadCommands(allocator); - try self.writeHeader(); + var lc_buffer = std.ArrayList(u8).init(allocator); + defer lc_buffer.deinit(); + const lc_writer = lc_buffer.writer(); + var ncmds: u32 = 0; + + try self.writeLinkeditSegmentData(&ncmds, lc_writer); + self.updateDwarfSegment(); + + { + try lc_writer.writeStruct(self.base.uuid); + ncmds += 1; + } + + var headers_buf = std.ArrayList(u8).init(allocator); + defer headers_buf.deinit(); + try self.base.writeSegmentHeaders( + 0, + self.base.linkedit_segment_cmd_index.?, + &ncmds, + headers_buf.writer(), + ); + + for (self.segments.items) |seg| { + try headers_buf.writer().writeStruct(seg); + ncmds += 2; + } + for (self.sections.items) |header| { + try headers_buf.writer().writeStruct(header); + } + + try self.file.pwriteAll(headers_buf.items, @sizeOf(macho.mach_header_64)); + try self.file.pwriteAll(lc_buffer.items, @sizeOf(macho.mach_header_64) + headers_buf.items.len); + + try self.writeHeader(ncmds, @intCast(u32, lc_buffer.items.len + headers_buf.items.len)); - assert(!self.load_commands_dirty); assert(!self.debug_abbrev_section_dirty); assert(!self.debug_aranges_section_dirty); assert(!self.debug_string_table_dirty); } pub fn deinit(self: *DebugSymbols, allocator: Allocator) void { - for (self.load_commands.items) |*lc| { - lc.deinit(allocator); - } - self.load_commands.deinit(allocator); + self.segments.deinit(allocator); + self.sections.deinit(allocator); self.dwarf.deinit(); self.strtab.deinit(allocator); self.relocs.deinit(allocator); @@ -402,59 +336,19 @@ pub fn swapRemoveRelocs(self: *DebugSymbols, target: u32) void { } } -fn copySegmentCommand( - self: *DebugSymbols, - allocator: Allocator, - base_cmd: macho.SegmentCommand, -) !macho.SegmentCommand { - var cmd = macho.SegmentCommand{ - .inner = .{ - .segname = undefined, - .cmdsize = base_cmd.inner.cmdsize, - .vmaddr = base_cmd.inner.vmaddr, - .vmsize = base_cmd.inner.vmsize, - .maxprot = base_cmd.inner.maxprot, - .initprot = base_cmd.inner.initprot, - .nsects = base_cmd.inner.nsects, - .flags = base_cmd.inner.flags, - }, - }; - mem.copy(u8, &cmd.inner.segname, &base_cmd.inner.segname); +fn updateDwarfSegment(self: *DebugSymbols) void { + const linkedit = self.segments.items[self.linkedit_segment_cmd_index.?]; + const dwarf_segment = &self.segments.items[self.dwarf_segment_cmd_index.?]; - try cmd.sections.ensureTotalCapacity(allocator, cmd.inner.nsects); - for (base_cmd.sections.items) |base_sect, i| { - var sect = macho.section_64{ - .sectname = undefined, - .segname = undefined, - .addr = base_sect.addr, - .size = base_sect.size, - .offset = 0, - .@"align" = base_sect.@"align", - .reloff = 0, - .nreloc = 0, - .flags = base_sect.flags, - .reserved1 = base_sect.reserved1, - .reserved2 = base_sect.reserved2, - .reserved3 = base_sect.reserved3, - }; - mem.copy(u8, §.sectname, &base_sect.sectname); - mem.copy(u8, §.segname, &base_sect.segname); - - if (self.base.text_section_index.? == i) { - self.text_section_index = @intCast(u16, i); - } - - cmd.sections.appendAssumeCapacity(sect); + const new_start_aligned = linkedit.vmaddr + linkedit.vmsize; + const old_start_aligned = dwarf_segment.vmaddr; + const diff = new_start_aligned - old_start_aligned; + if (diff > 0) { + dwarf_segment.vmaddr = new_start_aligned; } - return cmd; -} - -fn updateDwarfSegment(self: *DebugSymbols) void { - const dwarf_segment = &self.load_commands.items[self.dwarf_segment_cmd_index.?].segment; - var max_offset: u64 = 0; - for (dwarf_segment.sections.items) |sect| { + for (self.sections.items) |*sect| { log.debug(" {s},{s} - 0x{x}-0x{x} - 0x{x}-0x{x}", .{ sect.segName(), sect.sectName(), @@ -466,44 +360,19 @@ fn updateDwarfSegment(self: *DebugSymbols) void { if (sect.offset + sect.size > max_offset) { max_offset = sect.offset + sect.size; } + sect.addr += diff; } - const file_size = max_offset - dwarf_segment.inner.fileoff; + const file_size = max_offset - dwarf_segment.fileoff; log.debug("__DWARF size 0x{x}", .{file_size}); - if (file_size != dwarf_segment.inner.filesize) { - dwarf_segment.inner.filesize = file_size; - if (dwarf_segment.inner.vmsize < dwarf_segment.inner.filesize) { - dwarf_segment.inner.vmsize = mem.alignForwardGeneric(u64, dwarf_segment.inner.filesize, self.base.page_size); - } - self.load_commands_dirty = true; + if (file_size != dwarf_segment.filesize) { + dwarf_segment.filesize = file_size; + dwarf_segment.vmsize = mem.alignForwardGeneric(u64, dwarf_segment.filesize, self.base.page_size); } } -/// Writes all load commands and section headers. -fn writeLoadCommands(self: *DebugSymbols, allocator: Allocator) !void { - if (!self.load_commands_dirty) return; - - var sizeofcmds: u32 = 0; - for (self.load_commands.items) |lc| { - sizeofcmds += lc.cmdsize(); - } - - var buffer = try allocator.alloc(u8, sizeofcmds); - defer allocator.free(buffer); - var fib = std.io.fixedBufferStream(buffer); - const writer = fib.writer(); - for (self.load_commands.items) |lc| { - try lc.write(writer); - } - - const off = @sizeOf(macho.mach_header_64); - log.debug("writing {} load commands from 0x{x} to 0x{x}", .{ self.load_commands.items.len, off, off + sizeofcmds }); - try self.file.pwriteAll(buffer, off); - self.load_commands_dirty = false; -} - -fn writeHeader(self: *DebugSymbols) !void { +fn writeHeader(self: *DebugSymbols, ncmds: u32, sizeofcmds: u32) !void { var header: macho.mach_header_64 = .{}; header.filetype = macho.MH_DSYM; @@ -519,12 +388,8 @@ fn writeHeader(self: *DebugSymbols) !void { else => return error.UnsupportedCpuArchitecture, } - header.ncmds = @intCast(u32, self.load_commands.items.len); - header.sizeofcmds = 0; - - for (self.load_commands.items) |cmd| { - header.sizeofcmds += cmd.cmdsize(); - } + header.ncmds = ncmds; + header.sizeofcmds = sizeofcmds; log.debug("writing Mach-O header {}", .{header}); @@ -532,79 +397,46 @@ fn writeHeader(self: *DebugSymbols) !void { } pub fn allocatedSize(self: *DebugSymbols, start: u64) u64 { - const seg = self.load_commands.items[self.dwarf_segment_cmd_index.?].segment; - assert(start >= seg.inner.fileoff); + const seg = self.segments.items[self.dwarf_segment_cmd_index.?]; + assert(start >= seg.fileoff); var min_pos: u64 = std.math.maxInt(u64); - for (seg.sections.items) |section| { + for (self.sections.items) |section| { if (section.offset <= start) continue; if (section.offset < min_pos) min_pos = section.offset; } return min_pos - start; } -fn updateVirtualMemoryMapping(self: *DebugSymbols) !void { - const macho_file = self.base; - const allocator = macho_file.base.allocator; - - const IndexTuple = std.meta.Tuple(&[_]type{ *?u16, *?u16 }); - const indices = &[_]IndexTuple{ - .{ &macho_file.text_segment_cmd_index, &self.text_segment_cmd_index }, - .{ &macho_file.data_const_segment_cmd_index, &self.data_const_segment_cmd_index }, - .{ &macho_file.data_segment_cmd_index, &self.data_segment_cmd_index }, - }; - - for (indices) |tuple| { - const orig_cmd = macho_file.load_commands.items[tuple[0].*.?].segment; - const cmd = try self.copySegmentCommand(allocator, orig_cmd); - const comp_cmd = &self.load_commands.items[tuple[1].*.?]; - comp_cmd.deinit(allocator); - self.load_commands.items[tuple[1].*.?] = .{ .segment = cmd }; - } - - // TODO should we set the linkedit vmsize to that of the binary? - const orig_cmd = macho_file.load_commands.items[macho_file.linkedit_segment_cmd_index.?].segment; - const orig_vmaddr = orig_cmd.inner.vmaddr; - const linkedit_cmd = &self.load_commands.items[self.linkedit_segment_cmd_index.?].segment; - linkedit_cmd.inner.vmaddr = orig_vmaddr; - - // Update VM address for the DWARF segment and sections including re-running relocations. - // TODO re-run relocations - const dwarf_cmd = &self.load_commands.items[self.dwarf_segment_cmd_index.?].segment; - const new_start_aligned = orig_vmaddr + linkedit_cmd.inner.vmsize; - const old_start_aligned = dwarf_cmd.inner.vmaddr; - const diff = new_start_aligned - old_start_aligned; - if (diff > 0) { - dwarf_cmd.inner.vmaddr = new_start_aligned; - - for (dwarf_cmd.sections.items) |*sect| { - sect.addr += (new_start_aligned - old_start_aligned); - } - } - - self.load_commands_dirty = true; -} - -fn writeLinkeditSegment(self: *DebugSymbols) !void { +fn writeLinkeditSegmentData(self: *DebugSymbols, ncmds: *u32, lc_writer: anytype) !void { const tracy = trace(@src()); defer tracy.end(); - try self.writeSymbolTable(); - try self.writeStringTable(); + const source_vmaddr = self.base.segments.items[self.base.linkedit_segment_cmd_index.?].vmaddr; + const seg = &self.segments.items[self.linkedit_segment_cmd_index.?]; + seg.vmaddr = source_vmaddr; - const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].segment; - const aligned_size = mem.alignForwardGeneric(u64, seg.inner.filesize, self.base.page_size); - seg.inner.filesize = aligned_size; - seg.inner.vmsize = aligned_size; + var symtab_cmd = macho.symtab_command{ + .cmdsize = @sizeOf(macho.symtab_command), + .symoff = 0, + .nsyms = 0, + .stroff = 0, + .strsize = 0, + }; + try self.writeSymtab(&symtab_cmd); + try self.writeStrtab(&symtab_cmd); + try lc_writer.writeStruct(symtab_cmd); + ncmds.* += 1; + + const aligned_size = mem.alignForwardGeneric(u64, seg.filesize, self.base.page_size); + seg.filesize = aligned_size; + seg.vmsize = aligned_size; } -fn writeSymbolTable(self: *DebugSymbols) !void { +fn writeSymtab(self: *DebugSymbols, lc: *macho.symtab_command) !void { const tracy = trace(@src()); defer tracy.end(); const gpa = self.base.base.allocator; - const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].segment; - const symtab = &self.load_commands.items[self.symtab_cmd_index.?].symtab; - symtab.symoff = @intCast(u32, seg.inner.fileoff); var locals = std.ArrayList(macho.nlist_64).init(gpa); defer locals.deinit(); @@ -634,34 +466,36 @@ fn writeSymbolTable(self: *DebugSymbols) !void { const nlocals = locals.items.len; const nexports = exports.items.len; - const locals_off = symtab.symoff; - const locals_size = nlocals * @sizeOf(macho.nlist_64); - const exports_off = locals_off + locals_size; - const exports_size = nexports * @sizeOf(macho.nlist_64); + const nsyms = nlocals + nexports; - symtab.nsyms = @intCast(u32, nlocals + nexports); - const needed_size = (nlocals + nexports) * @sizeOf(macho.nlist_64); + const seg = &self.segments.items[self.linkedit_segment_cmd_index.?]; + const offset = mem.alignForwardGeneric( + u64, + seg.fileoff + seg.filesize, + @alignOf(macho.nlist_64), + ); + const needed_size = nsyms * @sizeOf(macho.nlist_64); - if (needed_size > seg.inner.filesize) { + if (needed_size > seg.filesize) { const aligned_size = mem.alignForwardGeneric(u64, needed_size, self.base.page_size); - const diff = @intCast(u32, aligned_size - seg.inner.filesize); - const dwarf_seg = &self.load_commands.items[self.dwarf_segment_cmd_index.?].segment; - seg.inner.filesize = aligned_size; + const diff = @intCast(u32, aligned_size - seg.filesize); + const dwarf_seg = &self.segments.items[self.dwarf_segment_cmd_index.?]; + seg.filesize = aligned_size; try MachO.copyRangeAllOverlappingAlloc( self.base.base.allocator, self.file, - dwarf_seg.inner.fileoff, - dwarf_seg.inner.fileoff + diff, - math.cast(usize, dwarf_seg.inner.filesize) orelse return error.Overflow, + dwarf_seg.fileoff, + dwarf_seg.fileoff + diff, + math.cast(usize, dwarf_seg.filesize) orelse return error.Overflow, ); - const old_seg_fileoff = dwarf_seg.inner.fileoff; - dwarf_seg.inner.fileoff += diff; + const old_seg_fileoff = dwarf_seg.fileoff; + dwarf_seg.fileoff += diff; - log.debug(" (moving __DWARF segment from 0x{x} to 0x{x})", .{ old_seg_fileoff, dwarf_seg.inner.fileoff }); + log.debug(" (moving __DWARF segment from 0x{x} to 0x{x})", .{ old_seg_fileoff, dwarf_seg.fileoff }); - for (dwarf_seg.sections.items) |*sect| { + for (self.sections.items) |*sect| { const old_offset = sect.offset; sect.offset += diff; @@ -674,47 +508,53 @@ fn writeSymbolTable(self: *DebugSymbols) !void { } } + lc.symoff = @intCast(u32, offset); + lc.nsyms = @intCast(u32, nsyms); + + const locals_off = lc.symoff; + const locals_size = nlocals * @sizeOf(macho.nlist_64); + const exports_off = locals_off + locals_size; + const exports_size = nexports * @sizeOf(macho.nlist_64); + log.debug("writing local symbols from 0x{x} to 0x{x}", .{ locals_off, locals_size + locals_off }); try self.file.pwriteAll(mem.sliceAsBytes(locals.items), locals_off); log.debug("writing exported symbols from 0x{x} to 0x{x}", .{ exports_off, exports_size + exports_off }); try self.file.pwriteAll(mem.sliceAsBytes(exports.items), exports_off); - - self.load_commands_dirty = true; } -fn writeStringTable(self: *DebugSymbols) !void { +fn writeStrtab(self: *DebugSymbols, lc: *macho.symtab_command) !void { const tracy = trace(@src()); defer tracy.end(); - const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].segment; - const symtab = &self.load_commands.items[self.symtab_cmd_index.?].symtab; - const symtab_size = @intCast(u32, symtab.nsyms * @sizeOf(macho.nlist_64)); - symtab.stroff = symtab.symoff + symtab_size; + const seg = &self.segments.items[self.linkedit_segment_cmd_index.?]; + const symtab_size = @intCast(u32, lc.nsyms * @sizeOf(macho.nlist_64)); + const offset = mem.alignForwardGeneric(u64, lc.symoff + symtab_size, @alignOf(u64)); + lc.stroff = @intCast(u32, offset); const needed_size = mem.alignForwardGeneric(u64, self.strtab.buffer.items.len, @alignOf(u64)); - symtab.strsize = @intCast(u32, needed_size); + lc.strsize = @intCast(u32, needed_size); - if (symtab_size + needed_size > seg.inner.filesize) { - const aligned_size = mem.alignForwardGeneric(u64, symtab_size + needed_size, self.base.page_size); - const diff = @intCast(u32, aligned_size - seg.inner.filesize); - const dwarf_seg = &self.load_commands.items[self.dwarf_segment_cmd_index.?].segment; - seg.inner.filesize = aligned_size; + if (offset + needed_size > seg.filesize) { + const aligned_size = mem.alignForwardGeneric(u64, offset + needed_size, self.base.page_size); + const diff = @intCast(u32, aligned_size - seg.filesize); + const dwarf_seg = &self.segments.items[self.dwarf_segment_cmd_index.?]; + seg.filesize = aligned_size; try MachO.copyRangeAllOverlappingAlloc( self.base.base.allocator, self.file, - dwarf_seg.inner.fileoff, - dwarf_seg.inner.fileoff + diff, - math.cast(usize, dwarf_seg.inner.filesize) orelse return error.Overflow, + dwarf_seg.fileoff, + dwarf_seg.fileoff + diff, + math.cast(usize, dwarf_seg.filesize) orelse return error.Overflow, ); - const old_seg_fileoff = dwarf_seg.inner.fileoff; - dwarf_seg.inner.fileoff += diff; + const old_seg_fileoff = dwarf_seg.fileoff; + dwarf_seg.fileoff += diff; - log.debug(" (moving __DWARF segment from 0x{x} to 0x{x})", .{ old_seg_fileoff, dwarf_seg.inner.fileoff }); + log.debug(" (moving __DWARF segment from 0x{x} to 0x{x})", .{ old_seg_fileoff, dwarf_seg.fileoff }); - for (dwarf_seg.sections.items) |*sect| { + for (self.sections.items) |*sect| { const old_offset = sect.offset; sect.offset += diff; @@ -727,9 +567,7 @@ fn writeStringTable(self: *DebugSymbols) !void { } } - log.debug("writing string table from 0x{x} to 0x{x}", .{ symtab.stroff, symtab.stroff + symtab.strsize }); + log.debug("writing string table from 0x{x} to 0x{x}", .{ lc.stroff, lc.stroff + lc.strsize }); - try self.file.pwriteAll(self.strtab.buffer.items, symtab.stroff); - - self.load_commands_dirty = true; + try self.file.pwriteAll(self.strtab.buffer.items, lc.stroff); } diff --git a/src/link/MachO/Dylib.zig b/src/link/MachO/Dylib.zig index ffc0b2cca6..0f16eada61 100644 --- a/src/link/MachO/Dylib.zig +++ b/src/link/MachO/Dylib.zig @@ -13,23 +13,9 @@ const fat = @import("fat.zig"); const Allocator = mem.Allocator; const CrossTarget = std.zig.CrossTarget; const LibStub = @import("../tapi.zig").LibStub; +const LoadCommandIterator = macho.LoadCommandIterator; const MachO = @import("../MachO.zig"); -file: fs.File, -name: []const u8, - -header: ?macho.mach_header_64 = null, - -// The actual dylib contents we care about linking with will be embedded at -// an offset within a file if we are linking against a fat lib -library_offset: u64 = 0, - -load_commands: std.ArrayListUnmanaged(macho.LoadCommand) = .{}, - -symtab_cmd_index: ?u16 = null, -dysymtab_cmd_index: ?u16 = null, -id_cmd_index: ?u16 = null, - id: ?Id = null, weak: bool = false, @@ -53,16 +39,12 @@ pub const Id = struct { }; } - pub fn fromLoadCommand(allocator: Allocator, lc: macho.GenericCommandWithData(macho.dylib_command)) !Id { - const dylib = lc.inner.dylib; - const dylib_name = @ptrCast([*:0]const u8, lc.data[dylib.name - @sizeOf(macho.dylib_command) ..]); - const name = try allocator.dupe(u8, mem.sliceTo(dylib_name, 0)); - + pub fn fromLoadCommand(allocator: Allocator, lc: macho.dylib_command, name: []const u8) !Id { return Id{ - .name = name, - .timestamp = dylib.timestamp, - .current_version = dylib.current_version, - .compatibility_version = dylib.compatibility_version, + .name = try allocator.dupe(u8, name), + .timestamp = lc.dylib.timestamp, + .current_version = lc.dylib.current_version, + .compatibility_version = lc.dylib.compatibility_version, }; } @@ -126,125 +108,89 @@ pub const Id = struct { }; pub fn deinit(self: *Dylib, allocator: Allocator) void { - for (self.load_commands.items) |*lc| { - lc.deinit(allocator); - } - self.load_commands.deinit(allocator); - for (self.symbols.keys()) |key| { allocator.free(key); } self.symbols.deinit(allocator); - - allocator.free(self.name); - if (self.id) |*id| { id.deinit(allocator); } } -pub fn parse( +pub fn parseFromBinary( self: *Dylib, allocator: Allocator, cpu_arch: std.Target.Cpu.Arch, dylib_id: u16, dependent_libs: anytype, + name: []const u8, + data: []align(@alignOf(u64)) const u8, ) !void { - log.debug("parsing shared library '{s}'", .{self.name}); + var stream = std.io.fixedBufferStream(data); + const reader = stream.reader(); - self.library_offset = try fat.getLibraryOffset(self.file.reader(), cpu_arch); + log.debug("parsing shared library '{s}'", .{name}); - try self.file.seekTo(self.library_offset); + const header = try reader.readStruct(macho.mach_header_64); - var reader = self.file.reader(); - self.header = try reader.readStruct(macho.mach_header_64); - - if (self.header.?.filetype != macho.MH_DYLIB) { - log.debug("invalid filetype: expected 0x{x}, found 0x{x}", .{ macho.MH_DYLIB, self.header.?.filetype }); + if (header.filetype != macho.MH_DYLIB) { + log.debug("invalid filetype: expected 0x{x}, found 0x{x}", .{ macho.MH_DYLIB, header.filetype }); return error.NotDylib; } - const this_arch: std.Target.Cpu.Arch = try fat.decodeArch(self.header.?.cputype, true); + const this_arch: std.Target.Cpu.Arch = try fat.decodeArch(header.cputype, true); if (this_arch != cpu_arch) { - log.err("mismatched cpu architecture: expected {}, found {}", .{ cpu_arch, this_arch }); + log.err("mismatched cpu architecture: expected {s}, found {s}", .{ + @tagName(cpu_arch), + @tagName(this_arch), + }); return error.MismatchedCpuArchitecture; } - try self.readLoadCommands(allocator, reader, dylib_id, dependent_libs); - try self.parseId(allocator); - try self.parseSymbols(allocator); -} - -fn readLoadCommands( - self: *Dylib, - allocator: Allocator, - reader: anytype, - dylib_id: u16, - dependent_libs: anytype, -) !void { - const should_lookup_reexports = self.header.?.flags & macho.MH_NO_REEXPORTED_DYLIBS == 0; - - try self.load_commands.ensureUnusedCapacity(allocator, self.header.?.ncmds); - - var i: u16 = 0; - while (i < self.header.?.ncmds) : (i += 1) { - var cmd = try macho.LoadCommand.read(allocator, reader); + const should_lookup_reexports = header.flags & macho.MH_NO_REEXPORTED_DYLIBS == 0; + var it = LoadCommandIterator{ + .ncmds = header.ncmds, + .buffer = data[@sizeOf(macho.mach_header_64)..][0..header.sizeofcmds], + }; + while (it.next()) |cmd| { switch (cmd.cmd()) { .SYMTAB => { - self.symtab_cmd_index = i; - }, - .DYSYMTAB => { - self.dysymtab_cmd_index = i; + const symtab_cmd = cmd.cast(macho.symtab_command).?; + const symtab = @ptrCast( + [*]const macho.nlist_64, + @alignCast(@alignOf(macho.nlist_64), &data[symtab_cmd.symoff]), + )[0..symtab_cmd.nsyms]; + const strtab = data[symtab_cmd.stroff..][0..symtab_cmd.strsize]; + + for (symtab) |sym| { + const add_to_symtab = sym.ext() and (sym.sect() or sym.indr()); + if (!add_to_symtab) continue; + + const sym_name = mem.sliceTo(@ptrCast([*:0]const u8, strtab.ptr + sym.n_strx), 0); + try self.symbols.putNoClobber(allocator, try allocator.dupe(u8, sym_name), {}); + } }, .ID_DYLIB => { - self.id_cmd_index = i; + self.id = try Id.fromLoadCommand( + allocator, + cmd.cast(macho.dylib_command).?, + cmd.getDylibPathName(), + ); }, .REEXPORT_DYLIB => { if (should_lookup_reexports) { // Parse install_name to dependent dylib. - var id = try Id.fromLoadCommand(allocator, cmd.dylib); + var id = try Id.fromLoadCommand( + allocator, + cmd.cast(macho.dylib_command).?, + cmd.getDylibPathName(), + ); try dependent_libs.writeItem(.{ .id = id, .parent = dylib_id }); } }, - else => { - log.debug("Unknown load command detected: 0x{x}.", .{@enumToInt(cmd.cmd())}); - }, + else => {}, } - self.load_commands.appendAssumeCapacity(cmd); - } -} - -fn parseId(self: *Dylib, allocator: Allocator) !void { - const index = self.id_cmd_index orelse { - log.debug("no LC_ID_DYLIB load command found; using hard-coded defaults...", .{}); - self.id = try Id.default(allocator, self.name); - return; - }; - self.id = try Id.fromLoadCommand(allocator, self.load_commands.items[index].dylib); -} - -fn parseSymbols(self: *Dylib, allocator: Allocator) !void { - const index = self.symtab_cmd_index orelse return; - const symtab_cmd = self.load_commands.items[index].symtab; - - const symtab = try allocator.alloc(u8, @sizeOf(macho.nlist_64) * symtab_cmd.nsyms); - defer allocator.free(symtab); - _ = try self.file.preadAll(symtab, symtab_cmd.symoff + self.library_offset); - const slice = @alignCast(@alignOf(macho.nlist_64), mem.bytesAsSlice(macho.nlist_64, symtab)); - - const strtab = try allocator.alloc(u8, symtab_cmd.strsize); - defer allocator.free(strtab); - _ = try self.file.preadAll(strtab, symtab_cmd.stroff + self.library_offset); - - for (slice) |sym| { - const add_to_symtab = sym.ext() and (sym.sect() or sym.indr()); - - if (!add_to_symtab) continue; - - const sym_name = mem.sliceTo(@ptrCast([*:0]const u8, strtab.ptr + sym.n_strx), 0); - const name = try allocator.dupe(u8, sym_name); - try self.symbols.putNoClobber(allocator, name, {}); } } @@ -356,10 +302,11 @@ pub fn parseFromStub( lib_stub: LibStub, dylib_id: u16, dependent_libs: anytype, + name: []const u8, ) !void { if (lib_stub.inner.len == 0) return error.EmptyStubFile; - log.debug("parsing shared library from stub '{s}'", .{self.name}); + log.debug("parsing shared library from stub '{s}'", .{name}); const umbrella_lib = lib_stub.inner[0]; diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index 0d929627cd..2e2f3dad84 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -3,6 +3,7 @@ const Object = @This(); const std = @import("std"); const build_options = @import("build_options"); const assert = std.debug.assert; +const dwarf = std.dwarf; const fs = std.fs; const io = std.io; const log = std.log.scoped(.link); @@ -14,43 +15,20 @@ const trace = @import("../../tracy.zig").trace; const Allocator = mem.Allocator; const Atom = @import("Atom.zig"); +const LoadCommandIterator = macho.LoadCommandIterator; const MachO = @import("../MachO.zig"); -const MatchingSection = MachO.MatchingSection; const SymbolWithLoc = MachO.SymbolWithLoc; -file: fs.File, name: []const u8, mtime: u64, - -/// Data contents of the file. Includes sections, and data of load commands. -/// Excludes the backing memory for the header and load commands. -/// Initialized in `parse`. -contents: []const u8 = undefined, - -file_offset: ?u32 = null, +contents: []align(@alignOf(u64)) const u8, header: macho.mach_header_64 = undefined, - -load_commands: std.ArrayListUnmanaged(macho.LoadCommand) = .{}, - -segment_cmd_index: ?u16 = null, -text_section_index: ?u16 = null, -symtab_cmd_index: ?u16 = null, -dysymtab_cmd_index: ?u16 = null, -build_version_cmd_index: ?u16 = null, -data_in_code_cmd_index: ?u16 = null, - -// __DWARF segment sections -dwarf_debug_info_index: ?u16 = null, -dwarf_debug_abbrev_index: ?u16 = null, -dwarf_debug_str_index: ?u16 = null, -dwarf_debug_line_index: ?u16 = null, -dwarf_debug_line_str_index: ?u16 = null, -dwarf_debug_ranges_index: ?u16 = null, +in_symtab: []const macho.nlist_64 = undefined, +in_strtab: []const u8 = undefined, symtab: std.ArrayListUnmanaged(macho.nlist_64) = .{}, -strtab: []const u8 = &.{}, -data_in_code_entries: []const macho.data_in_code_entry = &.{}, +sections: std.ArrayListUnmanaged(macho.section_64) = .{}, sections_as_symbols: std.AutoHashMapUnmanaged(u16, u32) = .{}, @@ -61,12 +39,8 @@ managed_atoms: std.ArrayListUnmanaged(*Atom) = .{}, atom_by_index_table: std.AutoHashMapUnmanaged(u32, *Atom) = .{}, pub fn deinit(self: *Object, gpa: Allocator) void { - for (self.load_commands.items) |*lc| { - lc.deinit(gpa); - } - self.load_commands.deinit(gpa); - gpa.free(self.contents); self.symtab.deinit(gpa); + self.sections.deinit(gpa); self.sections_as_symbols.deinit(gpa); self.atom_by_index_table.deinit(gpa); @@ -77,22 +51,15 @@ pub fn deinit(self: *Object, gpa: Allocator) void { self.managed_atoms.deinit(gpa); gpa.free(self.name); + gpa.free(self.contents); } pub fn parse(self: *Object, allocator: Allocator, cpu_arch: std.Target.Cpu.Arch) !void { - const file_stat = try self.file.stat(); - const file_size = math.cast(usize, file_stat.size) orelse return error.Overflow; - self.contents = try self.file.readToEndAlloc(allocator, file_size); - var stream = std.io.fixedBufferStream(self.contents); const reader = stream.reader(); - const file_offset = self.file_offset orelse 0; - if (file_offset > 0) { - try reader.context.seekTo(file_offset); - } - self.header = try reader.readStruct(macho.mach_header_64); + if (self.header.filetype != macho.MH_OBJECT) { log.debug("invalid filetype: expected 0x{x}, found 0x{x}", .{ macho.MH_OBJECT, @@ -110,92 +77,54 @@ pub fn parse(self: *Object, allocator: Allocator, cpu_arch: std.Target.Cpu.Arch) }, }; if (this_arch != cpu_arch) { - log.err("mismatched cpu architecture: expected {}, found {}", .{ cpu_arch, this_arch }); + log.err("mismatched cpu architecture: expected {s}, found {s}", .{ + @tagName(cpu_arch), + @tagName(this_arch), + }); return error.MismatchedCpuArchitecture; } - try self.load_commands.ensureUnusedCapacity(allocator, self.header.ncmds); - - var i: u16 = 0; - while (i < self.header.ncmds) : (i += 1) { - var cmd = try macho.LoadCommand.read(allocator, reader); + var it = LoadCommandIterator{ + .ncmds = self.header.ncmds, + .buffer = self.contents[@sizeOf(macho.mach_header_64)..][0..self.header.sizeofcmds], + }; + while (it.next()) |cmd| { switch (cmd.cmd()) { .SEGMENT_64 => { - self.segment_cmd_index = i; - var seg = cmd.segment; - for (seg.sections.items) |*sect, j| { - const index = @intCast(u16, j); - const segname = sect.segName(); - const sectname = sect.sectName(); - if (mem.eql(u8, segname, "__DWARF")) { - if (mem.eql(u8, sectname, "__debug_info")) { - self.dwarf_debug_info_index = index; - } else if (mem.eql(u8, sectname, "__debug_abbrev")) { - self.dwarf_debug_abbrev_index = index; - } else if (mem.eql(u8, sectname, "__debug_str")) { - self.dwarf_debug_str_index = index; - } else if (mem.eql(u8, sectname, "__debug_line")) { - self.dwarf_debug_line_index = index; - } else if (mem.eql(u8, sectname, "__debug_line_str")) { - self.dwarf_debug_line_str_index = index; - } else if (mem.eql(u8, sectname, "__debug_ranges")) { - self.dwarf_debug_ranges_index = index; - } - } else if (mem.eql(u8, segname, "__TEXT")) { - if (mem.eql(u8, sectname, "__text")) { - self.text_section_index = index; - } - } - - sect.offset += file_offset; - if (sect.reloff > 0) { - sect.reloff += file_offset; - } + const segment = cmd.cast(macho.segment_command_64).?; + try self.sections.ensureUnusedCapacity(allocator, segment.nsects); + for (cmd.getSections()) |sect| { + self.sections.appendAssumeCapacity(sect); } - - seg.inner.fileoff += file_offset; }, .SYMTAB => { - self.symtab_cmd_index = i; - cmd.symtab.symoff += file_offset; - cmd.symtab.stroff += file_offset; - }, - .DYSYMTAB => { - self.dysymtab_cmd_index = i; - }, - .BUILD_VERSION => { - self.build_version_cmd_index = i; - }, - .DATA_IN_CODE => { - self.data_in_code_cmd_index = i; - cmd.linkedit_data.dataoff += file_offset; - }, - else => { - log.debug("Unknown load command detected: 0x{x}.", .{@enumToInt(cmd.cmd())}); + const symtab = cmd.cast(macho.symtab_command).?; + self.in_symtab = @ptrCast( + [*]const macho.nlist_64, + @alignCast(@alignOf(macho.nlist_64), &self.contents[symtab.symoff]), + )[0..symtab.nsyms]; + self.in_strtab = self.contents[symtab.stroff..][0..symtab.strsize]; + try self.symtab.appendSlice(allocator, self.in_symtab); }, + else => {}, } - self.load_commands.appendAssumeCapacity(cmd); } - - try self.parseSymtab(allocator); } const Context = struct { - symtab: []const macho.nlist_64, - strtab: []const u8, + object: *const Object, }; const SymbolAtIndex = struct { index: u32, fn getSymbol(self: SymbolAtIndex, ctx: Context) macho.nlist_64 { - return ctx.symtab[self.index]; + return ctx.object.getSourceSymbol(self.index).?; } fn getSymbolName(self: SymbolAtIndex, ctx: Context) []const u8 { const sym = self.getSymbol(ctx); - assert(sym.n_strx < ctx.strtab.len); - return mem.sliceTo(@ptrCast([*:0]const u8, ctx.strtab.ptr + sym.n_strx), 0); + return ctx.object.getString(sym.n_strx); } /// Returns whether lhs is less than rhs by allocated address in object file. @@ -293,7 +222,6 @@ pub fn splitIntoAtomsOneShot(self: *Object, macho_file: *MachO, object_id: u32) defer tracy.end(); const gpa = macho_file.base.allocator; - const seg = self.load_commands.items[self.segment_cmd_index.?].segment; log.debug("splitting object({d}, {s}) into atoms: one-shot mode", .{ object_id, self.name }); @@ -302,13 +230,12 @@ pub fn splitIntoAtomsOneShot(self: *Object, macho_file: *MachO, object_id: u32) // the GO compiler does not necessarily respect that therefore we sort immediately by type // and address within. const context = Context{ - .symtab = self.getSourceSymtab(), - .strtab = self.strtab, + .object = self, }; - var sorted_all_syms = try std.ArrayList(SymbolAtIndex).initCapacity(gpa, context.symtab.len); + var sorted_all_syms = try std.ArrayList(SymbolAtIndex).initCapacity(gpa, self.in_symtab.len); defer sorted_all_syms.deinit(); - for (context.symtab) |_, index| { + for (self.in_symtab) |_, index| { sorted_all_syms.appendAssumeCapacity(.{ .index = @intCast(u32, index) }); } @@ -320,36 +247,36 @@ pub fn splitIntoAtomsOneShot(self: *Object, macho_file: *MachO, object_id: u32) // Well, shit, sometimes compilers skip the dysymtab load command altogether, meaning we // have to infer the start of undef section in the symtab ourselves. - const iundefsym = if (self.dysymtab_cmd_index) |cmd_index| blk: { - const dysymtab = self.load_commands.items[cmd_index].dysymtab; + const iundefsym = blk: { + const dysymtab = self.parseDysymtab() orelse { + var iundefsym: usize = sorted_all_syms.items.len; + while (iundefsym > 0) : (iundefsym -= 1) { + const sym = sorted_all_syms.items[iundefsym - 1].getSymbol(context); + if (sym.sect()) break; + } + break :blk iundefsym; + }; break :blk dysymtab.iundefsym; - } else blk: { - var iundefsym: usize = sorted_all_syms.items.len; - while (iundefsym > 0) : (iundefsym -= 1) { - const sym = sorted_all_syms.items[iundefsym - 1].getSymbol(context); - if (sym.sect()) break; - } - break :blk iundefsym; }; // We only care about defined symbols, so filter every other out. const sorted_syms = sorted_all_syms.items[0..iundefsym]; const subsections_via_symbols = self.header.flags & macho.MH_SUBSECTIONS_VIA_SYMBOLS != 0; - for (seg.sections.items) |sect, id| { + for (self.sections.items) |sect, id| { const sect_id = @intCast(u8, id); log.debug("splitting section '{s},{s}' into atoms", .{ sect.segName(), sect.sectName() }); // Get matching segment/section in the final artifact. - const match = (try macho_file.getMatchingSection(sect)) orelse { + const match = (try macho_file.getOutputSection(sect)) orelse { log.debug(" unhandled section", .{}); continue; }; log.debug(" output sect({d}, '{s},{s}')", .{ - macho_file.getSectionOrdinal(match), - macho_file.getSection(match).segName(), - macho_file.getSection(match).sectName(), + match + 1, + macho_file.sections.items(.header)[match].segName(), + macho_file.sections.items(.header)[match].sectName(), }); const cpu_arch = macho_file.base.options.target.cpu.arch; @@ -359,14 +286,13 @@ pub fn splitIntoAtomsOneShot(self: *Object, macho_file: *MachO, object_id: u32) }; // Read section's code - const code: ?[]const u8 = if (!is_zerofill) try self.getSectionContents(sect_id) else null; + const code: ?[]const u8 = if (!is_zerofill) try self.getSectionContents(sect) else null; // Read section's list of relocations - const raw_relocs = self.contents[sect.reloff..][0 .. sect.nreloc * @sizeOf(macho.relocation_info)]; - const relocs = mem.bytesAsSlice( - macho.relocation_info, - @alignCast(@alignOf(macho.relocation_info), raw_relocs), - ); + const relocs = @ptrCast( + [*]const macho.relocation_info, + @alignCast(@alignOf(macho.relocation_info), &self.contents[sect.reloff]), + )[0..sect.nreloc]; // Symbols within this section only. const filtered_syms = filterSymbolsByAddress( @@ -387,7 +313,7 @@ pub fn splitIntoAtomsOneShot(self: *Object, macho_file: *MachO, object_id: u32) try self.symtab.append(gpa, .{ .n_strx = 0, .n_type = macho.N_SECT, - .n_sect = macho_file.getSectionOrdinal(match), + .n_sect = match + 1, .n_desc = 0, .n_value = sect.addr, }); @@ -476,7 +402,7 @@ pub fn splitIntoAtomsOneShot(self: *Object, macho_file: *MachO, object_id: u32) try self.symtab.append(gpa, .{ .n_strx = 0, .n_type = macho.N_SECT, - .n_sect = macho_file.getSectionOrdinal(match), + .n_sect = match + 1, .n_desc = 0, .n_value = addr, }); @@ -501,7 +427,7 @@ pub fn splitIntoAtomsOneShot(self: *Object, macho_file: *MachO, object_id: u32) try self.symtab.append(gpa, .{ .n_strx = 0, .n_type = macho.N_SECT, - .n_sect = macho_file.getSectionOrdinal(match), + .n_sect = match + 1, .n_desc = 0, .n_value = sect.addr, }); @@ -535,21 +461,21 @@ fn createAtomFromSubsection( code: ?[]const u8, relocs: []const macho.relocation_info, indexes: []const SymbolAtIndex, - match: MatchingSection, + match: u8, sect: macho.section_64, ) !*Atom { const gpa = macho_file.base.allocator; const sym = self.symtab.items[sym_index]; const atom = try MachO.createEmptyAtom(gpa, sym_index, size, alignment); atom.file = object_id; - self.symtab.items[sym_index].n_sect = macho_file.getSectionOrdinal(match); + self.symtab.items[sym_index].n_sect = match + 1; log.debug("creating ATOM(%{d}, '{s}') in sect({d}, '{s},{s}') in object({d})", .{ sym_index, self.getString(sym.n_strx), - macho_file.getSectionOrdinal(match), - macho_file.getSection(match).segName(), - macho_file.getSection(match).sectName(), + match + 1, + macho_file.sections.items(.header)[match].segName(), + macho_file.sections.items(.header)[match].sectName(), object_id, }); @@ -577,7 +503,7 @@ fn createAtomFromSubsection( try atom.contained.ensureTotalCapacity(gpa, indexes.len); for (indexes) |inner_sym_index| { const inner_sym = &self.symtab.items[inner_sym_index.index]; - inner_sym.n_sect = macho_file.getSectionOrdinal(match); + inner_sym.n_sect = match + 1; atom.contained.appendAssumeCapacity(.{ .sym_index = inner_sym_index.index, .offset = inner_sym.n_value - sym.n_value, @@ -589,48 +515,84 @@ fn createAtomFromSubsection( return atom; } -fn parseSymtab(self: *Object, allocator: Allocator) !void { - const index = self.symtab_cmd_index orelse return; - const symtab = self.load_commands.items[index].symtab; - try self.symtab.appendSlice(allocator, self.getSourceSymtab()); - self.strtab = self.contents[symtab.stroff..][0..symtab.strsize]; -} - -pub fn getSourceSymtab(self: Object) []const macho.nlist_64 { - const index = self.symtab_cmd_index orelse return &[0]macho.nlist_64{}; - const symtab = self.load_commands.items[index].symtab; - const symtab_size = @sizeOf(macho.nlist_64) * symtab.nsyms; - const raw_symtab = self.contents[symtab.symoff..][0..symtab_size]; - return mem.bytesAsSlice( - macho.nlist_64, - @alignCast(@alignOf(macho.nlist_64), raw_symtab), - ); -} - pub fn getSourceSymbol(self: Object, index: u32) ?macho.nlist_64 { - const symtab = self.getSourceSymtab(); - if (index >= symtab.len) return null; - return symtab[index]; + if (index >= self.in_symtab.len) return null; + return self.in_symtab[index]; } pub fn getSourceSection(self: Object, index: u16) macho.section_64 { - const seg = self.load_commands.items[self.segment_cmd_index.?].segment; - assert(index < seg.sections.items.len); - return seg.sections.items[index]; + assert(index < self.sections.items.len); + return self.sections.items[index]; } pub fn parseDataInCode(self: Object) ?[]const macho.data_in_code_entry { - const index = self.data_in_code_cmd_index orelse return null; - const data_in_code = self.load_commands.items[index].linkedit_data; - const raw_dice = self.contents[data_in_code.dataoff..][0..data_in_code.datasize]; - return mem.bytesAsSlice( - macho.data_in_code_entry, - @alignCast(@alignOf(macho.data_in_code_entry), raw_dice), - ); + var it = LoadCommandIterator{ + .ncmds = self.header.ncmds, + .buffer = self.contents[@sizeOf(macho.mach_header_64)..][0..self.header.sizeofcmds], + }; + while (it.next()) |cmd| { + switch (cmd.cmd()) { + .DATA_IN_CODE => { + const dice = cmd.cast(macho.linkedit_data_command).?; + const ndice = @divExact(dice.datasize, @sizeOf(macho.data_in_code_entry)); + return @ptrCast( + [*]const macho.data_in_code_entry, + @alignCast(@alignOf(macho.data_in_code_entry), &self.contents[dice.dataoff]), + )[0..ndice]; + }, + else => {}, + } + } else return null; } -pub fn getSectionContents(self: Object, index: u16) error{Overflow}![]const u8 { - const sect = self.getSourceSection(index); +fn parseDysymtab(self: Object) ?macho.dysymtab_command { + var it = LoadCommandIterator{ + .ncmds = self.header.ncmds, + .buffer = self.contents[@sizeOf(macho.mach_header_64)..][0..self.header.sizeofcmds], + }; + while (it.next()) |cmd| { + switch (cmd.cmd()) { + .DYSYMTAB => { + return cmd.cast(macho.dysymtab_command).?; + }, + else => {}, + } + } else return null; +} + +pub fn parseDwarfInfo(self: Object) error{Overflow}!dwarf.DwarfInfo { + var di = dwarf.DwarfInfo{ + .endian = .Little, + .debug_info = &[0]u8{}, + .debug_abbrev = &[0]u8{}, + .debug_str = &[0]u8{}, + .debug_line = &[0]u8{}, + .debug_line_str = &[0]u8{}, + .debug_ranges = &[0]u8{}, + }; + for (self.sections.items) |sect| { + const segname = sect.segName(); + const sectname = sect.sectName(); + if (mem.eql(u8, segname, "__DWARF")) { + if (mem.eql(u8, sectname, "__debug_info")) { + di.debug_info = try self.getSectionContents(sect); + } else if (mem.eql(u8, sectname, "__debug_abbrev")) { + di.debug_abbrev = try self.getSectionContents(sect); + } else if (mem.eql(u8, sectname, "__debug_str")) { + di.debug_str = try self.getSectionContents(sect); + } else if (mem.eql(u8, sectname, "__debug_line")) { + di.debug_line = try self.getSectionContents(sect); + } else if (mem.eql(u8, sectname, "__debug_line_str")) { + di.debug_line_str = try self.getSectionContents(sect); + } else if (mem.eql(u8, sectname, "__debug_ranges")) { + di.debug_ranges = try self.getSectionContents(sect); + } + } + } + return di; +} + +pub fn getSectionContents(self: Object, sect: macho.section_64) error{Overflow}![]const u8 { const size = math.cast(usize, sect.size) orelse return error.Overflow; log.debug("getting {s},{s} data at 0x{x} - 0x{x}", .{ sect.segName(), @@ -642,8 +604,8 @@ pub fn getSectionContents(self: Object, index: u16) error{Overflow}![]const u8 { } pub fn getString(self: Object, off: u32) []const u8 { - assert(off < self.strtab.len); - return mem.sliceTo(@ptrCast([*:0]const u8, self.strtab.ptr + off), 0); + assert(off < self.in_strtab.len); + return mem.sliceTo(@ptrCast([*:0]const u8, self.in_strtab.ptr + off), 0); } pub fn getAtomForSymbol(self: Object, sym_index: u32) ?*Atom { diff --git a/src/link/MachO/dead_strip.zig b/src/link/MachO/dead_strip.zig index 909a0450d6..12f46c9f26 100644 --- a/src/link/MachO/dead_strip.zig +++ b/src/link/MachO/dead_strip.zig @@ -8,7 +8,6 @@ const mem = std.mem; const Allocator = mem.Allocator; const Atom = @import("Atom.zig"); const MachO = @import("../MachO.zig"); -const MatchingSection = MachO.MatchingSection; pub fn gcAtoms(macho_file: *MachO) !void { const gpa = macho_file.base.allocator; @@ -25,12 +24,12 @@ pub fn gcAtoms(macho_file: *MachO) !void { try prune(arena, alive, macho_file); } -fn removeAtomFromSection(atom: *Atom, match: MatchingSection, macho_file: *MachO) void { - const sect = macho_file.getSectionPtr(match); +fn removeAtomFromSection(atom: *Atom, match: u8, macho_file: *MachO) void { + var section = macho_file.sections.get(match); // If we want to enable GC for incremental codepath, we need to take into // account any padding that might have been left here. - sect.size -= atom.size; + section.header.size -= atom.size; if (atom.prev) |prev| { prev.next = atom.next; @@ -38,15 +37,16 @@ fn removeAtomFromSection(atom: *Atom, match: MatchingSection, macho_file: *MachO if (atom.next) |next| { next.prev = atom.prev; } else { - const last = macho_file.atoms.getPtr(match).?; if (atom.prev) |prev| { - last.* = prev; + section.last_atom = prev; } else { // The section will be GCed in the next step. - last.* = undefined; - sect.size = 0; + section.last_atom = null; + section.header.size = 0; } } + + macho_file.sections.set(match, section); } fn collectRoots(roots: *std.AutoHashMap(*Atom, void), macho_file: *MachO) !void { @@ -173,19 +173,19 @@ fn mark( fn prune(arena: Allocator, alive: std.AutoHashMap(*Atom, void), macho_file: *MachO) !void { // Any section that ends up here will be updated, that is, // its size and alignment recalculated. - var gc_sections = std.AutoHashMap(MatchingSection, void).init(arena); + var gc_sections = std.AutoHashMap(u8, void).init(arena); var loop: bool = true; while (loop) { loop = false; for (macho_file.objects.items) |object| { - for (object.getSourceSymtab()) |_, source_index| { + for (object.in_symtab) |_, source_index| { const atom = object.getAtomForSymbol(@intCast(u32, source_index)) orelse continue; if (alive.contains(atom)) continue; const global = atom.getSymbolWithLoc(); const sym = atom.getSymbolPtr(macho_file); - const match = macho_file.getMatchingSectionFromOrdinal(sym.n_sect); + const match = sym.n_sect - 1; if (sym.n_desc == MachO.N_DESC_GCED) continue; if (!sym.ext() and !refersDead(atom, macho_file)) continue; @@ -232,7 +232,7 @@ fn prune(arena: Allocator, alive: std.AutoHashMap(*Atom, void), macho_file: *Mac // TODO tombstone const atom = entry.getAtom(macho_file); - const match = macho_file.getMatchingSectionFromOrdinal(sym.n_sect); + const match = sym.n_sect - 1; removeAtomFromSection(atom, match, macho_file); _ = try gc_sections.put(match, {}); _ = macho_file.got_entries_table.remove(entry.target); @@ -244,7 +244,7 @@ fn prune(arena: Allocator, alive: std.AutoHashMap(*Atom, void), macho_file: *Mac // TODO tombstone const atom = entry.getAtom(macho_file); - const match = macho_file.getMatchingSectionFromOrdinal(sym.n_sect); + const match = sym.n_sect - 1; removeAtomFromSection(atom, match, macho_file); _ = try gc_sections.put(match, {}); _ = macho_file.stubs_table.remove(entry.target); @@ -256,7 +256,7 @@ fn prune(arena: Allocator, alive: std.AutoHashMap(*Atom, void), macho_file: *Mac // TODO tombstone const atom = entry.getAtom(macho_file); - const match = macho_file.getMatchingSectionFromOrdinal(sym.n_sect); + const match = sym.n_sect - 1; removeAtomFromSection(atom, match, macho_file); _ = try gc_sections.put(match, {}); _ = macho_file.tlv_ptr_entries_table.remove(entry.target); @@ -265,13 +265,13 @@ fn prune(arena: Allocator, alive: std.AutoHashMap(*Atom, void), macho_file: *Mac var gc_sections_it = gc_sections.iterator(); while (gc_sections_it.next()) |entry| { const match = entry.key_ptr.*; - const sect = macho_file.getSectionPtr(match); - if (sect.size == 0) continue; // Pruning happens automatically in next step. + var section = macho_file.sections.get(match); + if (section.header.size == 0) continue; // Pruning happens automatically in next step. - sect.@"align" = 0; - sect.size = 0; + section.header.@"align" = 0; + section.header.size = 0; - var atom = macho_file.atoms.get(match).?; + var atom = section.last_atom.?; while (atom.prev) |prev| { atom = prev; @@ -279,14 +279,16 @@ fn prune(arena: Allocator, alive: std.AutoHashMap(*Atom, void), macho_file: *Mac while (true) { const atom_alignment = try math.powi(u32, 2, atom.alignment); - const aligned_end_addr = mem.alignForwardGeneric(u64, sect.size, atom_alignment); - const padding = aligned_end_addr - sect.size; - sect.size += padding + atom.size; - sect.@"align" = @maximum(sect.@"align", atom.alignment); + const aligned_end_addr = mem.alignForwardGeneric(u64, section.header.size, atom_alignment); + const padding = aligned_end_addr - section.header.size; + section.header.size += padding + atom.size; + section.header.@"align" = @maximum(section.header.@"align", atom.alignment); if (atom.next) |next| { atom = next; } else break; } + + macho_file.sections.set(match, section); } } diff --git a/src/link/MachO/fat.zig b/src/link/MachO/fat.zig index 1511f274a8..7c328c1418 100644 --- a/src/link/MachO/fat.zig +++ b/src/link/MachO/fat.zig @@ -46,7 +46,9 @@ pub fn getLibraryOffset(reader: anytype, cpu_arch: std.Target.Cpu.Arch) !u64 { return fat_arch.offset; } } else { - log.err("Could not find matching cpu architecture in fat library: expected {}", .{cpu_arch}); + log.err("Could not find matching cpu architecture in fat library: expected {s}", .{ + @tagName(cpu_arch), + }); return error.MismatchedCpuArchitecture; } } From bb532584bc569edb563b757c658fd743731837ec Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Mon, 1 Aug 2022 12:28:58 +0200 Subject: [PATCH 24/33] macho: update how we insert output sections Instead of generating sections upfront, allow generation by scanning the object files for input -> output sections mapping. Next, always strive to keep output sections in the final container sorted as they appear in the final binary. This makes the linker less messy wrt handling of output sections sort order for dyld/macOS not to complain. There's still more work to be done for incremental context though to make this work but looks promising already. --- lib/std/macho.zig | 7 +- src/link/MachO.zig | 478 ++++++++++++++-------------------- src/link/MachO/Atom.zig | 4 +- src/link/MachO/Object.zig | 23 +- src/link/MachO/dead_strip.zig | 5 +- 5 files changed, 229 insertions(+), 288 deletions(-) diff --git a/lib/std/macho.zig b/lib/std/macho.zig index 9334f79dc5..aa43229a76 100644 --- a/lib/std/macho.zig +++ b/lib/std/macho.zig @@ -780,7 +780,7 @@ pub const section_64 = extern struct { return parseName(§.segname); } - pub fn type_(sect: section_64) u8 { + pub fn @"type"(sect: section_64) u8 { return @truncate(u8, sect.flags & 0xff); } @@ -793,6 +793,11 @@ pub const section_64 = extern struct { return attr & S_ATTR_PURE_INSTRUCTIONS != 0 or attr & S_ATTR_SOME_INSTRUCTIONS != 0; } + pub fn isZerofill(sect: section_64) bool { + const tt = sect.@"type"(); + return tt == S_ZEROFILL or tt == S_GB_ZEROFILL or tt == S_THREAD_LOCAL_ZEROFILL; + } + pub fn isDebug(sect: section_64) bool { return sect.attrs() & S_ATTR_DEBUG != 0; } diff --git a/src/link/MachO.zig b/src/link/MachO.zig index b912130957..dda5fd48e3 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -559,6 +559,7 @@ pub fn flushModule(self: *MachO, comp: *Compilation, prog_node: *std.Progress.No if (build_options.enable_logging) { self.logSymtab(); + self.logSections(); self.logAtoms(); } @@ -1140,7 +1141,6 @@ fn linkOneShot(self: *MachO, comp: *Compilation, prog_node: *std.Progress.Node) try self.resolveSymbolsInArchives(); try self.resolveDyldStubBinder(); try self.createDyldPrivateAtom(); - try self.createStubHelperPreambleAtom(); try self.resolveSymbolsInDylibs(); try self.createMhExecuteHeaderSymbol(); try self.createDsoHandleSymbol(); @@ -1156,6 +1156,11 @@ fn linkOneShot(self: *MachO, comp: *Compilation, prog_node: *std.Progress.Node) return error.FrameworkNotFound; } + for (self.objects.items) |*object| { + try object.scanInputSections(self); + } + + try self.createStubHelperPreambleAtom(); try self.createTentativeDefAtoms(); for (self.objects.items) |*object, object_id| { @@ -1166,14 +1171,14 @@ fn linkOneShot(self: *MachO, comp: *Compilation, prog_node: *std.Progress.Node) try dead_strip.gcAtoms(self); } - try self.pruneAndSortSections(); try self.allocateSegments(); try self.allocateSymbols(); try self.allocateSpecialSymbols(); - if (build_options.enable_logging) { + if (build_options.enable_logging or true) { self.logSymtab(); + self.logSections(); self.logAtoms(); } @@ -1691,7 +1696,7 @@ pub fn getOutputSection(self: *MachO, sect: macho.section_64) !?u8 { const segname = sect.segName(); const sectname = sect.sectName(); const res: ?u8 = blk: { - switch (sect.type_()) { + switch (sect.@"type"()) { macho.S_4BYTE_LITERALS, macho.S_8BYTE_LITERALS, macho.S_16BYTE_LITERALS => { if (self.text_const_section_index == null) { self.text_const_section_index = try self.initSection( @@ -2197,27 +2202,6 @@ fn allocateSymbols(self: *MachO) !void { } } -fn shiftLocalsByOffset(self: *MachO, sect_id: u8, offset: i64) !void { - var atom = self.sections.items(.last_atom)[sect_id] orelse return; - - while (true) { - const atom_sym = atom.getSymbolPtr(self); - atom_sym.n_value = @intCast(u64, @intCast(i64, atom_sym.n_value) + offset); - - for (atom.contained.items) |sym_at_off| { - const contained_sym = self.getSymbolPtr(.{ - .sym_index = sym_at_off.sym_index, - .file = atom.file, - }); - contained_sym.n_value = @intCast(u64, @intCast(i64, contained_sym.n_value) + offset); - } - - if (atom.prev) |prev| { - atom = prev; - } else break; - } -} - fn allocateSpecialSymbols(self: *MachO) !void { for (&[_][]const u8{ "___dso_handle", @@ -2245,9 +2229,10 @@ fn writeAtomsOneShot(self: *MachO) !void { for (slice.items(.last_atom)) |last_atom, sect_id| { const header = slice.items(.header)[sect_id]; + if (header.size == 0) continue; var atom = last_atom.?; - if (header.flags == macho.S_ZEROFILL or header.flags == macho.S_THREAD_LOCAL_ZEROFILL) continue; + if (header.isZerofill()) continue; var buffer = std.ArrayList(u8).init(gpa); defer buffer.deinit(); @@ -2334,8 +2319,7 @@ fn writeAtomsIncremental(self: *MachO) !void { const sect_i = @intCast(u8, i); const header = slice.items(.header)[sect_i]; - // TODO handle zerofill in stage2 - // if (sect.flags == macho.S_ZEROFILL or sect.flags == macho.S_THREAD_LOCAL_ZEROFILL) continue; + if (header.isZerofill()) continue; log.debug("writing atoms in {s},{s}", .{ header.segName(), header.sectName() }); @@ -3904,7 +3888,12 @@ fn getOutputSectionAtom( // TODO finish and audit this function if (val.isUndefDeep()) { if (mode == .ReleaseFast or mode == .ReleaseSmall) { - break :blk self.bss_section_index.?; + break :blk (try self.getOutputSection(.{ + .segname = makeStaticString("__DATA"), + .sectname = makeStaticString("__bss"), + .size = code.len, + .@"align" = align_log_2, + })).?; } else { break :blk self.data_section_index.?; } @@ -4488,74 +4477,6 @@ fn populateMissingMetadata(self: *MachO) !void { ); } - if (self.tlv_section_index == null) { - const needed_size = if (self.mode == .incremental) - @sizeOf(u64) * self.base.options.symbol_count_hint - else - 0; - const alignment: u16 = 3; // 2^3 = @sizeOf(u64) - self.tlv_section_index = try self.initSection( - self.data_segment_cmd_index.?, - "__thread_vars", - needed_size, - alignment, - .{ - .flags = macho.S_THREAD_LOCAL_VARIABLES, - }, - ); - } - - if (self.tlv_data_section_index == null) { - const needed_size = if (self.mode == .incremental) - @sizeOf(u64) * self.base.options.symbol_count_hint - else - 0; - const alignment: u16 = 3; // 2^3 = @sizeOf(u64) - self.tlv_data_section_index = try self.initSection( - self.data_segment_cmd_index.?, - "__thread_data", - needed_size, - alignment, - .{ - .flags = macho.S_THREAD_LOCAL_REGULAR, - }, - ); - } - - if (self.tlv_bss_section_index == null) { - const needed_size = if (self.mode == .incremental) - @sizeOf(u64) * self.base.options.symbol_count_hint - else - 0; - const alignment: u16 = 3; // 2^3 = @sizeOf(u64) - self.tlv_bss_section_index = try self.initSection( - self.data_segment_cmd_index.?, - "__thread_bss", - needed_size, - alignment, - .{ - .flags = macho.S_THREAD_LOCAL_ZEROFILL, - }, - ); - } - - if (self.bss_section_index == null) { - const needed_size = if (self.mode == .incremental) - @sizeOf(u64) * self.base.options.symbol_count_hint - else - 0; - const alignment: u16 = 3; // 2^3 = @sizeOf(u64) - self.bss_section_index = try self.initSection( - self.data_segment_cmd_index.?, - "__bss", - needed_size, - alignment, - .{ - .flags = macho.S_ZEROFILL, - }, - ); - } - if (self.linkedit_segment_cmd_index == null) { self.linkedit_segment_cmd_index = @intCast(u8, self.segments.items.len); var vmaddr: u64 = 0; @@ -4690,18 +4611,19 @@ fn allocateSegments(self: *MachO) !void { }, try self.calcMinHeaderPad()); if (self.text_segment_cmd_index) |index| blk: { - const seg = &self.segments.items[index]; - if (seg.nsects == 0) break :blk; + const indexes = self.getSectionIndexes(index); + if (indexes.start == indexes.end) break :blk; + const seg = self.segments.items[index]; // Shift all sections to the back to minimize jump size between __TEXT and __DATA segments. var min_alignment: u32 = 0; - for (self.sections.items(.header)[0..seg.nsects]) |header| { + for (self.sections.items(.header)[indexes.start..indexes.end]) |header| { const alignment = try math.powi(u32, 2, header.@"align"); min_alignment = math.max(min_alignment, alignment); } assert(min_alignment > 0); - const last_header = self.sections.items(.header)[seg.nsects - 1]; + const last_header = self.sections.items(.header)[indexes.end - 1]; const shift: u32 = shift: { const diff = seg.filesize - last_header.offset - last_header.size; const factor = @divTrunc(diff, min_alignment); @@ -4709,7 +4631,7 @@ fn allocateSegments(self: *MachO) !void { }; if (shift > 0) { - for (self.sections.items(.header)[0..seg.nsects]) |*header| { + for (self.sections.items(.header)[indexes.start..indexes.end]) |*header| { header.offset += shift; header.addr += shift; } @@ -4746,16 +4668,14 @@ fn allocateSegment(self: *MachO, maybe_index: ?u8, indices: []const ?u8, init_si seg.vmsize = init_size; // Allocate the sections according to their alignment at the beginning of the segment. + const indexes = self.getSectionIndexes(index); var start = init_size; const slice = self.sections.slice(); - for (slice.items(.header)) |*header, sect_id| { - const segment_index = slice.items(.segment_index)[sect_id]; - if (segment_index != index) continue; - const is_zerofill = header.flags == macho.S_ZEROFILL or header.flags == macho.S_THREAD_LOCAL_ZEROFILL; + for (slice.items(.header)[indexes.start..indexes.end]) |*header| { const alignment = try math.powi(u32, 2, header.@"align"); const start_aligned = mem.alignForwardGeneric(u64, start, alignment); - header.offset = if (is_zerofill) + header.offset = if (header.isZerofill()) 0 else @intCast(u32, seg.fileoff + start_aligned); @@ -4763,7 +4683,7 @@ fn allocateSegment(self: *MachO, maybe_index: ?u8, indices: []const ?u8, init_si start = start_aligned + header.size; - if (!is_zerofill) { + if (!header.isZerofill()) { seg.filesize = start; } seg.vmsize = start; @@ -4788,7 +4708,7 @@ fn initSection( opts: InitSectionOpts, ) !u8 { const seg = &self.segments.items[segment_id]; - var header = macho.section_64{ + const index = try self.insertSection(segment_id, .{ .sectname = makeStaticString(sectname), .segname = seg.segname, .size = if (self.mode == .incremental) @intCast(u32, size) else 0, @@ -4796,42 +4716,164 @@ fn initSection( .flags = opts.flags, .reserved1 = opts.reserved1, .reserved2 = opts.reserved2, - }; - - if (self.mode == .incremental) { - const alignment_pow_2 = try math.powi(u32, 2, alignment); - const padding: ?u64 = if (segment_id == self.text_segment_cmd_index.?) - try self.calcMinHeaderPad() - else - null; - const off = self.findFreeSpace(segment_id, alignment_pow_2, padding); - log.debug("allocating {s},{s} section from 0x{x} to 0x{x}", .{ - header.segName(), - header.sectName(), - off, - off + size, - }); - - header.addr = seg.vmaddr + off - seg.fileoff; - - // TODO handle zerofill in stage2 - // const is_zerofill = opts.flags == macho.S_ZEROFILL or opts.flags == macho.S_THREAD_LOCAL_ZEROFILL; - header.offset = @intCast(u32, off); - - try self.updateSectionOrdinals(); - } - - const index = @intCast(u8, self.sections.slice().len); - try self.sections.append(self.base.allocator, .{ - .segment_index = segment_id, - .header = header, }); seg.cmdsize += @sizeOf(macho.section_64); seg.nsects += 1; + if (self.mode == .incremental) { + const header = &self.sections.items(.header)[index]; + const prev_end_off = if (index > 0) blk: { + const prev_section = self.sections.get(index - 1); + if (prev_section.segment_index == segment_id) { + const prev_header = prev_section.header; + break :blk prev_header.offset + padToIdeal(prev_header.size); + } else break :blk seg.fileoff; + } else 0; + const alignment_pow_2 = try math.powi(u32, 2, alignment); + const padding: u64 = if (index == 0) try self.calcMinHeaderPad() else 0; + const off = mem.alignForwardGeneric(u64, padding + prev_end_off, alignment_pow_2); + log.debug("allocating {s},{s} section at 0x{x}", .{ header.segName(), header.sectName(), off }); + + header.addr = seg.vmaddr + off - seg.fileoff; + + if (!header.isZerofill()) { + header.offset = @intCast(u32, off); + } + + self.updateSectionOrdinals(index + 1); + } + return index; } +fn getSectionPrecedence(header: macho.section_64) u4 { + if (header.isCode()) { + if (mem.eql(u8, "__text", header.sectName())) return 0x0; + if (header.@"type"() == macho.S_SYMBOL_STUBS) return 0x1; + return 0x2; + } + switch (header.@"type"()) { + macho.S_NON_LAZY_SYMBOL_POINTERS, + macho.S_LAZY_SYMBOL_POINTERS, + => return 0x0, + macho.S_MOD_INIT_FUNC_POINTERS => return 0x1, + macho.S_MOD_TERM_FUNC_POINTERS => return 0x2, + macho.S_ZEROFILL => return 0xf, + macho.S_THREAD_LOCAL_REGULAR => return 0xd, + macho.S_THREAD_LOCAL_ZEROFILL => return 0xe, + else => if (mem.eql(u8, "__eh_frame", header.sectName())) + return 0xf + else + return 0x3, + } +} + +fn insertSection(self: *MachO, segment_index: u8, header: macho.section_64) !u8 { + const precedence = getSectionPrecedence(header); + const indexes = self.getSectionIndexes(segment_index); + const insertion_index = for (self.sections.items(.header)[indexes.start..indexes.end]) |hdr, i| { + if (getSectionPrecedence(hdr) > precedence) break @intCast(u8, i + indexes.start); + } else indexes.end; + log.debug("inserting section '{s},{s}' at index {d}", .{ + header.segName(), + header.sectName(), + insertion_index, + }); + // TODO slim it down + for (&[_]*?u8{ + // __TEXT + &self.text_section_index, + &self.stubs_section_index, + &self.stub_helper_section_index, + &self.gcc_except_tab_section_index, + &self.cstring_section_index, + &self.ustring_section_index, + &self.text_const_section_index, + &self.objc_methlist_section_index, + &self.objc_methname_section_index, + &self.objc_methtype_section_index, + &self.objc_classname_section_index, + &self.eh_frame_section_index, + // __DATA_CONST + &self.got_section_index, + &self.mod_init_func_section_index, + &self.mod_term_func_section_index, + &self.data_const_section_index, + &self.objc_cfstring_section_index, + &self.objc_classlist_section_index, + &self.objc_imageinfo_section_index, + // __DATA + &self.rustc_section_index, + &self.la_symbol_ptr_section_index, + &self.objc_const_section_index, + &self.objc_selrefs_section_index, + &self.objc_classrefs_section_index, + &self.objc_data_section_index, + &self.data_section_index, + &self.tlv_section_index, + &self.tlv_ptrs_section_index, + &self.tlv_data_section_index, + &self.tlv_bss_section_index, + &self.bss_section_index, + }) |maybe_index| { + const index = maybe_index.* orelse continue; + if (insertion_index <= index) maybe_index.* = index + 1; + } + try self.sections.insert(self.base.allocator, insertion_index, .{ + .segment_index = segment_index, + .header = header, + }); + return insertion_index; +} + +fn updateSectionOrdinals(self: *MachO, start: u8) void { + const tracy = trace(@src()); + defer tracy.end(); + + const slice = self.sections.slice(); + for (slice.items(.last_atom)[start..]) |last_atom| { + var atom = last_atom.?; + + while (true) { + const sym = atom.getSymbolPtr(self); + sym.n_sect = start + 1; + + for (atom.contained.items) |sym_at_off| { + const contained_sym = self.getSymbolPtr(.{ + .sym_index = sym_at_off.sym_index, + .file = atom.file, + }); + contained_sym.n_sect = start + 1; + } + + if (atom.prev) |prev| { + atom = prev; + } else break; + } + } +} + +fn shiftLocalsByOffset(self: *MachO, sect_id: u8, offset: i64) !void { + var atom = self.sections.items(.last_atom)[sect_id] orelse return; + + while (true) { + const atom_sym = atom.getSymbolPtr(self); + atom_sym.n_value = @intCast(u64, @intCast(i64, atom_sym.n_value) + offset); + + for (atom.contained.items) |sym_at_off| { + const contained_sym = self.getSymbolPtr(.{ + .sym_index = sym_at_off.sym_index, + .file = atom.file, + }); + contained_sym.n_value = @intCast(u64, @intCast(i64, contained_sym.n_value) + offset); + } + + if (atom.prev) |prev| { + atom = prev; + } else break; + } +} + fn findFreeSpace(self: MachO, segment_id: u8, alignment: u64, start: ?u64) u64 { const seg = self.segments.items[segment_id]; const indexes = self.getSectionIndexes(segment_id); @@ -5181,153 +5223,18 @@ fn getSegmentAllocBase(self: MachO, indices: []const ?u8) struct { vmaddr: u64, return .{ .vmaddr = 0, .fileoff = 0 }; } -fn pruneAndSortSections(self: *MachO) !void { - const gpa = self.base.allocator; - - var sections = self.sections.toOwnedSlice(); - defer sections.deinit(gpa); - try self.sections.ensureTotalCapacity(gpa, sections.len); - - for (&[_]*?u8{ - // __TEXT - &self.text_section_index, - &self.stubs_section_index, - &self.stub_helper_section_index, - &self.gcc_except_tab_section_index, - &self.cstring_section_index, - &self.ustring_section_index, - &self.text_const_section_index, - &self.objc_methlist_section_index, - &self.objc_methname_section_index, - &self.objc_methtype_section_index, - &self.objc_classname_section_index, - &self.eh_frame_section_index, - // __DATA_CONST - &self.got_section_index, - &self.mod_init_func_section_index, - &self.mod_term_func_section_index, - &self.data_const_section_index, - &self.objc_cfstring_section_index, - &self.objc_classlist_section_index, - &self.objc_imageinfo_section_index, - // __DATA - &self.rustc_section_index, - &self.la_symbol_ptr_section_index, - &self.objc_const_section_index, - &self.objc_selrefs_section_index, - &self.objc_classrefs_section_index, - &self.objc_data_section_index, - &self.data_section_index, - &self.tlv_section_index, - &self.tlv_ptrs_section_index, - &self.tlv_data_section_index, - &self.tlv_bss_section_index, - &self.bss_section_index, - }) |maybe_index| { - const old_idx = maybe_index.* orelse continue; - const segment_index = sections.items(.segment_index)[old_idx]; - const header = sections.items(.header)[old_idx]; - const last_atom = sections.items(.last_atom)[old_idx]; - if (header.size == 0) { - log.debug("pruning section {s},{s}", .{ header.segName(), header.sectName() }); - maybe_index.* = null; - const seg = &self.segments.items[segment_index]; - seg.cmdsize -= @sizeOf(macho.section_64); - seg.nsects -= 1; - } else { - maybe_index.* = @intCast(u8, self.sections.slice().len); - self.sections.appendAssumeCapacity(.{ - .segment_index = segment_index, - .header = header, - .last_atom = last_atom, - }); - } - } - - for (self.segments.items) |*seg| { - const segname = seg.segName(); - if (seg.nsects == 0 and - !mem.eql(u8, "__TEXT", segname) and - !mem.eql(u8, "__PAGEZERO", segname) and - !mem.eql(u8, "__LINKEDIT", segname)) - { - // Segment has now become empty, so mark it as such - log.debug("marking segment {s} as dead", .{seg.segName()}); - seg.cmd = @intToEnum(macho.LC, 0); - } - } -} - -fn updateSectionOrdinals(self: *MachO) !void { - _ = self; - const tracy = trace(@src()); - defer tracy.end(); - - @panic("updating section ordinals"); - - // const gpa = self.base.allocator; - - // var ordinal_remap = std.AutoHashMap(u8, u8).init(gpa); - // defer ordinal_remap.deinit(); - // var ordinals: std.AutoArrayHashMapUnmanaged(MatchingSection, void) = .{}; - - // var new_ordinal: u8 = 0; - // for (&[_]?u16{ - // self.text_segment_cmd_index, - // self.data_const_segment_cmd_index, - // self.data_segment_cmd_index, - // }) |maybe_index| { - // const index = maybe_index orelse continue; - // const seg = self.load_commands.items[index].segment; - // for (seg.sections.items) |sect, sect_id| { - // const match = MatchingSection{ - // .seg = @intCast(u16, index), - // .sect = @intCast(u16, sect_id), - // }; - // const old_ordinal = self.getSectionOrdinal(match); - // new_ordinal += 1; - // log.debug("'{s},{s}': sect({d}, '_,_') => sect({d}, '_,_')", .{ - // sect.segName(), - // sect.sectName(), - // old_ordinal, - // new_ordinal, - // }); - // try ordinal_remap.putNoClobber(old_ordinal, new_ordinal); - // try ordinals.putNoClobber(gpa, match, {}); - // } - // } - - // // FIXME Jakub - // // TODO no need for duping work here; simply walk the atom graph - // for (self.locals.items) |*sym| { - // if (sym.undf()) continue; - // if (sym.n_sect == 0) continue; - // sym.n_sect = ordinal_remap.get(sym.n_sect).?; - // } - // for (self.objects.items) |*object| { - // for (object.symtab.items) |*sym| { - // if (sym.undf()) continue; - // if (sym.n_sect == 0) continue; - // sym.n_sect = ordinal_remap.get(sym.n_sect).?; - // } - // } - - // self.section_ordinals.deinit(gpa); - // self.section_ordinals = ordinals; -} - pub fn writeSegmentHeaders(self: *MachO, start: usize, end: usize, ncmds: *u32, writer: anytype) !void { - var count: usize = 0; - for (self.segments.items[start..end]) |seg| { - if (seg.cmd == .NONE) continue; + for (self.segments.items[start..end]) |seg, i| { + if (seg.nsects == 0 and + (mem.eql(u8, seg.segName(), "__DATA_CONST") or + mem.eql(u8, seg.segName(), "__DATA"))) continue; try writer.writeStruct(seg); - // TODO - for (self.sections.items(.header)[count..][0..seg.nsects]) |header| { + const indexes = self.getSectionIndexes(@intCast(u8, start + i)); + for (self.sections.items(.header)[indexes.start..indexes.end]) |header| { try writer.writeStruct(header); } - count += seg.nsects; ncmds.* += 1; } } @@ -6644,6 +6551,19 @@ fn generateSymbolStabsForSymbol( // try writer.writeByte(']'); // } +fn logSections(self: *MachO) void { + log.debug("sections:", .{}); + for (self.sections.items(.header)) |header, i| { + log.debug(" sect({d}): {s},{s} @{x}, sizeof({x})", .{ + i + 1, + header.segName(), + header.sectName(), + header.offset, + header.size, + }); + } +} + fn logSymAttributes(sym: macho.nlist_64, buf: *[9]u8) []const u8 { mem.set(u8, buf[0..4], '_'); mem.set(u8, buf[4..], ' '); diff --git a/src/link/MachO/Atom.zig b/src/link/MachO/Atom.zig index 90c86e24ed..85b3ca1c2b 100644 --- a/src/link/MachO/Atom.zig +++ b/src/link/MachO/Atom.zig @@ -466,7 +466,7 @@ fn addPtrBindingOrRebase( const section = context.macho_file.sections.get(source_sym.n_sect - 1); const header = section.header; const segment_index = section.segment_index; - const sect_type = header.type_(); + const sect_type = header.@"type"(); const should_rebase = rebase: { if (rel.r_length != 3) break :rebase false; @@ -571,7 +571,7 @@ pub fn resolveRelocs(self: *Atom, macho_file: *MachO) !void { const is_tlv = is_tlv: { const source_sym = self.getSymbol(macho_file); const header = macho_file.sections.items(.header)[source_sym.n_sect - 1]; - break :is_tlv header.type_() == macho.S_THREAD_LOCAL_VARIABLES; + break :is_tlv header.@"type"() == macho.S_THREAD_LOCAL_VARIABLES; }; const target_addr = blk: { const target_atom = rel.getTargetAtom(macho_file) orelse { diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index 2e2f3dad84..996a85ed4b 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -214,6 +214,23 @@ fn filterRelocs( return relocs[start..end]; } +pub fn scanInputSections(self: Object, macho_file: *MachO) !void { + for (self.sections.items) |sect| { + const match = (try macho_file.getOutputSection(sect)) orelse { + log.debug(" unhandled section", .{}); + continue; + }; + const output = macho_file.sections.items(.header)[match]; + log.debug("mapping '{s},{s}' into output sect({d}, '{s},{s}')", .{ + sect.segName(), + sect.sectName(), + match + 1, + output.segName(), + output.sectName(), + }); + } +} + /// Splits object into atoms assuming one-shot linking mode. pub fn splitIntoAtomsOneShot(self: *Object, macho_file: *MachO, object_id: u32) !void { assert(macho_file.mode == .one_shot); @@ -280,13 +297,9 @@ pub fn splitIntoAtomsOneShot(self: *Object, macho_file: *MachO, object_id: u32) }); const cpu_arch = macho_file.base.options.target.cpu.arch; - const is_zerofill = blk: { - const section_type = sect.type_(); - break :blk section_type == macho.S_ZEROFILL or section_type == macho.S_THREAD_LOCAL_ZEROFILL; - }; // Read section's code - const code: ?[]const u8 = if (!is_zerofill) try self.getSectionContents(sect) else null; + const code: ?[]const u8 = if (!sect.isZerofill()) try self.getSectionContents(sect) else null; // Read section's list of relocations const relocs = @ptrCast( diff --git a/src/link/MachO/dead_strip.zig b/src/link/MachO/dead_strip.zig index 12f46c9f26..bf65b96049 100644 --- a/src/link/MachO/dead_strip.zig +++ b/src/link/MachO/dead_strip.zig @@ -43,6 +43,9 @@ fn removeAtomFromSection(atom: *Atom, match: u8, macho_file: *MachO) void { // The section will be GCed in the next step. section.last_atom = null; section.header.size = 0; + const segment = &macho_file.segments.items[section.segment_index]; + segment.cmdsize -= @sizeOf(macho.section_64); + segment.nsects -= 1; } } @@ -93,7 +96,7 @@ fn collectRoots(roots: *std.AutoHashMap(*Atom, void), macho_file: *MachO) !void const is_gc_root = blk: { if (source_sect.isDontDeadStrip()) break :blk true; if (mem.eql(u8, "__StaticInit", source_sect.sectName())) break :blk true; - switch (source_sect.type_()) { + switch (source_sect.@"type"()) { macho.S_MOD_INIT_FUNC_POINTERS, macho.S_MOD_TERM_FUNC_POINTERS, => break :blk true, From 7bba3d330ad90026e8b79cae9940a1e878628119 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Mon, 1 Aug 2022 18:42:18 +0200 Subject: [PATCH 25/33] macho: cleanup output section selection logic Cache only section indexes used by the linker for synthetic sections and/or incremental codepath. --- src/link/MachO.zig | 632 ++++++++++------------------------------ src/link/MachO/Atom.zig | 5 +- 2 files changed, 157 insertions(+), 480 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index dda5fd48e3..205fbcd6bf 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -121,48 +121,12 @@ data_const_segment_cmd_index: ?u8 = null, data_segment_cmd_index: ?u8 = null, linkedit_segment_cmd_index: ?u8 = null, -// __TEXT segment sections text_section_index: ?u8 = null, stubs_section_index: ?u8 = null, stub_helper_section_index: ?u8 = null, -text_const_section_index: ?u8 = null, -cstring_section_index: ?u8 = null, -ustring_section_index: ?u8 = null, -gcc_except_tab_section_index: ?u8 = null, -unwind_info_section_index: ?u8 = null, -eh_frame_section_index: ?u8 = null, - -objc_methlist_section_index: ?u8 = null, -objc_methname_section_index: ?u8 = null, -objc_methtype_section_index: ?u8 = null, -objc_classname_section_index: ?u8 = null, - -// __DATA_CONST segment sections got_section_index: ?u8 = null, -mod_init_func_section_index: ?u8 = null, -mod_term_func_section_index: ?u8 = null, -data_const_section_index: ?u8 = null, - -objc_cfstring_section_index: ?u8 = null, -objc_classlist_section_index: ?u8 = null, -objc_imageinfo_section_index: ?u8 = null, - -// __DATA segment sections -tlv_section_index: ?u8 = null, -tlv_data_section_index: ?u8 = null, -tlv_bss_section_index: ?u8 = null, -tlv_ptrs_section_index: ?u8 = null, la_symbol_ptr_section_index: ?u8 = null, data_section_index: ?u8 = null, -bss_section_index: ?u8 = null, - -objc_const_section_index: ?u8 = null, -objc_selrefs_section_index: ?u8 = null, -objc_classrefs_section_index: ?u8 = null, -objc_data_section_index: ?u8 = null, - -rustc_section_index: ?u8 = null, -rustc_section_size: u64 = 0, locals: std.ArrayListUnmanaged(macho.nlist_64) = .{}, globals: std.StringArrayHashMapUnmanaged(SymbolWithLoc) = .{}, @@ -547,14 +511,15 @@ pub fn flushModule(self: *MachO, comp: *Compilation, prog_node: *std.Progress.No try self.createMhExecuteHeaderSymbol(); try self.resolveDyldStubBinder(); - try self.createDyldPrivateAtom(); - try self.createStubHelperPreambleAtom(); try self.resolveSymbolsInDylibs(); if (self.unresolved.count() > 0) { return error.UndefinedSymbolReference; } + try self.createDyldPrivateAtom(); + try self.createStubHelperPreambleAtom(); + try self.allocateSpecialSymbols(); if (build_options.enable_logging) { @@ -1140,7 +1105,6 @@ fn linkOneShot(self: *MachO, comp: *Compilation, prog_node: *std.Progress.Node) try self.resolveSymbolsInArchives(); try self.resolveDyldStubBinder(); - try self.createDyldPrivateAtom(); try self.resolveSymbolsInDylibs(); try self.createMhExecuteHeaderSymbol(); try self.createDsoHandleSymbol(); @@ -1160,8 +1124,9 @@ fn linkOneShot(self: *MachO, comp: *Compilation, prog_node: *std.Progress.Node) try object.scanInputSections(self); } - try self.createStubHelperPreambleAtom(); + try self.createDyldPrivateAtom(); try self.createTentativeDefAtoms(); + try self.createStubHelperPreambleAtom(); for (self.objects.items) |*object, object_id| { try object.splitIntoAtomsOneShot(self, @intCast(u32, object_id)); @@ -1184,11 +1149,6 @@ fn linkOneShot(self: *MachO, comp: *Compilation, prog_node: *std.Progress.Node) try self.writeAtomsOneShot(); - if (self.rustc_section_index) |id| { - const header = &self.sections.items(.header)[id]; - header.size = self.rustc_section_size; - } - var lc_buffer = std.ArrayList(u8).init(arena); const lc_writer = lc_buffer.writer(); var ncmds: u32 = 0; @@ -1696,417 +1656,142 @@ pub fn getOutputSection(self: *MachO, sect: macho.section_64) !?u8 { const segname = sect.segName(); const sectname = sect.sectName(); const res: ?u8 = blk: { + if (mem.eql(u8, "__LLVM", segname)) { + log.debug("TODO LLVM section: type 0x{x}, name '{s},{s}'", .{ + sect.flags, segname, sectname, + }); + break :blk null; + } + + if (sect.isCode()) { + if (self.text_section_index == null) { + self.text_section_index = try self.initSection( + "__TEXT", + "__text", + sect.size, + sect.@"align", + .{ + .flags = macho.S_REGULAR | + macho.S_ATTR_PURE_INSTRUCTIONS | + macho.S_ATTR_SOME_INSTRUCTIONS, + }, + ); + } + break :blk self.text_section_index.?; + } + + if (sect.isDebug()) { + // TODO debug attributes + if (mem.eql(u8, "__LD", segname) and mem.eql(u8, "__compact_unwind", sectname)) { + log.debug("TODO compact unwind section: type 0x{x}, name '{s},{s}'", .{ + sect.flags, segname, sectname, + }); + } + break :blk null; + } + switch (sect.@"type"()) { - macho.S_4BYTE_LITERALS, macho.S_8BYTE_LITERALS, macho.S_16BYTE_LITERALS => { - if (self.text_const_section_index == null) { - self.text_const_section_index = try self.initSection( - self.text_segment_cmd_index.?, - "__const", - sect.size, - sect.@"align", - .{}, - ); - } - break :blk self.text_const_section_index.?; + macho.S_4BYTE_LITERALS, + macho.S_8BYTE_LITERALS, + macho.S_16BYTE_LITERALS, + => { + break :blk self.getSectionByName("__TEXT", "__const") orelse try self.initSection( + "__TEXT", + "__const", + sect.size, + sect.@"align", + .{}, + ); }, macho.S_CSTRING_LITERALS => { - if (mem.eql(u8, sectname, "__objc_methname")) { - // TODO it seems the common values within the sections in objects are deduplicated/merged - // on merging the sections' contents. - if (self.objc_methname_section_index == null) { - self.objc_methname_section_index = try self.initSection( - self.text_segment_cmd_index.?, - "__objc_methname", - sect.size, - sect.@"align", - .{}, - ); - } - break :blk self.objc_methname_section_index.?; - } else if (mem.eql(u8, sectname, "__objc_methtype")) { - if (self.objc_methtype_section_index == null) { - self.objc_methtype_section_index = try self.initSection( - self.text_segment_cmd_index.?, - "__objc_methtype", - sect.size, - sect.@"align", - .{}, - ); - } - break :blk self.objc_methtype_section_index.?; - } else if (mem.eql(u8, sectname, "__objc_classname")) { - if (self.objc_classname_section_index == null) { - self.objc_classname_section_index = try self.initSection( - self.text_segment_cmd_index.?, - "__objc_classname", - sect.size, - sect.@"align", - .{}, - ); - } - break :blk self.objc_classname_section_index.?; - } - - if (self.cstring_section_index == null) { - self.cstring_section_index = try self.initSection( - self.text_segment_cmd_index.?, - "__cstring", - sect.size, - sect.@"align", - .{ - .flags = macho.S_CSTRING_LITERALS, - }, - ); - } - break :blk self.cstring_section_index.?; - }, - macho.S_LITERAL_POINTERS => { - if (mem.eql(u8, segname, "__DATA") and mem.eql(u8, sectname, "__objc_selrefs")) { - if (self.objc_selrefs_section_index == null) { - self.objc_selrefs_section_index = try self.initSection( - self.data_segment_cmd_index.?, - "__objc_selrefs", - sect.size, - sect.@"align", - .{ - .flags = macho.S_LITERAL_POINTERS, - }, - ); - } - break :blk self.objc_selrefs_section_index.?; - } else { - // TODO investigate - break :blk null; - } - }, - macho.S_MOD_INIT_FUNC_POINTERS => { - if (self.mod_init_func_section_index == null) { - self.mod_init_func_section_index = try self.initSection( - self.data_const_segment_cmd_index.?, - "__mod_init_func", - sect.size, - sect.@"align", - .{ - .flags = macho.S_MOD_INIT_FUNC_POINTERS, - }, - ); - } - break :blk self.mod_init_func_section_index.?; - }, - macho.S_MOD_TERM_FUNC_POINTERS => { - if (self.mod_term_func_section_index == null) { - self.mod_term_func_section_index = try self.initSection( - self.data_const_segment_cmd_index.?, - "__mod_term_func", - sect.size, - sect.@"align", - .{ - .flags = macho.S_MOD_TERM_FUNC_POINTERS, - }, - ); - } - break :blk self.mod_term_func_section_index.?; - }, - macho.S_ZEROFILL => { - if (self.bss_section_index == null) { - self.bss_section_index = try self.initSection( - self.data_segment_cmd_index.?, - "__bss", - sect.size, - sect.@"align", - .{ - .flags = macho.S_ZEROFILL, - }, - ); - } - break :blk self.bss_section_index.?; - }, - macho.S_THREAD_LOCAL_VARIABLES => { - if (self.tlv_section_index == null) { - self.tlv_section_index = try self.initSection( - self.data_segment_cmd_index.?, - "__thread_vars", - sect.size, - sect.@"align", - .{ - .flags = macho.S_THREAD_LOCAL_VARIABLES, - }, - ); - } - break :blk self.tlv_section_index.?; - }, - macho.S_THREAD_LOCAL_VARIABLE_POINTERS => { - if (self.tlv_ptrs_section_index == null) { - self.tlv_ptrs_section_index = try self.initSection( - self.data_segment_cmd_index.?, - "__thread_ptrs", - sect.size, - sect.@"align", - .{ - .flags = macho.S_THREAD_LOCAL_VARIABLE_POINTERS, - }, - ); - } - break :blk self.tlv_ptrs_section_index.?; - }, - macho.S_THREAD_LOCAL_REGULAR => { - if (self.tlv_data_section_index == null) { - self.tlv_data_section_index = try self.initSection( - self.data_segment_cmd_index.?, - "__thread_data", - sect.size, - sect.@"align", - .{ - .flags = macho.S_THREAD_LOCAL_REGULAR, - }, - ); - } - break :blk self.tlv_data_section_index.?; - }, - macho.S_THREAD_LOCAL_ZEROFILL => { - if (self.tlv_bss_section_index == null) { - self.tlv_bss_section_index = try self.initSection( - self.data_segment_cmd_index.?, - "__thread_bss", - sect.size, - sect.@"align", - .{ - .flags = macho.S_THREAD_LOCAL_ZEROFILL, - }, - ); - } - break :blk self.tlv_bss_section_index.?; - }, - macho.S_COALESCED => { - if (mem.eql(u8, "__TEXT", segname) and mem.eql(u8, "__eh_frame", sectname)) { - // TODO I believe __eh_frame is currently part of __unwind_info section - // in the latest ld64 output. - if (self.eh_frame_section_index == null) { - self.eh_frame_section_index = try self.initSection( - self.text_segment_cmd_index.?, - "__eh_frame", - sect.size, - sect.@"align", - .{}, - ); - } - break :blk self.eh_frame_section_index.?; - } - - // TODO audit this: is this the right mapping? - if (self.data_const_section_index == null) { - self.data_const_section_index = try self.initSection( - self.data_const_segment_cmd_index.?, - "__const", + if (mem.startsWith(u8, sectname, "__objc")) { + break :blk self.getSectionByName(segname, sectname) orelse try self.initSection( + segname, + sectname, sect.size, sect.@"align", .{}, ); } - - break :blk self.data_const_section_index.?; + break :blk self.getSectionByName("__TEXT", "__cstring") orelse try self.initSection( + "__TEXT", + "__cstring", + sect.size, + sect.@"align", + .{ .flags = macho.S_CSTRING_LITERALS }, + ); + }, + macho.S_MOD_INIT_FUNC_POINTERS, + macho.S_MOD_TERM_FUNC_POINTERS, + => { + break :blk self.getSectionByName("__DATA_CONST", sectname) orelse try self.initSection( + "__DATA_CONST", + sectname, + sect.size, + sect.@"align", + .{ .flags = sect.flags }, + ); + }, + macho.S_LITERAL_POINTERS, + macho.S_ZEROFILL, + macho.S_THREAD_LOCAL_VARIABLES, + macho.S_THREAD_LOCAL_VARIABLE_POINTERS, + macho.S_THREAD_LOCAL_REGULAR, + macho.S_THREAD_LOCAL_ZEROFILL, + => { + break :blk self.getSectionByName(segname, sectname) orelse try self.initSection( + segname, + sectname, + sect.size, + sect.@"align", + .{ .flags = sect.flags }, + ); + }, + macho.S_COALESCED => { + break :blk self.getSectionByName(segname, sectname) orelse try self.initSection( + segname, + sectname, + sect.size, + sect.@"align", + .{}, + ); }, macho.S_REGULAR => { - if (sect.isCode()) { - if (self.text_section_index == null) { - self.text_section_index = try self.initSection( - self.text_segment_cmd_index.?, - "__text", - sect.size, - sect.@"align", - .{ - .flags = macho.S_REGULAR | - macho.S_ATTR_PURE_INSTRUCTIONS | - macho.S_ATTR_SOME_INSTRUCTIONS, - }, - ); - } - break :blk self.text_section_index.?; - } - if (sect.isDebug()) { - // TODO debug attributes - if (mem.eql(u8, "__LD", segname) and mem.eql(u8, "__compact_unwind", sectname)) { - log.debug("TODO compact unwind section: type 0x{x}, name '{s},{s}'", .{ - sect.flags, segname, sectname, - }); - } - break :blk null; - } - if (mem.eql(u8, segname, "__TEXT")) { - if (mem.eql(u8, sectname, "__ustring")) { - if (self.ustring_section_index == null) { - self.ustring_section_index = try self.initSection( - self.text_segment_cmd_index.?, - "__ustring", - sect.size, - sect.@"align", - .{}, - ); - } - break :blk self.ustring_section_index.?; - } else if (mem.eql(u8, sectname, "__gcc_except_tab")) { - if (self.gcc_except_tab_section_index == null) { - self.gcc_except_tab_section_index = try self.initSection( - self.text_segment_cmd_index.?, - "__gcc_except_tab", - sect.size, - sect.@"align", - .{}, - ); - } - break :blk self.gcc_except_tab_section_index.?; - } else if (mem.eql(u8, sectname, "__objc_methlist")) { - if (self.objc_methlist_section_index == null) { - self.objc_methlist_section_index = try self.initSection( - self.text_segment_cmd_index.?, - "__objc_methlist", - sect.size, - sect.@"align", - .{}, - ); - } - break :blk self.objc_methlist_section_index.?; - } else if (mem.eql(u8, sectname, "__rodata") or + if (mem.eql(u8, sectname, "__rodata") or mem.eql(u8, sectname, "__typelink") or mem.eql(u8, sectname, "__itablink") or mem.eql(u8, sectname, "__gosymtab") or mem.eql(u8, sectname, "__gopclntab")) { - if (self.data_const_section_index == null) { - self.data_const_section_index = try self.initSection( - self.data_const_segment_cmd_index.?, - "__const", - sect.size, - sect.@"align", - .{}, - ); - } - break :blk self.data_const_section_index.?; - } else { - if (self.text_const_section_index == null) { - self.text_const_section_index = try self.initSection( - self.text_segment_cmd_index.?, - "__const", - sect.size, - sect.@"align", - .{}, - ); - } - break :blk self.text_const_section_index.?; - } - } - - if (mem.eql(u8, segname, "__DATA_CONST")) { - if (self.data_const_section_index == null) { - self.data_const_section_index = try self.initSection( - self.data_const_segment_cmd_index.?, + break :blk self.getSectionByName("__DATA_CONST", "__const") orelse try self.initSection( + "__DATA_CONST", "__const", sect.size, sect.@"align", .{}, ); } - break :blk self.data_const_section_index.?; } - if (mem.eql(u8, segname, "__DATA")) { - if (mem.eql(u8, sectname, "__const")) { - if (self.data_const_section_index == null) { - self.data_const_section_index = try self.initSection( - self.data_const_segment_cmd_index.?, - "__const", - sect.size, - sect.@"align", - .{}, - ); - } - break :blk self.data_const_section_index.?; - } else if (mem.eql(u8, sectname, "__cfstring")) { - if (self.objc_cfstring_section_index == null) { - self.objc_cfstring_section_index = try self.initSection( - self.data_const_segment_cmd_index.?, - "__cfstring", - sect.size, - sect.@"align", - .{}, - ); - } - break :blk self.objc_cfstring_section_index.?; - } else if (mem.eql(u8, sectname, "__objc_classlist")) { - if (self.objc_classlist_section_index == null) { - self.objc_classlist_section_index = try self.initSection( - self.data_const_segment_cmd_index.?, - "__objc_classlist", - sect.size, - sect.@"align", - .{}, - ); - } - break :blk self.objc_classlist_section_index.?; - } else if (mem.eql(u8, sectname, "__objc_imageinfo")) { - if (self.objc_imageinfo_section_index == null) { - self.objc_imageinfo_section_index = try self.initSection( - self.data_const_segment_cmd_index.?, - "__objc_imageinfo", - sect.size, - sect.@"align", - .{}, - ); - } - break :blk self.objc_imageinfo_section_index.?; - } else if (mem.eql(u8, sectname, "__objc_const")) { - if (self.objc_const_section_index == null) { - self.objc_const_section_index = try self.initSection( - self.data_segment_cmd_index.?, - "__objc_const", - sect.size, - sect.@"align", - .{}, - ); - } - break :blk self.objc_const_section_index.?; - } else if (mem.eql(u8, sectname, "__objc_classrefs")) { - if (self.objc_classrefs_section_index == null) { - self.objc_classrefs_section_index = try self.initSection( - self.data_segment_cmd_index.?, - "__objc_classrefs", - sect.size, - sect.@"align", - .{}, - ); - } - break :blk self.objc_classrefs_section_index.?; - } else if (mem.eql(u8, sectname, "__objc_data")) { - if (self.objc_data_section_index == null) { - self.objc_data_section_index = try self.initSection( - self.data_segment_cmd_index.?, - "__objc_data", - sect.size, - sect.@"align", - .{}, - ); - } - break :blk self.objc_data_section_index.?; - } else if (mem.eql(u8, sectname, ".rustc")) { - if (self.rustc_section_index == null) { - self.rustc_section_index = try self.initSection( - self.data_segment_cmd_index.?, - ".rustc", - sect.size, - sect.@"align", - .{}, - ); - // We need to preserve the section size for rustc to properly - // decompress the metadata. - self.rustc_section_size = sect.size; - } - break :blk self.rustc_section_index.?; - } else { + if (mem.eql(u8, sectname, "__const") or + mem.eql(u8, sectname, "__cfstring") or + mem.eql(u8, sectname, "__objc_classlist") or + mem.eql(u8, sectname, "__objc_imageinfo")) + { + break :blk self.getSectionByName("__DATA_CONST", sectname) orelse + try self.initSection( + "__DATA_CONST", + sectname, + sect.size, + sect.@"align", + .{}, + ); + } else if (mem.eql(u8, sectname, "__data")) { if (self.data_section_index == null) { self.data_section_index = try self.initSection( - self.data_segment_cmd_index.?, - "__data", + segname, + sectname, sect.size, sect.@"align", .{}, @@ -2115,14 +1800,13 @@ pub fn getOutputSection(self: *MachO, sect: macho.section_64) !?u8 { break :blk self.data_section_index.?; } } - - if (mem.eql(u8, "__LLVM", segname) and mem.eql(u8, "__asm", sectname)) { - log.debug("TODO LLVM asm section: type 0x{x}, name '{s},{s}'", .{ - sect.flags, segname, sectname, - }); - } - - break :blk null; + break :blk self.getSectionByName(segname, sectname) orelse try self.initSection( + segname, + sectname, + sect.size, + sect.@"align", + .{}, + ); }, else => break :blk null, } @@ -2774,11 +2458,16 @@ fn createTentativeDefAtoms(self: *MachO) !void { // text blocks for each tentative definition. const size = sym.n_value; const alignment = (sym.n_desc >> 8) & 0x0f; + const n_sect = (try self.getOutputSection(.{ + .segname = makeStaticString("__DATA"), + .sectname = makeStaticString("__bss"), + .flags = macho.S_ZEROFILL, + })).?; sym.* = .{ .n_strx = sym.n_strx, .n_type = macho.N_SECT | macho.N_EXT, - .n_sect = 0, + .n_sect = n_sect, .n_desc = 0, .n_value = 0, }; @@ -2786,7 +2475,7 @@ fn createTentativeDefAtoms(self: *MachO) !void { const atom = try MachO.createEmptyAtom(gpa, global.sym_index, size, alignment); atom.file = global.file; - try self.allocateAtomCommon(atom, self.bss_section_index.?); + try self.allocateAtomCommon(atom, n_sect); if (global.file) |file| { const object = &self.objects.items[file]; @@ -4174,7 +3863,8 @@ pub fn deleteExport(self: *MachO, exp: Export) void { fn freeUnnamedConsts(self: *MachO, decl_index: Module.Decl.Index) void { const unnamed_consts = self.unnamed_const_atoms.getPtr(decl_index) orelse return; for (unnamed_consts.items) |atom| { - self.freeAtom(atom, self.text_const_section_index.?, true); + const sect_id = atom.getSymbol(self).n_sect; + self.freeAtom(atom, sect_id, true); self.locals_free_list.append(self.base.allocator, atom.sym_index) catch {}; self.locals.items[atom.sym_index].n_type = 0; _ = self.atom_by_index_table.remove(atom.sym_index); @@ -4307,7 +3997,7 @@ fn populateMissingMetadata(self: *MachO) !void { }; const needed_size = if (self.mode == .incremental) self.base.options.program_code_size_hint else 0; self.text_section_index = try self.initSection( - self.text_segment_cmd_index.?, + "__TEXT", "__text", needed_size, alignment, @@ -4330,7 +4020,7 @@ fn populateMissingMetadata(self: *MachO) !void { }; const needed_size = if (self.mode == .incremental) stub_size * self.base.options.symbol_count_hint else 0; self.stubs_section_index = try self.initSection( - self.text_segment_cmd_index.?, + "__TEXT", "__stubs", needed_size, alignment, @@ -4362,7 +4052,7 @@ fn populateMissingMetadata(self: *MachO) !void { else 0; self.stub_helper_section_index = try self.initSection( - self.text_segment_cmd_index.?, + "__TEXT", "__stub_helper", needed_size, alignment, @@ -4407,7 +4097,7 @@ fn populateMissingMetadata(self: *MachO) !void { 0; const alignment: u16 = 3; // 2^3 = @sizeOf(u64) self.got_section_index = try self.initSection( - self.data_const_segment_cmd_index.?, + "__DATA_CONST", "__got", needed_size, alignment, @@ -4452,7 +4142,7 @@ fn populateMissingMetadata(self: *MachO) !void { 0; const alignment: u16 = 3; // 2^3 = @sizeOf(u64) self.la_symbol_ptr_section_index = try self.initSection( - self.data_segment_cmd_index.?, + "__DATA", "__la_symbol_ptr", needed_size, alignment, @@ -4469,7 +4159,7 @@ fn populateMissingMetadata(self: *MachO) !void { 0; const alignment: u16 = 3; // 2^3 = @sizeOf(u64) self.data_section_index = try self.initSection( - self.data_segment_cmd_index.?, + "__DATA", "__data", needed_size, alignment, @@ -4701,12 +4391,13 @@ const InitSectionOpts = struct { fn initSection( self: *MachO, - segment_id: u8, + segname: []const u8, sectname: []const u8, size: u64, alignment: u32, opts: InitSectionOpts, ) !u8 { + const segment_id = self.getSegmentByName(segname).?; const seg = &self.segments.items[segment_id]; const index = try self.insertSection(segment_id, .{ .sectname = makeStaticString(sectname), @@ -4779,42 +4470,13 @@ fn insertSection(self: *MachO, segment_index: u8, header: macho.section_64) !u8 header.sectName(), insertion_index, }); - // TODO slim it down for (&[_]*?u8{ - // __TEXT &self.text_section_index, &self.stubs_section_index, &self.stub_helper_section_index, - &self.gcc_except_tab_section_index, - &self.cstring_section_index, - &self.ustring_section_index, - &self.text_const_section_index, - &self.objc_methlist_section_index, - &self.objc_methname_section_index, - &self.objc_methtype_section_index, - &self.objc_classname_section_index, - &self.eh_frame_section_index, - // __DATA_CONST &self.got_section_index, - &self.mod_init_func_section_index, - &self.mod_term_func_section_index, - &self.data_const_section_index, - &self.objc_cfstring_section_index, - &self.objc_classlist_section_index, - &self.objc_imageinfo_section_index, - // __DATA - &self.rustc_section_index, &self.la_symbol_ptr_section_index, - &self.objc_const_section_index, - &self.objc_selrefs_section_index, - &self.objc_classrefs_section_index, - &self.objc_data_section_index, &self.data_section_index, - &self.tlv_section_index, - &self.tlv_ptrs_section_index, - &self.tlv_data_section_index, - &self.tlv_bss_section_index, - &self.bss_section_index, }) |maybe_index| { const index = maybe_index.* orelse continue; if (insertion_index <= index) maybe_index.* = index + 1; @@ -6017,7 +5679,7 @@ fn writeHeader(self: *MachO, ncmds: u32, sizeofcmds: u32) !void { else => unreachable, } - if (self.tlv_section_index) |_| { + if (self.getSectionByName("__DATA", "__thread_vars")) |_| { header.flags |= macho.MH_HAS_TLV_DESCRIPTORS; } @@ -6042,6 +5704,20 @@ pub fn makeStaticString(bytes: []const u8) [16]u8 { return buf; } +fn getSegmentByName(self: MachO, segname: []const u8) ?u8 { + for (self.segments.items) |seg, i| { + if (mem.eql(u8, segname, seg.segName())) return @intCast(u8, i); + } else return null; +} + +pub fn getSectionByName(self: MachO, segname: []const u8, sectname: []const u8) ?u8 { + // TODO investigate caching with a hashmap + for (self.sections.items(.header)) |header, i| { + if (mem.eql(u8, header.segName(), segname) and mem.eql(u8, header.sectName(), sectname)) + return @intCast(u8, i); + } else return null; +} + fn getSectionIndexes(self: MachO, segment_index: u8) struct { start: u8, end: u8 } { var start: u8 = 0; const nsects = for (self.segments.items) |seg, i| { diff --git a/src/link/MachO/Atom.zig b/src/link/MachO/Atom.zig index 85b3ca1c2b..4871276f3c 100644 --- a/src/link/MachO/Atom.zig +++ b/src/link/MachO/Atom.zig @@ -529,6 +529,7 @@ fn addStub(target: MachO.SymbolWithLoc, context: RelocContext) !void { if (context.macho_file.stubs_table.contains(target)) return; const stub_index = try context.macho_file.allocateStubEntry(target); + const stub_helper_atom = try context.macho_file.createStubHelperAtom(); const laptr_atom = try context.macho_file.createLazyPointerAtom(stub_helper_atom.sym_index, target); const stub_atom = try context.macho_file.createStubAtom(laptr_atom.sym_index); @@ -601,9 +602,9 @@ pub fn resolveRelocs(self: *Atom, macho_file: *MachO) !void { // * wrt to __thread_data if defined, then // * wrt to __thread_bss const sect_id: u16 = sect_id: { - if (macho_file.tlv_data_section_index) |i| { + if (macho_file.getSectionByName("__DATA", "__thread_data")) |i| { break :sect_id i; - } else if (macho_file.tlv_bss_section_index) |i| { + } else if (macho_file.getSectionByName("__DATA", "__thread_bss")) |i| { break :sect_id i; } else { log.err("threadlocal variables present but no initializer sections found", .{}); From 1e710396d4489ce11e70820a95c33ec7463b1c12 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Tue, 2 Aug 2022 00:03:31 +0200 Subject: [PATCH 26/33] macho: fix linking in incremental context Fix incorrect writing of symtab and strtab in dSYM bundle in incremental context. Fix incorrectly navigating unnamed consts (freeing) in incremental context. This is currently hard-coded to require all consts to land in `__TEXT,__const`, which is wrong and needs a rewrite. --- src/link/MachO.zig | 57 +++++++++++++++--------------- src/link/MachO/DebugSymbols.zig | 62 +++++++++++++++++++++------------ 2 files changed, 69 insertions(+), 50 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 205fbcd6bf..9d3ca34b9e 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -511,15 +511,14 @@ pub fn flushModule(self: *MachO, comp: *Compilation, prog_node: *std.Progress.No try self.createMhExecuteHeaderSymbol(); try self.resolveDyldStubBinder(); + try self.createDyldPrivateAtom(); + try self.createStubHelperPreambleAtom(); try self.resolveSymbolsInDylibs(); if (self.unresolved.count() > 0) { return error.UndefinedSymbolReference; } - try self.createDyldPrivateAtom(); - try self.createStubHelperPreambleAtom(); - try self.allocateSpecialSymbols(); if (build_options.enable_logging) { @@ -589,7 +588,7 @@ pub fn flushModule(self: *MachO, comp: *Compilation, prog_node: *std.Progress.No } else null; var headers_buf = std.ArrayList(u8).init(arena); - try self.writeSegmentHeaders(0, self.segments.items.len, &ncmds, headers_buf.writer()); + try self.writeSegmentHeaders(&ncmds, headers_buf.writer()); try self.base.file.?.pwriteAll(headers_buf.items, @sizeOf(macho.mach_header_64)); try self.base.file.?.pwriteAll(lc_buffer.items, @sizeOf(macho.mach_header_64) + headers_buf.items.len); @@ -1203,7 +1202,7 @@ fn linkOneShot(self: *MachO, comp: *Compilation, prog_node: *std.Progress.Node) } else null; var headers_buf = std.ArrayList(u8).init(arena); - try self.writeSegmentHeaders(0, self.segments.items.len, &ncmds, headers_buf.writer()); + try self.writeSegmentHeaders(&ncmds, headers_buf.writer()); try self.base.file.?.pwriteAll(headers_buf.items, @sizeOf(macho.mach_header_64)); try self.base.file.?.pwriteAll(lc_buffer.items, @sizeOf(macho.mach_header_64) + headers_buf.items.len); @@ -3863,7 +3862,9 @@ pub fn deleteExport(self: *MachO, exp: Export) void { fn freeUnnamedConsts(self: *MachO, decl_index: Module.Decl.Index) void { const unnamed_consts = self.unnamed_const_atoms.getPtr(decl_index) orelse return; for (unnamed_consts.items) |atom| { - const sect_id = atom.getSymbol(self).n_sect; + // TODO + // const sect_id = atom.getSymbol(self).n_sect; + const sect_id = self.getSectionByName("__TEXT", "__const").?; self.freeAtom(atom, sect_id, true); self.locals_free_list.append(self.base.allocator, atom.sym_index) catch {}; self.locals.items[atom.sym_index].n_type = 0; @@ -4402,8 +4403,6 @@ fn initSection( const index = try self.insertSection(segment_id, .{ .sectname = makeStaticString(sectname), .segname = seg.segname, - .size = if (self.mode == .incremental) @intCast(u32, size) else 0, - .@"align" = alignment, .flags = opts.flags, .reserved1 = opts.reserved1, .reserved2 = opts.reserved2, @@ -4413,6 +4412,9 @@ fn initSection( if (self.mode == .incremental) { const header = &self.sections.items(.header)[index]; + header.size = size; + header.@"align" = alignment; + const prev_end_off = if (index > 0) blk: { const prev_section = self.sections.get(index - 1); if (prev_section.segment_index == segment_id) { @@ -4421,15 +4423,25 @@ fn initSection( } else break :blk seg.fileoff; } else 0; const alignment_pow_2 = try math.powi(u32, 2, alignment); - const padding: u64 = if (index == 0) try self.calcMinHeaderPad() else 0; + // TODO better prealloc for __text section + // const padding: u64 = if (index == 0) try self.calcMinHeaderPad() else 0; + const padding: u64 = if (index == 0) 0x1000 else 0; const off = mem.alignForwardGeneric(u64, padding + prev_end_off, alignment_pow_2); - log.debug("allocating {s},{s} section at 0x{x}", .{ header.segName(), header.sectName(), off }); - - header.addr = seg.vmaddr + off - seg.fileoff; if (!header.isZerofill()) { header.offset = @intCast(u32, off); } + header.addr = seg.vmaddr + off - seg.fileoff; + + // TODO this will break if we are inserting section that is not the last section + // in a segment. + const max_size = self.allocatedSize(segment_id, off); + + if (size > max_size) { + try self.growSection(index, @intCast(u32, size)); + } + + log.debug("allocating {s},{s} section at 0x{x}", .{ header.segName(), header.sectName(), off }); self.updateSectionOrdinals(index + 1); } @@ -4494,7 +4506,7 @@ fn updateSectionOrdinals(self: *MachO, start: u8) void { const slice = self.sections.slice(); for (slice.items(.last_atom)[start..]) |last_atom| { - var atom = last_atom.?; + var atom = last_atom orelse continue; while (true) { const sym = atom.getSymbolPtr(self); @@ -4536,17 +4548,6 @@ fn shiftLocalsByOffset(self: *MachO, sect_id: u8, offset: i64) !void { } } -fn findFreeSpace(self: MachO, segment_id: u8, alignment: u64, start: ?u64) u64 { - const seg = self.segments.items[segment_id]; - const indexes = self.getSectionIndexes(segment_id); - if (indexes.end - indexes.start == 0) { - return if (start) |v| v else seg.fileoff; - } - const last_sect = self.sections.items(.header)[indexes.end - 1]; - const final_off = last_sect.offset + padToIdeal(last_sect.size); - return mem.alignForwardGeneric(u64, final_off, alignment); -} - fn growSegment(self: *MachO, segment_index: u8, new_size: u64) !void { const segment = &self.segments.items[segment_index]; const new_segment_size = mem.alignForwardGeneric(u64, new_size, self.page_size); @@ -4885,14 +4886,14 @@ fn getSegmentAllocBase(self: MachO, indices: []const ?u8) struct { vmaddr: u64, return .{ .vmaddr = 0, .fileoff = 0 }; } -pub fn writeSegmentHeaders(self: *MachO, start: usize, end: usize, ncmds: *u32, writer: anytype) !void { - for (self.segments.items[start..end]) |seg, i| { +fn writeSegmentHeaders(self: *MachO, ncmds: *u32, writer: anytype) !void { + for (self.segments.items) |seg, i| { if (seg.nsects == 0 and (mem.eql(u8, seg.segName(), "__DATA_CONST") or mem.eql(u8, seg.segName(), "__DATA"))) continue; try writer.writeStruct(seg); - const indexes = self.getSectionIndexes(@intCast(u8, start + i)); + const indexes = self.getSectionIndexes(@intCast(u8, i)); for (self.sections.items(.header)[indexes.start..indexes.end]) |header| { try writer.writeStruct(header); } @@ -5718,7 +5719,7 @@ pub fn getSectionByName(self: MachO, segname: []const u8, sectname: []const u8) } else return null; } -fn getSectionIndexes(self: MachO, segment_index: u8) struct { start: u8, end: u8 } { +pub fn getSectionIndexes(self: MachO, segment_index: u8) struct { start: u8, end: u8 } { var start: u8 = 0; const nsects = for (self.segments.items) |seg, i| { if (i == segment_index) break @intCast(u8, seg.nsects); diff --git a/src/link/MachO/DebugSymbols.zig b/src/link/MachO/DebugSymbols.zig index f191d43f98..65d3319293 100644 --- a/src/link/MachO/DebugSymbols.zig +++ b/src/link/MachO/DebugSymbols.zig @@ -63,6 +63,10 @@ pub const Reloc = struct { pub fn populateMissingMetadata(self: *DebugSymbols, allocator: Allocator) !void { if (self.linkedit_segment_cmd_index == null) { self.linkedit_segment_cmd_index = @intCast(u8, self.segments.items.len); + log.debug("found __LINKEDIT segment free space 0x{x} to 0x{x}", .{ + self.base.page_size, + self.base.page_size * 2, + }); // TODO this needs reworking try self.segments.append(allocator, .{ .segname = makeStaticString("__LINKEDIT"), @@ -79,7 +83,7 @@ pub fn populateMissingMetadata(self: *DebugSymbols, allocator: Allocator) !void if (self.dwarf_segment_cmd_index == null) { self.dwarf_segment_cmd_index = @intCast(u8, self.segments.items.len); - const linkedit = self.segments.items[self.base.linkedit_segment_cmd_index.?]; + const linkedit = self.segments.items[self.linkedit_segment_cmd_index.?]; const ideal_size: u16 = 200 + 128 + 160 + 250; const needed_size = mem.alignForwardGeneric(u64, padToIdeal(ideal_size), self.base.page_size); const fileoff = linkedit.fileoff + linkedit.filesize; @@ -290,20 +294,7 @@ pub fn flushModule(self: *DebugSymbols, allocator: Allocator, options: link.Opti var headers_buf = std.ArrayList(u8).init(allocator); defer headers_buf.deinit(); - try self.base.writeSegmentHeaders( - 0, - self.base.linkedit_segment_cmd_index.?, - &ncmds, - headers_buf.writer(), - ); - - for (self.segments.items) |seg| { - try headers_buf.writer().writeStruct(seg); - ncmds += 2; - } - for (self.sections.items) |header| { - try headers_buf.writer().writeStruct(header); - } + try self.writeSegmentHeaders(&ncmds, headers_buf.writer()); try self.file.pwriteAll(headers_buf.items, @sizeOf(macho.mach_header_64)); try self.file.pwriteAll(lc_buffer.items, @sizeOf(macho.mach_header_64) + headers_buf.items.len); @@ -349,6 +340,7 @@ fn updateDwarfSegment(self: *DebugSymbols) void { var max_offset: u64 = 0; for (self.sections.items) |*sect| { + sect.addr += diff; log.debug(" {s},{s} - 0x{x}-0x{x} - 0x{x}-0x{x}", .{ sect.segName(), sect.sectName(), @@ -360,7 +352,6 @@ fn updateDwarfSegment(self: *DebugSymbols) void { if (sect.offset + sect.size > max_offset) { max_offset = sect.offset + sect.size; } - sect.addr += diff; } const file_size = max_offset - dwarf_segment.fileoff; @@ -372,6 +363,37 @@ fn updateDwarfSegment(self: *DebugSymbols) void { } } +fn writeSegmentHeaders(self: *DebugSymbols, ncmds: *u32, writer: anytype) !void { + // Write segment/section headers from the binary file first. + const end = self.base.linkedit_segment_cmd_index.?; + for (self.base.segments.items[0..end]) |seg, i| { + if (seg.nsects == 0 and + (mem.eql(u8, seg.segName(), "__DATA_CONST") or + mem.eql(u8, seg.segName(), "__DATA"))) continue; + var out_seg = seg; + out_seg.fileoff = 0; + out_seg.filesize = 0; + try writer.writeStruct(out_seg); + + const indexes = self.base.getSectionIndexes(@intCast(u8, i)); + for (self.base.sections.items(.header)[indexes.start..indexes.end]) |header| { + var out_header = header; + out_header.offset = 0; + try writer.writeStruct(out_header); + } + + ncmds.* += 1; + } + // Next, commit DSYM's __LINKEDIT and __DWARF segments headers. + for (self.segments.items) |seg| { + try writer.writeStruct(seg); + ncmds.* += 1; + } + for (self.sections.items) |header| { + try writer.writeStruct(header); + } +} + fn writeHeader(self: *DebugSymbols, ncmds: u32, sizeofcmds: u32) !void { var header: macho.mach_header_64 = .{}; header.filetype = macho.MH_DSYM; @@ -469,11 +491,7 @@ fn writeSymtab(self: *DebugSymbols, lc: *macho.symtab_command) !void { const nsyms = nlocals + nexports; const seg = &self.segments.items[self.linkedit_segment_cmd_index.?]; - const offset = mem.alignForwardGeneric( - u64, - seg.fileoff + seg.filesize, - @alignOf(macho.nlist_64), - ); + const offset = mem.alignForwardGeneric(u64, seg.fileoff, @alignOf(macho.nlist_64)); const needed_size = nsyms * @sizeOf(macho.nlist_64); if (needed_size > seg.filesize) { @@ -535,7 +553,7 @@ fn writeStrtab(self: *DebugSymbols, lc: *macho.symtab_command) !void { const needed_size = mem.alignForwardGeneric(u64, self.strtab.buffer.items.len, @alignOf(u64)); lc.strsize = @intCast(u32, needed_size); - if (offset + needed_size > seg.filesize) { + if (symtab_size + needed_size > seg.filesize) { const aligned_size = mem.alignForwardGeneric(u64, offset + needed_size, self.base.page_size); const diff = @intCast(u32, aligned_size - seg.filesize); const dwarf_seg = &self.segments.items[self.dwarf_segment_cmd_index.?]; From 421d3e8d2822e979c1a2d5e7aaa5859499bc2146 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Tue, 2 Aug 2022 20:38:00 +0200 Subject: [PATCH 27/33] macho: add missing align cast in LoadCommandIterator --- lib/std/macho.zig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/std/macho.zig b/lib/std/macho.zig index aa43229a76..1955a00334 100644 --- a/lib/std/macho.zig +++ b/lib/std/macho.zig @@ -1901,7 +1901,7 @@ pub const LoadCommandIterator = struct { .data = it.buffer[0..hdr.cmdsize], }; - it.buffer = it.buffer[hdr.cmdsize..]; + it.buffer = @alignCast(@alignOf(u64), it.buffer[hdr.cmdsize..]); it.index += 1; return cmd; From 90e326827062fc7899d02516cdaffa7da8366077 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Tue, 2 Aug 2022 22:15:07 +0200 Subject: [PATCH 28/33] macho: do not preempt segment headers; do it when commiting to file This way, tracking segment-to-section mapping becomes a lot easier since it's effectively just start index plus number of sections defined within the segment. If a section becomes empty however care needs to be taken to remove the header upon committing to the final binary. --- src/link/MachO.zig | 23 ++++++++++++++++++----- src/link/MachO/DebugSymbols.zig | 22 +++++++++++++++++----- src/link/MachO/dead_strip.zig | 3 --- 3 files changed, 35 insertions(+), 13 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 9d3ca34b9e..352e3bedf8 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -4888,13 +4888,26 @@ fn getSegmentAllocBase(self: MachO, indices: []const ?u8) struct { vmaddr: u64, fn writeSegmentHeaders(self: *MachO, ncmds: *u32, writer: anytype) !void { for (self.segments.items) |seg, i| { - if (seg.nsects == 0 and - (mem.eql(u8, seg.segName(), "__DATA_CONST") or - mem.eql(u8, seg.segName(), "__DATA"))) continue; - try writer.writeStruct(seg); - const indexes = self.getSectionIndexes(@intCast(u8, i)); + var out_seg = seg; + out_seg.cmdsize = @sizeOf(macho.segment_command_64); + out_seg.nsects = 0; + + // Update section headers count; any section with size of 0 is excluded + // since it doesn't have any data in the final binary file. for (self.sections.items(.header)[indexes.start..indexes.end]) |header| { + if (header.size == 0) continue; + out_seg.cmdsize += @sizeOf(macho.section_64); + out_seg.nsects += 1; + } + + if (out_seg.nsects == 0 and + (mem.eql(u8, out_seg.segName(), "__DATA_CONST") or + mem.eql(u8, out_seg.segName(), "__DATA"))) continue; + + try writer.writeStruct(out_seg); + for (self.sections.items(.header)[indexes.start..indexes.end]) |header| { + if (header.size == 0) continue; try writer.writeStruct(header); } diff --git a/src/link/MachO/DebugSymbols.zig b/src/link/MachO/DebugSymbols.zig index 65d3319293..3bfe334302 100644 --- a/src/link/MachO/DebugSymbols.zig +++ b/src/link/MachO/DebugSymbols.zig @@ -367,16 +367,28 @@ fn writeSegmentHeaders(self: *DebugSymbols, ncmds: *u32, writer: anytype) !void // Write segment/section headers from the binary file first. const end = self.base.linkedit_segment_cmd_index.?; for (self.base.segments.items[0..end]) |seg, i| { - if (seg.nsects == 0 and - (mem.eql(u8, seg.segName(), "__DATA_CONST") or - mem.eql(u8, seg.segName(), "__DATA"))) continue; + const indexes = self.base.getSectionIndexes(@intCast(u8, i)); var out_seg = seg; out_seg.fileoff = 0; out_seg.filesize = 0; - try writer.writeStruct(out_seg); + out_seg.cmdsize = @sizeOf(macho.segment_command_64); + out_seg.nsects = 0; - const indexes = self.base.getSectionIndexes(@intCast(u8, i)); + // Update section headers count; any section with size of 0 is excluded + // since it doesn't have any data in the final binary file. for (self.base.sections.items(.header)[indexes.start..indexes.end]) |header| { + if (header.size == 0) continue; + out_seg.cmdsize += @sizeOf(macho.section_64); + out_seg.nsects += 1; + } + + if (out_seg.nsects == 0 and + (mem.eql(u8, out_seg.segName(), "__DATA_CONST") or + mem.eql(u8, out_seg.segName(), "__DATA"))) continue; + + try writer.writeStruct(out_seg); + for (self.base.sections.items(.header)[indexes.start..indexes.end]) |header| { + if (header.size == 0) continue; var out_header = header; out_header.offset = 0; try writer.writeStruct(out_header); diff --git a/src/link/MachO/dead_strip.zig b/src/link/MachO/dead_strip.zig index bf65b96049..eb2be6e5fe 100644 --- a/src/link/MachO/dead_strip.zig +++ b/src/link/MachO/dead_strip.zig @@ -43,9 +43,6 @@ fn removeAtomFromSection(atom: *Atom, match: u8, macho_file: *MachO) void { // The section will be GCed in the next step. section.last_atom = null; section.header.size = 0; - const segment = &macho_file.segments.items[section.segment_index]; - segment.cmdsize -= @sizeOf(macho.section_64); - segment.nsects -= 1; } } From 2c8fc3b5979d6ca8befbf5de48a661a6fadbe819 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Tue, 2 Aug 2022 22:58:10 +0200 Subject: [PATCH 29/33] macho: add missing u64 to usize casts Fixes 32bit builds. --- src/link/MachO.zig | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 352e3bedf8..a955200a77 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -1487,7 +1487,8 @@ pub fn parseDylib( var file_size = math.cast(usize, file_stat.size) orelse return error.Overflow; const reader = file.reader(); - const fat_offset = try fat.getLibraryOffset(reader, cpu_arch); + const fat_offset = math.cast(usize, try fat.getLibraryOffset(reader, cpu_arch)) orelse + return error.Overflow; try file.seekTo(fat_offset); file_size -= fat_offset; @@ -5091,7 +5092,7 @@ fn writeDyldInfoData(self: *MachO, ncmds: *u32, lc_writer: anytype) !void { const needed_size = export_off + export_size - rebase_off; link_seg.filesize = needed_size; - var buffer = try gpa.alloc(u8, needed_size); + var buffer = try gpa.alloc(u8, math.cast(usize, needed_size) orelse return error.Overflow); defer gpa.free(buffer); mem.set(u8, buffer, 0); @@ -5115,7 +5116,9 @@ fn writeDyldInfoData(self: *MachO, ncmds: *u32, lc_writer: anytype) !void { }); try self.base.file.?.pwriteAll(buffer, rebase_off); - try self.populateLazyBindOffsetsInStubHelper(buffer[lazy_bind_off - rebase_off ..][0..lazy_bind_size]); + const start = math.cast(usize, lazy_bind_off - rebase_off) orelse return error.Overflow; + const end = start + (math.cast(usize, lazy_bind_size) orelse return error.Overflow); + try self.populateLazyBindOffsetsInStubHelper(buffer[start..end]); try lc_writer.writeStruct(macho.dyld_info_command{ .cmd = .DYLD_INFO_ONLY, From 007eb3bd714caee550722cae0e4e98a205ead341 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Wed, 3 Aug 2022 21:26:52 +0200 Subject: [PATCH 30/33] macho: fix some TODOs --- src/link/MachO.zig | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index a955200a77..db207af5f5 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -64,7 +64,10 @@ const SystemLib = struct { const Section = struct { header: macho.section_64, segment_index: u8, - last_atom: ?*Atom = null, // TODO temporary hack; we really should shrink section to 0 + + // TODO is null here necessary, or can we do away with tracking via section + // size in incremental context? + last_atom: ?*Atom = null, /// A list of atoms that have surplus capacity. This list can have false /// positives, as functions grow and shrink over time, only sometimes being added @@ -4434,8 +4437,8 @@ fn initSection( } header.addr = seg.vmaddr + off - seg.fileoff; - // TODO this will break if we are inserting section that is not the last section - // in a segment. + // TODO Will this break if we are inserting section that is not the last section + // in a segment? const max_size = self.allocatedSize(segment_id, off); if (size > max_size) { From fb0b9f05b3dbf394f92fac7c36189f054fc68b41 Mon Sep 17 00:00:00 2001 From: Loris Cro Date: Thu, 4 Aug 2022 20:08:11 +0200 Subject: [PATCH 31/33] new init-exe template - removed an unnecessary (and confusing) `anyerror` fronm the function signature of `main` - replaced the call to std.log with two prints: one to stderr and one to stdout - replaced the test code with a better example --- lib/init-exe/src/main.zig | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/lib/init-exe/src/main.zig b/lib/init-exe/src/main.zig index c2f93f4771..433d311599 100644 --- a/lib/init-exe/src/main.zig +++ b/lib/init-exe/src/main.zig @@ -1,11 +1,22 @@ const std = @import("std"); -pub fn main() anyerror!void { - // Note that info level log messages are by default printed only in Debug - // and ReleaseSafe build modes. - std.log.info("All your codebase are belong to us.", .{}); +pub fn main() !void { + // Prints to stderr + std.debug.print("All your {s} are belong to us.\n", .{"codebase"}); + + // Prints to stdout + const unbuffered_out = std.io.getStdOut().writer(); + const out = std.io.bufferedWriter(unbuffered_out).writer(); + try out.print("Run `zig build test` to run the tests.\n", .{}); + + // Stdout is for the actual output of your application, for example if you + // are implementing gzip, then only the compressed bytes should be sent to + // stdout, not any debugging messages! } -test "basic test" { - try std.testing.expectEqual(10, 3 + 7); +test "simple test" { + var list = std.ArrayList(i32).init(std.testing.allocator); + defer list.deinit(); // try commenting this out and see if zig detects the memory leak! + try list.append(42); + try std.testing.expectEqual(list.pop(), 42); } From cbac7a019473055aaf6b1a87b4b57b9872ed54b4 Mon Sep 17 00:00:00 2001 From: Loris Cro Date: Thu, 4 Aug 2022 20:27:46 +0200 Subject: [PATCH 32/33] init-exe template: small improvements --- lib/init-exe/src/main.zig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/init-exe/src/main.zig b/lib/init-exe/src/main.zig index 433d311599..45d075af88 100644 --- a/lib/init-exe/src/main.zig +++ b/lib/init-exe/src/main.zig @@ -1,7 +1,7 @@ const std = @import("std"); pub fn main() !void { - // Prints to stderr + // Prints to stderr (it's a shortcut based on `std.io.getStdErr()`) std.debug.print("All your {s} are belong to us.\n", .{"codebase"}); // Prints to stdout @@ -18,5 +18,5 @@ test "simple test" { var list = std.ArrayList(i32).init(std.testing.allocator); defer list.deinit(); // try commenting this out and see if zig detects the memory leak! try list.append(42); - try std.testing.expectEqual(list.pop(), 42); + try std.testing.expectEqual(@as(i32, 42), list.pop()); } From 616f65df750f53e6334cc5ed2c8f4b5668d573f2 Mon Sep 17 00:00:00 2001 From: Loris Cro Date: Thu, 4 Aug 2022 21:12:42 +0200 Subject: [PATCH 33/33] init-exe template: add flushing to the buffered writer --- lib/init-exe/src/main.zig | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/lib/init-exe/src/main.zig b/lib/init-exe/src/main.zig index 45d075af88..c8a3f67dd0 100644 --- a/lib/init-exe/src/main.zig +++ b/lib/init-exe/src/main.zig @@ -4,14 +4,16 @@ pub fn main() !void { // Prints to stderr (it's a shortcut based on `std.io.getStdErr()`) std.debug.print("All your {s} are belong to us.\n", .{"codebase"}); - // Prints to stdout - const unbuffered_out = std.io.getStdOut().writer(); - const out = std.io.bufferedWriter(unbuffered_out).writer(); - try out.print("Run `zig build test` to run the tests.\n", .{}); - - // Stdout is for the actual output of your application, for example if you + // stdout is for the actual output of your application, for example if you // are implementing gzip, then only the compressed bytes should be sent to - // stdout, not any debugging messages! + // stdout, not any debugging messages. + const stdout_file = std.io.getStdOut().writer(); + var bw = std.io.bufferedWriter(stdout_file); + const stdout = bw.writer(); + + try stdout.print("Run `zig build test` to run the tests.\n", .{}); + + try bw.flush(); // don't forget to flush! } test "simple test" {