From f26d5ee7ea97c8fd6e5b2655f845be7e4293930e Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sun, 31 Jul 2022 18:19:17 +0200 Subject: [PATCH 1/8] macho: sync with zld gitrev a2c32e972f8c5adfcda8ed2d99379ae868f59c24 https://github.com/kubkon/zld/commit/a2c32e972f8c5adfcda8ed2d99379ae868f59c24 --- lib/std/build/CheckObjectStep.zig | 89 +- lib/std/macho.zig | 479 +--- src/link/Dwarf.zig | 44 +- src/link/MachO.zig | 3463 +++++++++++++---------------- src/link/MachO/Archive.zig | 58 +- src/link/MachO/Atom.zig | 35 +- src/link/MachO/CodeSignature.zig | 12 +- src/link/MachO/DebugSymbols.zig | 506 ++--- src/link/MachO/Dylib.zig | 159 +- src/link/MachO/Object.zig | 308 ++- src/link/MachO/dead_strip.zig | 48 +- src/link/MachO/fat.zig | 4 +- 12 files changed, 2128 insertions(+), 3077 deletions(-) diff --git a/lib/std/build/CheckObjectStep.zig b/lib/std/build/CheckObjectStep.zig index 0525bbf034..cc0982ec08 100644 --- a/lib/std/build/CheckObjectStep.zig +++ b/lib/std/build/CheckObjectStep.zig @@ -283,7 +283,14 @@ fn make(step: *Step) !void { const gpa = self.builder.allocator; const src_path = self.source.getPath(self.builder); - const contents = try fs.cwd().readFileAlloc(gpa, src_path, self.max_bytes); + const contents = try fs.cwd().readFileAllocOptions( + gpa, + src_path, + self.max_bytes, + null, + @alignOf(u64), + null, + ); const output = switch (self.obj_format) { .macho => try MachODumper.parseAndDump(contents, .{ @@ -370,9 +377,10 @@ const Opts = struct { }; const MachODumper = struct { + const LoadCommandIterator = macho.LoadCommandIterator; const symtab_label = "symtab"; - fn parseAndDump(bytes: []const u8, opts: Opts) ![]const u8 { + fn parseAndDump(bytes: []align(@alignOf(u64)) const u8, opts: Opts) ![]const u8 { const gpa = opts.gpa orelse unreachable; // MachO dumper requires an allocator var stream = std.io.fixedBufferStream(bytes); const reader = stream.reader(); @@ -385,55 +393,54 @@ const MachODumper = struct { var output = std.ArrayList(u8).init(gpa); const writer = output.writer(); - var load_commands = std.ArrayList(macho.LoadCommand).init(gpa); - try load_commands.ensureTotalCapacity(hdr.ncmds); - - var sections = std.ArrayList(struct { seg: u16, sect: u16 }).init(gpa); - var imports = std.ArrayList(u16).init(gpa); - - var symtab_cmd: ?u16 = null; - var i: u16 = 0; - while (i < hdr.ncmds) : (i += 1) { - var cmd = try macho.LoadCommand.read(gpa, reader); - load_commands.appendAssumeCapacity(cmd); + var symtab: []const macho.nlist_64 = undefined; + var strtab: []const u8 = undefined; + var sections = std.ArrayList(macho.section_64).init(gpa); + var imports = std.ArrayList([]const u8).init(gpa); + var it = LoadCommandIterator{ + .ncmds = hdr.ncmds, + .buffer = bytes[@sizeOf(macho.mach_header_64)..][0..hdr.sizeofcmds], + }; + var i: usize = 0; + while (it.next()) |cmd| { switch (cmd.cmd()) { .SEGMENT_64 => { - const seg = cmd.segment; - for (seg.sections.items) |_, j| { - try sections.append(.{ .seg = i, .sect = @intCast(u16, j) }); + const seg = cmd.cast(macho.segment_command_64).?; + try sections.ensureUnusedCapacity(seg.nsects); + for (cmd.getSections()) |sect| { + sections.appendAssumeCapacity(sect); } }, - .SYMTAB => { - symtab_cmd = i; + .SYMTAB => if (opts.dump_symtab) { + const lc = cmd.cast(macho.symtab_command).?; + symtab = @ptrCast( + [*]const macho.nlist_64, + @alignCast(@alignOf(macho.nlist_64), &bytes[lc.symoff]), + )[0..lc.nsyms]; + strtab = bytes[lc.stroff..][0..lc.strsize]; }, .LOAD_DYLIB, .LOAD_WEAK_DYLIB, .REEXPORT_DYLIB, => { - try imports.append(i); + try imports.append(cmd.getDylibPathName()); }, else => {}, } try dumpLoadCommand(cmd, i, writer); try writer.writeByte('\n'); + + i += 1; } if (opts.dump_symtab) { - const cmd = load_commands.items[symtab_cmd.?].symtab; - try writer.writeAll(symtab_label ++ "\n"); - const strtab = bytes[cmd.stroff..][0..cmd.strsize]; - const raw_symtab = bytes[cmd.symoff..][0 .. cmd.nsyms * @sizeOf(macho.nlist_64)]; - const symtab = mem.bytesAsSlice(macho.nlist_64, raw_symtab); - for (symtab) |sym| { if (sym.stab()) continue; const sym_name = mem.sliceTo(@ptrCast([*:0]const u8, strtab.ptr + sym.n_strx), 0); if (sym.sect()) { - const map = sections.items[sym.n_sect - 1]; - const seg = load_commands.items[map.seg].segment; - const sect = seg.sections.items[map.sect]; + const sect = sections.items[sym.n_sect - 1]; try writer.print("{x} ({s},{s})", .{ sym.n_value, sect.segName(), @@ -455,9 +462,7 @@ const MachODumper = struct { break :blk "flat lookup"; unreachable; } - const import_id = imports.items[@bitCast(u16, ordinal) - 1]; - const import = load_commands.items[import_id].dylib; - const full_path = mem.sliceTo(import.data, 0); + const full_path = imports.items[@bitCast(u16, ordinal) - 1]; const basename = fs.path.basename(full_path); assert(basename.len > 0); const ext = mem.lastIndexOfScalar(u8, basename, '.') orelse basename.len; @@ -481,7 +486,7 @@ const MachODumper = struct { return output.toOwnedSlice(); } - fn dumpLoadCommand(lc: macho.LoadCommand, index: u16, writer: anytype) !void { + fn dumpLoadCommand(lc: macho.LoadCommandIterator.LoadCommand, index: usize, writer: anytype) !void { // print header first try writer.print( \\LC {d} @@ -491,8 +496,7 @@ const MachODumper = struct { switch (lc.cmd()) { .SEGMENT_64 => { - // TODO dump section headers - const seg = lc.segment.inner; + const seg = lc.cast(macho.segment_command_64).?; try writer.writeByte('\n'); try writer.print( \\segname {s} @@ -508,7 +512,7 @@ const MachODumper = struct { seg.filesize, }); - for (lc.segment.sections.items) |sect| { + for (lc.getSections()) |sect| { try writer.writeByte('\n'); try writer.print( \\sectname {s} @@ -531,7 +535,7 @@ const MachODumper = struct { .LOAD_WEAK_DYLIB, .REEXPORT_DYLIB, => { - const dylib = lc.dylib.inner.dylib; + const dylib = lc.cast(macho.dylib_command).?; try writer.writeByte('\n'); try writer.print( \\name {s} @@ -539,19 +543,20 @@ const MachODumper = struct { \\current version {x} \\compatibility version {x} , .{ - mem.sliceTo(lc.dylib.data, 0), - dylib.timestamp, - dylib.current_version, - dylib.compatibility_version, + lc.getDylibPathName(), + dylib.dylib.timestamp, + dylib.dylib.current_version, + dylib.dylib.compatibility_version, }); }, .MAIN => { + const main = lc.cast(macho.entry_point_command).?; try writer.writeByte('\n'); try writer.print( \\entryoff {x} \\stacksize {x} - , .{ lc.main.entryoff, lc.main.stacksize }); + , .{ main.entryoff, main.stacksize }); }, .RPATH => { @@ -559,7 +564,7 @@ const MachODumper = struct { try writer.print( \\path {s} , .{ - mem.sliceTo(lc.rpath.data, 0), + lc.getRpathPathName(), }); }, diff --git a/lib/std/macho.zig b/lib/std/macho.zig index cd4bfa37fb..9334f79dc5 100644 --- a/lib/std/macho.zig +++ b/lib/std/macho.zig @@ -1835,429 +1835,70 @@ pub const data_in_code_entry = extern struct { kind: u16, }; -/// A Zig wrapper for all known MachO load commands. -/// Provides interface to read and write the load command data to a buffer. -pub const LoadCommand = union(enum) { - segment: SegmentCommand, - dyld_info_only: dyld_info_command, - symtab: symtab_command, - dysymtab: dysymtab_command, - dylinker: GenericCommandWithData(dylinker_command), - dylib: GenericCommandWithData(dylib_command), - main: entry_point_command, - version_min: version_min_command, - source_version: source_version_command, - build_version: GenericCommandWithData(build_version_command), - uuid: uuid_command, - linkedit_data: linkedit_data_command, - rpath: GenericCommandWithData(rpath_command), - unknown: GenericCommandWithData(load_command), +pub const LoadCommandIterator = struct { + ncmds: usize, + buffer: []align(@alignOf(u64)) const u8, + index: usize = 0, - pub fn read(allocator: Allocator, reader: anytype) !LoadCommand { - const header = try reader.readStruct(load_command); - var buffer = try allocator.alloc(u8, header.cmdsize); - defer allocator.free(buffer); - mem.copy(u8, buffer, mem.asBytes(&header)); - try reader.readNoEof(buffer[@sizeOf(load_command)..]); - var stream = io.fixedBufferStream(buffer); + pub const LoadCommand = struct { + hdr: load_command, + data: []const u8, - return switch (header.cmd) { - .SEGMENT_64 => LoadCommand{ - .segment = try SegmentCommand.read(allocator, stream.reader()), - }, - .DYLD_INFO, .DYLD_INFO_ONLY => LoadCommand{ - .dyld_info_only = try stream.reader().readStruct(dyld_info_command), - }, - .SYMTAB => LoadCommand{ - .symtab = try stream.reader().readStruct(symtab_command), - }, - .DYSYMTAB => LoadCommand{ - .dysymtab = try stream.reader().readStruct(dysymtab_command), - }, - .ID_DYLINKER, .LOAD_DYLINKER, .DYLD_ENVIRONMENT => LoadCommand{ - .dylinker = try GenericCommandWithData(dylinker_command).read(allocator, stream.reader()), - }, - .ID_DYLIB, .LOAD_WEAK_DYLIB, .LOAD_DYLIB, .REEXPORT_DYLIB => LoadCommand{ - .dylib = try GenericCommandWithData(dylib_command).read(allocator, stream.reader()), - }, - .MAIN => LoadCommand{ - .main = try stream.reader().readStruct(entry_point_command), - }, - .VERSION_MIN_MACOSX, .VERSION_MIN_IPHONEOS, .VERSION_MIN_WATCHOS, .VERSION_MIN_TVOS => LoadCommand{ - .version_min = try stream.reader().readStruct(version_min_command), - }, - .SOURCE_VERSION => LoadCommand{ - .source_version = try stream.reader().readStruct(source_version_command), - }, - .BUILD_VERSION => LoadCommand{ - .build_version = try GenericCommandWithData(build_version_command).read(allocator, stream.reader()), - }, - .UUID => LoadCommand{ - .uuid = try stream.reader().readStruct(uuid_command), - }, - .FUNCTION_STARTS, .DATA_IN_CODE, .CODE_SIGNATURE => LoadCommand{ - .linkedit_data = try stream.reader().readStruct(linkedit_data_command), - }, - .RPATH => LoadCommand{ - .rpath = try GenericCommandWithData(rpath_command).read(allocator, stream.reader()), - }, - else => LoadCommand{ - .unknown = try GenericCommandWithData(load_command).read(allocator, stream.reader()), - }, + pub fn cmd(lc: LoadCommand) LC { + return lc.hdr.cmd; + } + + pub fn cmdsize(lc: LoadCommand) u32 { + return lc.hdr.cmdsize; + } + + pub fn cast(lc: LoadCommand, comptime Cmd: type) ?Cmd { + if (lc.data.len < @sizeOf(Cmd)) return null; + return @ptrCast(*const Cmd, @alignCast(@alignOf(Cmd), &lc.data[0])).*; + } + + /// Asserts LoadCommand is of type segment_command_64. + pub fn getSections(lc: LoadCommand) []const section_64 { + const segment_lc = lc.cast(segment_command_64).?; + if (segment_lc.nsects == 0) return &[0]section_64{}; + const data = lc.data[@sizeOf(segment_command_64)..]; + const sections = @ptrCast( + [*]const section_64, + @alignCast(@alignOf(section_64), &data[0]), + )[0..segment_lc.nsects]; + return sections; + } + + /// Asserts LoadCommand is of type dylib_command. + pub fn getDylibPathName(lc: LoadCommand) []const u8 { + const dylib_lc = lc.cast(dylib_command).?; + const data = lc.data[dylib_lc.dylib.name..]; + return mem.sliceTo(data, 0); + } + + /// Asserts LoadCommand is of type rpath_command. + pub fn getRpathPathName(lc: LoadCommand) []const u8 { + const rpath_lc = lc.cast(rpath_command).?; + const data = lc.data[rpath_lc.path..]; + return mem.sliceTo(data, 0); + } + }; + + pub fn next(it: *LoadCommandIterator) ?LoadCommand { + if (it.index >= it.ncmds) return null; + + const hdr = @ptrCast( + *const load_command, + @alignCast(@alignOf(load_command), &it.buffer[0]), + ).*; + const cmd = LoadCommand{ + .hdr = hdr, + .data = it.buffer[0..hdr.cmdsize], }; - } - pub fn write(self: LoadCommand, writer: anytype) !void { - return switch (self) { - .dyld_info_only => |x| writeStruct(x, writer), - .symtab => |x| writeStruct(x, writer), - .dysymtab => |x| writeStruct(x, writer), - .main => |x| writeStruct(x, writer), - .version_min => |x| writeStruct(x, writer), - .source_version => |x| writeStruct(x, writer), - .uuid => |x| writeStruct(x, writer), - .linkedit_data => |x| writeStruct(x, writer), - .segment => |x| x.write(writer), - .dylinker => |x| x.write(writer), - .dylib => |x| x.write(writer), - .rpath => |x| x.write(writer), - .build_version => |x| x.write(writer), - .unknown => |x| x.write(writer), - }; - } + it.buffer = it.buffer[hdr.cmdsize..]; + it.index += 1; - pub fn cmd(self: LoadCommand) LC { - return switch (self) { - .dyld_info_only => |x| x.cmd, - .symtab => |x| x.cmd, - .dysymtab => |x| x.cmd, - .main => |x| x.cmd, - .version_min => |x| x.cmd, - .source_version => |x| x.cmd, - .uuid => |x| x.cmd, - .linkedit_data => |x| x.cmd, - .segment => |x| x.inner.cmd, - .dylinker => |x| x.inner.cmd, - .dylib => |x| x.inner.cmd, - .rpath => |x| x.inner.cmd, - .build_version => |x| x.inner.cmd, - .unknown => |x| x.inner.cmd, - }; - } - - pub fn cmdsize(self: LoadCommand) u32 { - return switch (self) { - .dyld_info_only => |x| x.cmdsize, - .symtab => |x| x.cmdsize, - .dysymtab => |x| x.cmdsize, - .main => |x| x.cmdsize, - .version_min => |x| x.cmdsize, - .source_version => |x| x.cmdsize, - .linkedit_data => |x| x.cmdsize, - .uuid => |x| x.cmdsize, - .segment => |x| x.inner.cmdsize, - .dylinker => |x| x.inner.cmdsize, - .dylib => |x| x.inner.cmdsize, - .rpath => |x| x.inner.cmdsize, - .build_version => |x| x.inner.cmdsize, - .unknown => |x| x.inner.cmdsize, - }; - } - - pub fn deinit(self: *LoadCommand, allocator: Allocator) void { - return switch (self.*) { - .segment => |*x| x.deinit(allocator), - .dylinker => |*x| x.deinit(allocator), - .dylib => |*x| x.deinit(allocator), - .rpath => |*x| x.deinit(allocator), - .build_version => |*x| x.deinit(allocator), - .unknown => |*x| x.deinit(allocator), - else => {}, - }; - } - - fn writeStruct(command: anytype, writer: anytype) !void { - return writer.writeAll(mem.asBytes(&command)); - } - - pub fn eql(self: LoadCommand, other: LoadCommand) bool { - if (@as(meta.Tag(LoadCommand), self) != @as(meta.Tag(LoadCommand), other)) return false; - return switch (self) { - .dyld_info_only => |x| meta.eql(x, other.dyld_info_only), - .symtab => |x| meta.eql(x, other.symtab), - .dysymtab => |x| meta.eql(x, other.dysymtab), - .main => |x| meta.eql(x, other.main), - .version_min => |x| meta.eql(x, other.version_min), - .source_version => |x| meta.eql(x, other.source_version), - .build_version => |x| x.eql(other.build_version), - .uuid => |x| meta.eql(x, other.uuid), - .linkedit_data => |x| meta.eql(x, other.linkedit_data), - .segment => |x| x.eql(other.segment), - .dylinker => |x| x.eql(other.dylinker), - .dylib => |x| x.eql(other.dylib), - .rpath => |x| x.eql(other.rpath), - .unknown => |x| x.eql(other.unknown), - }; + return cmd; } }; - -/// A Zig wrapper for segment_command_64. -/// Encloses the extern struct together with a list of sections for this segment. -pub const SegmentCommand = struct { - inner: segment_command_64, - sections: std.ArrayListUnmanaged(section_64) = .{}, - - pub fn read(allocator: Allocator, reader: anytype) !SegmentCommand { - const inner = try reader.readStruct(segment_command_64); - var segment = SegmentCommand{ - .inner = inner, - }; - try segment.sections.ensureTotalCapacityPrecise(allocator, inner.nsects); - - var i: usize = 0; - while (i < inner.nsects) : (i += 1) { - const sect = try reader.readStruct(section_64); - segment.sections.appendAssumeCapacity(sect); - } - - return segment; - } - - pub fn write(self: SegmentCommand, writer: anytype) !void { - try writer.writeAll(mem.asBytes(&self.inner)); - for (self.sections.items) |sect| { - try writer.writeAll(mem.asBytes(§)); - } - } - - pub fn deinit(self: *SegmentCommand, allocator: Allocator) void { - self.sections.deinit(allocator); - } - - pub fn eql(self: SegmentCommand, other: SegmentCommand) bool { - if (!meta.eql(self.inner, other.inner)) return false; - const lhs = self.sections.items; - const rhs = other.sections.items; - var i: usize = 0; - while (i < self.inner.nsects) : (i += 1) { - if (!meta.eql(lhs[i], rhs[i])) return false; - } - return true; - } -}; - -pub fn emptyGenericCommandWithData(cmd: anytype) GenericCommandWithData(@TypeOf(cmd)) { - return .{ .inner = cmd }; -} - -/// A Zig wrapper for a generic load command with variable-length data. -pub fn GenericCommandWithData(comptime Cmd: type) type { - return struct { - inner: Cmd, - /// This field remains undefined until `read` is called. - data: []u8 = undefined, - - const Self = @This(); - - pub fn read(allocator: Allocator, reader: anytype) !Self { - const inner = try reader.readStruct(Cmd); - var data = try allocator.alloc(u8, inner.cmdsize - @sizeOf(Cmd)); - errdefer allocator.free(data); - try reader.readNoEof(data); - return Self{ - .inner = inner, - .data = data, - }; - } - - pub fn write(self: Self, writer: anytype) !void { - try writer.writeAll(mem.asBytes(&self.inner)); - try writer.writeAll(self.data); - } - - pub fn deinit(self: *Self, allocator: Allocator) void { - allocator.free(self.data); - } - - pub fn eql(self: Self, other: Self) bool { - if (!meta.eql(self.inner, other.inner)) return false; - return mem.eql(u8, self.data, other.data); - } - }; -} - -pub fn createLoadDylibCommand( - allocator: Allocator, - cmd_id: LC, - name: []const u8, - timestamp: u32, - current_version: u32, - compatibility_version: u32, -) !GenericCommandWithData(dylib_command) { - assert(cmd_id == .LOAD_DYLIB or cmd_id == .LOAD_WEAK_DYLIB or cmd_id == .REEXPORT_DYLIB or cmd_id == .ID_DYLIB); - const cmdsize = @intCast(u32, mem.alignForwardGeneric( - u64, - @sizeOf(dylib_command) + name.len + 1, // +1 for nul - @sizeOf(u64), - )); - - var dylib_cmd = emptyGenericCommandWithData(dylib_command{ - .cmd = cmd_id, - .cmdsize = cmdsize, - .dylib = .{ - .name = @sizeOf(dylib_command), - .timestamp = timestamp, - .current_version = current_version, - .compatibility_version = compatibility_version, - }, - }); - dylib_cmd.data = try allocator.alloc(u8, cmdsize - dylib_cmd.inner.dylib.name); - - mem.set(u8, dylib_cmd.data, 0); - mem.copy(u8, dylib_cmd.data, name); - - return dylib_cmd; -} - -fn testRead(allocator: Allocator, buffer: []const u8, expected: anytype) !void { - var stream = io.fixedBufferStream(buffer); - var given = try LoadCommand.read(allocator, stream.reader()); - defer given.deinit(allocator); - try testing.expect(expected.eql(given)); -} - -fn testWrite(buffer: []u8, cmd: LoadCommand, expected: []const u8) !void { - var stream = io.fixedBufferStream(buffer); - try cmd.write(stream.writer()); - try testing.expect(mem.eql(u8, expected, buffer[0..expected.len])); -} - -fn makeStaticString(bytes: []const u8) [16]u8 { - var buf = [_]u8{0} ** 16; - assert(bytes.len <= buf.len); - mem.copy(u8, &buf, bytes); - return buf; -} - -test "read-write segment command" { - // TODO compiling for macOS from big-endian arch - if (builtin.target.cpu.arch.endian() != .Little) return error.SkipZigTest; - - var gpa = testing.allocator; - const in_buffer = &[_]u8{ - 0x19, 0x00, 0x00, 0x00, // cmd - 0x98, 0x00, 0x00, 0x00, // cmdsize - 0x5f, 0x5f, 0x54, 0x45, 0x58, 0x54, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // segname - 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, // vmaddr - 0x00, 0x80, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, // vmsize - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // fileoff - 0x00, 0x80, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, // filesize - 0x07, 0x00, 0x00, 0x00, // maxprot - 0x05, 0x00, 0x00, 0x00, // initprot - 0x01, 0x00, 0x00, 0x00, // nsects - 0x00, 0x00, 0x00, 0x00, // flags - 0x5f, 0x5f, 0x74, 0x65, 0x78, 0x74, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // sectname - 0x5f, 0x5f, 0x54, 0x45, 0x58, 0x54, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // segname - 0x00, 0x40, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, // address - 0xc0, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // size - 0x00, 0x40, 0x00, 0x00, // offset - 0x02, 0x00, 0x00, 0x00, // alignment - 0x00, 0x00, 0x00, 0x00, // reloff - 0x00, 0x00, 0x00, 0x00, // nreloc - 0x00, 0x04, 0x00, 0x80, // flags - 0x00, 0x00, 0x00, 0x00, // reserved1 - 0x00, 0x00, 0x00, 0x00, // reserved2 - 0x00, 0x00, 0x00, 0x00, // reserved3 - }; - var cmd = SegmentCommand{ - .inner = .{ - .cmdsize = 152, - .segname = makeStaticString("__TEXT"), - .vmaddr = 4294967296, - .vmsize = 294912, - .filesize = 294912, - .maxprot = PROT.READ | PROT.WRITE | PROT.EXEC, - .initprot = PROT.EXEC | PROT.READ, - .nsects = 1, - }, - }; - try cmd.sections.append(gpa, .{ - .sectname = makeStaticString("__text"), - .segname = makeStaticString("__TEXT"), - .addr = 4294983680, - .size = 448, - .offset = 16384, - .@"align" = 2, - .flags = S_REGULAR | S_ATTR_PURE_INSTRUCTIONS | S_ATTR_SOME_INSTRUCTIONS, - }); - defer cmd.deinit(gpa); - try testRead(gpa, in_buffer, LoadCommand{ .segment = cmd }); - - var out_buffer: [in_buffer.len]u8 = undefined; - try testWrite(&out_buffer, LoadCommand{ .segment = cmd }, in_buffer); -} - -test "read-write generic command with data" { - // TODO compiling for macOS from big-endian arch - if (builtin.target.cpu.arch.endian() != .Little) return error.SkipZigTest; - - var gpa = testing.allocator; - const in_buffer = &[_]u8{ - 0x0c, 0x00, 0x00, 0x00, // cmd - 0x20, 0x00, 0x00, 0x00, // cmdsize - 0x18, 0x00, 0x00, 0x00, // name - 0x02, 0x00, 0x00, 0x00, // timestamp - 0x00, 0x00, 0x00, 0x00, // current_version - 0x00, 0x00, 0x00, 0x00, // compatibility_version - 0x2f, 0x75, 0x73, 0x72, 0x00, 0x00, 0x00, 0x00, // data - }; - var cmd = GenericCommandWithData(dylib_command){ - .inner = .{ - .cmd = .LOAD_DYLIB, - .cmdsize = 32, - .dylib = .{ - .name = 24, - .timestamp = 2, - .current_version = 0, - .compatibility_version = 0, - }, - }, - }; - cmd.data = try gpa.alloc(u8, 8); - defer gpa.free(cmd.data); - cmd.data[0] = 0x2f; - cmd.data[1] = 0x75; - cmd.data[2] = 0x73; - cmd.data[3] = 0x72; - cmd.data[4] = 0x0; - cmd.data[5] = 0x0; - cmd.data[6] = 0x0; - cmd.data[7] = 0x0; - try testRead(gpa, in_buffer, LoadCommand{ .dylib = cmd }); - - var out_buffer: [in_buffer.len]u8 = undefined; - try testWrite(&out_buffer, LoadCommand{ .dylib = cmd }, in_buffer); -} - -test "read-write C struct command" { - // TODO compiling for macOS from big-endian arch - if (builtin.target.cpu.arch.endian() != .Little) return error.SkipZigTest; - - var gpa = testing.allocator; - const in_buffer = &[_]u8{ - 0x28, 0x00, 0x00, 0x80, // cmd - 0x18, 0x00, 0x00, 0x00, // cmdsize - 0x04, 0x41, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // entryoff - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // stacksize - }; - const cmd = .{ - .cmd = .MAIN, - .cmdsize = 24, - .entryoff = 16644, - .stacksize = 0, - }; - try testRead(gpa, in_buffer, LoadCommand{ .main = cmd }); - - var out_buffer: [in_buffer.len]u8 = undefined; - try testWrite(&out_buffer, LoadCommand{ .main = cmd }, in_buffer); -} diff --git a/src/link/Dwarf.zig b/src/link/Dwarf.zig index 03ba53801b..627f946e36 100644 --- a/src/link/Dwarf.zig +++ b/src/link/Dwarf.zig @@ -853,8 +853,7 @@ pub fn commitDeclState( .macho => { const macho_file = file.cast(File.MachO).?; const d_sym = &macho_file.d_sym.?; - const dwarf_segment = &d_sym.load_commands.items[d_sym.dwarf_segment_cmd_index.?].segment; - const debug_line_sect = &dwarf_segment.sections.items[d_sym.debug_line_section_index.?]; + const debug_line_sect = &d_sym.sections.items[d_sym.debug_line_section_index.?]; const file_pos = debug_line_sect.offset + src_fn.off; try pwriteDbgLineNops(d_sym.file, file_pos, 0, &[0]u8{}, src_fn.len); }, @@ -933,8 +932,8 @@ pub fn commitDeclState( .macho => { const macho_file = file.cast(File.MachO).?; const d_sym = &macho_file.d_sym.?; - const dwarf_segment = &d_sym.load_commands.items[d_sym.dwarf_segment_cmd_index.?].segment; - const debug_line_sect = &dwarf_segment.sections.items[d_sym.debug_line_section_index.?]; + const dwarf_segment = d_sym.segments.items[d_sym.dwarf_segment_cmd_index.?]; + const debug_line_sect = &d_sym.sections.items[d_sym.debug_line_section_index.?]; if (needed_size != debug_line_sect.size) { if (needed_size > d_sym.allocatedSize(debug_line_sect.offset)) { const new_offset = d_sym.findFreeSpace(needed_size, 1); @@ -955,10 +954,9 @@ pub fn commitDeclState( ); debug_line_sect.offset = @intCast(u32, new_offset); - debug_line_sect.addr = dwarf_segment.inner.vmaddr + new_offset - dwarf_segment.inner.fileoff; + debug_line_sect.addr = dwarf_segment.vmaddr + new_offset - dwarf_segment.fileoff; } debug_line_sect.size = needed_size; - d_sym.load_commands_dirty = true; // TODO look into making only the one section dirty d_sym.debug_line_header_dirty = true; } const file_pos = debug_line_sect.offset + src_fn.off; @@ -1137,8 +1135,7 @@ fn updateDeclDebugInfoAllocation(self: *Dwarf, file: *File, atom: *Atom, len: u3 .macho => { const macho_file = file.cast(File.MachO).?; const d_sym = &macho_file.d_sym.?; - const dwarf_segment = &d_sym.load_commands.items[d_sym.dwarf_segment_cmd_index.?].segment; - const debug_info_sect = &dwarf_segment.sections.items[d_sym.debug_info_section_index.?]; + const debug_info_sect = &d_sym.sections.items[d_sym.debug_info_section_index.?]; const file_pos = debug_info_sect.offset + atom.off; try pwriteDbgInfoNops(d_sym.file, file_pos, 0, &[0]u8{}, atom.len, false); }, @@ -1235,8 +1232,8 @@ fn writeDeclDebugInfo(self: *Dwarf, file: *File, atom: *Atom, dbg_info_buf: []co .macho => { const macho_file = file.cast(File.MachO).?; const d_sym = &macho_file.d_sym.?; - const dwarf_segment = &d_sym.load_commands.items[d_sym.dwarf_segment_cmd_index.?].segment; - const debug_info_sect = &dwarf_segment.sections.items[d_sym.debug_info_section_index.?]; + const dwarf_segment = d_sym.segments.items[d_sym.dwarf_segment_cmd_index.?]; + const debug_info_sect = &d_sym.sections.items[d_sym.debug_info_section_index.?]; if (needed_size != debug_info_sect.size) { if (needed_size > d_sym.allocatedSize(debug_info_sect.offset)) { const new_offset = d_sym.findFreeSpace(needed_size, 1); @@ -1257,10 +1254,9 @@ fn writeDeclDebugInfo(self: *Dwarf, file: *File, atom: *Atom, dbg_info_buf: []co ); debug_info_sect.offset = @intCast(u32, new_offset); - debug_info_sect.addr = dwarf_segment.inner.vmaddr + new_offset - dwarf_segment.inner.fileoff; + debug_info_sect.addr = dwarf_segment.vmaddr + new_offset - dwarf_segment.fileoff; } debug_info_sect.size = needed_size; - d_sym.load_commands_dirty = true; // TODO look into making only the one section dirty d_sym.debug_line_header_dirty = true; } const file_pos = debug_info_sect.offset + atom.off; @@ -1330,8 +1326,7 @@ pub fn updateDeclLineNumber(self: *Dwarf, file: *File, decl: *const Module.Decl) .macho => { const macho_file = file.cast(File.MachO).?; const d_sym = macho_file.d_sym.?; - const dwarf_seg = d_sym.load_commands.items[d_sym.dwarf_segment_cmd_index.?].segment; - const sect = dwarf_seg.sections.items[d_sym.debug_line_section_index.?]; + const sect = d_sym.sections.items[d_sym.debug_line_section_index.?]; const file_pos = sect.offset + decl.fn_link.macho.off + self.getRelocDbgLineOff(); try d_sym.file.pwriteAll(&data, file_pos); }, @@ -1557,14 +1552,14 @@ pub fn writeDbgAbbrev(self: *Dwarf, file: *File) !void { .macho => { const macho_file = file.cast(File.MachO).?; const d_sym = &macho_file.d_sym.?; - const dwarf_segment = &d_sym.load_commands.items[d_sym.dwarf_segment_cmd_index.?].segment; - const debug_abbrev_sect = &dwarf_segment.sections.items[d_sym.debug_abbrev_section_index.?]; + const dwarf_segment = d_sym.segments.items[d_sym.dwarf_segment_cmd_index.?]; + const debug_abbrev_sect = &d_sym.sections.items[d_sym.debug_abbrev_section_index.?]; const allocated_size = d_sym.allocatedSize(debug_abbrev_sect.offset); if (needed_size > allocated_size) { debug_abbrev_sect.size = 0; // free the space const offset = d_sym.findFreeSpace(needed_size, 1); debug_abbrev_sect.offset = @intCast(u32, offset); - debug_abbrev_sect.addr = dwarf_segment.inner.vmaddr + offset - dwarf_segment.inner.fileoff; + debug_abbrev_sect.addr = dwarf_segment.vmaddr + offset - dwarf_segment.fileoff; } debug_abbrev_sect.size = needed_size; log.debug("__debug_abbrev start=0x{x} end=0x{x}", .{ @@ -1681,8 +1676,7 @@ pub fn writeDbgInfoHeader(self: *Dwarf, file: *File, module: *Module, low_pc: u6 .macho => { const macho_file = file.cast(File.MachO).?; const d_sym = &macho_file.d_sym.?; - const dwarf_seg = d_sym.load_commands.items[d_sym.dwarf_segment_cmd_index.?].segment; - const debug_info_sect = dwarf_seg.sections.items[d_sym.debug_info_section_index.?]; + const debug_info_sect = d_sym.sections.items[d_sym.debug_info_section_index.?]; const file_pos = debug_info_sect.offset; try pwriteDbgInfoNops(d_sym.file, file_pos, 0, di_buf.items, jmp_amt, false); }, @@ -1998,13 +1992,13 @@ pub fn writeDbgAranges(self: *Dwarf, file: *File, addr: u64, size: u64) !void { .macho => { const macho_file = file.cast(File.MachO).?; const d_sym = &macho_file.d_sym.?; - const dwarf_seg = &d_sym.load_commands.items[d_sym.dwarf_segment_cmd_index.?].segment; - const debug_aranges_sect = &dwarf_seg.sections.items[d_sym.debug_aranges_section_index.?]; + const dwarf_seg = d_sym.segments.items[d_sym.dwarf_segment_cmd_index.?]; + const debug_aranges_sect = &d_sym.sections.items[d_sym.debug_aranges_section_index.?]; const allocated_size = d_sym.allocatedSize(debug_aranges_sect.offset); if (needed_size > allocated_size) { debug_aranges_sect.size = 0; // free the space const new_offset = d_sym.findFreeSpace(needed_size, 16); - debug_aranges_sect.addr = dwarf_seg.inner.vmaddr + new_offset - dwarf_seg.inner.fileoff; + debug_aranges_sect.addr = dwarf_seg.vmaddr + new_offset - dwarf_seg.fileoff; debug_aranges_sect.offset = @intCast(u32, new_offset); } debug_aranges_sect.size = needed_size; @@ -2134,8 +2128,7 @@ pub fn writeDbgLineHeader(self: *Dwarf, file: *File, module: *Module) !void { .macho => { const macho_file = file.cast(File.MachO).?; const d_sym = &macho_file.d_sym.?; - const dwarf_seg = d_sym.load_commands.items[d_sym.dwarf_segment_cmd_index.?].segment; - const debug_line_sect = dwarf_seg.sections.items[d_sym.debug_line_section_index.?]; + const debug_line_sect = d_sym.sections.items[d_sym.debug_line_section_index.?]; const file_pos = debug_line_sect.offset; try pwriteDbgLineNops(d_sym.file, file_pos, 0, di_buf.items, jmp_amt); }, @@ -2264,8 +2257,7 @@ pub fn flushModule(self: *Dwarf, file: *File, module: *Module) !void { .macho => { const macho_file = file.cast(File.MachO).?; const d_sym = &macho_file.d_sym.?; - const dwarf_segment = &d_sym.load_commands.items[d_sym.dwarf_segment_cmd_index.?].segment; - const debug_info_sect = &dwarf_segment.sections.items[d_sym.debug_info_section_index.?]; + const debug_info_sect = &d_sym.sections.items[d_sym.debug_info_section_index.?]; break :blk debug_info_sect.offset; }, // for wasm, the offset is always 0 as we write to memory first diff --git a/src/link/MachO.zig b/src/link/MachO.zig index a247b3e6c5..b912130957 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -17,6 +17,7 @@ const aarch64 = @import("../arch/aarch64/bits.zig"); const bind = @import("MachO/bind.zig"); const codegen = @import("../codegen.zig"); const dead_strip = @import("MachO/dead_strip.zig"); +const fat = @import("MachO/fat.zig"); const link = @import("../link.zig"); const llvm_backend = @import("../codegen/llvm.zig"); const target_util = @import("../target.zig"); @@ -60,6 +61,29 @@ const SystemLib = struct { weak: bool = false, }; +const Section = struct { + header: macho.section_64, + segment_index: u8, + last_atom: ?*Atom = null, // TODO temporary hack; we really should shrink section to 0 + + /// A list of atoms that have surplus capacity. This list can have false + /// positives, as functions grow and shrink over time, only sometimes being added + /// or removed from the freelist. + /// + /// An atom has surplus capacity when its overcapacity value is greater than + /// padToIdeal(minimum_atom_size). That is, when it has so + /// much extra capacity, that we could fit a small new symbol in it, itself with + /// ideal_capacity or more. + /// + /// Ideal capacity is defined by size + (size / ideal_factor). + /// + /// Overcapacity is measured by actual_capacity - ideal_capacity. Note that + /// overcapacity can be negative. A simple way to have negative overcapacity is to + /// allocate a fresh atom, which will have ideal capacity, and then grow it + /// by 1 byte. It will then have -1 overcapacity. + free_list: std.ArrayListUnmanaged(*Atom) = .{}, +}; + base: File, /// If this is not null, an object file is created by LLVM and linked with LLD afterwards. @@ -77,80 +101,67 @@ page_size: u16, /// fashion (default for LLVM backend). mode: enum { incremental, one_shot }, -/// The absolute address of the entry point. -entry_addr: ?u64 = null, - -/// Code signature (if any) -code_signature: ?CodeSignature = null, +uuid: macho.uuid_command = .{ + .cmdsize = @sizeOf(macho.uuid_command), + .uuid = undefined, +}, objects: std.ArrayListUnmanaged(Object) = .{}, archives: std.ArrayListUnmanaged(Archive) = .{}, - dylibs: std.ArrayListUnmanaged(Dylib) = .{}, dylibs_map: std.StringHashMapUnmanaged(u16) = .{}, referenced_dylibs: std.AutoArrayHashMapUnmanaged(u16, void) = .{}, -load_commands: std.ArrayListUnmanaged(macho.LoadCommand) = .{}, +segments: std.ArrayListUnmanaged(macho.segment_command_64) = .{}, +sections: std.MultiArrayList(Section) = .{}, -pagezero_segment_cmd_index: ?u16 = null, -text_segment_cmd_index: ?u16 = null, -data_const_segment_cmd_index: ?u16 = null, -data_segment_cmd_index: ?u16 = null, -linkedit_segment_cmd_index: ?u16 = null, -dyld_info_cmd_index: ?u16 = null, -symtab_cmd_index: ?u16 = null, -dysymtab_cmd_index: ?u16 = null, -dylinker_cmd_index: ?u16 = null, -data_in_code_cmd_index: ?u16 = null, -function_starts_cmd_index: ?u16 = null, -main_cmd_index: ?u16 = null, -dylib_id_cmd_index: ?u16 = null, -source_version_cmd_index: ?u16 = null, -build_version_cmd_index: ?u16 = null, -uuid_cmd_index: ?u16 = null, -code_signature_cmd_index: ?u16 = null, +pagezero_segment_cmd_index: ?u8 = null, +text_segment_cmd_index: ?u8 = null, +data_const_segment_cmd_index: ?u8 = null, +data_segment_cmd_index: ?u8 = null, +linkedit_segment_cmd_index: ?u8 = null, // __TEXT segment sections -text_section_index: ?u16 = null, -stubs_section_index: ?u16 = null, -stub_helper_section_index: ?u16 = null, -text_const_section_index: ?u16 = null, -cstring_section_index: ?u16 = null, -ustring_section_index: ?u16 = null, -gcc_except_tab_section_index: ?u16 = null, -unwind_info_section_index: ?u16 = null, -eh_frame_section_index: ?u16 = null, +text_section_index: ?u8 = null, +stubs_section_index: ?u8 = null, +stub_helper_section_index: ?u8 = null, +text_const_section_index: ?u8 = null, +cstring_section_index: ?u8 = null, +ustring_section_index: ?u8 = null, +gcc_except_tab_section_index: ?u8 = null, +unwind_info_section_index: ?u8 = null, +eh_frame_section_index: ?u8 = null, -objc_methlist_section_index: ?u16 = null, -objc_methname_section_index: ?u16 = null, -objc_methtype_section_index: ?u16 = null, -objc_classname_section_index: ?u16 = null, +objc_methlist_section_index: ?u8 = null, +objc_methname_section_index: ?u8 = null, +objc_methtype_section_index: ?u8 = null, +objc_classname_section_index: ?u8 = null, // __DATA_CONST segment sections -got_section_index: ?u16 = null, -mod_init_func_section_index: ?u16 = null, -mod_term_func_section_index: ?u16 = null, -data_const_section_index: ?u16 = null, +got_section_index: ?u8 = null, +mod_init_func_section_index: ?u8 = null, +mod_term_func_section_index: ?u8 = null, +data_const_section_index: ?u8 = null, -objc_cfstring_section_index: ?u16 = null, -objc_classlist_section_index: ?u16 = null, -objc_imageinfo_section_index: ?u16 = null, +objc_cfstring_section_index: ?u8 = null, +objc_classlist_section_index: ?u8 = null, +objc_imageinfo_section_index: ?u8 = null, // __DATA segment sections -tlv_section_index: ?u16 = null, -tlv_data_section_index: ?u16 = null, -tlv_bss_section_index: ?u16 = null, -tlv_ptrs_section_index: ?u16 = null, -la_symbol_ptr_section_index: ?u16 = null, -data_section_index: ?u16 = null, -bss_section_index: ?u16 = null, +tlv_section_index: ?u8 = null, +tlv_data_section_index: ?u8 = null, +tlv_bss_section_index: ?u8 = null, +tlv_ptrs_section_index: ?u8 = null, +la_symbol_ptr_section_index: ?u8 = null, +data_section_index: ?u8 = null, +bss_section_index: ?u8 = null, -objc_const_section_index: ?u16 = null, -objc_selrefs_section_index: ?u16 = null, -objc_classrefs_section_index: ?u16 = null, -objc_data_section_index: ?u16 = null, +objc_const_section_index: ?u8 = null, +objc_selrefs_section_index: ?u8 = null, +objc_classrefs_section_index: ?u8 = null, +objc_data_section_index: ?u8 = null, -rustc_section_index: ?u16 = null, +rustc_section_index: ?u8 = null, rustc_section_size: u64 = 0, locals: std.ArrayListUnmanaged(macho.nlist_64) = .{}, @@ -188,37 +199,12 @@ stubs_table: std.AutoHashMapUnmanaged(SymbolWithLoc, u32) = .{}, error_flags: File.ErrorFlags = File.ErrorFlags{}, -load_commands_dirty: bool = false, -sections_order_dirty: bool = false, - /// A helper var to indicate if we are at the start of the incremental updates, or /// already somewhere further along the update-and-run chain. /// TODO once we add opening a prelinked output binary from file, this will become /// obsolete as we will carry on where we left off. cold_start: bool = true, -section_ordinals: std.AutoArrayHashMapUnmanaged(MatchingSection, void) = .{}, - -/// A list of atoms that have surplus capacity. This list can have false -/// positives, as functions grow and shrink over time, only sometimes being added -/// or removed from the freelist. -/// -/// An atom has surplus capacity when its overcapacity value is greater than -/// padToIdeal(minimum_atom_size). That is, when it has so -/// much extra capacity, that we could fit a small new symbol in it, itself with -/// ideal_capacity or more. -/// -/// Ideal capacity is defined by size + (size / ideal_factor). -/// -/// Overcapacity is measured by actual_capacity - ideal_capacity. Note that -/// overcapacity can be negative. A simple way to have negative overcapacity is to -/// allocate a fresh atom, which will have ideal capacity, and then grow it -/// by 1 byte. It will then have -1 overcapacity. -atom_free_lists: std.AutoHashMapUnmanaged(MatchingSection, std.ArrayListUnmanaged(*Atom)) = .{}, - -/// Pointer to the last allocated atom -atoms: std.AutoHashMapUnmanaged(MatchingSection, *Atom) = .{}, - /// List of atoms that are either synthetic or map directly to the Zig source program. managed_atoms: std.ArrayListUnmanaged(*Atom) = .{}, @@ -250,7 +236,7 @@ unnamed_const_atoms: UnnamedConstTable = .{}, /// We store them here so that we can properly dispose of any allocated /// memory within the atom in the incremental linker. /// TODO consolidate this. -decls: std.AutoArrayHashMapUnmanaged(Module.Decl.Index, ?MatchingSection) = .{}, +decls: std.AutoArrayHashMapUnmanaged(Module.Decl.Index, ?u8) = .{}, const Entry = struct { target: SymbolWithLoc, @@ -408,12 +394,7 @@ pub fn openPath(allocator: Allocator, options: link.Options) !*MachO { pub fn createEmpty(gpa: Allocator, options: link.Options) !*MachO { const cpu_arch = options.target.cpu.arch; - const os_tag = options.target.os.tag; - const abi = options.target.abi; const page_size: u16 = if (cpu_arch == .aarch64) 0x4000 else 0x1000; - // Adhoc code signature is required when targeting aarch64-macos either directly or indirectly via the simulator - // ABI such as aarch64-ios-simulator, etc. - const requires_adhoc_codesig = cpu_arch == .aarch64 and (os_tag == .macos or abi == .simulator); const use_llvm = build_options.have_llvm and options.use_llvm; const use_stage1 = build_options.is_stage1 and options.use_stage1; @@ -428,10 +409,6 @@ pub fn createEmpty(gpa: Allocator, options: link.Options) !*MachO { .file = null, }, .page_size = page_size, - .code_signature = if (requires_adhoc_codesig) - CodeSignature.init(page_size) - else - null, .mode = if (use_stage1 or use_llvm or options.module == null or options.cache_mode == .whole) .one_shot else @@ -562,8 +539,8 @@ pub fn flushModule(self: *MachO, comp: *Compilation, prog_node: *std.Progress.No var dependent_libs = std.fifo.LinearFifo(struct { id: Dylib.Id, parent: u16, - }, .Dynamic).init(self.base.allocator); - defer dependent_libs.deinit(); + }, .Dynamic).init(arena); + try self.parseLibs(libs.keys(), libs.values(), self.base.options.sysroot, &dependent_libs); try self.parseDependentLibs(self.base.options.sysroot, &dependent_libs); } @@ -573,7 +550,6 @@ pub fn flushModule(self: *MachO, comp: *Compilation, prog_node: *std.Progress.No try self.createDyldPrivateAtom(); try self.createStubHelperPreambleAtom(); try self.resolveSymbolsInDylibs(); - try self.addCodeSignatureLC(); if (self.unresolved.count() > 0) { return error.UndefinedSymbolReference; @@ -583,66 +559,90 @@ pub fn flushModule(self: *MachO, comp: *Compilation, prog_node: *std.Progress.No if (build_options.enable_logging) { self.logSymtab(); - self.logSectionOrdinals(); self.logAtoms(); } try self.writeAtomsIncremental(); - try self.setEntryPoint(); - try self.updateSectionOrdinals(); - try self.writeLinkeditSegment(); + var lc_buffer = std.ArrayList(u8).init(arena); + const lc_writer = lc_buffer.writer(); + var ncmds: u32 = 0; + + try self.writeLinkeditSegmentData(&ncmds, lc_writer); + try writeDylinkerLC(&ncmds, lc_writer); + + self.writeMainLC(&ncmds, lc_writer) catch |err| switch (err) { + error.MissingMainEntrypoint => { + self.error_flags.no_entry_point_found = true; + }, + else => |e| return e, + }; + + try self.writeDylibIdLC(&ncmds, lc_writer); + try self.writeRpathLCs(&ncmds, lc_writer); + + { + try lc_writer.writeStruct(macho.source_version_command{ + .cmdsize = @sizeOf(macho.source_version_command), + .version = 0x0, + }); + ncmds += 1; + } + + try self.writeBuildVersionLC(&ncmds, lc_writer); + + { + std.crypto.random.bytes(&self.uuid.uuid); + try lc_writer.writeStruct(self.uuid); + ncmds += 1; + } + + try self.writeLoadDylibLCs(&ncmds, lc_writer); + + const target = self.base.options.target; + const requires_codesig = blk: { + if (self.base.options.entitlements) |_| break :blk true; + if (target.cpu.arch == .aarch64 and (target.os.tag == .macos or target.abi == .simulator)) + break :blk true; + break :blk false; + }; + var codesig_offset: ?u32 = null; + var codesig: ?CodeSignature = if (requires_codesig) blk: { + // Preallocate space for the code signature. + // We need to do this at this stage so that we have the load commands with proper values + // written out to the file. + // The most important here is to have the correct vm and filesize of the __LINKEDIT segment + // where the code signature goes into. + var codesig = CodeSignature.init(self.page_size); + codesig.code_directory.ident = self.base.options.emit.?.sub_path; + if (self.base.options.entitlements) |path| { + try codesig.addEntitlements(arena, path); + } + codesig_offset = try self.writeCodeSignaturePadding(&codesig, &ncmds, lc_writer); + break :blk codesig; + } else null; + + var headers_buf = std.ArrayList(u8).init(arena); + try self.writeSegmentHeaders(0, self.segments.items.len, &ncmds, headers_buf.writer()); + + try self.base.file.?.pwriteAll(headers_buf.items, @sizeOf(macho.mach_header_64)); + try self.base.file.?.pwriteAll(lc_buffer.items, @sizeOf(macho.mach_header_64) + headers_buf.items.len); + + try self.writeHeader(ncmds, @intCast(u32, lc_buffer.items.len + headers_buf.items.len)); + + if (codesig) |*csig| { + try self.writeCodeSignature(csig, codesig_offset.?); // code signing always comes last + } if (self.d_sym) |*d_sym| { // Flush debug symbols bundle. try d_sym.flushModule(self.base.allocator, self.base.options); } - // code signature and entitlements - if (self.base.options.entitlements) |path| { - if (self.code_signature) |*csig| { - try csig.addEntitlements(self.base.allocator, path); - csig.code_directory.ident = self.base.options.emit.?.sub_path; - } else { - var csig = CodeSignature.init(self.page_size); - try csig.addEntitlements(self.base.allocator, path); - csig.code_directory.ident = self.base.options.emit.?.sub_path; - self.code_signature = csig; - } - } - - if (self.code_signature) |*csig| { - csig.clear(self.base.allocator); - csig.code_directory.ident = self.base.options.emit.?.sub_path; - // Preallocate space for the code signature. - // We need to do this at this stage so that we have the load commands with proper values - // written out to the file. - // The most important here is to have the correct vm and filesize of the __LINKEDIT segment - // where the code signature goes into. - try self.writeCodeSignaturePadding(csig); - } - - try self.writeLoadCommands(); - try self.writeHeader(); - - if (self.entry_addr == null and self.base.options.output_mode == .Exe) { - log.debug("flushing. no_entry_point_found = true", .{}); - self.error_flags.no_entry_point_found = true; - } else { - log.debug("flushing. no_entry_point_found = false", .{}); - self.error_flags.no_entry_point_found = false; - } - - assert(!self.load_commands_dirty); - - if (self.code_signature) |*csig| { - try self.writeCodeSignature(csig); // code signing always comes last - } - - if (build_options.enable_link_snapshots) { - if (self.base.options.enable_link_snapshots) - try self.snapshotState(); - } + // if (build_options.enable_link_snapshots) { + // if (self.base.options.enable_link_snapshots) + // try self.snapshotState(); + // } if (cache_miss) { // Update the file with the digest. If it fails we can continue; it only @@ -708,6 +708,9 @@ fn linkOneShot(self: *MachO, comp: *Compilation, prog_node: *std.Progress.Node) sub_prog_node.context.refresh(); defer sub_prog_node.end(); + const cpu_arch = self.base.options.target.cpu.arch; + const os_tag = self.base.options.target.os.tag; + const abi = self.base.options.target.abi; const is_lib = self.base.options.output_mode == .Lib; const is_dyn_lib = self.base.options.link_mode == .Dynamic and is_lib; const is_exe_or_dyn_lib = is_dyn_lib or self.base.options.output_mode == .Exe; @@ -990,40 +993,6 @@ fn linkOneShot(self: *MachO, comp: *Compilation, prog_node: *std.Progress.Node) } } - // rpaths - var rpath_table = std.StringArrayHashMap(void).init(arena); - for (self.base.options.rpath_list) |rpath| { - if (rpath_table.contains(rpath)) continue; - const cmdsize = @intCast(u32, mem.alignForwardGeneric( - u64, - @sizeOf(macho.rpath_command) + rpath.len + 1, - @sizeOf(u64), - )); - var rpath_cmd = macho.emptyGenericCommandWithData(macho.rpath_command{ - .cmdsize = cmdsize, - .path = @sizeOf(macho.rpath_command), - }); - rpath_cmd.data = try gpa.alloc(u8, cmdsize - rpath_cmd.inner.path); - mem.set(u8, rpath_cmd.data, 0); - mem.copy(u8, rpath_cmd.data, rpath); - try self.load_commands.append(gpa, .{ .rpath = rpath_cmd }); - try rpath_table.putNoClobber(rpath, {}); - self.load_commands_dirty = true; - } - - // code signature and entitlements - if (self.base.options.entitlements) |path| { - if (self.code_signature) |*csig| { - try csig.addEntitlements(gpa, path); - csig.code_directory.ident = self.base.options.emit.?.sub_path; - } else { - var csig = CodeSignature.init(self.page_size); - try csig.addEntitlements(gpa, path); - csig.code_directory.ident = self.base.options.emit.?.sub_path; - self.code_signature = csig; - } - } - if (self.base.options.verbose_link) { var argv = std.ArrayList([]const u8).init(arena); @@ -1048,7 +1017,7 @@ fn linkOneShot(self: *MachO, comp: *Compilation, prog_node: *std.Progress.Node) try argv.append(syslibroot); } - for (rpath_table.keys()) |rpath| { + for (self.base.options.rpath_list) |rpath| { try argv.append("-rpath"); try argv.append(rpath); } @@ -1157,15 +1126,15 @@ fn linkOneShot(self: *MachO, comp: *Compilation, prog_node: *std.Progress.Node) var dependent_libs = std.fifo.LinearFifo(struct { id: Dylib.Id, parent: u16, - }, .Dynamic).init(gpa); - defer dependent_libs.deinit(); + }, .Dynamic).init(arena); + try self.parseInputFiles(positionals.items, self.base.options.sysroot, &dependent_libs); try self.parseAndForceLoadStaticArchives(must_link_archives.keys()); try self.parseLibs(libs.keys(), libs.values(), self.base.options.sysroot, &dependent_libs); try self.parseDependentLibs(self.base.options.sysroot, &dependent_libs); - for (self.objects.items) |*object, object_id| { - try self.resolveSymbolsInObject(object, @intCast(u16, object_id)); + for (self.objects.items) |_, object_id| { + try self.resolveSymbolsInObject(@intCast(u16, object_id)); } try self.resolveSymbolsInArchives(); @@ -1175,7 +1144,6 @@ fn linkOneShot(self: *MachO, comp: *Compilation, prog_node: *std.Progress.Node) try self.resolveSymbolsInDylibs(); try self.createMhExecuteHeaderSymbol(); try self.createDsoHandleSymbol(); - try self.addCodeSignatureLC(); try self.resolveSymbolsAtLoading(); if (self.unresolved.count() > 0) { @@ -1206,41 +1174,79 @@ fn linkOneShot(self: *MachO, comp: *Compilation, prog_node: *std.Progress.Node) if (build_options.enable_logging) { self.logSymtab(); - self.logSectionOrdinals(); self.logAtoms(); } try self.writeAtomsOneShot(); if (self.rustc_section_index) |id| { - const sect = self.getSectionPtr(.{ - .seg = self.data_segment_cmd_index.?, - .sect = id, - }); - sect.size = self.rustc_section_size; + const header = &self.sections.items(.header)[id]; + header.size = self.rustc_section_size; } - try self.setEntryPoint(); - try self.writeLinkeditSegment(); + var lc_buffer = std.ArrayList(u8).init(arena); + const lc_writer = lc_buffer.writer(); + var ncmds: u32 = 0; - if (self.code_signature) |*csig| { - csig.clear(gpa); - csig.code_directory.ident = self.base.options.emit.?.sub_path; + try self.writeLinkeditSegmentData(&ncmds, lc_writer); + try writeDylinkerLC(&ncmds, lc_writer); + try self.writeMainLC(&ncmds, lc_writer); + try self.writeDylibIdLC(&ncmds, lc_writer); + try self.writeRpathLCs(&ncmds, lc_writer); + + { + try lc_writer.writeStruct(macho.source_version_command{ + .cmdsize = @sizeOf(macho.source_version_command), + .version = 0x0, + }); + ncmds += 1; + } + + try self.writeBuildVersionLC(&ncmds, lc_writer); + + { + var uuid_lc = macho.uuid_command{ + .cmdsize = @sizeOf(macho.uuid_command), + .uuid = undefined, + }; + std.crypto.random.bytes(&uuid_lc.uuid); + try lc_writer.writeStruct(uuid_lc); + ncmds += 1; + } + + try self.writeLoadDylibLCs(&ncmds, lc_writer); + + const requires_codesig = blk: { + if (self.base.options.entitlements) |_| break :blk true; + if (cpu_arch == .aarch64 and (os_tag == .macos or abi == .simulator)) break :blk true; + break :blk false; + }; + var codesig_offset: ?u32 = null; + var codesig: ?CodeSignature = if (requires_codesig) blk: { // Preallocate space for the code signature. // We need to do this at this stage so that we have the load commands with proper values // written out to the file. // The most important here is to have the correct vm and filesize of the __LINKEDIT segment // where the code signature goes into. - try self.writeCodeSignaturePadding(csig); - } + var codesig = CodeSignature.init(self.page_size); + codesig.code_directory.ident = self.base.options.emit.?.sub_path; + if (self.base.options.entitlements) |path| { + try codesig.addEntitlements(arena, path); + } + codesig_offset = try self.writeCodeSignaturePadding(&codesig, &ncmds, lc_writer); + break :blk codesig; + } else null; - try self.writeLoadCommands(); - try self.writeHeader(); + var headers_buf = std.ArrayList(u8).init(arena); + try self.writeSegmentHeaders(0, self.segments.items.len, &ncmds, headers_buf.writer()); - assert(!self.load_commands_dirty); + try self.base.file.?.pwriteAll(headers_buf.items, @sizeOf(macho.mach_header_64)); + try self.base.file.?.pwriteAll(lc_buffer.items, @sizeOf(macho.mach_header_64) + headers_buf.items.len); - if (self.code_signature) |*csig| { - try self.writeCodeSignature(csig); // code signing always comes last + try self.writeHeader(ncmds, @intCast(u32, lc_buffer.items.len + headers_buf.items.len)); + + if (codesig) |*csig| { + try self.writeCodeSignature(csig, codesig_offset.?); // code signing always comes last } } @@ -1395,66 +1401,77 @@ fn resolveFramework( } fn parseObject(self: *MachO, path: []const u8) !bool { + const gpa = self.base.allocator; const file = fs.cwd().openFile(path, .{}) catch |err| switch (err) { error.FileNotFound => return false, else => |e| return e, }; - errdefer file.close(); - - const name = try self.base.allocator.dupe(u8, path); - errdefer self.base.allocator.free(name); + defer file.close(); + const name = try gpa.dupe(u8, path); + errdefer gpa.free(name); + const cpu_arch = self.base.options.target.cpu.arch; const mtime: u64 = mtime: { const stat = file.stat() catch break :mtime 0; break :mtime @intCast(u64, @divFloor(stat.mtime, 1_000_000_000)); }; + const file_stat = try file.stat(); + const file_size = math.cast(usize, file_stat.size) orelse return error.Overflow; + const contents = try file.readToEndAllocOptions(gpa, file_size, file_size, @alignOf(u64), null); var object = Object{ .name = name, - .file = file, .mtime = mtime, + .contents = contents, }; - object.parse(self.base.allocator, self.base.options.target.cpu.arch) catch |err| switch (err) { + object.parse(gpa, cpu_arch) catch |err| switch (err) { error.EndOfStream, error.NotObject => { - object.deinit(self.base.allocator); + object.deinit(gpa); return false; }, else => |e| return e, }; - try self.objects.append(self.base.allocator, object); + try self.objects.append(gpa, object); return true; } fn parseArchive(self: *MachO, path: []const u8, force_load: bool) !bool { + const gpa = self.base.allocator; const file = fs.cwd().openFile(path, .{}) catch |err| switch (err) { error.FileNotFound => return false, else => |e| return e, }; errdefer file.close(); - const name = try self.base.allocator.dupe(u8, path); - errdefer self.base.allocator.free(name); + const name = try gpa.dupe(u8, path); + errdefer gpa.free(name); + const cpu_arch = self.base.options.target.cpu.arch; + const reader = file.reader(); + const fat_offset = try fat.getLibraryOffset(reader, cpu_arch); + try reader.context.seekTo(fat_offset); var archive = Archive{ .name = name, + .fat_offset = fat_offset, .file = file, }; - archive.parse(self.base.allocator, self.base.options.target.cpu.arch) catch |err| switch (err) { + archive.parse(gpa, reader) catch |err| switch (err) { error.EndOfStream, error.NotArchive => { - archive.deinit(self.base.allocator); + archive.deinit(gpa); return false; }, else => |e| return e, }; if (force_load) { - defer archive.deinit(self.base.allocator); + defer archive.deinit(gpa); + defer file.close(); // Get all offsets from the ToC - var offsets = std.AutoArrayHashMap(u32, void).init(self.base.allocator); + var offsets = std.AutoArrayHashMap(u32, void).init(gpa); defer offsets.deinit(); for (archive.toc.values()) |offs| { for (offs.items) |off| { @@ -1462,15 +1479,11 @@ fn parseArchive(self: *MachO, path: []const u8, force_load: bool) !bool { } } for (offsets.keys()) |off| { - const object = try self.objects.addOne(self.base.allocator); - object.* = try archive.parseObject( - self.base.allocator, - self.base.options.target.cpu.arch, - off, - ); + const object = try archive.parseObject(gpa, cpu_arch, off); + try self.objects.append(gpa, object); } } else { - try self.archives.append(self.base.allocator, archive); + try self.archives.append(gpa, archive); } return true; @@ -1481,6 +1494,7 @@ const ParseDylibError = error{ EmptyStubFile, MismatchedCpuArchitecture, UnsupportedCpuArchitecture, + EndOfStream, } || fs.File.OpenError || std.os.PReadError || Dylib.Id.ParseError; const DylibCreateOpts = struct { @@ -1497,43 +1511,52 @@ pub fn parseDylib( dependent_libs: anytype, opts: DylibCreateOpts, ) ParseDylibError!bool { + const gpa = self.base.allocator; const file = fs.cwd().openFile(path, .{}) catch |err| switch (err) { error.FileNotFound => return false, else => |e| return e, }; - errdefer file.close(); + defer file.close(); - const name = try self.base.allocator.dupe(u8, path); - errdefer self.base.allocator.free(name); + const cpu_arch = self.base.options.target.cpu.arch; + const file_stat = try file.stat(); + var file_size = math.cast(usize, file_stat.size) orelse return error.Overflow; + + const reader = file.reader(); + const fat_offset = try fat.getLibraryOffset(reader, cpu_arch); + try file.seekTo(fat_offset); + file_size -= fat_offset; + + const contents = try file.readToEndAllocOptions(gpa, file_size, file_size, @alignOf(u64), null); + defer gpa.free(contents); const dylib_id = @intCast(u16, self.dylibs.items.len); - var dylib = Dylib{ - .name = name, - .file = file, - .weak = opts.weak, - }; + var dylib = Dylib{ .weak = opts.weak }; - dylib.parse( - self.base.allocator, - self.base.options.target.cpu.arch, + dylib.parseFromBinary( + gpa, + cpu_arch, dylib_id, dependent_libs, + path, + contents, ) catch |err| switch (err) { error.EndOfStream, error.NotDylib => { try file.seekTo(0); - var lib_stub = LibStub.loadFromFile(self.base.allocator, file) catch { - dylib.deinit(self.base.allocator); + var lib_stub = LibStub.loadFromFile(gpa, file) catch { + dylib.deinit(gpa); return false; }; defer lib_stub.deinit(); try dylib.parseFromStub( - self.base.allocator, + gpa, self.base.options.target, lib_stub, dylib_id, dependent_libs, + path, ); }, else => |e| return e, @@ -1547,13 +1570,13 @@ pub fn parseDylib( log.warn(" dylib version: {}", .{dylib.id.?.current_version}); // TODO maybe this should be an error and facilitate auto-cleanup? - dylib.deinit(self.base.allocator); + dylib.deinit(gpa); return false; } } - try self.dylibs.append(self.base.allocator, dylib); - try self.dylibs_map.putNoClobber(self.base.allocator, dylib.id.?.name, dylib_id); + try self.dylibs.append(gpa, dylib); + try self.dylibs_map.putNoClobber(gpa, dylib.id.?.name, dylib_id); const should_link_dylib_even_if_unreachable = blk: { if (self.base.options.dead_strip_dylibs and !opts.needed) break :blk false; @@ -1561,8 +1584,7 @@ pub fn parseDylib( }; if (should_link_dylib_even_if_unreachable) { - try self.addLoadDylibLC(dylib_id); - try self.referenced_dylibs.putNoClobber(self.base.allocator, dylib_id, {}); + try self.referenced_dylibs.putNoClobber(gpa, dylib_id, {}); } return true; @@ -1572,10 +1594,8 @@ fn parseInputFiles(self: *MachO, files: []const []const u8, syslibroot: ?[]const for (files) |file_name| { const full_path = full_path: { var buffer: [fs.MAX_PATH_BYTES]u8 = undefined; - const path = try fs.realpath(file_name, &buffer); - break :full_path try self.base.allocator.dupe(u8, path); + break :full_path try fs.realpath(file_name, &buffer); }; - defer self.base.allocator.free(full_path); log.debug("parsing input file path '{s}'", .{full_path}); if (try self.parseObject(full_path)) continue; @@ -1592,10 +1612,8 @@ fn parseAndForceLoadStaticArchives(self: *MachO, files: []const []const u8) !voi for (files) |file_name| { const full_path = full_path: { var buffer: [fs.MAX_PATH_BYTES]u8 = undefined; - const path = try fs.realpath(file_name, &buffer); - break :full_path try self.base.allocator.dupe(u8, path); + break :full_path try fs.realpath(file_name, &buffer); }; - defer self.base.allocator.free(full_path); log.debug("parsing and force loading static archive '{s}'", .{full_path}); if (try self.parseArchive(full_path, true)) continue; @@ -1669,24 +1687,10 @@ fn parseDependentLibs(self: *MachO, syslibroot: ?[]const u8, dependent_libs: any } } -pub const MatchingSection = struct { - seg: u16, - sect: u16, - - pub fn eql(this: MatchingSection, other: struct { - seg: ?u16, - sect: ?u16, - }) bool { - const seg = other.seg orelse return false; - const sect = other.sect orelse return false; - return this.seg == seg and this.sect == sect; - } -}; - -pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSection { +pub fn getOutputSection(self: *MachO, sect: macho.section_64) !?u8 { const segname = sect.segName(); const sectname = sect.sectName(); - const res: ?MatchingSection = blk: { + const res: ?u8 = blk: { switch (sect.type_()) { macho.S_4BYTE_LITERALS, macho.S_8BYTE_LITERALS, macho.S_16BYTE_LITERALS => { if (self.text_const_section_index == null) { @@ -1698,11 +1702,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio .{}, ); } - - break :blk .{ - .seg = self.text_segment_cmd_index.?, - .sect = self.text_const_section_index.?, - }; + break :blk self.text_const_section_index.?; }, macho.S_CSTRING_LITERALS => { if (mem.eql(u8, sectname, "__objc_methname")) { @@ -1717,11 +1717,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio .{}, ); } - - break :blk .{ - .seg = self.text_segment_cmd_index.?, - .sect = self.objc_methname_section_index.?, - }; + break :blk self.objc_methname_section_index.?; } else if (mem.eql(u8, sectname, "__objc_methtype")) { if (self.objc_methtype_section_index == null) { self.objc_methtype_section_index = try self.initSection( @@ -1732,11 +1728,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio .{}, ); } - - break :blk .{ - .seg = self.text_segment_cmd_index.?, - .sect = self.objc_methtype_section_index.?, - }; + break :blk self.objc_methtype_section_index.?; } else if (mem.eql(u8, sectname, "__objc_classname")) { if (self.objc_classname_section_index == null) { self.objc_classname_section_index = try self.initSection( @@ -1747,11 +1739,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio .{}, ); } - - break :blk .{ - .seg = self.text_segment_cmd_index.?, - .sect = self.objc_classname_section_index.?, - }; + break :blk self.objc_classname_section_index.?; } if (self.cstring_section_index == null) { @@ -1765,11 +1753,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio }, ); } - - break :blk .{ - .seg = self.text_segment_cmd_index.?, - .sect = self.cstring_section_index.?, - }; + break :blk self.cstring_section_index.?; }, macho.S_LITERAL_POINTERS => { if (mem.eql(u8, segname, "__DATA") and mem.eql(u8, sectname, "__objc_selrefs")) { @@ -1784,11 +1768,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio }, ); } - - break :blk .{ - .seg = self.data_segment_cmd_index.?, - .sect = self.objc_selrefs_section_index.?, - }; + break :blk self.objc_selrefs_section_index.?; } else { // TODO investigate break :blk null; @@ -1806,11 +1786,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio }, ); } - - break :blk .{ - .seg = self.data_const_segment_cmd_index.?, - .sect = self.mod_init_func_section_index.?, - }; + break :blk self.mod_init_func_section_index.?; }, macho.S_MOD_TERM_FUNC_POINTERS => { if (self.mod_term_func_section_index == null) { @@ -1824,11 +1800,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio }, ); } - - break :blk .{ - .seg = self.data_const_segment_cmd_index.?, - .sect = self.mod_term_func_section_index.?, - }; + break :blk self.mod_term_func_section_index.?; }, macho.S_ZEROFILL => { if (self.bss_section_index == null) { @@ -1842,11 +1814,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio }, ); } - - break :blk .{ - .seg = self.data_segment_cmd_index.?, - .sect = self.bss_section_index.?, - }; + break :blk self.bss_section_index.?; }, macho.S_THREAD_LOCAL_VARIABLES => { if (self.tlv_section_index == null) { @@ -1860,11 +1828,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio }, ); } - - break :blk .{ - .seg = self.data_segment_cmd_index.?, - .sect = self.tlv_section_index.?, - }; + break :blk self.tlv_section_index.?; }, macho.S_THREAD_LOCAL_VARIABLE_POINTERS => { if (self.tlv_ptrs_section_index == null) { @@ -1878,11 +1842,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio }, ); } - - break :blk .{ - .seg = self.data_segment_cmd_index.?, - .sect = self.tlv_ptrs_section_index.?, - }; + break :blk self.tlv_ptrs_section_index.?; }, macho.S_THREAD_LOCAL_REGULAR => { if (self.tlv_data_section_index == null) { @@ -1896,11 +1856,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio }, ); } - - break :blk .{ - .seg = self.data_segment_cmd_index.?, - .sect = self.tlv_data_section_index.?, - }; + break :blk self.tlv_data_section_index.?; }, macho.S_THREAD_LOCAL_ZEROFILL => { if (self.tlv_bss_section_index == null) { @@ -1914,11 +1870,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio }, ); } - - break :blk .{ - .seg = self.data_segment_cmd_index.?, - .sect = self.tlv_bss_section_index.?, - }; + break :blk self.tlv_bss_section_index.?; }, macho.S_COALESCED => { if (mem.eql(u8, "__TEXT", segname) and mem.eql(u8, "__eh_frame", sectname)) { @@ -1933,11 +1885,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio .{}, ); } - - break :blk .{ - .seg = self.text_segment_cmd_index.?, - .sect = self.eh_frame_section_index.?, - }; + break :blk self.eh_frame_section_index.?; } // TODO audit this: is this the right mapping? @@ -1951,10 +1899,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio ); } - break :blk .{ - .seg = self.data_const_segment_cmd_index.?, - .sect = self.data_const_section_index.?, - }; + break :blk self.data_const_section_index.?; }, macho.S_REGULAR => { if (sect.isCode()) { @@ -1971,11 +1916,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio }, ); } - - break :blk .{ - .seg = self.text_segment_cmd_index.?, - .sect = self.text_section_index.?, - }; + break :blk self.text_section_index.?; } if (sect.isDebug()) { // TODO debug attributes @@ -1998,11 +1939,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio .{}, ); } - - break :blk .{ - .seg = self.text_segment_cmd_index.?, - .sect = self.ustring_section_index.?, - }; + break :blk self.ustring_section_index.?; } else if (mem.eql(u8, sectname, "__gcc_except_tab")) { if (self.gcc_except_tab_section_index == null) { self.gcc_except_tab_section_index = try self.initSection( @@ -2013,11 +1950,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio .{}, ); } - - break :blk .{ - .seg = self.text_segment_cmd_index.?, - .sect = self.gcc_except_tab_section_index.?, - }; + break :blk self.gcc_except_tab_section_index.?; } else if (mem.eql(u8, sectname, "__objc_methlist")) { if (self.objc_methlist_section_index == null) { self.objc_methlist_section_index = try self.initSection( @@ -2028,11 +1961,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio .{}, ); } - - break :blk .{ - .seg = self.text_segment_cmd_index.?, - .sect = self.objc_methlist_section_index.?, - }; + break :blk self.objc_methlist_section_index.?; } else if (mem.eql(u8, sectname, "__rodata") or mem.eql(u8, sectname, "__typelink") or mem.eql(u8, sectname, "__itablink") or @@ -2048,11 +1977,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio .{}, ); } - - break :blk .{ - .seg = self.data_const_segment_cmd_index.?, - .sect = self.data_const_section_index.?, - }; + break :blk self.data_const_section_index.?; } else { if (self.text_const_section_index == null) { self.text_const_section_index = try self.initSection( @@ -2063,11 +1988,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio .{}, ); } - - break :blk .{ - .seg = self.text_segment_cmd_index.?, - .sect = self.text_const_section_index.?, - }; + break :blk self.text_const_section_index.?; } } @@ -2081,11 +2002,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio .{}, ); } - - break :blk .{ - .seg = self.data_const_segment_cmd_index.?, - .sect = self.data_const_section_index.?, - }; + break :blk self.data_const_section_index.?; } if (mem.eql(u8, segname, "__DATA")) { @@ -2099,11 +2016,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio .{}, ); } - - break :blk .{ - .seg = self.data_const_segment_cmd_index.?, - .sect = self.data_const_section_index.?, - }; + break :blk self.data_const_section_index.?; } else if (mem.eql(u8, sectname, "__cfstring")) { if (self.objc_cfstring_section_index == null) { self.objc_cfstring_section_index = try self.initSection( @@ -2114,11 +2027,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio .{}, ); } - - break :blk .{ - .seg = self.data_const_segment_cmd_index.?, - .sect = self.objc_cfstring_section_index.?, - }; + break :blk self.objc_cfstring_section_index.?; } else if (mem.eql(u8, sectname, "__objc_classlist")) { if (self.objc_classlist_section_index == null) { self.objc_classlist_section_index = try self.initSection( @@ -2129,11 +2038,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio .{}, ); } - - break :blk .{ - .seg = self.data_const_segment_cmd_index.?, - .sect = self.objc_classlist_section_index.?, - }; + break :blk self.objc_classlist_section_index.?; } else if (mem.eql(u8, sectname, "__objc_imageinfo")) { if (self.objc_imageinfo_section_index == null) { self.objc_imageinfo_section_index = try self.initSection( @@ -2144,11 +2049,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio .{}, ); } - - break :blk .{ - .seg = self.data_const_segment_cmd_index.?, - .sect = self.objc_imageinfo_section_index.?, - }; + break :blk self.objc_imageinfo_section_index.?; } else if (mem.eql(u8, sectname, "__objc_const")) { if (self.objc_const_section_index == null) { self.objc_const_section_index = try self.initSection( @@ -2159,11 +2060,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio .{}, ); } - - break :blk .{ - .seg = self.data_segment_cmd_index.?, - .sect = self.objc_const_section_index.?, - }; + break :blk self.objc_const_section_index.?; } else if (mem.eql(u8, sectname, "__objc_classrefs")) { if (self.objc_classrefs_section_index == null) { self.objc_classrefs_section_index = try self.initSection( @@ -2174,11 +2071,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio .{}, ); } - - break :blk .{ - .seg = self.data_segment_cmd_index.?, - .sect = self.objc_classrefs_section_index.?, - }; + break :blk self.objc_classrefs_section_index.?; } else if (mem.eql(u8, sectname, "__objc_data")) { if (self.objc_data_section_index == null) { self.objc_data_section_index = try self.initSection( @@ -2189,11 +2082,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio .{}, ); } - - break :blk .{ - .seg = self.data_segment_cmd_index.?, - .sect = self.objc_data_section_index.?, - }; + break :blk self.objc_data_section_index.?; } else if (mem.eql(u8, sectname, ".rustc")) { if (self.rustc_section_index == null) { self.rustc_section_index = try self.initSection( @@ -2207,11 +2096,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio // decompress the metadata. self.rustc_section_size = sect.size; } - - break :blk .{ - .seg = self.data_segment_cmd_index.?, - .sect = self.rustc_section_index.?, - }; + break :blk self.rustc_section_index.?; } else { if (self.data_section_index == null) { self.data_section_index = try self.initSection( @@ -2222,11 +2107,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio .{}, ); } - - break :blk .{ - .seg = self.data_segment_cmd_index.?, - .sect = self.data_section_index.?, - }; + break :blk self.data_section_index.?; } } @@ -2259,30 +2140,33 @@ pub fn createEmptyAtom(gpa: Allocator, sym_index: u32, size: u64, alignment: u32 return atom; } -pub fn writeAtom(self: *MachO, atom: *Atom, match: MatchingSection) !void { - const sect = self.getSection(match); +pub fn writeAtom(self: *MachO, atom: *Atom, sect_id: u8) !void { + const section = self.sections.get(sect_id); const sym = atom.getSymbol(self); - const file_offset = sect.offset + sym.n_value - sect.addr; + const file_offset = section.header.offset + sym.n_value - section.header.addr; try atom.resolveRelocs(self); log.debug("writing atom for symbol {s} at file offset 0x{x}", .{ atom.getName(self), file_offset }); try self.base.file.?.pwriteAll(atom.code.items, file_offset); } fn allocateSymbols(self: *MachO) !void { - var it = self.atoms.iterator(); - while (it.next()) |entry| { - const match = entry.key_ptr.*; - var atom = entry.value_ptr.*; + const slice = self.sections.slice(); + for (slice.items(.last_atom)) |last_atom, sect_id| { + const header = slice.items(.header)[sect_id]; + var atom = last_atom orelse continue; while (atom.prev) |prev| { atom = prev; } - const n_sect = self.getSectionOrdinal(match); - const sect = self.getSection(match); - var base_vaddr = sect.addr; + const n_sect = @intCast(u8, sect_id + 1); + var base_vaddr = header.addr; - log.debug("allocating local symbols in sect({d}, '{s},{s}')", .{ n_sect, sect.segName(), sect.sectName() }); + log.debug("allocating local symbols in sect({d}, '{s},{s}')", .{ + n_sect, + header.segName(), + header.sectName(), + }); while (true) { const alignment = try math.powi(u32, 2, atom.alignment); @@ -2296,7 +2180,10 @@ fn allocateSymbols(self: *MachO) !void { // Update each symbol contained within the atom for (atom.contained.items) |sym_at_off| { - const contained_sym = self.getSymbolPtr(.{ .sym_index = sym_at_off.sym_index, .file = atom.file }); + const contained_sym = self.getSymbolPtr(.{ + .sym_index = sym_at_off.sym_index, + .file = atom.file, + }); contained_sym.n_value = base_vaddr + sym_at_off.offset; contained_sym.n_sect = n_sect; } @@ -2310,15 +2197,18 @@ fn allocateSymbols(self: *MachO) !void { } } -fn shiftLocalsByOffset(self: *MachO, match: MatchingSection, offset: i64) !void { - var atom = self.atoms.get(match) orelse return; +fn shiftLocalsByOffset(self: *MachO, sect_id: u8, offset: i64) !void { + var atom = self.sections.items(.last_atom)[sect_id] orelse return; while (true) { const atom_sym = atom.getSymbolPtr(self); atom_sym.n_value = @intCast(u64, @intCast(i64, atom_sym.n_value) + offset); for (atom.contained.items) |sym_at_off| { - const contained_sym = self.getSymbolPtr(.{ .sym_index = sym_at_off.sym_index, .file = atom.file }); + const contained_sym = self.getSymbolPtr(.{ + .sym_index = sym_at_off.sym_index, + .file = atom.file, + }); contained_sym.n_value = @intCast(u64, @intCast(i64, contained_sym.n_value) + offset); } @@ -2336,16 +2226,13 @@ fn allocateSpecialSymbols(self: *MachO) !void { const global = self.globals.get(name) orelse continue; if (global.file != null) continue; const sym = self.getSymbolPtr(global); - const seg = self.load_commands.items[self.text_segment_cmd_index.?].segment; - sym.n_sect = self.getSectionOrdinal(.{ - .seg = self.text_segment_cmd_index.?, - .sect = 0, - }); - sym.n_value = seg.inner.vmaddr; + const seg = self.segments.items[self.text_segment_cmd_index.?]; + sym.n_sect = 1; + sym.n_value = seg.vmaddr; log.debug("allocating {s} at the start of {s}", .{ name, - seg.inner.segName(), + seg.segName(), }); } } @@ -2353,18 +2240,20 @@ fn allocateSpecialSymbols(self: *MachO) !void { fn writeAtomsOneShot(self: *MachO) !void { assert(self.mode == .one_shot); - var it = self.atoms.iterator(); - while (it.next()) |entry| { - const sect = self.getSection(entry.key_ptr.*); - var atom: *Atom = entry.value_ptr.*; + const gpa = self.base.allocator; + const slice = self.sections.slice(); - if (sect.flags == macho.S_ZEROFILL or sect.flags == macho.S_THREAD_LOCAL_ZEROFILL) continue; + for (slice.items(.last_atom)) |last_atom, sect_id| { + const header = slice.items(.header)[sect_id]; + var atom = last_atom.?; - var buffer = std.ArrayList(u8).init(self.base.allocator); + if (header.flags == macho.S_ZEROFILL or header.flags == macho.S_THREAD_LOCAL_ZEROFILL) continue; + + var buffer = std.ArrayList(u8).init(gpa); defer buffer.deinit(); - try buffer.ensureTotalCapacity(math.cast(usize, sect.size) orelse return error.Overflow); + try buffer.ensureTotalCapacity(math.cast(usize, header.size) orelse return error.Overflow); - log.debug("writing atoms in {s},{s}", .{ sect.segName(), sect.sectName() }); + log.debug("writing atoms in {s},{s}", .{ header.segName(), header.sectName() }); while (atom.prev) |prev| { atom = prev; @@ -2399,18 +2288,18 @@ fn writeAtomsOneShot(self: *MachO) !void { if (atom.next) |next| { atom = next; } else { - assert(buffer.items.len == sect.size); - log.debug(" (writing at file offset 0x{x})", .{sect.offset}); - try self.base.file.?.pwriteAll(buffer.items, sect.offset); + assert(buffer.items.len == header.size); + log.debug(" (writing at file offset 0x{x})", .{header.offset}); + try self.base.file.?.pwriteAll(buffer.items, header.offset); break; } } } } -fn writePadding(self: *MachO, match: MatchingSection, size: usize, writer: anytype) !void { - const is_code = match.seg == self.text_segment_cmd_index.? and match.sect == self.text_section_index.?; - const min_alignment: u3 = if (!is_code) +fn writePadding(self: *MachO, sect_id: u8, size: usize, writer: anytype) !void { + const header = self.sections.items(.header)[sect_id]; + const min_alignment: u3 = if (!header.isCode()) 1 else switch (self.base.options.target.cpu.arch) { .aarch64 => @sizeOf(u32), @@ -2421,7 +2310,7 @@ fn writePadding(self: *MachO, match: MatchingSection, size: usize, writer: anyty const len = @divExact(size, min_alignment); var i: usize = 0; while (i < len) : (i += 1) { - if (!is_code) { + if (!header.isCode()) { try writer.writeByte(0); } else switch (self.base.options.target.cpu.arch) { .aarch64 => { @@ -2439,20 +2328,20 @@ fn writePadding(self: *MachO, match: MatchingSection, size: usize, writer: anyty fn writeAtomsIncremental(self: *MachO) !void { assert(self.mode == .incremental); - var it = self.atoms.iterator(); - while (it.next()) |entry| { - const match = entry.key_ptr.*; - const sect = self.getSection(match); - var atom: *Atom = entry.value_ptr.*; + const slice = self.sections.slice(); + for (slice.items(.last_atom)) |last, i| { + var atom: *Atom = last orelse continue; + const sect_i = @intCast(u8, i); + const header = slice.items(.header)[sect_i]; // TODO handle zerofill in stage2 // if (sect.flags == macho.S_ZEROFILL or sect.flags == macho.S_THREAD_LOCAL_ZEROFILL) continue; - log.debug("writing atoms in {s},{s}", .{ sect.segName(), sect.sectName() }); + log.debug("writing atoms in {s},{s}", .{ header.segName(), header.sectName() }); while (true) { if (atom.dirty) { - try self.writeAtom(atom, match); + try self.writeAtom(atom, sect_i); atom.dirty = false; } @@ -2503,10 +2392,7 @@ pub fn createGotAtom(self: *MachO, target: SymbolWithLoc) !*Atom { try self.managed_atoms.append(gpa, atom); try self.atom_by_index_table.putNoClobber(gpa, sym_index, atom); - try self.allocateAtomCommon(atom, .{ - .seg = self.data_const_segment_cmd_index.?, - .sect = self.got_section_index.?, - }); + try self.allocateAtomCommon(atom, self.got_section_index.?); return atom; } @@ -2535,7 +2421,7 @@ pub fn createTlvPtrAtom(self: *MachO, target: SymbolWithLoc) !*Atom { try self.managed_atoms.append(gpa, atom); try self.atom_by_index_table.putNoClobber(gpa, sym_index, atom); - const match = (try self.getMatchingSection(.{ + const match = (try self.getOutputSection(.{ .segname = makeStaticString("__DATA"), .sectname = makeStaticString("__thread_ptrs"), .flags = macho.S_THREAD_LOCAL_VARIABLE_POINTERS, @@ -2561,10 +2447,7 @@ fn createDyldPrivateAtom(self: *MachO) !void { const atom = try MachO.createEmptyAtom(gpa, sym_index, @sizeOf(u64), 3); self.dyld_private_atom = atom; - try self.allocateAtomCommon(atom, .{ - .seg = self.data_segment_cmd_index.?, - .sect = self.data_section_index.?, - }); + try self.allocateAtomCommon(atom, self.data_section_index.?); try self.managed_atoms.append(gpa, atom); try self.atom_by_index_table.putNoClobber(gpa, sym_index, atom); @@ -2692,10 +2575,7 @@ fn createStubHelperPreambleAtom(self: *MachO) !void { } self.stub_helper_preamble_atom = atom; - try self.allocateAtomCommon(atom, .{ - .seg = self.text_segment_cmd_index.?, - .sect = self.stub_helper_section_index.?, - }); + try self.allocateAtomCommon(atom, self.stub_helper_section_index.?); try self.managed_atoms.append(gpa, atom); try self.atom_by_index_table.putNoClobber(gpa, sym_index, atom); @@ -2771,10 +2651,7 @@ pub fn createStubHelperAtom(self: *MachO) !*Atom { try self.managed_atoms.append(gpa, atom); try self.atom_by_index_table.putNoClobber(gpa, sym_index, atom); - try self.allocateAtomCommon(atom, .{ - .seg = self.text_segment_cmd_index.?, - .sect = self.stub_helper_section_index.?, - }); + try self.allocateAtomCommon(atom, self.stub_helper_section_index.?); return atom; } @@ -2814,10 +2691,7 @@ pub fn createLazyPointerAtom(self: *MachO, stub_sym_index: u32, target: SymbolWi try self.managed_atoms.append(gpa, atom); try self.atom_by_index_table.putNoClobber(gpa, sym_index, atom); - try self.allocateAtomCommon(atom, .{ - .seg = self.data_segment_cmd_index.?, - .sect = self.la_symbol_ptr_section_index.?, - }); + try self.allocateAtomCommon(atom, self.la_symbol_ptr_section_index.?); return atom; } @@ -2896,10 +2770,7 @@ pub fn createStubAtom(self: *MachO, laptr_sym_index: u32) !*Atom { try self.managed_atoms.append(gpa, atom); try self.atom_by_index_table.putNoClobber(gpa, sym_index, atom); - try self.allocateAtomCommon(atom, .{ - .seg = self.text_segment_cmd_index.?, - .sect = self.stubs_section_index.?, - }); + try self.allocateAtomCommon(atom, self.stubs_section_index.?); return atom; } @@ -2917,12 +2788,6 @@ fn createTentativeDefAtoms(self: *MachO) !void { // Convert any tentative definition into a regular symbol and allocate // text blocks for each tentative definition. - const match = MatchingSection{ - .seg = self.data_segment_cmd_index.?, - .sect = self.bss_section_index.?, - }; - _ = try self.section_ordinals.getOrPut(gpa, match); - const size = sym.n_value; const alignment = (sym.n_desc >> 8) & 0x0f; @@ -2937,7 +2802,7 @@ fn createTentativeDefAtoms(self: *MachO) !void { const atom = try MachO.createEmptyAtom(gpa, global.sym_index, size, alignment); atom.file = global.file; - try self.allocateAtomCommon(atom, match); + try self.allocateAtomCommon(atom, self.bss_section_index.?); if (global.file) |file| { const object = &self.objects.items[file]; @@ -3060,7 +2925,8 @@ fn resolveGlobalSymbol(self: *MachO, current: SymbolWithLoc) !void { gop.value_ptr.* = current; } -fn resolveSymbolsInObject(self: *MachO, object: *Object, object_id: u16) !void { +fn resolveSymbolsInObject(self: *MachO, object_id: u16) !void { + const object = &self.objects.items[object_id]; log.debug("resolving symbols in '{s}'", .{object.name}); for (object.symtab.items) |sym, index| { @@ -3115,6 +2981,8 @@ fn resolveSymbolsInObject(self: *MachO, object: *Object, object_id: u16) !void { fn resolveSymbolsInArchives(self: *MachO) !void { if (self.archives.items.len == 0) return; + const gpa = self.base.allocator; + const cpu_arch = self.base.options.target.cpu.arch; var next_sym: usize = 0; loop: while (next_sym < self.unresolved.count()) { const global = self.globals.values()[self.unresolved.keys()[next_sym]]; @@ -3129,13 +2997,9 @@ fn resolveSymbolsInArchives(self: *MachO) !void { assert(offsets.items.len > 0); const object_id = @intCast(u16, self.objects.items.len); - const object = try self.objects.addOne(self.base.allocator); - object.* = try archive.parseObject( - self.base.allocator, - self.base.options.target.cpu.arch, - offsets.items[0], - ); - try self.resolveSymbolsInObject(object, object_id); + const object = try archive.parseObject(gpa, cpu_arch, offsets.items[0]); + try self.objects.append(gpa, object); + try self.resolveSymbolsInObject(object_id); continue :loop; } @@ -3159,7 +3023,6 @@ fn resolveSymbolsInDylibs(self: *MachO) !void { const dylib_id = @intCast(u16, id); if (!self.referenced_dylibs.contains(dylib_id)) { - try self.addLoadDylibLC(dylib_id); try self.referenced_dylibs.putNoClobber(self.base.allocator, dylib_id, {}); } @@ -3257,7 +3120,6 @@ fn resolveDyldStubBinder(self: *MachO) !void { const dylib_id = @intCast(u16, id); if (!self.referenced_dylibs.contains(dylib_id)) { - try self.addLoadDylibLC(dylib_id); try self.referenced_dylibs.putNoClobber(self.base.allocator, dylib_id, {}); } @@ -3280,47 +3142,192 @@ fn resolveDyldStubBinder(self: *MachO) !void { self.got_entries.items[got_index].sym_index = got_atom.sym_index; } -fn addLoadDylibLC(self: *MachO, id: u16) !void { - const dylib = self.dylibs.items[id]; - const dylib_id = dylib.id orelse unreachable; - var dylib_cmd = try macho.createLoadDylibCommand( - self.base.allocator, - if (dylib.weak) .LOAD_WEAK_DYLIB else .LOAD_DYLIB, - dylib_id.name, - dylib_id.timestamp, - dylib_id.current_version, - dylib_id.compatibility_version, - ); - errdefer dylib_cmd.deinit(self.base.allocator); - try self.load_commands.append(self.base.allocator, .{ .dylib = dylib_cmd }); - self.load_commands_dirty = true; -} - -fn addCodeSignatureLC(self: *MachO) !void { - if (self.code_signature_cmd_index != null or self.code_signature == null) return; - self.code_signature_cmd_index = @intCast(u16, self.load_commands.items.len); - try self.load_commands.append(self.base.allocator, .{ - .linkedit_data = .{ - .cmd = .CODE_SIGNATURE, - .cmdsize = @sizeOf(macho.linkedit_data_command), - .dataoff = 0, - .datasize = 0, - }, +fn writeDylinkerLC(ncmds: *u32, lc_writer: anytype) !void { + const name_len = mem.sliceTo(default_dyld_path, 0).len; + const cmdsize = @intCast(u32, mem.alignForwardGeneric( + u64, + @sizeOf(macho.dylinker_command) + name_len, + @sizeOf(u64), + )); + try lc_writer.writeStruct(macho.dylinker_command{ + .cmd = .LOAD_DYLINKER, + .cmdsize = cmdsize, + .name = @sizeOf(macho.dylinker_command), }); - self.load_commands_dirty = true; + try lc_writer.writeAll(mem.sliceTo(default_dyld_path, 0)); + const padding = cmdsize - @sizeOf(macho.dylinker_command) - name_len; + if (padding > 0) { + try lc_writer.writeByteNTimes(0, padding); + } + ncmds.* += 1; } -fn setEntryPoint(self: *MachO) !void { +fn writeMainLC(self: *MachO, ncmds: *u32, lc_writer: anytype) !void { if (self.base.options.output_mode != .Exe) return; - - const seg = self.load_commands.items[self.text_segment_cmd_index.?].segment; + const seg = self.segments.items[self.text_segment_cmd_index.?]; const global = try self.getEntryPoint(); const sym = self.getSymbol(global); - const ec = &self.load_commands.items[self.main_cmd_index.?].main; - ec.entryoff = @intCast(u32, sym.n_value - seg.inner.vmaddr); - ec.stacksize = self.base.options.stack_size_override orelse 0; - self.entry_addr = sym.n_value; - self.load_commands_dirty = true; + try lc_writer.writeStruct(macho.entry_point_command{ + .cmd = .MAIN, + .cmdsize = @sizeOf(macho.entry_point_command), + .entryoff = @intCast(u32, sym.n_value - seg.vmaddr), + .stacksize = self.base.options.stack_size_override orelse 0, + }); + ncmds.* += 1; +} + +const WriteDylibLCCtx = struct { + cmd: macho.LC, + name: []const u8, + timestamp: u32 = 2, + current_version: u32 = 0x10000, + compatibility_version: u32 = 0x10000, +}; + +fn writeDylibLC(ctx: WriteDylibLCCtx, ncmds: *u32, lc_writer: anytype) !void { + const name_len = ctx.name.len + 1; + const cmdsize = @intCast(u32, mem.alignForwardGeneric( + u64, + @sizeOf(macho.dylib_command) + name_len, + @sizeOf(u64), + )); + try lc_writer.writeStruct(macho.dylib_command{ + .cmd = ctx.cmd, + .cmdsize = cmdsize, + .dylib = .{ + .name = @sizeOf(macho.dylib_command), + .timestamp = ctx.timestamp, + .current_version = ctx.current_version, + .compatibility_version = ctx.compatibility_version, + }, + }); + try lc_writer.writeAll(ctx.name); + try lc_writer.writeByte(0); + const padding = cmdsize - @sizeOf(macho.dylib_command) - name_len; + if (padding > 0) { + try lc_writer.writeByteNTimes(0, padding); + } + ncmds.* += 1; +} + +fn writeDylibIdLC(self: *MachO, ncmds: *u32, lc_writer: anytype) !void { + if (self.base.options.output_mode != .Lib) return; + const install_name = self.base.options.install_name orelse self.base.options.emit.?.sub_path; + const curr = self.base.options.version orelse std.builtin.Version{ + .major = 1, + .minor = 0, + .patch = 0, + }; + const compat = self.base.options.compatibility_version orelse std.builtin.Version{ + .major = 1, + .minor = 0, + .patch = 0, + }; + try writeDylibLC(.{ + .cmd = .ID_DYLIB, + .name = install_name, + .current_version = curr.major << 16 | curr.minor << 8 | curr.patch, + .compatibility_version = compat.major << 16 | compat.minor << 8 | compat.patch, + }, ncmds, lc_writer); +} + +const RpathIterator = struct { + buffer: []const []const u8, + table: std.StringHashMap(void), + count: usize = 0, + + fn init(gpa: Allocator, rpaths: []const []const u8) RpathIterator { + return .{ .buffer = rpaths, .table = std.StringHashMap(void).init(gpa) }; + } + + fn deinit(it: *RpathIterator) void { + it.table.deinit(); + } + + fn next(it: *RpathIterator) !?[]const u8 { + while (true) { + if (it.count >= it.buffer.len) return null; + const rpath = it.buffer[it.count]; + it.count += 1; + const gop = try it.table.getOrPut(rpath); + if (gop.found_existing) continue; + return rpath; + } + } +}; + +fn writeRpathLCs(self: *MachO, ncmds: *u32, lc_writer: anytype) !void { + const gpa = self.base.allocator; + + var it = RpathIterator.init(gpa, self.base.options.rpath_list); + defer it.deinit(); + + while (try it.next()) |rpath| { + const rpath_len = rpath.len + 1; + const cmdsize = @intCast(u32, mem.alignForwardGeneric( + u64, + @sizeOf(macho.rpath_command) + rpath_len, + @sizeOf(u64), + )); + try lc_writer.writeStruct(macho.rpath_command{ + .cmdsize = cmdsize, + .path = @sizeOf(macho.rpath_command), + }); + try lc_writer.writeAll(rpath); + try lc_writer.writeByte(0); + const padding = cmdsize - @sizeOf(macho.rpath_command) - rpath_len; + if (padding > 0) { + try lc_writer.writeByteNTimes(0, padding); + } + ncmds.* += 1; + } +} + +fn writeBuildVersionLC(self: *MachO, ncmds: *u32, lc_writer: anytype) !void { + const cmdsize = @sizeOf(macho.build_version_command) + @sizeOf(macho.build_tool_version); + const platform_version = blk: { + const ver = self.base.options.target.os.version_range.semver.min; + const platform_version = ver.major << 16 | ver.minor << 8; + break :blk platform_version; + }; + const sdk_version = if (self.base.options.native_darwin_sdk) |sdk| blk: { + const ver = sdk.version; + const sdk_version = ver.major << 16 | ver.minor << 8; + break :blk sdk_version; + } else platform_version; + const is_simulator_abi = self.base.options.target.abi == .simulator; + try lc_writer.writeStruct(macho.build_version_command{ + .cmdsize = cmdsize, + .platform = switch (self.base.options.target.os.tag) { + .macos => .MACOS, + .ios => if (is_simulator_abi) macho.PLATFORM.IOSSIMULATOR else macho.PLATFORM.IOS, + .watchos => if (is_simulator_abi) macho.PLATFORM.WATCHOSSIMULATOR else macho.PLATFORM.WATCHOS, + .tvos => if (is_simulator_abi) macho.PLATFORM.TVOSSIMULATOR else macho.PLATFORM.TVOS, + else => unreachable, + }, + .minos = platform_version, + .sdk = sdk_version, + .ntools = 1, + }); + try lc_writer.writeAll(mem.asBytes(&macho.build_tool_version{ + .tool = .LD, + .version = 0x0, + })); + ncmds.* += 1; +} + +fn writeLoadDylibLCs(self: *MachO, ncmds: *u32, lc_writer: anytype) !void { + for (self.referenced_dylibs.keys()) |id| { + const dylib = self.dylibs.items[id]; + const dylib_id = dylib.id orelse unreachable; + try writeDylibLC(.{ + .cmd = if (dylib.weak) .LOAD_WEAK_DYLIB else .LOAD_DYLIB, + .name = dylib_id.name, + .timestamp = dylib_id.timestamp, + .current_version = dylib_id.current_version, + .compatibility_version = dylib_id.compatibility_version, + }, ncmds, lc_writer); + } } pub fn deinit(self: *MachO) void { @@ -3334,7 +3341,6 @@ pub fn deinit(self: *MachO) void { d_sym.deinit(gpa); } - self.section_ordinals.deinit(gpa); self.tlv_ptr_entries.deinit(gpa); self.tlv_ptr_entries_free_list.deinit(gpa); self.tlv_ptr_entries_table.deinit(gpa); @@ -3371,24 +3377,19 @@ pub fn deinit(self: *MachO) void { self.dylibs_map.deinit(gpa); self.referenced_dylibs.deinit(gpa); - for (self.load_commands.items) |*lc| { - lc.deinit(gpa); + self.segments.deinit(gpa); + + for (self.sections.items(.free_list)) |*list| { + list.deinit(gpa); } - self.load_commands.deinit(gpa); + self.sections.deinit(gpa); for (self.managed_atoms.items) |atom| { atom.deinit(gpa); gpa.destroy(atom); } self.managed_atoms.deinit(gpa); - self.atoms.deinit(gpa); - { - var it = self.atom_free_lists.valueIterator(); - while (it.next()) |free_list| { - free_list.deinit(gpa); - } - self.atom_free_lists.deinit(gpa); - } + if (self.base.options.module) |mod| { for (self.decls.keys()) |decl_index| { const decl = mod.declPtr(decl_index); @@ -3408,34 +3409,24 @@ pub fn deinit(self: *MachO) void { } self.atom_by_index_table.deinit(gpa); - - if (self.code_signature) |*csig| { - csig.deinit(gpa); - } } pub fn closeFiles(self: MachO) void { - for (self.objects.items) |object| { - object.file.close(); - } for (self.archives.items) |archive| { archive.file.close(); } - for (self.dylibs.items) |dylib| { - dylib.file.close(); - } if (self.d_sym) |ds| { ds.file.close(); } } -fn freeAtom(self: *MachO, atom: *Atom, match: MatchingSection, owns_atom: bool) void { +fn freeAtom(self: *MachO, atom: *Atom, sect_id: u8, owns_atom: bool) void { log.debug("freeAtom {*}", .{atom}); if (!owns_atom) { atom.deinit(self.base.allocator); } - const free_list = self.atom_free_lists.getPtr(match).?; + const free_list = &self.sections.items(.free_list)[sect_id]; var already_have_free_list_node = false; { var i: usize = 0; @@ -3452,13 +3443,14 @@ fn freeAtom(self: *MachO, atom: *Atom, match: MatchingSection, owns_atom: bool) } } - if (self.atoms.getPtr(match)) |last_atom| { - if (last_atom.* == atom) { + const maybe_last_atom = &self.sections.items(.last_atom)[sect_id]; + if (maybe_last_atom.*) |last_atom| { + if (last_atom == atom) { if (atom.prev) |prev| { // TODO shrink the section size here - last_atom.* = prev; + maybe_last_atom.* = prev; } else { - _ = self.atoms.fetchRemove(match); + maybe_last_atom.* = null; } } } @@ -3486,21 +3478,21 @@ fn freeAtom(self: *MachO, atom: *Atom, match: MatchingSection, owns_atom: bool) } } -fn shrinkAtom(self: *MachO, atom: *Atom, new_block_size: u64, match: MatchingSection) void { +fn shrinkAtom(self: *MachO, atom: *Atom, new_block_size: u64, sect_id: u8) void { _ = self; _ = atom; _ = new_block_size; - _ = match; + _ = sect_id; // TODO check the new capacity, and if it crosses the size threshold into a big enough // capacity, insert a free list node for it. } -fn growAtom(self: *MachO, atom: *Atom, new_atom_size: u64, alignment: u64, match: MatchingSection) !u64 { +fn growAtom(self: *MachO, atom: *Atom, new_atom_size: u64, alignment: u64, sect_id: u8) !u64 { const sym = atom.getSymbol(self); const align_ok = mem.alignBackwardGeneric(u64, sym.n_value, alignment) == sym.n_value; const need_realloc = !align_ok or new_atom_size > atom.capacity(self); if (!need_realloc) return sym.n_value; - return self.allocateAtom(atom, new_atom_size, alignment, match); + return self.allocateAtom(atom, new_atom_size, alignment, sect_id); } fn allocateSymbol(self: *MachO) !u32 { @@ -3671,10 +3663,11 @@ pub fn updateFunc(self: *MachO, module: *Module, func: *Module.Fn, air: Air, liv } pub fn lowerUnnamedConst(self: *MachO, typed_value: TypedValue, decl_index: Module.Decl.Index) !u32 { - var code_buffer = std.ArrayList(u8).init(self.base.allocator); + const gpa = self.base.allocator; + + var code_buffer = std.ArrayList(u8).init(gpa); defer code_buffer.deinit(); - const gpa = self.base.allocator; const module = self.base.options.module.?; const gop = try self.unnamed_const_atoms.getOrPut(gpa, decl_index); if (!gop.found_existing) { @@ -3725,25 +3718,25 @@ pub fn lowerUnnamedConst(self: *MachO, typed_value: TypedValue, decl_index: Modu atom.code.clearRetainingCapacity(); try atom.code.appendSlice(gpa, code); - const match = try self.getMatchingSectionAtom( + const sect_id = try self.getOutputSectionAtom( atom, decl_name, typed_value.ty, typed_value.val, required_alignment, ); - const addr = try self.allocateAtom(atom, code.len, required_alignment, match); + const addr = try self.allocateAtom(atom, code.len, required_alignment, sect_id); log.debug("allocated atom for {?s} at 0x{x}", .{ name, addr }); log.debug(" (required alignment 0x{x})", .{required_alignment}); - errdefer self.freeAtom(atom, match, true); + errdefer self.freeAtom(atom, sect_id, true); const symbol = atom.getSymbolPtr(self); symbol.* = .{ .n_strx = name_str_index, .n_type = macho.N_SECT, - .n_sect = self.getSectionOrdinal(match), + .n_sect = sect_id + 1, .n_desc = 0, .n_value = addr, }; @@ -3894,44 +3887,35 @@ fn needsPointerRebase(ty: Type, val: Value, mod: *Module) bool { } } -fn getMatchingSectionAtom( +fn getOutputSectionAtom( self: *MachO, atom: *Atom, name: []const u8, ty: Type, val: Value, alignment: u32, -) !MatchingSection { +) !u8 { const code = atom.code.items; const mod = self.base.options.module.?; const align_log_2 = math.log2(alignment); const zig_ty = ty.zigTypeTag(); const mode = self.base.options.optimize_mode; - const match: MatchingSection = blk: { + const sect_id: u8 = blk: { // TODO finish and audit this function if (val.isUndefDeep()) { if (mode == .ReleaseFast or mode == .ReleaseSmall) { - break :blk MatchingSection{ - .seg = self.data_segment_cmd_index.?, - .sect = self.bss_section_index.?, - }; + break :blk self.bss_section_index.?; } else { - break :blk MatchingSection{ - .seg = self.data_segment_cmd_index.?, - .sect = self.data_section_index.?, - }; + break :blk self.data_section_index.?; } } if (val.castTag(.variable)) |_| { - break :blk MatchingSection{ - .seg = self.data_segment_cmd_index.?, - .sect = self.data_section_index.?, - }; + break :blk self.data_section_index.?; } if (needsPointerRebase(ty, val, mod)) { - break :blk (try self.getMatchingSection(.{ + break :blk (try self.getOutputSection(.{ .segname = makeStaticString("__DATA_CONST"), .sectname = makeStaticString("__const"), .size = code.len, @@ -3941,10 +3925,7 @@ fn getMatchingSectionAtom( switch (zig_ty) { .Fn => { - break :blk MatchingSection{ - .seg = self.text_segment_cmd_index.?, - .sect = self.text_section_index.?, - }; + break :blk self.text_section_index.?; }, .Array => { if (val.tag() == .bytes) { @@ -3953,7 +3934,7 @@ fn getMatchingSectionAtom( .const_slice_u8_sentinel_0, .manyptr_const_u8_sentinel_0, => { - break :blk (try self.getMatchingSection(.{ + break :blk (try self.getOutputSection(.{ .segname = makeStaticString("__TEXT"), .sectname = makeStaticString("__cstring"), .flags = macho.S_CSTRING_LITERALS, @@ -3967,22 +3948,21 @@ fn getMatchingSectionAtom( }, else => {}, } - break :blk (try self.getMatchingSection(.{ + break :blk (try self.getOutputSection(.{ .segname = makeStaticString("__TEXT"), .sectname = makeStaticString("__const"), .size = code.len, .@"align" = align_log_2, })).?; }; - const sect = self.getSection(match); - log.debug(" allocating atom '{s}' in '{s},{s}' ({d},{d})", .{ + const header = self.sections.items(.header)[sect_id]; + log.debug(" allocating atom '{s}' in '{s},{s}', ord({d})", .{ name, - sect.segName(), - sect.sectName(), - match.seg, - match.sect, + header.segName(), + header.sectName(), + sect_id, }); - return match; + return sect_id; } fn placeDecl(self: *MachO, decl_index: Module.Decl.Index, code_len: usize) !u64 { @@ -3996,7 +3976,7 @@ fn placeDecl(self: *MachO, decl_index: Module.Decl.Index, code_len: usize) !u64 const decl_ptr = self.decls.getPtr(decl_index).?; if (decl_ptr.* == null) { - decl_ptr.* = try self.getMatchingSectionAtom( + decl_ptr.* = try self.getOutputSectionAtom( &decl.link.macho, sym_name, decl.ty, @@ -4045,7 +4025,7 @@ fn placeDecl(self: *MachO, decl_index: Module.Decl.Index, code_len: usize) !u64 symbol.* = .{ .n_strx = name_str_index, .n_type = macho.N_SECT, - .n_sect = self.getSectionOrdinal(match), + .n_sect = match + 1, .n_desc = 0, .n_value = addr, }; @@ -4134,10 +4114,7 @@ pub fn updateDeclExports( sym.* = .{ .n_strx = try self.strtab.insert(gpa, exp_name), .n_type = macho.N_SECT | macho.N_EXT, - .n_sect = self.getSectionOrdinal(.{ - .seg = self.text_segment_cmd_index.?, - .sect = self.text_section_index.?, // TODO what if we export a variable? - }), + .n_sect = self.text_section_index.? + 1, // TODO what if we export a variable? .n_desc = 0, .n_value = decl_sym.n_value, }; @@ -4208,10 +4185,7 @@ pub fn deleteExport(self: *MachO, exp: Export) void { fn freeUnnamedConsts(self: *MachO, decl_index: Module.Decl.Index) void { const unnamed_consts = self.unnamed_const_atoms.getPtr(decl_index) orelse return; for (unnamed_consts.items) |atom| { - self.freeAtom(atom, .{ - .seg = self.text_segment_cmd_index.?, - .sect = self.text_const_section_index.?, - }, true); + self.freeAtom(atom, self.text_const_section_index.?, true); self.locals_free_list.append(self.base.allocator, atom.sym_index) catch {}; self.locals.items[atom.sym_index].n_type = 0; _ = self.atom_by_index_table.remove(atom.sym_index); @@ -4294,6 +4268,7 @@ pub fn getDeclVAddr(self: *MachO, decl_index: Module.Decl.Index, reloc_info: Fil } fn populateMissingMetadata(self: *MachO) !void { + const gpa = self.base.allocator; const cpu_arch = self.base.options.target.cpu.arch; const pagezero_vmsize = self.base.options.pagezero_size orelse default_pagezero_vmsize; const aligned_pagezero_vmsize = mem.alignBackwardGeneric(u64, pagezero_vmsize, self.page_size); @@ -4305,21 +4280,16 @@ fn populateMissingMetadata(self: *MachO) !void { log.warn("requested __PAGEZERO size (0x{x}) is not page aligned", .{pagezero_vmsize}); log.warn(" rounding down to 0x{x}", .{aligned_pagezero_vmsize}); } - self.pagezero_segment_cmd_index = @intCast(u16, self.load_commands.items.len); - try self.load_commands.append(self.base.allocator, .{ - .segment = .{ - .inner = .{ - .segname = makeStaticString("__PAGEZERO"), - .vmsize = aligned_pagezero_vmsize, - .cmdsize = @sizeOf(macho.segment_command_64), - }, - }, + self.pagezero_segment_cmd_index = @intCast(u8, self.segments.items.len); + try self.segments.append(gpa, .{ + .segname = makeStaticString("__PAGEZERO"), + .vmsize = aligned_pagezero_vmsize, + .cmdsize = @sizeOf(macho.segment_command_64), }); - self.load_commands_dirty = true; } if (self.text_segment_cmd_index == null) { - self.text_segment_cmd_index = @intCast(u16, self.load_commands.items.len); + self.text_segment_cmd_index = @intCast(u8, self.segments.items.len); const needed_size = if (self.mode == .incremental) blk: { const headerpad_size = @maximum(self.base.options.headerpad_size orelse 0, default_headerpad_size); const program_code_size_hint = self.base.options.program_code_size_hint; @@ -4329,20 +4299,15 @@ fn populateMissingMetadata(self: *MachO) !void { log.debug("found __TEXT segment free space 0x{x} to 0x{x}", .{ 0, needed_size }); break :blk needed_size; } else 0; - try self.load_commands.append(self.base.allocator, .{ - .segment = .{ - .inner = .{ - .segname = makeStaticString("__TEXT"), - .vmaddr = aligned_pagezero_vmsize, - .vmsize = needed_size, - .filesize = needed_size, - .maxprot = macho.PROT.READ | macho.PROT.EXEC, - .initprot = macho.PROT.READ | macho.PROT.EXEC, - .cmdsize = @sizeOf(macho.segment_command_64), - }, - }, + try self.segments.append(gpa, .{ + .segname = makeStaticString("__TEXT"), + .vmaddr = aligned_pagezero_vmsize, + .vmsize = needed_size, + .filesize = needed_size, + .maxprot = macho.PROT.READ | macho.PROT.EXEC, + .initprot = macho.PROT.READ | macho.PROT.EXEC, + .cmdsize = @sizeOf(macho.segment_command_64), }); - self.load_commands_dirty = true; } if (self.text_section_index == null) { @@ -4419,7 +4384,7 @@ fn populateMissingMetadata(self: *MachO) !void { } if (self.data_const_segment_cmd_index == null) { - self.data_const_segment_cmd_index = @intCast(u16, self.load_commands.items.len); + self.data_const_segment_cmd_index = @intCast(u8, self.segments.items.len); var vmaddr: u64 = 0; var fileoff: u64 = 0; var needed_size: u64 = 0; @@ -4434,21 +4399,16 @@ fn populateMissingMetadata(self: *MachO) !void { fileoff + needed_size, }); } - try self.load_commands.append(self.base.allocator, .{ - .segment = .{ - .inner = .{ - .segname = makeStaticString("__DATA_CONST"), - .vmaddr = vmaddr, - .vmsize = needed_size, - .fileoff = fileoff, - .filesize = needed_size, - .maxprot = macho.PROT.READ | macho.PROT.WRITE, - .initprot = macho.PROT.READ | macho.PROT.WRITE, - .cmdsize = @sizeOf(macho.segment_command_64), - }, - }, + try self.segments.append(gpa, .{ + .segname = makeStaticString("__DATA_CONST"), + .vmaddr = vmaddr, + .vmsize = needed_size, + .fileoff = fileoff, + .filesize = needed_size, + .maxprot = macho.PROT.READ | macho.PROT.WRITE, + .initprot = macho.PROT.READ | macho.PROT.WRITE, + .cmdsize = @sizeOf(macho.segment_command_64), }); - self.load_commands_dirty = true; } if (self.got_section_index == null) { @@ -4469,7 +4429,7 @@ fn populateMissingMetadata(self: *MachO) !void { } if (self.data_segment_cmd_index == null) { - self.data_segment_cmd_index = @intCast(u16, self.load_commands.items.len); + self.data_segment_cmd_index = @intCast(u8, self.segments.items.len); var vmaddr: u64 = 0; var fileoff: u64 = 0; var needed_size: u64 = 0; @@ -4484,21 +4444,16 @@ fn populateMissingMetadata(self: *MachO) !void { fileoff + needed_size, }); } - try self.load_commands.append(self.base.allocator, .{ - .segment = .{ - .inner = .{ - .segname = makeStaticString("__DATA"), - .vmaddr = vmaddr, - .vmsize = needed_size, - .fileoff = fileoff, - .filesize = needed_size, - .maxprot = macho.PROT.READ | macho.PROT.WRITE, - .initprot = macho.PROT.READ | macho.PROT.WRITE, - .cmdsize = @sizeOf(macho.segment_command_64), - }, - }, + try self.segments.append(gpa, .{ + .segname = makeStaticString("__DATA"), + .vmaddr = vmaddr, + .vmsize = needed_size, + .fileoff = fileoff, + .filesize = needed_size, + .maxprot = macho.PROT.READ | macho.PROT.WRITE, + .initprot = macho.PROT.READ | macho.PROT.WRITE, + .cmdsize = @sizeOf(macho.segment_command_64), }); - self.load_commands_dirty = true; } if (self.la_symbol_ptr_section_index == null) { @@ -4602,7 +4557,7 @@ fn populateMissingMetadata(self: *MachO) !void { } if (self.linkedit_segment_cmd_index == null) { - self.linkedit_segment_cmd_index = @intCast(u16, self.load_commands.items.len); + self.linkedit_segment_cmd_index = @intCast(u8, self.segments.items.len); var vmaddr: u64 = 0; var fileoff: u64 = 0; if (self.mode == .incremental) { @@ -4611,249 +4566,113 @@ fn populateMissingMetadata(self: *MachO) !void { fileoff = base.fileoff; log.debug("found __LINKEDIT segment free space at 0x{x}", .{fileoff}); } - try self.load_commands.append(self.base.allocator, .{ - .segment = .{ - .inner = .{ - .segname = makeStaticString("__LINKEDIT"), - .vmaddr = vmaddr, - .fileoff = fileoff, - .maxprot = macho.PROT.READ, - .initprot = macho.PROT.READ, - .cmdsize = @sizeOf(macho.segment_command_64), - }, - }, + try self.segments.append(gpa, .{ + .segname = makeStaticString("__LINKEDIT"), + .vmaddr = vmaddr, + .fileoff = fileoff, + .maxprot = macho.PROT.READ, + .initprot = macho.PROT.READ, + .cmdsize = @sizeOf(macho.segment_command_64), }); - self.load_commands_dirty = true; - } - - if (self.dyld_info_cmd_index == null) { - self.dyld_info_cmd_index = @intCast(u16, self.load_commands.items.len); - try self.load_commands.append(self.base.allocator, .{ - .dyld_info_only = .{ - .cmd = .DYLD_INFO_ONLY, - .cmdsize = @sizeOf(macho.dyld_info_command), - .rebase_off = 0, - .rebase_size = 0, - .bind_off = 0, - .bind_size = 0, - .weak_bind_off = 0, - .weak_bind_size = 0, - .lazy_bind_off = 0, - .lazy_bind_size = 0, - .export_off = 0, - .export_size = 0, - }, - }); - self.load_commands_dirty = true; - } - - if (self.symtab_cmd_index == null) { - self.symtab_cmd_index = @intCast(u16, self.load_commands.items.len); - try self.load_commands.append(self.base.allocator, .{ - .symtab = .{ - .cmdsize = @sizeOf(macho.symtab_command), - .symoff = 0, - .nsyms = 0, - .stroff = 0, - .strsize = 0, - }, - }); - self.load_commands_dirty = true; - } - - if (self.dysymtab_cmd_index == null) { - self.dysymtab_cmd_index = @intCast(u16, self.load_commands.items.len); - try self.load_commands.append(self.base.allocator, .{ - .dysymtab = .{ - .cmdsize = @sizeOf(macho.dysymtab_command), - .ilocalsym = 0, - .nlocalsym = 0, - .iextdefsym = 0, - .nextdefsym = 0, - .iundefsym = 0, - .nundefsym = 0, - .tocoff = 0, - .ntoc = 0, - .modtaboff = 0, - .nmodtab = 0, - .extrefsymoff = 0, - .nextrefsyms = 0, - .indirectsymoff = 0, - .nindirectsyms = 0, - .extreloff = 0, - .nextrel = 0, - .locreloff = 0, - .nlocrel = 0, - }, - }); - self.load_commands_dirty = true; - } - - if (self.dylinker_cmd_index == null) { - self.dylinker_cmd_index = @intCast(u16, self.load_commands.items.len); - const cmdsize = @intCast(u32, mem.alignForwardGeneric( - u64, - @sizeOf(macho.dylinker_command) + mem.sliceTo(default_dyld_path, 0).len, - @sizeOf(u64), - )); - var dylinker_cmd = macho.emptyGenericCommandWithData(macho.dylinker_command{ - .cmd = .LOAD_DYLINKER, - .cmdsize = cmdsize, - .name = @sizeOf(macho.dylinker_command), - }); - dylinker_cmd.data = try self.base.allocator.alloc(u8, cmdsize - dylinker_cmd.inner.name); - mem.set(u8, dylinker_cmd.data, 0); - mem.copy(u8, dylinker_cmd.data, mem.sliceTo(default_dyld_path, 0)); - try self.load_commands.append(self.base.allocator, .{ .dylinker = dylinker_cmd }); - self.load_commands_dirty = true; - } - - if (self.main_cmd_index == null and self.base.options.output_mode == .Exe) { - self.main_cmd_index = @intCast(u16, self.load_commands.items.len); - try self.load_commands.append(self.base.allocator, .{ - .main = .{ - .cmdsize = @sizeOf(macho.entry_point_command), - .entryoff = 0x0, - .stacksize = 0, - }, - }); - self.load_commands_dirty = true; - } - - if (self.dylib_id_cmd_index == null and self.base.options.output_mode == .Lib) { - self.dylib_id_cmd_index = @intCast(u16, self.load_commands.items.len); - const install_name = self.base.options.install_name orelse self.base.options.emit.?.sub_path; - const current_version = self.base.options.version orelse - std.builtin.Version{ .major = 1, .minor = 0, .patch = 0 }; - const compat_version = self.base.options.compatibility_version orelse - std.builtin.Version{ .major = 1, .minor = 0, .patch = 0 }; - var dylib_cmd = try macho.createLoadDylibCommand( - self.base.allocator, - .ID_DYLIB, - install_name, - 2, - current_version.major << 16 | current_version.minor << 8 | current_version.patch, - compat_version.major << 16 | compat_version.minor << 8 | compat_version.patch, - ); - errdefer dylib_cmd.deinit(self.base.allocator); - try self.load_commands.append(self.base.allocator, .{ .dylib = dylib_cmd }); - self.load_commands_dirty = true; - } - - if (self.source_version_cmd_index == null) { - self.source_version_cmd_index = @intCast(u16, self.load_commands.items.len); - try self.load_commands.append(self.base.allocator, .{ - .source_version = .{ - .cmdsize = @sizeOf(macho.source_version_command), - .version = 0x0, - }, - }); - self.load_commands_dirty = true; - } - - if (self.build_version_cmd_index == null) { - self.build_version_cmd_index = @intCast(u16, self.load_commands.items.len); - const cmdsize = @intCast(u32, mem.alignForwardGeneric( - u64, - @sizeOf(macho.build_version_command) + @sizeOf(macho.build_tool_version), - @sizeOf(u64), - )); - const platform_version = blk: { - const ver = self.base.options.target.os.version_range.semver.min; - const platform_version = ver.major << 16 | ver.minor << 8; - break :blk platform_version; - }; - const sdk_version = if (self.base.options.native_darwin_sdk) |sdk| blk: { - const ver = sdk.version; - const sdk_version = ver.major << 16 | ver.minor << 8; - break :blk sdk_version; - } else platform_version; - const is_simulator_abi = self.base.options.target.abi == .simulator; - var cmd = macho.emptyGenericCommandWithData(macho.build_version_command{ - .cmdsize = cmdsize, - .platform = switch (self.base.options.target.os.tag) { - .macos => .MACOS, - .ios => if (is_simulator_abi) macho.PLATFORM.IOSSIMULATOR else macho.PLATFORM.IOS, - .watchos => if (is_simulator_abi) macho.PLATFORM.WATCHOSSIMULATOR else macho.PLATFORM.WATCHOS, - .tvos => if (is_simulator_abi) macho.PLATFORM.TVOSSIMULATOR else macho.PLATFORM.TVOS, - else => unreachable, - }, - .minos = platform_version, - .sdk = sdk_version, - .ntools = 1, - }); - const ld_ver = macho.build_tool_version{ - .tool = .LD, - .version = 0x0, - }; - cmd.data = try self.base.allocator.alloc(u8, cmdsize - @sizeOf(macho.build_version_command)); - mem.set(u8, cmd.data, 0); - mem.copy(u8, cmd.data, mem.asBytes(&ld_ver)); - try self.load_commands.append(self.base.allocator, .{ .build_version = cmd }); - self.load_commands_dirty = true; - } - - if (self.uuid_cmd_index == null) { - self.uuid_cmd_index = @intCast(u16, self.load_commands.items.len); - var uuid_cmd: macho.uuid_command = .{ - .cmdsize = @sizeOf(macho.uuid_command), - .uuid = undefined, - }; - std.crypto.random.bytes(&uuid_cmd.uuid); - try self.load_commands.append(self.base.allocator, .{ .uuid = uuid_cmd }); - self.load_commands_dirty = true; - } - - if (self.function_starts_cmd_index == null) { - self.function_starts_cmd_index = @intCast(u16, self.load_commands.items.len); - try self.load_commands.append(self.base.allocator, .{ - .linkedit_data = .{ - .cmd = .FUNCTION_STARTS, - .cmdsize = @sizeOf(macho.linkedit_data_command), - .dataoff = 0, - .datasize = 0, - }, - }); - self.load_commands_dirty = true; - } - - if (self.data_in_code_cmd_index == null) { - self.data_in_code_cmd_index = @intCast(u16, self.load_commands.items.len); - try self.load_commands.append(self.base.allocator, .{ - .linkedit_data = .{ - .cmd = .DATA_IN_CODE, - .cmdsize = @sizeOf(macho.linkedit_data_command), - .dataoff = 0, - .datasize = 0, - }, - }); - self.load_commands_dirty = true; } } -fn calcMinHeaderpad(self: *MachO) u64 { - var sizeofcmds: u32 = 0; - for (self.load_commands.items) |lc| { - if (lc.cmd() == .NONE) continue; - sizeofcmds += lc.cmdsize(); +inline fn calcInstallNameLen(cmd_size: u64, name: []const u8, assume_max_path_len: bool) u64 { + const name_len = if (assume_max_path_len) std.os.PATH_MAX else std.mem.len(name) + 1; + return mem.alignForwardGeneric(u64, cmd_size + name_len, @alignOf(u64)); +} + +fn calcLCsSize(self: *MachO, assume_max_path_len: bool) !u32 { + const gpa = self.base.allocator; + var sizeofcmds: u64 = 0; + for (self.segments.items) |seg| { + sizeofcmds += seg.nsects * @sizeOf(macho.section_64) + @sizeOf(macho.segment_command_64); } - var padding: u32 = sizeofcmds + (self.base.options.headerpad_size orelse 0); + // LC_DYLD_INFO_ONLY + sizeofcmds += @sizeOf(macho.dyld_info_command); + // LC_FUNCTION_STARTS + if (self.text_section_index != null) { + sizeofcmds += @sizeOf(macho.linkedit_data_command); + } + // LC_DATA_IN_CODE + sizeofcmds += @sizeOf(macho.linkedit_data_command); + // LC_SYMTAB + sizeofcmds += @sizeOf(macho.symtab_command); + // LC_DYSYMTAB + sizeofcmds += @sizeOf(macho.dysymtab_command); + // LC_LOAD_DYLINKER + sizeofcmds += calcInstallNameLen( + @sizeOf(macho.dylinker_command), + mem.sliceTo(default_dyld_path, 0), + false, + ); + // LC_MAIN + if (self.base.options.output_mode == .Exe) { + sizeofcmds += @sizeOf(macho.entry_point_command); + } + // LC_ID_DYLIB + if (self.base.options.output_mode == .Lib) { + sizeofcmds += blk: { + const install_name = self.base.options.install_name orelse self.base.options.emit.?.sub_path; + break :blk calcInstallNameLen( + @sizeOf(macho.dylib_command), + install_name, + assume_max_path_len, + ); + }; + } + // LC_RPATH + { + var it = RpathIterator.init(gpa, self.base.options.rpath_list); + defer it.deinit(); + while (try it.next()) |rpath| { + sizeofcmds += calcInstallNameLen( + @sizeOf(macho.rpath_command), + rpath, + assume_max_path_len, + ); + } + } + // LC_SOURCE_VERSION + sizeofcmds += @sizeOf(macho.source_version_command); + // LC_BUILD_VERSION + sizeofcmds += @sizeOf(macho.build_version_command) + @sizeOf(macho.build_tool_version); + // LC_UUID + sizeofcmds += @sizeOf(macho.uuid_command); + // LC_LOAD_DYLIB + for (self.referenced_dylibs.keys()) |id| { + const dylib = self.dylibs.items[id]; + const dylib_id = dylib.id orelse unreachable; + sizeofcmds += calcInstallNameLen( + @sizeOf(macho.dylib_command), + dylib_id.name, + assume_max_path_len, + ); + } + // LC_CODE_SIGNATURE + { + const target = self.base.options.target; + const requires_codesig = blk: { + if (self.base.options.entitlements) |_| break :blk true; + if (target.cpu.arch == .aarch64 and (target.os.tag == .macos or target.abi == .simulator)) + break :blk true; + break :blk false; + }; + if (requires_codesig) { + sizeofcmds += @sizeOf(macho.linkedit_data_command); + } + } + + return @intCast(u32, sizeofcmds); +} + +fn calcMinHeaderPad(self: *MachO) !u64 { + var padding: u32 = (try self.calcLCsSize(false)) + (self.base.options.headerpad_size orelse 0); log.debug("minimum requested headerpad size 0x{x}", .{padding + @sizeOf(macho.mach_header_64)}); if (self.base.options.headerpad_max_install_names) { - var min_headerpad_size: u32 = 0; - for (self.load_commands.items) |lc| switch (lc.cmd()) { - .ID_DYLIB, - .LOAD_WEAK_DYLIB, - .LOAD_DYLIB, - .REEXPORT_DYLIB, - => { - min_headerpad_size += @sizeOf(macho.dylib_command) + std.os.PATH_MAX + 1; - }, - - else => {}, - }; + var min_headerpad_size: u32 = try self.calcLCsSize(true); log.debug("headerpad_max_install_names minimum headerpad size 0x{x}", .{ min_headerpad_size + @sizeOf(macho.mach_header_64), }); @@ -4868,32 +4687,31 @@ fn calcMinHeaderpad(self: *MachO) u64 { fn allocateSegments(self: *MachO) !void { try self.allocateSegment(self.text_segment_cmd_index, &.{ self.pagezero_segment_cmd_index, - }, self.calcMinHeaderpad()); + }, try self.calcMinHeaderPad()); if (self.text_segment_cmd_index) |index| blk: { - const seg = &self.load_commands.items[index].segment; - if (seg.sections.items.len == 0) break :blk; + const seg = &self.segments.items[index]; + if (seg.nsects == 0) break :blk; // Shift all sections to the back to minimize jump size between __TEXT and __DATA segments. var min_alignment: u32 = 0; - for (seg.sections.items) |sect| { - const alignment = try math.powi(u32, 2, sect.@"align"); + for (self.sections.items(.header)[0..seg.nsects]) |header| { + const alignment = try math.powi(u32, 2, header.@"align"); min_alignment = math.max(min_alignment, alignment); } assert(min_alignment > 0); - const last_sect_idx = seg.sections.items.len - 1; - const last_sect = seg.sections.items[last_sect_idx]; + const last_header = self.sections.items(.header)[seg.nsects - 1]; const shift: u32 = shift: { - const diff = seg.inner.filesize - last_sect.offset - last_sect.size; + const diff = seg.filesize - last_header.offset - last_header.size; const factor = @divTrunc(diff, min_alignment); break :shift @intCast(u32, factor * min_alignment); }; if (shift > 0) { - for (seg.sections.items) |*sect| { - sect.offset += shift; - sect.addr += shift; + for (self.sections.items(.header)[0..seg.nsects]) |*header| { + header.offset += shift; + header.addr += shift; } } } @@ -4917,42 +4735,42 @@ fn allocateSegments(self: *MachO) !void { }, 0); } -fn allocateSegment(self: *MachO, maybe_index: ?u16, indices: []const ?u16, init_size: u64) !void { +fn allocateSegment(self: *MachO, maybe_index: ?u8, indices: []const ?u8, init_size: u64) !void { const index = maybe_index orelse return; - const seg = &self.load_commands.items[index].segment; + const seg = &self.segments.items[index]; const base = self.getSegmentAllocBase(indices); - seg.inner.vmaddr = base.vmaddr; - seg.inner.fileoff = base.fileoff; - seg.inner.filesize = init_size; - seg.inner.vmsize = init_size; + seg.vmaddr = base.vmaddr; + seg.fileoff = base.fileoff; + seg.filesize = init_size; + seg.vmsize = init_size; // Allocate the sections according to their alignment at the beginning of the segment. var start = init_size; - for (seg.sections.items) |*sect| { - const is_zerofill = sect.flags == macho.S_ZEROFILL or sect.flags == macho.S_THREAD_LOCAL_ZEROFILL; - const use_llvm = build_options.have_llvm and self.base.options.use_llvm; - const use_stage1 = build_options.is_stage1 and self.base.options.use_stage1; - const alignment = try math.powi(u32, 2, sect.@"align"); + const slice = self.sections.slice(); + for (slice.items(.header)) |*header, sect_id| { + const segment_index = slice.items(.segment_index)[sect_id]; + if (segment_index != index) continue; + const is_zerofill = header.flags == macho.S_ZEROFILL or header.flags == macho.S_THREAD_LOCAL_ZEROFILL; + const alignment = try math.powi(u32, 2, header.@"align"); const start_aligned = mem.alignForwardGeneric(u64, start, alignment); - // TODO handle zerofill sections in stage2 - sect.offset = if (is_zerofill and (use_stage1 or use_llvm)) + header.offset = if (is_zerofill) 0 else - @intCast(u32, seg.inner.fileoff + start_aligned); - sect.addr = seg.inner.vmaddr + start_aligned; + @intCast(u32, seg.fileoff + start_aligned); + header.addr = seg.vmaddr + start_aligned; - start = start_aligned + sect.size; + start = start_aligned + header.size; - if (!(is_zerofill and (use_stage1 or use_llvm))) { - seg.inner.filesize = start; + if (!is_zerofill) { + seg.filesize = start; } - seg.inner.vmsize = start; + seg.vmsize = start; } - seg.inner.filesize = mem.alignForwardGeneric(u64, seg.inner.filesize, self.page_size); - seg.inner.vmsize = mem.alignForwardGeneric(u64, seg.inner.vmsize, self.page_size); + seg.filesize = mem.alignForwardGeneric(u64, seg.filesize, self.page_size); + seg.vmsize = mem.alignForwardGeneric(u64, seg.vmsize, self.page_size); } const InitSectionOpts = struct { @@ -4963,16 +4781,16 @@ const InitSectionOpts = struct { fn initSection( self: *MachO, - segment_id: u16, + segment_id: u8, sectname: []const u8, size: u64, alignment: u32, opts: InitSectionOpts, -) !u16 { - const seg = &self.load_commands.items[segment_id].segment; - var sect = macho.section_64{ +) !u8 { + const seg = &self.segments.items[segment_id]; + var header = macho.section_64{ .sectname = makeStaticString(sectname), - .segname = seg.inner.segname, + .segname = seg.segname, .size = if (self.mode == .incremental) @intCast(u32, size) else 0, .@"align" = alignment, .flags = opts.flags, @@ -4982,165 +4800,157 @@ fn initSection( if (self.mode == .incremental) { const alignment_pow_2 = try math.powi(u32, 2, alignment); - const padding: ?u32 = if (segment_id == self.text_segment_cmd_index.?) - @maximum(self.base.options.headerpad_size orelse 0, default_headerpad_size) + const padding: ?u64 = if (segment_id == self.text_segment_cmd_index.?) + try self.calcMinHeaderPad() else null; const off = self.findFreeSpace(segment_id, alignment_pow_2, padding); log.debug("allocating {s},{s} section from 0x{x} to 0x{x}", .{ - sect.segName(), - sect.sectName(), + header.segName(), + header.sectName(), off, off + size, }); - sect.addr = seg.inner.vmaddr + off - seg.inner.fileoff; - - const is_zerofill = opts.flags == macho.S_ZEROFILL or opts.flags == macho.S_THREAD_LOCAL_ZEROFILL; - const use_llvm = build_options.have_llvm and self.base.options.use_llvm; - const use_stage1 = build_options.is_stage1 and self.base.options.use_stage1; + header.addr = seg.vmaddr + off - seg.fileoff; // TODO handle zerofill in stage2 - if (!(is_zerofill and (use_stage1 or use_llvm))) { - sect.offset = @intCast(u32, off); - } + // const is_zerofill = opts.flags == macho.S_ZEROFILL or opts.flags == macho.S_THREAD_LOCAL_ZEROFILL; + header.offset = @intCast(u32, off); + + try self.updateSectionOrdinals(); } - const index = @intCast(u16, seg.sections.items.len); - try seg.sections.append(self.base.allocator, sect); - seg.inner.cmdsize += @sizeOf(macho.section_64); - seg.inner.nsects += 1; - - const match = MatchingSection{ - .seg = segment_id, - .sect = index, - }; - _ = try self.section_ordinals.getOrPut(self.base.allocator, match); - try self.atom_free_lists.putNoClobber(self.base.allocator, match, .{}); - - self.load_commands_dirty = true; - self.sections_order_dirty = true; + const index = @intCast(u8, self.sections.slice().len); + try self.sections.append(self.base.allocator, .{ + .segment_index = segment_id, + .header = header, + }); + seg.cmdsize += @sizeOf(macho.section_64); + seg.nsects += 1; return index; } -fn findFreeSpace(self: MachO, segment_id: u16, alignment: u64, start: ?u32) u64 { - const seg = self.load_commands.items[segment_id].segment; - if (seg.sections.items.len == 0) { - return if (start) |v| v else seg.inner.fileoff; +fn findFreeSpace(self: MachO, segment_id: u8, alignment: u64, start: ?u64) u64 { + const seg = self.segments.items[segment_id]; + const indexes = self.getSectionIndexes(segment_id); + if (indexes.end - indexes.start == 0) { + return if (start) |v| v else seg.fileoff; } - const last_sect = seg.sections.items[seg.sections.items.len - 1]; + const last_sect = self.sections.items(.header)[indexes.end - 1]; const final_off = last_sect.offset + padToIdeal(last_sect.size); return mem.alignForwardGeneric(u64, final_off, alignment); } -fn growSegment(self: *MachO, seg_id: u16, new_size: u64) !void { - const seg = &self.load_commands.items[seg_id].segment; - const new_seg_size = mem.alignForwardGeneric(u64, new_size, self.page_size); - assert(new_seg_size > seg.inner.filesize); - const offset_amt = new_seg_size - seg.inner.filesize; +fn growSegment(self: *MachO, segment_index: u8, new_size: u64) !void { + const segment = &self.segments.items[segment_index]; + const new_segment_size = mem.alignForwardGeneric(u64, new_size, self.page_size); + assert(new_segment_size > segment.filesize); + const offset_amt = new_segment_size - segment.filesize; log.debug("growing segment {s} from 0x{x} to 0x{x}", .{ - seg.inner.segname, - seg.inner.filesize, - new_seg_size, + segment.segname, + segment.filesize, + new_segment_size, }); - seg.inner.filesize = new_seg_size; - seg.inner.vmsize = new_seg_size; + segment.filesize = new_segment_size; + segment.vmsize = new_segment_size; log.debug(" (new segment file offsets from 0x{x} to 0x{x} (in memory 0x{x} to 0x{x}))", .{ - seg.inner.fileoff, - seg.inner.fileoff + seg.inner.filesize, - seg.inner.vmaddr, - seg.inner.vmaddr + seg.inner.vmsize, + segment.fileoff, + segment.fileoff + segment.filesize, + segment.vmaddr, + segment.vmaddr + segment.vmsize, }); - var next: usize = seg_id + 1; + var next: u8 = segment_index + 1; while (next < self.linkedit_segment_cmd_index.? + 1) : (next += 1) { - const next_seg = &self.load_commands.items[next].segment; + const next_segment = &self.segments.items[next]; try MachO.copyRangeAllOverlappingAlloc( self.base.allocator, self.base.file.?, - next_seg.inner.fileoff, - next_seg.inner.fileoff + offset_amt, - math.cast(usize, next_seg.inner.filesize) orelse return error.Overflow, + next_segment.fileoff, + next_segment.fileoff + offset_amt, + math.cast(usize, next_segment.filesize) orelse return error.Overflow, ); - next_seg.inner.fileoff += offset_amt; - next_seg.inner.vmaddr += offset_amt; + next_segment.fileoff += offset_amt; + next_segment.vmaddr += offset_amt; log.debug(" (new {s} segment file offsets from 0x{x} to 0x{x} (in memory 0x{x} to 0x{x}))", .{ - next_seg.inner.segname, - next_seg.inner.fileoff, - next_seg.inner.fileoff + next_seg.inner.filesize, - next_seg.inner.vmaddr, - next_seg.inner.vmaddr + next_seg.inner.vmsize, + next_segment.segname, + next_segment.fileoff, + next_segment.fileoff + next_segment.filesize, + next_segment.vmaddr, + next_segment.vmaddr + next_segment.vmsize, }); - for (next_seg.sections.items) |*moved_sect, moved_sect_id| { - moved_sect.offset += @intCast(u32, offset_amt); - moved_sect.addr += offset_amt; + const indexes = self.getSectionIndexes(next); + for (self.sections.items(.header)[indexes.start..indexes.end]) |*header, i| { + header.offset += @intCast(u32, offset_amt); + header.addr += offset_amt; log.debug(" (new {s},{s} file offsets from 0x{x} to 0x{x} (in memory 0x{x} to 0x{x}))", .{ - moved_sect.segName(), - moved_sect.sectName(), - moved_sect.offset, - moved_sect.offset + moved_sect.size, - moved_sect.addr, - moved_sect.addr + moved_sect.size, + header.segName(), + header.sectName(), + header.offset, + header.offset + header.size, + header.addr, + header.addr + header.size, }); - try self.shiftLocalsByOffset(.{ - .seg = @intCast(u16, next), - .sect = @intCast(u16, moved_sect_id), - }, @intCast(i64, offset_amt)); + try self.shiftLocalsByOffset(@intCast(u8, i + indexes.start), @intCast(i64, offset_amt)); } } } -fn growSection(self: *MachO, match: MatchingSection, new_size: u32) !void { +fn growSection(self: *MachO, sect_id: u8, new_size: u32) !void { const tracy = trace(@src()); defer tracy.end(); - const seg = &self.load_commands.items[match.seg].segment; - const sect = &seg.sections.items[match.sect]; + const section = self.sections.get(sect_id); + const segment_index = section.segment_index; + const header = section.header; + const segment = self.segments.items[segment_index]; - const alignment = try math.powi(u32, 2, sect.@"align"); - const max_size = self.allocatedSize(match.seg, sect.offset); + const alignment = try math.powi(u32, 2, header.@"align"); + const max_size = self.allocatedSize(segment_index, header.offset); const ideal_size = padToIdeal(new_size); const needed_size = mem.alignForwardGeneric(u32, ideal_size, alignment); if (needed_size > max_size) blk: { log.debug(" (need to grow! needed 0x{x}, max 0x{x})", .{ needed_size, max_size }); - if (match.sect == seg.sections.items.len - 1) { + const indexes = self.getSectionIndexes(segment_index); + if (sect_id == indexes.end - 1) { // Last section, just grow segments - try self.growSegment(match.seg, seg.inner.filesize + needed_size - max_size); + try self.growSegment(segment_index, segment.filesize + needed_size - max_size); break :blk; } // Need to move all sections below in file and address spaces. const offset_amt = offset: { - const max_alignment = try self.getSectionMaxAlignment(match.seg, match.sect + 1); + const max_alignment = try self.getSectionMaxAlignment(sect_id + 1, indexes.end); break :offset mem.alignForwardGeneric(u64, needed_size - max_size, max_alignment); }; // Before we commit to this, check if the segment needs to grow too. // We assume that each section header is growing linearly with the increasing // file offset / virtual memory address space. - const last_sect = seg.sections.items[seg.sections.items.len - 1]; - const last_sect_off = last_sect.offset + last_sect.size; - const seg_off = seg.inner.fileoff + seg.inner.filesize; + const last_sect_header = self.sections.items(.header)[indexes.end - 1]; + const last_sect_off = last_sect_header.offset + last_sect_header.size; + const seg_off = segment.fileoff + segment.filesize; if (last_sect_off + offset_amt > seg_off) { // Need to grow segment first. const spill_size = (last_sect_off + offset_amt) - seg_off; - try self.growSegment(match.seg, seg.inner.filesize + spill_size); + try self.growSegment(segment_index, segment.filesize + spill_size); } // We have enough space to expand within the segment, so move all sections by // the required amount and update their header offsets. - const next_sect = seg.sections.items[match.sect + 1]; + const next_sect = self.sections.items(.header)[sect_id + 1]; const total_size = last_sect_off - next_sect.offset; try MachO.copyRangeAllOverlappingAlloc( @@ -5151,9 +4961,7 @@ fn growSection(self: *MachO, match: MatchingSection, new_size: u32) !void { math.cast(usize, total_size) orelse return error.Overflow, ); - var next = match.sect + 1; - while (next < seg.sections.items.len) : (next += 1) { - const moved_sect = &seg.sections.items[next]; + for (self.sections.items(.header)[sect_id + 1 .. indexes.end]) |*moved_sect, i| { moved_sect.offset += @intCast(u32, offset_amt); moved_sect.addr += offset_amt; @@ -5166,49 +4974,45 @@ fn growSection(self: *MachO, match: MatchingSection, new_size: u32) !void { moved_sect.addr + moved_sect.size, }); - try self.shiftLocalsByOffset(.{ - .seg = match.seg, - .sect = next, - }, @intCast(i64, offset_amt)); + try self.shiftLocalsByOffset(@intCast(u8, sect_id + 1 + i), @intCast(i64, offset_amt)); } } } -fn allocatedSize(self: MachO, segment_id: u16, start: u64) u64 { - const seg = self.load_commands.items[segment_id].segment; - assert(start >= seg.inner.fileoff); - var min_pos: u64 = seg.inner.fileoff + seg.inner.filesize; +fn allocatedSize(self: MachO, segment_id: u8, start: u64) u64 { + const segment = self.segments.items[segment_id]; + const indexes = self.getSectionIndexes(segment_id); + assert(start >= segment.fileoff); + var min_pos: u64 = segment.fileoff + segment.filesize; if (start > min_pos) return 0; - for (seg.sections.items) |section| { - if (section.offset <= start) continue; - if (section.offset < min_pos) min_pos = section.offset; + for (self.sections.items(.header)[indexes.start..indexes.end]) |header| { + if (header.offset <= start) continue; + if (header.offset < min_pos) min_pos = header.offset; } return min_pos - start; } -fn getSectionMaxAlignment(self: *MachO, segment_id: u16, start_sect_id: u16) !u32 { - const seg = self.load_commands.items[segment_id].segment; +fn getSectionMaxAlignment(self: *MachO, start: u8, end: u8) !u32 { var max_alignment: u32 = 1; - var next = start_sect_id; - while (next < seg.sections.items.len) : (next += 1) { - const sect = seg.sections.items[next]; - const alignment = try math.powi(u32, 2, sect.@"align"); + const slice = self.sections.slice(); + for (slice.items(.header)[start..end]) |header| { + const alignment = try math.powi(u32, 2, header.@"align"); max_alignment = math.max(max_alignment, alignment); } return max_alignment; } -fn allocateAtomCommon(self: *MachO, atom: *Atom, match: MatchingSection) !void { +fn allocateAtomCommon(self: *MachO, atom: *Atom, sect_id: u8) !void { const sym = atom.getSymbolPtr(self); if (self.mode == .incremental) { const size = atom.size; const alignment = try math.powi(u32, 2, atom.alignment); - const vaddr = try self.allocateAtom(atom, size, alignment, match); + const vaddr = try self.allocateAtom(atom, size, alignment, sect_id); const sym_name = atom.getName(self); log.debug("allocated {s} atom at 0x{x}", .{ sym_name, vaddr }); sym.n_value = vaddr; - } else try self.addAtomToSection(atom, match); - sym.n_sect = self.getSectionOrdinal(match); + } else try self.addAtomToSection(atom, sect_id); + sym.n_sect = sect_id + 1; } fn allocateAtom( @@ -5216,15 +5020,15 @@ fn allocateAtom( atom: *Atom, new_atom_size: u64, alignment: u64, - match: MatchingSection, + sect_id: u8, ) !u64 { const tracy = trace(@src()); defer tracy.end(); - const sect = self.getSectionPtr(match); - var free_list = self.atom_free_lists.get(match).?; - const needs_padding = match.seg == self.text_segment_cmd_index.? and match.sect == self.text_section_index.?; - const new_atom_ideal_capacity = if (needs_padding) padToIdeal(new_atom_size) else new_atom_size; + const header = &self.sections.items(.header)[sect_id]; + const free_list = &self.sections.items(.free_list)[sect_id]; + const maybe_last_atom = &self.sections.items(.last_atom)[sect_id]; + const new_atom_ideal_capacity = if (header.isCode()) padToIdeal(new_atom_size) else new_atom_size; // We use these to indicate our intention to update metadata, placing the new atom, // and possibly removing a free list node. @@ -5244,7 +5048,7 @@ fn allocateAtom( // Is it enough that we could fit this new atom? const sym = big_atom.getSymbol(self); const capacity = big_atom.capacity(self); - const ideal_capacity = if (needs_padding) padToIdeal(capacity) else capacity; + const ideal_capacity = if (header.isCode()) padToIdeal(capacity) else capacity; const ideal_capacity_end_vaddr = math.add(u64, sym.n_value, ideal_capacity) catch ideal_capacity; const capacity_end_vaddr = sym.n_value + capacity; const new_start_vaddr_unaligned = capacity_end_vaddr - new_atom_ideal_capacity; @@ -5272,30 +5076,28 @@ fn allocateAtom( free_list_removal = i; } break :blk new_start_vaddr; - } else if (self.atoms.get(match)) |last| { + } else if (maybe_last_atom.*) |last| { const last_symbol = last.getSymbol(self); - const ideal_capacity = if (needs_padding) padToIdeal(last.size) else last.size; + const ideal_capacity = if (header.isCode()) padToIdeal(last.size) else last.size; const ideal_capacity_end_vaddr = last_symbol.n_value + ideal_capacity; const new_start_vaddr = mem.alignForwardGeneric(u64, ideal_capacity_end_vaddr, alignment); atom_placement = last; break :blk new_start_vaddr; } else { - break :blk mem.alignForwardGeneric(u64, sect.addr, alignment); + break :blk mem.alignForwardGeneric(u64, header.addr, alignment); } }; const expand_section = atom_placement == null or atom_placement.?.next == null; if (expand_section) { - const needed_size = @intCast(u32, (vaddr + new_atom_size) - sect.addr); - try self.growSection(match, needed_size); - _ = try self.atoms.put(self.base.allocator, match, atom); - sect.size = needed_size; - self.load_commands_dirty = true; + const needed_size = @intCast(u32, (vaddr + new_atom_size) - header.addr); + try self.growSection(sect_id, needed_size); + maybe_last_atom.* = atom; + header.size = needed_size; } const align_pow = @intCast(u32, math.log2(alignment)); - if (sect.@"align" < align_pow) { - sect.@"align" = align_pow; - self.load_commands_dirty = true; + if (header.@"align" < align_pow) { + header.@"align" = align_pow; } atom.size = new_atom_size; atom.alignment = align_pow; @@ -5322,20 +5124,19 @@ fn allocateAtom( return vaddr; } -pub fn addAtomToSection(self: *MachO, atom: *Atom, match: MatchingSection) !void { - if (self.atoms.getPtr(match)) |last| { - last.*.next = atom; - atom.prev = last.*; - last.* = atom; - } else { - try self.atoms.putNoClobber(self.base.allocator, match, atom); +pub fn addAtomToSection(self: *MachO, atom: *Atom, sect_id: u8) !void { + var section = self.sections.get(sect_id); + if (section.header.size > 0) { + section.last_atom.?.next = atom; + atom.prev = section.last_atom.?; } - const sect = self.getSectionPtr(match); + section.last_atom = atom; const atom_alignment = try math.powi(u32, 2, atom.alignment); - const aligned_end_addr = mem.alignForwardGeneric(u64, sect.size, atom_alignment); - const padding = aligned_end_addr - sect.size; - sect.size += padding + atom.size; - sect.@"align" = @maximum(sect.@"align", atom.alignment); + const aligned_end_addr = mem.alignForwardGeneric(u64, section.header.size, atom_alignment); + const padding = aligned_end_addr - section.header.size; + section.header.size += padding + atom.size; + section.header.@"align" = @maximum(section.header.@"align", atom.alignment); + self.sections.set(sect_id, section); } pub fn getGlobalSymbol(self: *MachO, name: []const u8) !u32 { @@ -5368,74 +5169,27 @@ pub fn getGlobalSymbol(self: *MachO, name: []const u8) !u32 { return sym_index; } -fn getSegmentAllocBase(self: MachO, indices: []const ?u16) struct { vmaddr: u64, fileoff: u64 } { +fn getSegmentAllocBase(self: MachO, indices: []const ?u8) struct { vmaddr: u64, fileoff: u64 } { for (indices) |maybe_prev_id| { const prev_id = maybe_prev_id orelse continue; - const prev = self.load_commands.items[prev_id].segment; + const prev = self.segments.items[prev_id]; return .{ - .vmaddr = prev.inner.vmaddr + prev.inner.vmsize, - .fileoff = prev.inner.fileoff + prev.inner.filesize, + .vmaddr = prev.vmaddr + prev.vmsize, + .fileoff = prev.fileoff + prev.filesize, }; } return .{ .vmaddr = 0, .fileoff = 0 }; } -fn pruneAndSortSectionsInSegment(self: *MachO, maybe_seg_id: *?u16, indices: []*?u16) !void { - const seg_id = maybe_seg_id.* orelse return; - - var mapping = std.AutoArrayHashMap(u16, ?u16).init(self.base.allocator); - defer mapping.deinit(); - - const seg = &self.load_commands.items[seg_id].segment; - var sections = seg.sections.toOwnedSlice(self.base.allocator); - defer self.base.allocator.free(sections); - try seg.sections.ensureTotalCapacity(self.base.allocator, sections.len); - - for (indices) |maybe_index| { - const old_idx = maybe_index.* orelse continue; - const sect = §ions[old_idx]; - if (sect.size == 0) { - log.debug("pruning section {s},{s}", .{ sect.segName(), sect.sectName() }); - maybe_index.* = null; - seg.inner.cmdsize -= @sizeOf(macho.section_64); - seg.inner.nsects -= 1; - } else { - maybe_index.* = @intCast(u16, seg.sections.items.len); - seg.sections.appendAssumeCapacity(sect.*); - } - try mapping.putNoClobber(old_idx, maybe_index.*); - } - - var atoms = std.ArrayList(struct { match: MatchingSection, atom: *Atom }).init(self.base.allocator); - defer atoms.deinit(); - try atoms.ensureTotalCapacity(mapping.count()); - - for (mapping.keys()) |old_sect| { - const new_sect = mapping.get(old_sect).? orelse { - _ = self.atoms.remove(.{ .seg = seg_id, .sect = old_sect }); - continue; - }; - const kv = self.atoms.fetchRemove(.{ .seg = seg_id, .sect = old_sect }).?; - atoms.appendAssumeCapacity(.{ - .match = .{ .seg = seg_id, .sect = new_sect }, - .atom = kv.value, - }); - } - - while (atoms.popOrNull()) |next| { - try self.atoms.putNoClobber(self.base.allocator, next.match, next.atom); - } - - if (seg.inner.nsects == 0 and !mem.eql(u8, "__TEXT", seg.inner.segName())) { - // Segment has now become empty, so mark it as such - log.debug("marking segment {s} as dead", .{seg.inner.segName()}); - seg.inner.cmd = @intToEnum(macho.LC, 0); - maybe_seg_id.* = null; - } -} - fn pruneAndSortSections(self: *MachO) !void { - try self.pruneAndSortSectionsInSegment(&self.text_segment_cmd_index, &.{ + const gpa = self.base.allocator; + + var sections = self.sections.toOwnedSlice(); + defer sections.deinit(gpa); + try self.sections.ensureTotalCapacity(gpa, sections.len); + + for (&[_]*?u8{ + // __TEXT &self.text_section_index, &self.stubs_section_index, &self.stub_helper_section_index, @@ -5448,9 +5202,7 @@ fn pruneAndSortSections(self: *MachO) !void { &self.objc_methtype_section_index, &self.objc_classname_section_index, &self.eh_frame_section_index, - }); - - try self.pruneAndSortSectionsInSegment(&self.data_const_segment_cmd_index, &.{ + // __DATA_CONST &self.got_section_index, &self.mod_init_func_section_index, &self.mod_term_func_section_index, @@ -5458,9 +5210,7 @@ fn pruneAndSortSections(self: *MachO) !void { &self.objc_cfstring_section_index, &self.objc_classlist_section_index, &self.objc_imageinfo_section_index, - }); - - try self.pruneAndSortSectionsInSegment(&self.data_segment_cmd_index, &.{ + // __DATA &self.rustc_section_index, &self.la_symbol_ptr_section_index, &self.objc_const_section_index, @@ -5473,103 +5223,129 @@ fn pruneAndSortSections(self: *MachO) !void { &self.tlv_data_section_index, &self.tlv_bss_section_index, &self.bss_section_index, - }); + }) |maybe_index| { + const old_idx = maybe_index.* orelse continue; + const segment_index = sections.items(.segment_index)[old_idx]; + const header = sections.items(.header)[old_idx]; + const last_atom = sections.items(.last_atom)[old_idx]; + if (header.size == 0) { + log.debug("pruning section {s},{s}", .{ header.segName(), header.sectName() }); + maybe_index.* = null; + const seg = &self.segments.items[segment_index]; + seg.cmdsize -= @sizeOf(macho.section_64); + seg.nsects -= 1; + } else { + maybe_index.* = @intCast(u8, self.sections.slice().len); + self.sections.appendAssumeCapacity(.{ + .segment_index = segment_index, + .header = header, + .last_atom = last_atom, + }); + } + } - // Create new section ordinals. - self.section_ordinals.clearRetainingCapacity(); - if (self.text_segment_cmd_index) |seg_id| { - const seg = self.load_commands.items[seg_id].segment; - for (seg.sections.items) |_, sect_id| { - const res = self.section_ordinals.getOrPutAssumeCapacity(.{ - .seg = seg_id, - .sect = @intCast(u16, sect_id), - }); - assert(!res.found_existing); + for (self.segments.items) |*seg| { + const segname = seg.segName(); + if (seg.nsects == 0 and + !mem.eql(u8, "__TEXT", segname) and + !mem.eql(u8, "__PAGEZERO", segname) and + !mem.eql(u8, "__LINKEDIT", segname)) + { + // Segment has now become empty, so mark it as such + log.debug("marking segment {s} as dead", .{seg.segName()}); + seg.cmd = @intToEnum(macho.LC, 0); } } - if (self.data_const_segment_cmd_index) |seg_id| { - const seg = self.load_commands.items[seg_id].segment; - for (seg.sections.items) |_, sect_id| { - const res = self.section_ordinals.getOrPutAssumeCapacity(.{ - .seg = seg_id, - .sect = @intCast(u16, sect_id), - }); - assert(!res.found_existing); - } - } - if (self.data_segment_cmd_index) |seg_id| { - const seg = self.load_commands.items[seg_id].segment; - for (seg.sections.items) |_, sect_id| { - const res = self.section_ordinals.getOrPutAssumeCapacity(.{ - .seg = seg_id, - .sect = @intCast(u16, sect_id), - }); - assert(!res.found_existing); - } - } - self.sections_order_dirty = false; } fn updateSectionOrdinals(self: *MachO) !void { - if (!self.sections_order_dirty) return; - + _ = self; const tracy = trace(@src()); defer tracy.end(); - log.debug("updating section ordinals", .{}); + @panic("updating section ordinals"); - const gpa = self.base.allocator; + // const gpa = self.base.allocator; - var ordinal_remap = std.AutoHashMap(u8, u8).init(gpa); - defer ordinal_remap.deinit(); - var ordinals: std.AutoArrayHashMapUnmanaged(MatchingSection, void) = .{}; + // var ordinal_remap = std.AutoHashMap(u8, u8).init(gpa); + // defer ordinal_remap.deinit(); + // var ordinals: std.AutoArrayHashMapUnmanaged(MatchingSection, void) = .{}; - var new_ordinal: u8 = 0; - for (&[_]?u16{ - self.text_segment_cmd_index, - self.data_const_segment_cmd_index, - self.data_segment_cmd_index, - }) |maybe_index| { - const index = maybe_index orelse continue; - const seg = self.load_commands.items[index].segment; - for (seg.sections.items) |sect, sect_id| { - const match = MatchingSection{ - .seg = @intCast(u16, index), - .sect = @intCast(u16, sect_id), - }; - const old_ordinal = self.getSectionOrdinal(match); - new_ordinal += 1; - log.debug("'{s},{s}': sect({d}, '_,_') => sect({d}, '_,_')", .{ - sect.segName(), - sect.sectName(), - old_ordinal, - new_ordinal, - }); - try ordinal_remap.putNoClobber(old_ordinal, new_ordinal); - try ordinals.putNoClobber(gpa, match, {}); - } - } + // var new_ordinal: u8 = 0; + // for (&[_]?u16{ + // self.text_segment_cmd_index, + // self.data_const_segment_cmd_index, + // self.data_segment_cmd_index, + // }) |maybe_index| { + // const index = maybe_index orelse continue; + // const seg = self.load_commands.items[index].segment; + // for (seg.sections.items) |sect, sect_id| { + // const match = MatchingSection{ + // .seg = @intCast(u16, index), + // .sect = @intCast(u16, sect_id), + // }; + // const old_ordinal = self.getSectionOrdinal(match); + // new_ordinal += 1; + // log.debug("'{s},{s}': sect({d}, '_,_') => sect({d}, '_,_')", .{ + // sect.segName(), + // sect.sectName(), + // old_ordinal, + // new_ordinal, + // }); + // try ordinal_remap.putNoClobber(old_ordinal, new_ordinal); + // try ordinals.putNoClobber(gpa, match, {}); + // } + // } - // FIXME Jakub - // TODO no need for duping work here; simply walk the atom graph - for (self.locals.items) |*sym| { - if (sym.undf()) continue; - if (sym.n_sect == 0) continue; - sym.n_sect = ordinal_remap.get(sym.n_sect).?; - } - for (self.objects.items) |*object| { - for (object.symtab.items) |*sym| { - if (sym.undf()) continue; - if (sym.n_sect == 0) continue; - sym.n_sect = ordinal_remap.get(sym.n_sect).?; - } - } + // // FIXME Jakub + // // TODO no need for duping work here; simply walk the atom graph + // for (self.locals.items) |*sym| { + // if (sym.undf()) continue; + // if (sym.n_sect == 0) continue; + // sym.n_sect = ordinal_remap.get(sym.n_sect).?; + // } + // for (self.objects.items) |*object| { + // for (object.symtab.items) |*sym| { + // if (sym.undf()) continue; + // if (sym.n_sect == 0) continue; + // sym.n_sect = ordinal_remap.get(sym.n_sect).?; + // } + // } - self.section_ordinals.deinit(gpa); - self.section_ordinals = ordinals; + // self.section_ordinals.deinit(gpa); + // self.section_ordinals = ordinals; } -fn writeDyldInfoData(self: *MachO) !void { +pub fn writeSegmentHeaders(self: *MachO, start: usize, end: usize, ncmds: *u32, writer: anytype) !void { + var count: usize = 0; + for (self.segments.items[start..end]) |seg| { + if (seg.cmd == .NONE) continue; + try writer.writeStruct(seg); + + // TODO + for (self.sections.items(.header)[count..][0..seg.nsects]) |header| { + try writer.writeStruct(header); + } + + count += seg.nsects; + ncmds.* += 1; + } +} + +fn writeLinkeditSegmentData(self: *MachO, ncmds: *u32, lc_writer: anytype) !void { + const seg = &self.segments.items[self.linkedit_segment_cmd_index.?]; + seg.filesize = 0; + seg.vmsize = 0; + + try self.writeDyldInfoData(ncmds, lc_writer); + try self.writeFunctionStarts(ncmds, lc_writer); + try self.writeDataInCode(ncmds, lc_writer); + try self.writeSymtabs(ncmds, lc_writer); + + seg.vmsize = mem.alignForwardGeneric(u64, seg.filesize, self.page_size); +} + +fn writeDyldInfoData(self: *MachO, ncmds: *u32, lc_writer: anytype) !void { const tracy = trace(@src()); defer tracy.end(); @@ -5582,89 +5358,86 @@ fn writeDyldInfoData(self: *MachO) !void { var lazy_bind_pointers = std.ArrayList(bind.Pointer).init(gpa); defer lazy_bind_pointers.deinit(); - { - var it = self.atoms.iterator(); - while (it.next()) |entry| { - const match = entry.key_ptr.*; - var atom: *Atom = entry.value_ptr.*; + const slice = self.sections.slice(); + for (slice.items(.last_atom)) |last_atom, sect_id| { + var atom = last_atom orelse continue; + const segment_index = slice.items(.segment_index)[sect_id]; + const header = slice.items(.header)[sect_id]; - if (self.text_segment_cmd_index) |seg| { - if (match.seg == seg) continue; // __TEXT is non-writable + if (mem.eql(u8, header.segName(), "__TEXT")) continue; // __TEXT is non-writable + + log.debug("dyld info for {s},{s}", .{ header.segName(), header.sectName() }); + + const seg = self.segments.items[segment_index]; + + while (true) { + log.debug(" ATOM(%{d}, '{s}')", .{ atom.sym_index, atom.getName(self) }); + const sym = atom.getSymbol(self); + const base_offset = sym.n_value - seg.vmaddr; + + for (atom.rebases.items) |offset| { + log.debug(" | rebase at {x}", .{base_offset + offset}); + try rebase_pointers.append(.{ + .offset = base_offset + offset, + .segment_id = segment_index, + }); } - const seg = self.getSegment(match); - const sect = self.getSection(match); - log.debug("dyld info for {s},{s}", .{ sect.segName(), sect.sectName() }); - - while (true) { - log.debug(" ATOM(%{d}, '{s}')", .{ atom.sym_index, atom.getName(self) }); - const sym = atom.getSymbol(self); - const base_offset = sym.n_value - seg.inner.vmaddr; - - for (atom.rebases.items) |offset| { - log.debug(" | rebase at {x}", .{base_offset + offset}); - try rebase_pointers.append(.{ - .offset = base_offset + offset, - .segment_id = match.seg, - }); + for (atom.bindings.items) |binding| { + const bind_sym = self.getSymbol(binding.target); + const bind_sym_name = self.getSymbolName(binding.target); + const dylib_ordinal = @divTrunc( + @bitCast(i16, bind_sym.n_desc), + macho.N_SYMBOL_RESOLVER, + ); + var flags: u4 = 0; + log.debug(" | bind at {x}, import('{s}') in dylib({d})", .{ + binding.offset + base_offset, + bind_sym_name, + dylib_ordinal, + }); + if (bind_sym.weakRef()) { + log.debug(" | marking as weak ref ", .{}); + flags |= @truncate(u4, macho.BIND_SYMBOL_FLAGS_WEAK_IMPORT); } - - for (atom.bindings.items) |binding| { - const bind_sym = self.getSymbol(binding.target); - const bind_sym_name = self.getSymbolName(binding.target); - const dylib_ordinal = @divTrunc( - @bitCast(i16, bind_sym.n_desc), - macho.N_SYMBOL_RESOLVER, - ); - var flags: u4 = 0; - log.debug(" | bind at {x}, import('{s}') in dylib({d})", .{ - binding.offset + base_offset, - bind_sym_name, - dylib_ordinal, - }); - if (bind_sym.weakRef()) { - log.debug(" | marking as weak ref ", .{}); - flags |= @truncate(u4, macho.BIND_SYMBOL_FLAGS_WEAK_IMPORT); - } - try bind_pointers.append(.{ - .offset = binding.offset + base_offset, - .segment_id = match.seg, - .dylib_ordinal = dylib_ordinal, - .name = bind_sym_name, - .bind_flags = flags, - }); - } - - for (atom.lazy_bindings.items) |binding| { - const bind_sym = self.getSymbol(binding.target); - const bind_sym_name = self.getSymbolName(binding.target); - const dylib_ordinal = @divTrunc( - @bitCast(i16, bind_sym.n_desc), - macho.N_SYMBOL_RESOLVER, - ); - var flags: u4 = 0; - log.debug(" | lazy bind at {x} import('{s}') ord({d})", .{ - binding.offset + base_offset, - bind_sym_name, - dylib_ordinal, - }); - if (bind_sym.weakRef()) { - log.debug(" | marking as weak ref ", .{}); - flags |= @truncate(u4, macho.BIND_SYMBOL_FLAGS_WEAK_IMPORT); - } - try lazy_bind_pointers.append(.{ - .offset = binding.offset + base_offset, - .segment_id = match.seg, - .dylib_ordinal = dylib_ordinal, - .name = bind_sym_name, - .bind_flags = flags, - }); - } - - if (atom.prev) |prev| { - atom = prev; - } else break; + try bind_pointers.append(.{ + .offset = binding.offset + base_offset, + .segment_id = segment_index, + .dylib_ordinal = dylib_ordinal, + .name = bind_sym_name, + .bind_flags = flags, + }); } + + for (atom.lazy_bindings.items) |binding| { + const bind_sym = self.getSymbol(binding.target); + const bind_sym_name = self.getSymbolName(binding.target); + const dylib_ordinal = @divTrunc( + @bitCast(i16, bind_sym.n_desc), + macho.N_SYMBOL_RESOLVER, + ); + var flags: u4 = 0; + log.debug(" | lazy bind at {x} import('{s}') ord({d})", .{ + binding.offset + base_offset, + bind_sym_name, + dylib_ordinal, + }); + if (bind_sym.weakRef()) { + log.debug(" | marking as weak ref ", .{}); + flags |= @truncate(u4, macho.BIND_SYMBOL_FLAGS_WEAK_IMPORT); + } + try lazy_bind_pointers.append(.{ + .offset = binding.offset + base_offset, + .segment_id = segment_index, + .dylib_ordinal = dylib_ordinal, + .name = bind_sym_name, + .bind_flags = flags, + }); + } + + if (atom.prev) |prev| { + atom = prev; + } else break; } } @@ -5675,8 +5448,8 @@ fn writeDyldInfoData(self: *MachO) !void { // TODO handle macho.EXPORT_SYMBOL_FLAGS_REEXPORT and macho.EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER. log.debug("generating export trie", .{}); - const text_segment = self.load_commands.items[self.text_segment_cmd_index.?].segment; - const base_address = text_segment.inner.vmaddr; + const text_segment = self.segments.items[self.text_segment_cmd_index.?]; + const base_address = text_segment.vmaddr; if (self.base.options.output_mode == .Exe) { for (&[_]SymbolWithLoc{ @@ -5714,48 +5487,27 @@ fn writeDyldInfoData(self: *MachO) !void { try trie.finalize(gpa); } - const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].segment; - const dyld_info = &self.load_commands.items[self.dyld_info_cmd_index.?].dyld_info_only; - - const rebase_off = mem.alignForwardGeneric(u64, seg.inner.fileoff, @alignOf(u64)); + const link_seg = &self.segments.items[self.linkedit_segment_cmd_index.?]; + const rebase_off = mem.alignForwardGeneric(u64, link_seg.fileoff, @alignOf(u64)); + assert(rebase_off == link_seg.fileoff); const rebase_size = try bind.rebaseInfoSize(rebase_pointers.items); - dyld_info.rebase_off = @intCast(u32, rebase_off); - dyld_info.rebase_size = @intCast(u32, rebase_size); - log.debug("writing rebase info from 0x{x} to 0x{x}", .{ - dyld_info.rebase_off, - dyld_info.rebase_off + dyld_info.rebase_size, - }); + log.debug("writing rebase info from 0x{x} to 0x{x}", .{ rebase_off, rebase_off + rebase_size }); - const bind_off = mem.alignForwardGeneric(u64, dyld_info.rebase_off + dyld_info.rebase_size, @alignOf(u64)); + const bind_off = mem.alignForwardGeneric(u64, rebase_off + rebase_size, @alignOf(u64)); const bind_size = try bind.bindInfoSize(bind_pointers.items); - dyld_info.bind_off = @intCast(u32, bind_off); - dyld_info.bind_size = @intCast(u32, bind_size); - log.debug("writing bind info from 0x{x} to 0x{x}", .{ - dyld_info.bind_off, - dyld_info.bind_off + dyld_info.bind_size, - }); + log.debug("writing bind info from 0x{x} to 0x{x}", .{ bind_off, bind_off + bind_size }); - const lazy_bind_off = mem.alignForwardGeneric(u64, dyld_info.bind_off + dyld_info.bind_size, @alignOf(u64)); + const lazy_bind_off = mem.alignForwardGeneric(u64, bind_off + bind_size, @alignOf(u64)); const lazy_bind_size = try bind.lazyBindInfoSize(lazy_bind_pointers.items); - dyld_info.lazy_bind_off = @intCast(u32, lazy_bind_off); - dyld_info.lazy_bind_size = @intCast(u32, lazy_bind_size); - log.debug("writing lazy bind info from 0x{x} to 0x{x}", .{ - dyld_info.lazy_bind_off, - dyld_info.lazy_bind_off + dyld_info.lazy_bind_size, - }); + log.debug("writing lazy bind info from 0x{x} to 0x{x}", .{ lazy_bind_off, lazy_bind_off + lazy_bind_size }); - const export_off = mem.alignForwardGeneric(u64, dyld_info.lazy_bind_off + dyld_info.lazy_bind_size, @alignOf(u64)); + const export_off = mem.alignForwardGeneric(u64, lazy_bind_off + lazy_bind_size, @alignOf(u64)); const export_size = trie.size; - dyld_info.export_off = @intCast(u32, export_off); - dyld_info.export_size = @intCast(u32, export_size); - log.debug("writing export trie from 0x{x} to 0x{x}", .{ - dyld_info.export_off, - dyld_info.export_off + dyld_info.export_size, - }); + log.debug("writing export trie from 0x{x} to 0x{x}", .{ export_off, export_off + export_size }); - seg.inner.filesize = dyld_info.export_off + dyld_info.export_size - seg.inner.fileoff; + const needed_size = export_off + export_size - rebase_off; + link_seg.filesize = needed_size; - const needed_size = dyld_info.export_off + dyld_info.export_size - dyld_info.rebase_off; var buffer = try gpa.alloc(u8, needed_size); defer gpa.free(buffer); mem.set(u8, buffer, 0); @@ -5763,54 +5515,61 @@ fn writeDyldInfoData(self: *MachO) !void { var stream = std.io.fixedBufferStream(buffer); const writer = stream.writer(); - const base_off = dyld_info.rebase_off; try bind.writeRebaseInfo(rebase_pointers.items, writer); - try stream.seekTo(dyld_info.bind_off - base_off); + try stream.seekTo(bind_off - rebase_off); try bind.writeBindInfo(bind_pointers.items, writer); - try stream.seekTo(dyld_info.lazy_bind_off - base_off); + try stream.seekTo(lazy_bind_off - rebase_off); try bind.writeLazyBindInfo(lazy_bind_pointers.items, writer); - try stream.seekTo(dyld_info.export_off - base_off); + try stream.seekTo(export_off - rebase_off); _ = try trie.write(writer); log.debug("writing dyld info from 0x{x} to 0x{x}", .{ - dyld_info.rebase_off, - dyld_info.rebase_off + needed_size, + rebase_off, + rebase_off + needed_size, }); - try self.base.file.?.pwriteAll(buffer, dyld_info.rebase_off); - try self.populateLazyBindOffsetsInStubHelper( - buffer[dyld_info.lazy_bind_off - base_off ..][0..dyld_info.lazy_bind_size], - ); + try self.base.file.?.pwriteAll(buffer, rebase_off); + try self.populateLazyBindOffsetsInStubHelper(buffer[lazy_bind_off - rebase_off ..][0..lazy_bind_size]); - self.load_commands_dirty = true; + try lc_writer.writeStruct(macho.dyld_info_command{ + .cmd = .DYLD_INFO_ONLY, + .cmdsize = @sizeOf(macho.dyld_info_command), + .rebase_off = @intCast(u32, rebase_off), + .rebase_size = @intCast(u32, rebase_size), + .bind_off = @intCast(u32, bind_off), + .bind_size = @intCast(u32, bind_size), + .weak_bind_off = 0, + .weak_bind_size = 0, + .lazy_bind_off = @intCast(u32, lazy_bind_off), + .lazy_bind_size = @intCast(u32, lazy_bind_size), + .export_off = @intCast(u32, export_off), + .export_size = @intCast(u32, export_size), + }); + ncmds.* += 1; } fn populateLazyBindOffsetsInStubHelper(self: *MachO, buffer: []const u8) !void { const gpa = self.base.allocator; - const text_segment_cmd_index = self.text_segment_cmd_index orelse return; + const stub_helper_section_index = self.stub_helper_section_index orelse return; - const last_atom = self.atoms.get(.{ - .seg = text_segment_cmd_index, - .sect = stub_helper_section_index, - }) orelse return; if (self.stub_helper_preamble_atom == null) return; - if (last_atom == self.stub_helper_preamble_atom.?) return; + + const section = self.sections.get(stub_helper_section_index); + const last_atom = section.last_atom orelse return; + if (last_atom == self.stub_helper_preamble_atom.?) return; // TODO is this a redundant check? var table = std.AutoHashMap(i64, *Atom).init(gpa); defer table.deinit(); { var stub_atom = last_atom; - var laptr_atom = self.atoms.get(.{ - .seg = self.data_segment_cmd_index.?, - .sect = self.la_symbol_ptr_section_index.?, - }).?; + var laptr_atom = self.sections.items(.last_atom)[self.la_symbol_ptr_section_index.?].?; const base_addr = blk: { - const seg = self.load_commands.items[self.data_segment_cmd_index.?].segment; - break :blk seg.inner.vmaddr; + const seg = self.segments.items[self.data_segment_cmd_index.?]; + break :blk seg.vmaddr; }; while (true) { @@ -5871,10 +5630,7 @@ fn populateLazyBindOffsetsInStubHelper(self: *MachO, buffer: []const u8) !void { } } - const sect = self.getSection(.{ - .seg = text_segment_cmd_index, - .sect = stub_helper_section_index, - }); + const header = self.sections.items(.header)[stub_helper_section_index]; const stub_offset: u4 = switch (self.base.options.target.cpu.arch) { .x86_64 => 1, .aarch64 => 2 * @sizeOf(u32), @@ -5886,7 +5642,7 @@ fn populateLazyBindOffsetsInStubHelper(self: *MachO, buffer: []const u8) !void { while (offsets.popOrNull()) |bind_offset| { const atom = table.get(bind_offset.sym_offset).?; const sym = atom.getSymbol(self); - const file_offset = sect.offset + sym.n_value - sect.addr + stub_offset; + const file_offset = header.offset + sym.n_value - header.addr + stub_offset; mem.writeIntLittle(u32, &buf, bind_offset.offset); log.debug("writing lazy bind offset in stub helper of 0x{x} for symbol {s} at offset 0x{x}", .{ bind_offset.offset, @@ -5899,14 +5655,14 @@ fn populateLazyBindOffsetsInStubHelper(self: *MachO, buffer: []const u8) !void { const asc_u64 = std.sort.asc(u64); -fn writeFunctionStarts(self: *MachO) !void { - const text_seg_index = self.text_segment_cmd_index orelse return; - const text_sect_index = self.text_section_index orelse return; - const text_seg = self.load_commands.items[text_seg_index].segment; - +fn writeFunctionStarts(self: *MachO, ncmds: *u32, lc_writer: anytype) !void { const tracy = trace(@src()); defer tracy.end(); + const text_seg_index = self.text_segment_cmd_index orelse return; + const text_sect_index = self.text_section_index orelse return; + const text_seg = self.segments.items[text_seg_index]; + const gpa = self.base.allocator; // We need to sort by address first @@ -5918,8 +5674,8 @@ fn writeFunctionStarts(self: *MachO) !void { const sym = self.getSymbol(global); if (sym.undf()) continue; if (sym.n_desc == N_DESC_GCED) continue; - const match = self.getMatchingSectionFromOrdinal(sym.n_sect); - if (match.seg != text_seg_index or match.sect != text_sect_index) continue; + const sect_id = sym.n_sect - 1; + if (sect_id != text_sect_index) continue; addresses.appendAssumeCapacity(sym.n_value); } @@ -5932,7 +5688,7 @@ fn writeFunctionStarts(self: *MachO) !void { var last_off: u32 = 0; for (addresses.items) |addr| { - const offset = @intCast(u32, addr - text_seg.inner.vmaddr); + const offset = @intCast(u32, addr - text_seg.vmaddr); const diff = offset - last_off; if (diff == 0) continue; @@ -5951,22 +5707,22 @@ fn writeFunctionStarts(self: *MachO) !void { try std.leb.writeULEB128(buffer.writer(), offset); } - const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].segment; - const fn_cmd = &self.load_commands.items[self.function_starts_cmd_index.?].linkedit_data; + const link_seg = &self.segments.items[self.linkedit_segment_cmd_index.?]; + const offset = mem.alignForwardGeneric(u64, link_seg.fileoff + link_seg.filesize, @alignOf(u64)); + const needed_size = buffer.items.len; + link_seg.filesize = offset + needed_size - link_seg.fileoff; - const dataoff = mem.alignForwardGeneric(u64, seg.inner.fileoff + seg.inner.filesize, @alignOf(u64)); - const datasize = buffer.items.len; - fn_cmd.dataoff = @intCast(u32, dataoff); - fn_cmd.datasize = @intCast(u32, datasize); - seg.inner.filesize = fn_cmd.dataoff + fn_cmd.datasize - seg.inner.fileoff; + log.debug("writing function starts info from 0x{x} to 0x{x}", .{ offset, offset + needed_size }); - log.debug("writing function starts info from 0x{x} to 0x{x}", .{ - fn_cmd.dataoff, - fn_cmd.dataoff + fn_cmd.datasize, + try self.base.file.?.pwriteAll(buffer.items, offset); + + try lc_writer.writeStruct(macho.linkedit_data_command{ + .cmd = .FUNCTION_STARTS, + .cmdsize = @sizeOf(macho.linkedit_data_command), + .dataoff = @intCast(u32, offset), + .datasize = @intCast(u32, needed_size), }); - - try self.base.file.?.pwriteAll(buffer.items, fn_cmd.dataoff); - self.load_commands_dirty = true; + ncmds.* += 1; } fn filterDataInCode( @@ -5988,17 +5744,15 @@ fn filterDataInCode( return dices[start..end]; } -fn writeDataInCode(self: *MachO) !void { +fn writeDataInCode(self: *MachO, ncmds: *u32, lc_writer: anytype) !void { const tracy = trace(@src()); defer tracy.end(); var out_dice = std.ArrayList(macho.data_in_code_entry).init(self.base.allocator); defer out_dice.deinit(); - const text_sect = self.getSection(.{ - .seg = self.text_segment_cmd_index orelse return, - .sect = self.text_section_index orelse return, - }); + const text_sect_id = self.text_section_index orelse return; + const text_sect_header = self.sections.items(.header)[text_sect_id]; for (self.objects.items) |object| { const dice = object.parseDataInCode() orelse continue; @@ -6008,15 +5762,15 @@ fn writeDataInCode(self: *MachO) !void { const sym = atom.getSymbol(self); if (sym.n_desc == N_DESC_GCED) continue; - const match = self.getMatchingSectionFromOrdinal(sym.n_sect); - if (match.seg != self.text_segment_cmd_index.? and match.sect != self.text_section_index.?) { + const sect_id = sym.n_sect - 1; + if (sect_id != self.text_section_index.?) { continue; } const source_sym = object.getSourceSymbol(atom.sym_index) orelse continue; const source_addr = math.cast(u32, source_sym.n_value) orelse return error.Overflow; const filtered_dice = filterDataInCode(dice, source_addr, source_addr + atom.size); - const base = math.cast(u32, sym.n_value - text_sect.addr + text_sect.offset) orelse + const base = math.cast(u32, sym.n_value - text_sect_header.addr + text_sect_header.offset) orelse return error.Overflow; for (filtered_dice) |single| { @@ -6030,33 +5784,63 @@ fn writeDataInCode(self: *MachO) !void { } } - const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].segment; - const dice_cmd = &self.load_commands.items[self.data_in_code_cmd_index.?].linkedit_data; + const seg = &self.segments.items[self.linkedit_segment_cmd_index.?]; + const offset = mem.alignForwardGeneric(u64, seg.fileoff + seg.filesize, @alignOf(u64)); + const needed_size = out_dice.items.len * @sizeOf(macho.data_in_code_entry); + seg.filesize = offset + needed_size - seg.fileoff; - const dataoff = mem.alignForwardGeneric(u64, seg.inner.fileoff + seg.inner.filesize, @alignOf(u64)); - const datasize = out_dice.items.len * @sizeOf(macho.data_in_code_entry); - dice_cmd.dataoff = @intCast(u32, dataoff); - dice_cmd.datasize = @intCast(u32, datasize); - seg.inner.filesize = dice_cmd.dataoff + dice_cmd.datasize - seg.inner.fileoff; + log.debug("writing data-in-code from 0x{x} to 0x{x}", .{ offset, offset + needed_size }); - log.debug("writing data-in-code from 0x{x} to 0x{x}", .{ - dice_cmd.dataoff, - dice_cmd.dataoff + dice_cmd.datasize, + try self.base.file.?.pwriteAll(mem.sliceAsBytes(out_dice.items), offset); + try lc_writer.writeStruct(macho.linkedit_data_command{ + .cmd = .DATA_IN_CODE, + .cmdsize = @sizeOf(macho.linkedit_data_command), + .dataoff = @intCast(u32, offset), + .datasize = @intCast(u32, needed_size), }); - - try self.base.file.?.pwriteAll(mem.sliceAsBytes(out_dice.items), dice_cmd.dataoff); - self.load_commands_dirty = true; + ncmds.* += 1; } -fn writeSymtab(self: *MachO) !void { - const tracy = trace(@src()); - defer tracy.end(); +fn writeSymtabs(self: *MachO, ncmds: *u32, lc_writer: anytype) !void { + var symtab_cmd = macho.symtab_command{ + .cmdsize = @sizeOf(macho.symtab_command), + .symoff = 0, + .nsyms = 0, + .stroff = 0, + .strsize = 0, + }; + var dysymtab_cmd = macho.dysymtab_command{ + .cmdsize = @sizeOf(macho.dysymtab_command), + .ilocalsym = 0, + .nlocalsym = 0, + .iextdefsym = 0, + .nextdefsym = 0, + .iundefsym = 0, + .nundefsym = 0, + .tocoff = 0, + .ntoc = 0, + .modtaboff = 0, + .nmodtab = 0, + .extrefsymoff = 0, + .nextrefsyms = 0, + .indirectsymoff = 0, + .nindirectsyms = 0, + .extreloff = 0, + .nextrel = 0, + .locreloff = 0, + .nlocrel = 0, + }; + var ctx = try self.writeSymtab(&symtab_cmd); + defer ctx.imports_table.deinit(); + try self.writeDysymtab(ctx, &dysymtab_cmd); + try self.writeStrtab(&symtab_cmd); + try lc_writer.writeStruct(symtab_cmd); + try lc_writer.writeStruct(dysymtab_cmd); + ncmds.* += 2; +} +fn writeSymtab(self: *MachO, lc: *macho.symtab_command) !SymtabCtx { const gpa = self.base.allocator; - const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].segment; - const symtab = &self.load_commands.items[self.symtab_cmd_index.?].symtab; - const symoff = mem.alignForwardGeneric(u64, seg.inner.fileoff + seg.inner.filesize, @alignOf(macho.nlist_64)); - symtab.symoff = @intCast(u32, symoff); var locals = std.ArrayList(macho.nlist_64).init(gpa); defer locals.deinit(); @@ -6101,8 +5885,8 @@ fn writeSymtab(self: *MachO) !void { var imports = std.ArrayList(macho.nlist_64).init(gpa); defer imports.deinit(); + var imports_table = std.AutoHashMap(SymbolWithLoc, u32).init(gpa); - defer imports_table.deinit(); for (self.globals.values()) |global| { const sym = self.getSymbol(global); @@ -6115,56 +5899,84 @@ fn writeSymtab(self: *MachO) !void { try imports_table.putNoClobber(global, new_index); } - const nlocals = locals.items.len; - const nexports = exports.items.len; - const nimports = imports.items.len; - symtab.nsyms = @intCast(u32, nlocals + nexports + nimports); + const nlocals = @intCast(u32, locals.items.len); + const nexports = @intCast(u32, exports.items.len); + const nimports = @intCast(u32, imports.items.len); + const nsyms = nlocals + nexports + nimports; + + const seg = &self.segments.items[self.linkedit_segment_cmd_index.?]; + const offset = mem.alignForwardGeneric( + u64, + seg.fileoff + seg.filesize, + @alignOf(macho.nlist_64), + ); + const needed_size = nsyms * @sizeOf(macho.nlist_64); + seg.filesize = offset + needed_size - seg.fileoff; var buffer = std.ArrayList(u8).init(gpa); defer buffer.deinit(); - try buffer.ensureTotalCapacityPrecise(symtab.nsyms * @sizeOf(macho.nlist_64)); + try buffer.ensureTotalCapacityPrecise(needed_size); buffer.appendSliceAssumeCapacity(mem.sliceAsBytes(locals.items)); buffer.appendSliceAssumeCapacity(mem.sliceAsBytes(exports.items)); buffer.appendSliceAssumeCapacity(mem.sliceAsBytes(imports.items)); - log.debug("writing symtab from 0x{x} to 0x{x}", .{ symtab.symoff, symtab.symoff + buffer.items.len }); - try self.base.file.?.pwriteAll(buffer.items, symtab.symoff); + log.debug("writing symtab from 0x{x} to 0x{x}", .{ offset, offset + needed_size }); + try self.base.file.?.pwriteAll(buffer.items, offset); - seg.inner.filesize = symtab.symoff + buffer.items.len - seg.inner.fileoff; + lc.symoff = @intCast(u32, offset); + lc.nsyms = nsyms; - // Update dynamic symbol table. - const dysymtab = &self.load_commands.items[self.dysymtab_cmd_index.?].dysymtab; - dysymtab.nlocalsym = @intCast(u32, nlocals); - dysymtab.iextdefsym = dysymtab.nlocalsym; - dysymtab.nextdefsym = @intCast(u32, nexports); - dysymtab.iundefsym = dysymtab.nlocalsym + dysymtab.nextdefsym; - dysymtab.nundefsym = @intCast(u32, nimports); + return SymtabCtx{ + .nlocalsym = nlocals, + .nextdefsym = nexports, + .nundefsym = nimports, + .imports_table = imports_table, + }; +} +fn writeStrtab(self: *MachO, lc: *macho.symtab_command) !void { + const seg = &self.segments.items[self.linkedit_segment_cmd_index.?]; + const offset = mem.alignForwardGeneric(u64, seg.fileoff + seg.filesize, @alignOf(u64)); + const needed_size = self.strtab.buffer.items.len; + seg.filesize = offset + needed_size - seg.fileoff; + + log.debug("writing string table from 0x{x} to 0x{x}", .{ offset, offset + needed_size }); + + try self.base.file.?.pwriteAll(self.strtab.buffer.items, offset); + + lc.stroff = @intCast(u32, offset); + lc.strsize = @intCast(u32, needed_size); +} + +const SymtabCtx = struct { + nlocalsym: u32, + nextdefsym: u32, + nundefsym: u32, + imports_table: std.AutoHashMap(SymbolWithLoc, u32), +}; + +fn writeDysymtab(self: *MachO, ctx: SymtabCtx, lc: *macho.dysymtab_command) !void { + const gpa = self.base.allocator; const nstubs = @intCast(u32, self.stubs_table.count()); const ngot_entries = @intCast(u32, self.got_entries_table.count()); + const nindirectsyms = nstubs * 2 + ngot_entries; + const iextdefsym = ctx.nlocalsym; + const iundefsym = iextdefsym + ctx.nextdefsym; - const indirectsymoff = mem.alignForwardGeneric(u64, seg.inner.fileoff + seg.inner.filesize, @alignOf(u64)); - dysymtab.indirectsymoff = @intCast(u32, indirectsymoff); - dysymtab.nindirectsyms = nstubs * 2 + ngot_entries; + const seg = &self.segments.items[self.linkedit_segment_cmd_index.?]; + const offset = mem.alignForwardGeneric(u64, seg.fileoff + seg.filesize, @alignOf(u64)); + const needed_size = nindirectsyms * @sizeOf(u32); + seg.filesize = offset + needed_size - seg.fileoff; - seg.inner.filesize = dysymtab.indirectsymoff + dysymtab.nindirectsyms * @sizeOf(u32) - seg.inner.fileoff; - - log.debug("writing indirect symbol table from 0x{x} to 0x{x}", .{ - dysymtab.indirectsymoff, - dysymtab.indirectsymoff + dysymtab.nindirectsyms * @sizeOf(u32), - }); + log.debug("writing indirect symbol table from 0x{x} to 0x{x}", .{ offset, offset + needed_size }); var buf = std.ArrayList(u8).init(gpa); defer buf.deinit(); - try buf.ensureTotalCapacity(dysymtab.nindirectsyms * @sizeOf(u32)); + try buf.ensureTotalCapacity(needed_size); const writer = buf.writer(); - if (self.text_segment_cmd_index) |text_segment_cmd_index| blk: { - const stubs_section_index = self.stubs_section_index orelse break :blk; - const stubs = self.getSectionPtr(.{ - .seg = text_segment_cmd_index, - .sect = stubs_section_index, - }); + if (self.stubs_section_index) |sect_id| { + const stubs = &self.sections.items(.header)[sect_id]; stubs.reserved1 = 0; for (self.stubs.items) |entry| { if (entry.sym_index == 0) continue; @@ -6172,16 +5984,12 @@ fn writeSymtab(self: *MachO) !void { if (atom_sym.n_desc == N_DESC_GCED) continue; const target_sym = self.getSymbol(entry.target); assert(target_sym.undf()); - try writer.writeIntLittle(u32, dysymtab.iundefsym + imports_table.get(entry.target).?); + try writer.writeIntLittle(u32, iundefsym + ctx.imports_table.get(entry.target).?); } } - if (self.data_const_segment_cmd_index) |data_const_segment_cmd_index| blk: { - const got_section_index = self.got_section_index orelse break :blk; - const got = self.getSectionPtr(.{ - .seg = data_const_segment_cmd_index, - .sect = got_section_index, - }); + if (self.got_section_index) |sect_id| { + const got = &self.sections.items(.header)[sect_id]; got.reserved1 = nstubs; for (self.got_entries.items) |entry| { if (entry.sym_index == 0) continue; @@ -6189,19 +5997,15 @@ fn writeSymtab(self: *MachO) !void { if (atom_sym.n_desc == N_DESC_GCED) continue; const target_sym = self.getSymbol(entry.target); if (target_sym.undf()) { - try writer.writeIntLittle(u32, dysymtab.iundefsym + imports_table.get(entry.target).?); + try writer.writeIntLittle(u32, iundefsym + ctx.imports_table.get(entry.target).?); } else { try writer.writeIntLittle(u32, macho.INDIRECT_SYMBOL_LOCAL); } } } - if (self.data_segment_cmd_index) |data_segment_cmd_index| blk: { - const la_symbol_ptr_section_index = self.la_symbol_ptr_section_index orelse break :blk; - const la_symbol_ptr = self.getSectionPtr(.{ - .seg = data_segment_cmd_index, - .sect = la_symbol_ptr_section_index, - }); + if (self.la_symbol_ptr_section_index) |sect_id| { + const la_symbol_ptr = &self.sections.items(.header)[sect_id]; la_symbol_ptr.reserved1 = nstubs + ngot_entries; for (self.stubs.items) |entry| { if (entry.sym_index == 0) continue; @@ -6209,131 +6013,76 @@ fn writeSymtab(self: *MachO) !void { if (atom_sym.n_desc == N_DESC_GCED) continue; const target_sym = self.getSymbol(entry.target); assert(target_sym.undf()); - try writer.writeIntLittle(u32, dysymtab.iundefsym + imports_table.get(entry.target).?); + try writer.writeIntLittle(u32, iundefsym + ctx.imports_table.get(entry.target).?); } } - assert(buf.items.len == dysymtab.nindirectsyms * @sizeOf(u32)); + assert(buf.items.len == needed_size); + try self.base.file.?.pwriteAll(buf.items, offset); - try self.base.file.?.pwriteAll(buf.items, dysymtab.indirectsymoff); - self.load_commands_dirty = true; + lc.nlocalsym = ctx.nlocalsym; + lc.iextdefsym = iextdefsym; + lc.nextdefsym = ctx.nextdefsym; + lc.iundefsym = iundefsym; + lc.nundefsym = ctx.nundefsym; + lc.indirectsymoff = @intCast(u32, offset); + lc.nindirectsyms = nindirectsyms; } -fn writeStrtab(self: *MachO) !void { - const tracy = trace(@src()); - defer tracy.end(); - - const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].segment; - const symtab = &self.load_commands.items[self.symtab_cmd_index.?].symtab; - const stroff = mem.alignForwardGeneric(u64, seg.inner.fileoff + seg.inner.filesize, @alignOf(u64)); - - const strsize = self.strtab.buffer.items.len; - symtab.stroff = @intCast(u32, stroff); - symtab.strsize = @intCast(u32, strsize); - seg.inner.filesize = symtab.stroff + symtab.strsize - seg.inner.fileoff; - - log.debug("writing string table from 0x{x} to 0x{x}", .{ symtab.stroff, symtab.stroff + symtab.strsize }); - - try self.base.file.?.pwriteAll(self.strtab.buffer.items, symtab.stroff); - - self.load_commands_dirty = true; -} - -fn writeLinkeditSegment(self: *MachO) !void { - const tracy = trace(@src()); - defer tracy.end(); - - const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].segment; - seg.inner.filesize = 0; - - try self.writeDyldInfoData(); - try self.writeFunctionStarts(); - try self.writeDataInCode(); - try self.writeSymtab(); - try self.writeStrtab(); - - seg.inner.vmsize = mem.alignForwardGeneric(u64, seg.inner.filesize, self.page_size); -} - -fn writeCodeSignaturePadding(self: *MachO, code_sig: *CodeSignature) !void { - const tracy = trace(@src()); - defer tracy.end(); - - const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].segment; - const cs_cmd = &self.load_commands.items[self.code_signature_cmd_index.?].linkedit_data; +fn writeCodeSignaturePadding( + self: *MachO, + code_sig: *CodeSignature, + ncmds: *u32, + lc_writer: anytype, +) !u32 { + const seg = &self.segments.items[self.linkedit_segment_cmd_index.?]; // Code signature data has to be 16-bytes aligned for Apple tools to recognize the file // https://github.com/opensource-apple/cctools/blob/fdb4825f303fd5c0751be524babd32958181b3ed/libstuff/checkout.c#L271 - const dataoff = mem.alignForwardGeneric(u64, seg.inner.fileoff + seg.inner.filesize, 16); - const datasize = code_sig.estimateSize(dataoff); - cs_cmd.dataoff = @intCast(u32, dataoff); - cs_cmd.datasize = @intCast(u32, code_sig.estimateSize(dataoff)); - - // Advance size of __LINKEDIT segment - seg.inner.filesize = cs_cmd.dataoff + cs_cmd.datasize - seg.inner.fileoff; - seg.inner.vmsize = mem.alignForwardGeneric(u64, seg.inner.filesize, self.page_size); - log.debug("writing code signature padding from 0x{x} to 0x{x}", .{ dataoff, dataoff + datasize }); + const offset = mem.alignForwardGeneric(u64, seg.fileoff + seg.filesize, 16); + const needed_size = code_sig.estimateSize(offset); + seg.filesize = offset + needed_size - seg.fileoff; + seg.vmsize = mem.alignForwardGeneric(u64, seg.filesize, self.page_size); + log.debug("writing code signature padding from 0x{x} to 0x{x}", .{ offset, offset + needed_size }); // Pad out the space. We need to do this to calculate valid hashes for everything in the file // except for code signature data. - try self.base.file.?.pwriteAll(&[_]u8{0}, dataoff + datasize - 1); - self.load_commands_dirty = true; + try self.base.file.?.pwriteAll(&[_]u8{0}, offset + needed_size - 1); + + try lc_writer.writeStruct(macho.linkedit_data_command{ + .cmd = .CODE_SIGNATURE, + .cmdsize = @sizeOf(macho.linkedit_data_command), + .dataoff = @intCast(u32, offset), + .datasize = @intCast(u32, needed_size), + }); + ncmds.* += 1; + + return @intCast(u32, offset); } -fn writeCodeSignature(self: *MachO, code_sig: *CodeSignature) !void { - const tracy = trace(@src()); - defer tracy.end(); - - const code_sig_cmd = self.load_commands.items[self.code_signature_cmd_index.?].linkedit_data; - const seg = self.load_commands.items[self.text_segment_cmd_index.?].segment; +fn writeCodeSignature(self: *MachO, code_sig: *CodeSignature, offset: u32) !void { + const seg = self.segments.items[self.text_segment_cmd_index.?]; var buffer = std.ArrayList(u8).init(self.base.allocator); defer buffer.deinit(); try buffer.ensureTotalCapacityPrecise(code_sig.size()); try code_sig.writeAdhocSignature(self.base.allocator, .{ .file = self.base.file.?, - .exec_seg_base = seg.inner.fileoff, - .exec_seg_limit = seg.inner.filesize, - .code_sig_cmd = code_sig_cmd, + .exec_seg_base = seg.fileoff, + .exec_seg_limit = seg.filesize, + .file_size = offset, .output_mode = self.base.options.output_mode, }, buffer.writer()); assert(buffer.items.len == code_sig.size()); log.debug("writing code signature from 0x{x} to 0x{x}", .{ - code_sig_cmd.dataoff, - code_sig_cmd.dataoff + buffer.items.len, + offset, + offset + buffer.items.len, }); - try self.base.file.?.pwriteAll(buffer.items, code_sig_cmd.dataoff); -} - -/// Writes all load commands and section headers. -fn writeLoadCommands(self: *MachO) !void { - if (!self.load_commands_dirty) return; - - var sizeofcmds: u32 = 0; - for (self.load_commands.items) |lc| { - if (lc.cmd() == .NONE) continue; - sizeofcmds += lc.cmdsize(); - } - - var buffer = try self.base.allocator.alloc(u8, sizeofcmds); - defer self.base.allocator.free(buffer); - var fib = std.io.fixedBufferStream(buffer); - const writer = fib.writer(); - for (self.load_commands.items) |lc| { - if (lc.cmd() == .NONE) continue; - try lc.write(writer); - } - - const off = @sizeOf(macho.mach_header_64); - - log.debug("writing load commands from 0x{x} to 0x{x}", .{ off, off + sizeofcmds }); - - try self.base.file.?.pwriteAll(buffer, off); - self.load_commands_dirty = false; + try self.base.file.?.pwriteAll(buffer.items, offset); } /// Writes Mach-O file header. -fn writeHeader(self: *MachO) !void { +fn writeHeader(self: *MachO, ncmds: u32, sizeofcmds: u32) !void { var header: macho.mach_header_64 = .{}; header.flags = macho.MH_NOUNDEFS | macho.MH_DYLDLINK | macho.MH_PIE | macho.MH_TWOLEVEL; @@ -6365,14 +6114,8 @@ fn writeHeader(self: *MachO) !void { header.flags |= macho.MH_HAS_TLV_DESCRIPTORS; } - header.ncmds = 0; - header.sizeofcmds = 0; - - for (self.load_commands.items) |cmd| { - if (cmd.cmd() == .NONE) continue; - header.sizeofcmds += cmd.cmdsize(); - header.ncmds += 1; - } + header.ncmds = ncmds; + header.sizeofcmds = sizeofcmds; log.debug("writing Mach-O header {}", .{header}); @@ -6392,33 +6135,13 @@ pub fn makeStaticString(bytes: []const u8) [16]u8 { return buf; } -pub fn getSectionOrdinal(self: *MachO, match: MatchingSection) u8 { - return @intCast(u8, self.section_ordinals.getIndex(match).?) + 1; -} - -pub fn getMatchingSectionFromOrdinal(self: *MachO, ord: u8) MatchingSection { - const index = ord - 1; - assert(index < self.section_ordinals.count()); - return self.section_ordinals.keys()[index]; -} - -pub fn getSegmentPtr(self: *MachO, match: MatchingSection) *macho.SegmentCommand { - assert(match.seg < self.load_commands.items.len); - return &self.load_commands.items[match.seg].segment; -} - -pub fn getSegment(self: *MachO, match: MatchingSection) macho.SegmentCommand { - return self.getSegmentPtr(match).*; -} - -pub fn getSectionPtr(self: *MachO, match: MatchingSection) *macho.section_64 { - const seg = self.getSegmentPtr(match); - assert(match.sect < seg.sections.items.len); - return &seg.sections.items[match.sect]; -} - -pub fn getSection(self: *MachO, match: MatchingSection) macho.section_64 { - return self.getSectionPtr(match).*; +fn getSectionIndexes(self: MachO, segment_index: u8) struct { start: u8, end: u8 } { + var start: u8 = 0; + const nsects = for (self.segments.items) |seg, i| { + if (i == segment_index) break @intCast(u8, seg.nsects); + start += @intCast(u8, seg.nsects); + } else 0; + return .{ .start = start, .end = start + nsects }; } pub fn symbolIsTemp(self: *MachO, sym_with_loc: SymbolWithLoc) bool { @@ -6512,72 +6235,6 @@ pub fn findFirst(comptime T: type, haystack: []const T, start: usize, predicate: return i; } -const DebugInfo = struct { - inner: dwarf.DwarfInfo, - debug_info: []const u8, - debug_abbrev: []const u8, - debug_str: []const u8, - debug_line: []const u8, - debug_line_str: []const u8, - debug_ranges: []const u8, - - pub fn parse(allocator: Allocator, object: Object) !?DebugInfo { - var debug_info = blk: { - const index = object.dwarf_debug_info_index orelse return null; - break :blk try object.getSectionContents(index); - }; - var debug_abbrev = blk: { - const index = object.dwarf_debug_abbrev_index orelse return null; - break :blk try object.getSectionContents(index); - }; - var debug_str = blk: { - const index = object.dwarf_debug_str_index orelse return null; - break :blk try object.getSectionContents(index); - }; - var debug_line = blk: { - const index = object.dwarf_debug_line_index orelse return null; - break :blk try object.getSectionContents(index); - }; - var debug_line_str = blk: { - if (object.dwarf_debug_line_str_index) |ind| { - break :blk try object.getSectionContents(ind); - } - break :blk &[0]u8{}; - }; - var debug_ranges = blk: { - if (object.dwarf_debug_ranges_index) |ind| { - break :blk try object.getSectionContents(ind); - } - break :blk &[0]u8{}; - }; - - var inner: dwarf.DwarfInfo = .{ - .endian = .Little, - .debug_info = debug_info, - .debug_abbrev = debug_abbrev, - .debug_str = debug_str, - .debug_line = debug_line, - .debug_line_str = debug_line_str, - .debug_ranges = debug_ranges, - }; - try dwarf.openDwarfDebugInfo(&inner, allocator); - - return DebugInfo{ - .inner = inner, - .debug_info = debug_info, - .debug_abbrev = debug_abbrev, - .debug_str = debug_str, - .debug_line = debug_line, - .debug_line_str = debug_line_str, - .debug_ranges = debug_ranges, - }; - } - - pub fn deinit(self: *DebugInfo, allocator: Allocator) void { - self.inner.deinit(allocator); - } -}; - pub fn generateSymbolStabs( self: *MachO, object: Object, @@ -6585,14 +6242,15 @@ pub fn generateSymbolStabs( ) !void { assert(!self.base.options.strip); - const gpa = self.base.allocator; - log.debug("parsing debug info in '{s}'", .{object.name}); - var debug_info = (try DebugInfo.parse(gpa, object)) orelse return; + const gpa = self.base.allocator; + var debug_info = try object.parseDwarfInfo(); + defer debug_info.deinit(gpa); + try dwarf.openDwarfDebugInfo(&debug_info, gpa); // We assume there is only one CU. - const compile_unit = debug_info.inner.findCompileUnit(0x0) catch |err| switch (err) { + const compile_unit = debug_info.findCompileUnit(0x0) catch |err| switch (err) { error.MissingDebugInfo => { // TODO audit cases with missing debug info and audit our dwarf.zig module. log.debug("invalid or missing debug info in {s}; skipping", .{object.name}); @@ -6600,8 +6258,8 @@ pub fn generateSymbolStabs( }, else => |e| return e, }; - const tu_name = try compile_unit.die.getAttrString(&debug_info.inner, dwarf.AT.name); - const tu_comp_dir = try compile_unit.die.getAttrString(&debug_info.inner, dwarf.AT.comp_dir); + const tu_name = try compile_unit.die.getAttrString(&debug_info, dwarf.AT.name); + const tu_comp_dir = try compile_unit.die.getAttrString(&debug_info, dwarf.AT.comp_dir); // Open scope try locals.ensureUnusedCapacity(3); @@ -6664,7 +6322,7 @@ pub fn generateSymbolStabs( fn generateSymbolStabsForSymbol( self: *MachO, sym_loc: SymbolWithLoc, - debug_info: DebugInfo, + debug_info: dwarf.DwarfInfo, buf: *[4]macho.nlist_64, ) ![]const macho.nlist_64 { const gpa = self.base.allocator; @@ -6679,7 +6337,7 @@ fn generateSymbolStabsForSymbol( const source_sym = object.getSourceSymbol(sym_loc.sym_index) orelse return buf[0..0]; const size: ?u64 = size: { if (source_sym.tentative()) break :size null; - for (debug_info.inner.func_list.items) |func| { + for (debug_info.func_list.items) |func| { if (func.pc_range) |range| { if (source_sym.n_value >= range.start and source_sym.n_value < range.end) { break :size range.end - range.start; @@ -6731,260 +6389,260 @@ fn generateSymbolStabsForSymbol( } } -fn snapshotState(self: *MachO) !void { - const emit = self.base.options.emit orelse { - log.debug("no emit directory found; skipping snapshot...", .{}); - return; - }; +// fn snapshotState(self: *MachO) !void { +// const emit = self.base.options.emit orelse { +// log.debug("no emit directory found; skipping snapshot...", .{}); +// return; +// }; - const Snapshot = struct { - const Node = struct { - const Tag = enum { - section_start, - section_end, - atom_start, - atom_end, - relocation, +// const Snapshot = struct { +// const Node = struct { +// const Tag = enum { +// section_start, +// section_end, +// atom_start, +// atom_end, +// relocation, - pub fn jsonStringify( - tag: Tag, - options: std.json.StringifyOptions, - out_stream: anytype, - ) !void { - _ = options; - switch (tag) { - .section_start => try out_stream.writeAll("\"section_start\""), - .section_end => try out_stream.writeAll("\"section_end\""), - .atom_start => try out_stream.writeAll("\"atom_start\""), - .atom_end => try out_stream.writeAll("\"atom_end\""), - .relocation => try out_stream.writeAll("\"relocation\""), - } - } - }; - const Payload = struct { - name: []const u8 = "", - aliases: [][]const u8 = &[0][]const u8{}, - is_global: bool = false, - target: u64 = 0, - }; - address: u64, - tag: Tag, - payload: Payload, - }; - timestamp: i128, - nodes: []Node, - }; +// pub fn jsonStringify( +// tag: Tag, +// options: std.json.StringifyOptions, +// out_stream: anytype, +// ) !void { +// _ = options; +// switch (tag) { +// .section_start => try out_stream.writeAll("\"section_start\""), +// .section_end => try out_stream.writeAll("\"section_end\""), +// .atom_start => try out_stream.writeAll("\"atom_start\""), +// .atom_end => try out_stream.writeAll("\"atom_end\""), +// .relocation => try out_stream.writeAll("\"relocation\""), +// } +// } +// }; +// const Payload = struct { +// name: []const u8 = "", +// aliases: [][]const u8 = &[0][]const u8{}, +// is_global: bool = false, +// target: u64 = 0, +// }; +// address: u64, +// tag: Tag, +// payload: Payload, +// }; +// timestamp: i128, +// nodes: []Node, +// }; - var arena_allocator = std.heap.ArenaAllocator.init(self.base.allocator); - defer arena_allocator.deinit(); - const arena = arena_allocator.allocator(); +// var arena_allocator = std.heap.ArenaAllocator.init(self.base.allocator); +// defer arena_allocator.deinit(); +// const arena = arena_allocator.allocator(); - const out_file = try emit.directory.handle.createFile("snapshots.json", .{ - .truncate = false, - .read = true, - }); - defer out_file.close(); +// const out_file = try emit.directory.handle.createFile("snapshots.json", .{ +// .truncate = false, +// .read = true, +// }); +// defer out_file.close(); - if (out_file.seekFromEnd(-1)) { - try out_file.writer().writeByte(','); - } else |err| switch (err) { - error.Unseekable => try out_file.writer().writeByte('['), - else => |e| return e, - } - const writer = out_file.writer(); +// if (out_file.seekFromEnd(-1)) { +// try out_file.writer().writeByte(','); +// } else |err| switch (err) { +// error.Unseekable => try out_file.writer().writeByte('['), +// else => |e| return e, +// } +// const writer = out_file.writer(); - var snapshot = Snapshot{ - .timestamp = std.time.nanoTimestamp(), - .nodes = undefined, - }; - var nodes = std.ArrayList(Snapshot.Node).init(arena); +// var snapshot = Snapshot{ +// .timestamp = std.time.nanoTimestamp(), +// .nodes = undefined, +// }; +// var nodes = std.ArrayList(Snapshot.Node).init(arena); - for (self.section_ordinals.keys()) |key| { - const sect = self.getSection(key); - const sect_name = try std.fmt.allocPrint(arena, "{s},{s}", .{ sect.segName(), sect.sectName() }); - try nodes.append(.{ - .address = sect.addr, - .tag = .section_start, - .payload = .{ .name = sect_name }, - }); +// for (self.section_ordinals.keys()) |key| { +// const sect = self.getSection(key); +// const sect_name = try std.fmt.allocPrint(arena, "{s},{s}", .{ sect.segName(), sect.sectName() }); +// try nodes.append(.{ +// .address = sect.addr, +// .tag = .section_start, +// .payload = .{ .name = sect_name }, +// }); - const is_tlv = sect.type_() == macho.S_THREAD_LOCAL_VARIABLES; +// const is_tlv = sect.type_() == macho.S_THREAD_LOCAL_VARIABLES; - var atom: *Atom = self.atoms.get(key) orelse { - try nodes.append(.{ - .address = sect.addr + sect.size, - .tag = .section_end, - .payload = .{}, - }); - continue; - }; +// var atom: *Atom = self.atoms.get(key) orelse { +// try nodes.append(.{ +// .address = sect.addr + sect.size, +// .tag = .section_end, +// .payload = .{}, +// }); +// continue; +// }; - while (atom.prev) |prev| { - atom = prev; - } +// while (atom.prev) |prev| { +// atom = prev; +// } - while (true) { - const atom_sym = atom.getSymbol(self); - var node = Snapshot.Node{ - .address = atom_sym.n_value, - .tag = .atom_start, - .payload = .{ - .name = atom.getName(self), - .is_global = self.globals.contains(atom.getName(self)), - }, - }; +// while (true) { +// const atom_sym = atom.getSymbol(self); +// var node = Snapshot.Node{ +// .address = atom_sym.n_value, +// .tag = .atom_start, +// .payload = .{ +// .name = atom.getName(self), +// .is_global = self.globals.contains(atom.getName(self)), +// }, +// }; - var aliases = std.ArrayList([]const u8).init(arena); - for (atom.contained.items) |sym_off| { - if (sym_off.offset == 0) { - try aliases.append(self.getSymbolName(.{ - .sym_index = sym_off.sym_index, - .file = atom.file, - })); - } - } - node.payload.aliases = aliases.toOwnedSlice(); - try nodes.append(node); +// var aliases = std.ArrayList([]const u8).init(arena); +// for (atom.contained.items) |sym_off| { +// if (sym_off.offset == 0) { +// try aliases.append(self.getSymbolName(.{ +// .sym_index = sym_off.sym_index, +// .file = atom.file, +// })); +// } +// } +// node.payload.aliases = aliases.toOwnedSlice(); +// try nodes.append(node); - var relocs = try std.ArrayList(Snapshot.Node).initCapacity(arena, atom.relocs.items.len); - for (atom.relocs.items) |rel| { - const source_addr = blk: { - const source_sym = atom.getSymbol(self); - break :blk source_sym.n_value + rel.offset; - }; - const target_addr = blk: { - const target_atom = rel.getTargetAtom(self) orelse { - // If there is no atom for target, we still need to check for special, atom-less - // symbols such as `___dso_handle`. - const target_name = self.getSymbolName(rel.target); - if (self.globals.contains(target_name)) { - const atomless_sym = self.getSymbol(rel.target); - break :blk atomless_sym.n_value; - } - break :blk 0; - }; - const target_sym = if (target_atom.isSymbolContained(rel.target, self)) - self.getSymbol(rel.target) - else - target_atom.getSymbol(self); - const base_address: u64 = if (is_tlv) base_address: { - const sect_id: u16 = sect_id: { - if (self.tlv_data_section_index) |i| { - break :sect_id i; - } else if (self.tlv_bss_section_index) |i| { - break :sect_id i; - } else unreachable; - }; - break :base_address self.getSection(.{ - .seg = self.data_segment_cmd_index.?, - .sect = sect_id, - }).addr; - } else 0; - break :blk target_sym.n_value - base_address; - }; +// var relocs = try std.ArrayList(Snapshot.Node).initCapacity(arena, atom.relocs.items.len); +// for (atom.relocs.items) |rel| { +// const source_addr = blk: { +// const source_sym = atom.getSymbol(self); +// break :blk source_sym.n_value + rel.offset; +// }; +// const target_addr = blk: { +// const target_atom = rel.getTargetAtom(self) orelse { +// // If there is no atom for target, we still need to check for special, atom-less +// // symbols such as `___dso_handle`. +// const target_name = self.getSymbolName(rel.target); +// if (self.globals.contains(target_name)) { +// const atomless_sym = self.getSymbol(rel.target); +// break :blk atomless_sym.n_value; +// } +// break :blk 0; +// }; +// const target_sym = if (target_atom.isSymbolContained(rel.target, self)) +// self.getSymbol(rel.target) +// else +// target_atom.getSymbol(self); +// const base_address: u64 = if (is_tlv) base_address: { +// const sect_id: u16 = sect_id: { +// if (self.tlv_data_section_index) |i| { +// break :sect_id i; +// } else if (self.tlv_bss_section_index) |i| { +// break :sect_id i; +// } else unreachable; +// }; +// break :base_address self.getSection(.{ +// .seg = self.data_segment_cmd_index.?, +// .sect = sect_id, +// }).addr; +// } else 0; +// break :blk target_sym.n_value - base_address; +// }; - relocs.appendAssumeCapacity(.{ - .address = source_addr, - .tag = .relocation, - .payload = .{ .target = target_addr }, - }); - } +// relocs.appendAssumeCapacity(.{ +// .address = source_addr, +// .tag = .relocation, +// .payload = .{ .target = target_addr }, +// }); +// } - if (atom.contained.items.len == 0) { - try nodes.appendSlice(relocs.items); - } else { - // Need to reverse iteration order of relocs since by default for relocatable sources - // they come in reverse. For linking, this doesn't matter in any way, however, for - // arranging the memoryline for displaying it does. - std.mem.reverse(Snapshot.Node, relocs.items); +// if (atom.contained.items.len == 0) { +// try nodes.appendSlice(relocs.items); +// } else { +// // Need to reverse iteration order of relocs since by default for relocatable sources +// // they come in reverse. For linking, this doesn't matter in any way, however, for +// // arranging the memoryline for displaying it does. +// std.mem.reverse(Snapshot.Node, relocs.items); - var next_i: usize = 0; - var last_rel: usize = 0; - while (next_i < atom.contained.items.len) : (next_i += 1) { - const loc = SymbolWithLoc{ - .sym_index = atom.contained.items[next_i].sym_index, - .file = atom.file, - }; - const cont_sym = self.getSymbol(loc); - const cont_sym_name = self.getSymbolName(loc); - var contained_node = Snapshot.Node{ - .address = cont_sym.n_value, - .tag = .atom_start, - .payload = .{ - .name = cont_sym_name, - .is_global = self.globals.contains(cont_sym_name), - }, - }; +// var next_i: usize = 0; +// var last_rel: usize = 0; +// while (next_i < atom.contained.items.len) : (next_i += 1) { +// const loc = SymbolWithLoc{ +// .sym_index = atom.contained.items[next_i].sym_index, +// .file = atom.file, +// }; +// const cont_sym = self.getSymbol(loc); +// const cont_sym_name = self.getSymbolName(loc); +// var contained_node = Snapshot.Node{ +// .address = cont_sym.n_value, +// .tag = .atom_start, +// .payload = .{ +// .name = cont_sym_name, +// .is_global = self.globals.contains(cont_sym_name), +// }, +// }; - // Accumulate aliases - var inner_aliases = std.ArrayList([]const u8).init(arena); - while (true) { - if (next_i + 1 >= atom.contained.items.len) break; - const next_sym_loc = SymbolWithLoc{ - .sym_index = atom.contained.items[next_i + 1].sym_index, - .file = atom.file, - }; - const next_sym = self.getSymbol(next_sym_loc); - if (next_sym.n_value != cont_sym.n_value) break; - const next_sym_name = self.getSymbolName(next_sym_loc); - if (self.globals.contains(next_sym_name)) { - try inner_aliases.append(contained_node.payload.name); - contained_node.payload.name = next_sym_name; - contained_node.payload.is_global = true; - } else try inner_aliases.append(next_sym_name); - next_i += 1; - } +// // Accumulate aliases +// var inner_aliases = std.ArrayList([]const u8).init(arena); +// while (true) { +// if (next_i + 1 >= atom.contained.items.len) break; +// const next_sym_loc = SymbolWithLoc{ +// .sym_index = atom.contained.items[next_i + 1].sym_index, +// .file = atom.file, +// }; +// const next_sym = self.getSymbol(next_sym_loc); +// if (next_sym.n_value != cont_sym.n_value) break; +// const next_sym_name = self.getSymbolName(next_sym_loc); +// if (self.globals.contains(next_sym_name)) { +// try inner_aliases.append(contained_node.payload.name); +// contained_node.payload.name = next_sym_name; +// contained_node.payload.is_global = true; +// } else try inner_aliases.append(next_sym_name); +// next_i += 1; +// } - const cont_size = if (next_i + 1 < atom.contained.items.len) - self.getSymbol(.{ - .sym_index = atom.contained.items[next_i + 1].sym_index, - .file = atom.file, - }).n_value - cont_sym.n_value - else - atom_sym.n_value + atom.size - cont_sym.n_value; +// const cont_size = if (next_i + 1 < atom.contained.items.len) +// self.getSymbol(.{ +// .sym_index = atom.contained.items[next_i + 1].sym_index, +// .file = atom.file, +// }).n_value - cont_sym.n_value +// else +// atom_sym.n_value + atom.size - cont_sym.n_value; - contained_node.payload.aliases = inner_aliases.toOwnedSlice(); - try nodes.append(contained_node); +// contained_node.payload.aliases = inner_aliases.toOwnedSlice(); +// try nodes.append(contained_node); - for (relocs.items[last_rel..]) |rel| { - if (rel.address >= cont_sym.n_value + cont_size) { - break; - } - try nodes.append(rel); - last_rel += 1; - } +// for (relocs.items[last_rel..]) |rel| { +// if (rel.address >= cont_sym.n_value + cont_size) { +// break; +// } +// try nodes.append(rel); +// last_rel += 1; +// } - try nodes.append(.{ - .address = cont_sym.n_value + cont_size, - .tag = .atom_end, - .payload = .{}, - }); - } - } +// try nodes.append(.{ +// .address = cont_sym.n_value + cont_size, +// .tag = .atom_end, +// .payload = .{}, +// }); +// } +// } - try nodes.append(.{ - .address = atom_sym.n_value + atom.size, - .tag = .atom_end, - .payload = .{}, - }); +// try nodes.append(.{ +// .address = atom_sym.n_value + atom.size, +// .tag = .atom_end, +// .payload = .{}, +// }); - if (atom.next) |next| { - atom = next; - } else break; - } +// if (atom.next) |next| { +// atom = next; +// } else break; +// } - try nodes.append(.{ - .address = sect.addr + sect.size, - .tag = .section_end, - .payload = .{}, - }); - } +// try nodes.append(.{ +// .address = sect.addr + sect.size, +// .tag = .section_end, +// .payload = .{}, +// }); +// } - snapshot.nodes = nodes.toOwnedSlice(); +// snapshot.nodes = nodes.toOwnedSlice(); - try std.json.stringify(snapshot, .{}, writer); - try writer.writeByte(']'); -} +// try std.json.stringify(snapshot, .{}, writer); +// try writer.writeByte(']'); +// } fn logSymAttributes(sym: macho.nlist_64, buf: *[9]u8) []const u8 { mem.set(u8, buf[0..4], '_'); @@ -7104,26 +6762,19 @@ fn logSymtab(self: *MachO) void { } } -fn logSectionOrdinals(self: *MachO) void { - for (self.section_ordinals.keys()) |match, i| { - const sect = self.getSection(match); - log.debug("sect({d}, '{s},{s}')", .{ i + 1, sect.segName(), sect.sectName() }); - } -} - fn logAtoms(self: *MachO) void { log.debug("atoms:", .{}); - var it = self.atoms.iterator(); - while (it.next()) |entry| { - const match = entry.key_ptr.*; - var atom = entry.value_ptr.*; + + const slice = self.sections.slice(); + for (slice.items(.last_atom)) |last, i| { + var atom = last orelse continue; + const header = slice.items(.header)[i]; while (atom.prev) |prev| { atom = prev; } - const sect = self.getSection(match); - log.debug("{s},{s}", .{ sect.segName(), sect.sectName() }); + log.debug("{s},{s}", .{ header.segName(), header.sectName() }); while (true) { self.logAtom(atom); diff --git a/src/link/MachO/Archive.zig b/src/link/MachO/Archive.zig index ee43e5b2a2..054f75fff3 100644 --- a/src/link/MachO/Archive.zig +++ b/src/link/MachO/Archive.zig @@ -6,19 +6,14 @@ const fs = std.fs; const log = std.log.scoped(.link); const macho = std.macho; const mem = std.mem; -const fat = @import("fat.zig"); const Allocator = mem.Allocator; const Object = @import("Object.zig"); file: fs.File, +fat_offset: u64, name: []const u8, - -header: ?ar_hdr = null, - -// The actual contents we care about linking with will be embedded at -// an offset within a file if we are linking against a fat lib -library_offset: u64 = 0, +header: ar_hdr = undefined, /// Parsed table of contents. /// Each symbol name points to a list of all definition @@ -103,11 +98,7 @@ pub fn deinit(self: *Archive, allocator: Allocator) void { allocator.free(self.name); } -pub fn parse(self: *Archive, allocator: Allocator, cpu_arch: std.Target.Cpu.Arch) !void { - const reader = self.file.reader(); - self.library_offset = try fat.getLibraryOffset(reader, cpu_arch); - try self.file.seekTo(self.library_offset); - +pub fn parse(self: *Archive, allocator: Allocator, reader: anytype) !void { const magic = try reader.readBytesNoEof(SARMAG); if (!mem.eql(u8, &magic, ARMAG)) { log.debug("invalid magic: expected '{s}', found '{s}'", .{ ARMAG, magic }); @@ -115,21 +106,23 @@ pub fn parse(self: *Archive, allocator: Allocator, cpu_arch: std.Target.Cpu.Arch } self.header = try reader.readStruct(ar_hdr); - if (!mem.eql(u8, &self.header.?.ar_fmag, ARFMAG)) { - log.debug("invalid header delimiter: expected '{s}', found '{s}'", .{ ARFMAG, self.header.?.ar_fmag }); + if (!mem.eql(u8, &self.header.ar_fmag, ARFMAG)) { + log.debug("invalid header delimiter: expected '{s}', found '{s}'", .{ + ARFMAG, + self.header.ar_fmag, + }); return error.NotArchive; } - var embedded_name = try parseName(allocator, self.header.?, reader); + const name_or_length = try self.header.nameOrLength(); + var embedded_name = try parseName(allocator, name_or_length, reader); log.debug("parsing archive '{s}' at '{s}'", .{ embedded_name, self.name }); defer allocator.free(embedded_name); try self.parseTableOfContents(allocator, reader); - try reader.context.seekTo(0); } -fn parseName(allocator: Allocator, header: ar_hdr, reader: anytype) ![]u8 { - const name_or_length = try header.nameOrLength(); +fn parseName(allocator: Allocator, name_or_length: ar_hdr.NameOrLength, reader: anytype) ![]u8 { var name: []u8 = undefined; switch (name_or_length) { .Name => |n| { @@ -187,9 +180,14 @@ fn parseTableOfContents(self: *Archive, allocator: Allocator, reader: anytype) ! } } -pub fn parseObject(self: Archive, allocator: Allocator, cpu_arch: std.Target.Cpu.Arch, offset: u32) !Object { +pub fn parseObject( + self: Archive, + allocator: Allocator, + cpu_arch: std.Target.Cpu.Arch, + offset: u32, +) !Object { const reader = self.file.reader(); - try reader.context.seekTo(offset + self.library_offset); + try reader.context.seekTo(self.fat_offset + offset); const object_header = try reader.readStruct(ar_hdr); @@ -198,7 +196,8 @@ pub fn parseObject(self: Archive, allocator: Allocator, cpu_arch: std.Target.Cpu return error.MalformedArchive; } - const object_name = try parseName(allocator, object_header, reader); + const name_or_length = try object_header.nameOrLength(); + const object_name = try parseName(allocator, name_or_length, reader); defer allocator.free(object_name); log.debug("extracting object '{s}' from archive '{s}'", .{ object_name, self.name }); @@ -209,15 +208,24 @@ pub fn parseObject(self: Archive, allocator: Allocator, cpu_arch: std.Target.Cpu break :name try std.fmt.allocPrint(allocator, "{s}({s})", .{ path, object_name }); }; + const object_name_len = switch (name_or_length) { + .Name => 0, + .Length => |len| len, + }; + const object_size = (try object_header.size()) - object_name_len; + const contents = try allocator.allocWithOptions(u8, object_size, @alignOf(u64), null); + const amt = try reader.readAll(contents); + if (amt != object_size) { + return error.InputOutput; + } + var object = Object{ - .file = try fs.cwd().openFile(self.name, .{}), .name = name, - .file_offset = @intCast(u32, try reader.context.getPos()), - .mtime = try self.header.?.date(), + .mtime = try self.header.date(), + .contents = contents, }; try object.parse(allocator, cpu_arch); - try reader.context.seekTo(0); return object; } diff --git a/src/link/MachO/Atom.zig b/src/link/MachO/Atom.zig index ba00764127..90c86e24ed 100644 --- a/src/link/MachO/Atom.zig +++ b/src/link/MachO/Atom.zig @@ -246,7 +246,7 @@ pub fn parseRelocs(self: *Atom, relocs: []const macho.relocation_info, context: else => { log.err("unexpected relocation type after ARM64_RELOC_ADDEND", .{}); log.err(" expected ARM64_RELOC_PAGE21 or ARM64_RELOC_PAGEOFF12", .{}); - log.err(" found {}", .{next}); + log.err(" found {s}", .{@tagName(next)}); return error.UnexpectedRelocationType; }, } @@ -285,7 +285,9 @@ pub fn parseRelocs(self: *Atom, relocs: []const macho.relocation_info, context: else => { log.err("unexpected relocation type after ARM64_RELOC_ADDEND", .{}); log.err(" expected ARM64_RELOC_UNSIGNED", .{}); - log.err(" found {}", .{@intToEnum(macho.reloc_type_arm64, relocs[i + 1].r_type)}); + log.err(" found {s}", .{ + @tagName(@intToEnum(macho.reloc_type_arm64, relocs[i + 1].r_type)), + }); return error.UnexpectedRelocationType; }, }, @@ -294,7 +296,9 @@ pub fn parseRelocs(self: *Atom, relocs: []const macho.relocation_info, context: else => { log.err("unexpected relocation type after X86_64_RELOC_ADDEND", .{}); log.err(" expected X86_64_RELOC_UNSIGNED", .{}); - log.err(" found {}", .{@intToEnum(macho.reloc_type_x86_64, relocs[i + 1].r_type)}); + log.err(" found {s}", .{ + @tagName(@intToEnum(macho.reloc_type_x86_64, relocs[i + 1].r_type)), + }); return error.UnexpectedRelocationType; }, }, @@ -309,13 +313,13 @@ pub fn parseRelocs(self: *Atom, relocs: []const macho.relocation_info, context: const sect_id = @intCast(u16, rel.r_symbolnum - 1); const sym_index = object.sections_as_symbols.get(sect_id) orelse blk: { const sect = object.getSourceSection(sect_id); - const match = (try context.macho_file.getMatchingSection(sect)) orelse + const match = (try context.macho_file.getOutputSection(sect)) orelse unreachable; const sym_index = @intCast(u32, object.symtab.items.len); try object.symtab.append(gpa, .{ .n_strx = 0, .n_type = macho.N_SECT, - .n_sect = context.macho_file.getSectionOrdinal(match), + .n_sect = match + 1, .n_desc = 0, .n_value = sect.addr, }); @@ -459,9 +463,10 @@ fn addPtrBindingOrRebase( }); } else { const source_sym = self.getSymbol(context.macho_file); - const match = context.macho_file.getMatchingSectionFromOrdinal(source_sym.n_sect); - const sect = context.macho_file.getSection(match); - const sect_type = sect.type_(); + const section = context.macho_file.sections.get(source_sym.n_sect - 1); + const header = section.header; + const segment_index = section.segment_index; + const sect_type = header.type_(); const should_rebase = rebase: { if (rel.r_length != 3) break :rebase false; @@ -470,12 +475,12 @@ fn addPtrBindingOrRebase( // that the segment is writable should be enough here. const is_right_segment = blk: { if (context.macho_file.data_segment_cmd_index) |idx| { - if (match.seg == idx) { + if (segment_index == idx) { break :blk true; } } if (context.macho_file.data_const_segment_cmd_index) |idx| { - if (match.seg == idx) { + if (segment_index == idx) { break :blk true; } } @@ -565,9 +570,8 @@ pub fn resolveRelocs(self: *Atom, macho_file: *MachO) !void { }; const is_tlv = is_tlv: { const source_sym = self.getSymbol(macho_file); - const match = macho_file.getMatchingSectionFromOrdinal(source_sym.n_sect); - const sect = macho_file.getSection(match); - break :is_tlv sect.type_() == macho.S_THREAD_LOCAL_VARIABLES; + const header = macho_file.sections.items(.header)[source_sym.n_sect - 1]; + break :is_tlv header.type_() == macho.S_THREAD_LOCAL_VARIABLES; }; const target_addr = blk: { const target_atom = rel.getTargetAtom(macho_file) orelse { @@ -608,10 +612,7 @@ pub fn resolveRelocs(self: *Atom, macho_file: *MachO) !void { return error.FailedToResolveRelocationTarget; } }; - break :base_address macho_file.getSection(.{ - .seg = macho_file.data_segment_cmd_index.?, - .sect = sect_id, - }).addr; + break :base_address macho_file.sections.items(.header)[sect_id].addr; } else 0; break :blk target_sym.n_value - base_address; }; diff --git a/src/link/MachO/CodeSignature.zig b/src/link/MachO/CodeSignature.zig index fbfd487ce2..530a13dc51 100644 --- a/src/link/MachO/CodeSignature.zig +++ b/src/link/MachO/CodeSignature.zig @@ -252,7 +252,7 @@ pub const WriteOpts = struct { file: fs.File, exec_seg_base: u64, exec_seg_limit: u64, - code_sig_cmd: macho.linkedit_data_command, + file_size: u32, output_mode: std.builtin.OutputMode, }; @@ -274,10 +274,9 @@ pub fn writeAdhocSignature( self.code_directory.inner.execSegBase = opts.exec_seg_base; self.code_directory.inner.execSegLimit = opts.exec_seg_limit; self.code_directory.inner.execSegFlags = if (opts.output_mode == .Exe) macho.CS_EXECSEG_MAIN_BINARY else 0; - const file_size = opts.code_sig_cmd.dataoff; - self.code_directory.inner.codeLimit = file_size; + self.code_directory.inner.codeLimit = opts.file_size; - const total_pages = mem.alignForward(file_size, self.page_size) / self.page_size; + const total_pages = mem.alignForward(opts.file_size, self.page_size) / self.page_size; var buffer = try allocator.alloc(u8, self.page_size); defer allocator.free(buffer); @@ -289,7 +288,10 @@ pub fn writeAdhocSignature( var i: usize = 0; while (i < total_pages) : (i += 1) { const fstart = i * self.page_size; - const fsize = if (fstart + self.page_size > file_size) file_size - fstart else self.page_size; + const fsize = if (fstart + self.page_size > opts.file_size) + opts.file_size - fstart + else + self.page_size; const len = try opts.file.preadAll(buffer, fstart); assert(fsize <= len); diff --git a/src/link/MachO/DebugSymbols.zig b/src/link/MachO/DebugSymbols.zig index 4da106eca1..f191d43f98 100644 --- a/src/link/MachO/DebugSymbols.zig +++ b/src/link/MachO/DebugSymbols.zig @@ -25,35 +25,18 @@ base: *MachO, dwarf: Dwarf, file: fs.File, -/// Table of all load commands -load_commands: std.ArrayListUnmanaged(macho.LoadCommand) = .{}, -/// __PAGEZERO segment -pagezero_segment_cmd_index: ?u16 = null, -/// __TEXT segment -text_segment_cmd_index: ?u16 = null, -/// __DATA_CONST segment -data_const_segment_cmd_index: ?u16 = null, -/// __DATA segment -data_segment_cmd_index: ?u16 = null, -/// __LINKEDIT segment -linkedit_segment_cmd_index: ?u16 = null, -/// __DWARF segment -dwarf_segment_cmd_index: ?u16 = null, -/// Symbol table -symtab_cmd_index: ?u16 = null, -/// UUID load command -uuid_cmd_index: ?u16 = null, +segments: std.ArrayListUnmanaged(macho.segment_command_64) = .{}, +sections: std.ArrayListUnmanaged(macho.section_64) = .{}, -/// Index into __TEXT,__text section. -text_section_index: ?u16 = null, +linkedit_segment_cmd_index: ?u8 = null, +dwarf_segment_cmd_index: ?u8 = null, -debug_info_section_index: ?u16 = null, -debug_abbrev_section_index: ?u16 = null, -debug_str_section_index: ?u16 = null, -debug_aranges_section_index: ?u16 = null, -debug_line_section_index: ?u16 = null, +debug_info_section_index: ?u8 = null, +debug_abbrev_section_index: ?u8 = null, +debug_str_section_index: ?u8 = null, +debug_aranges_section_index: ?u8 = null, +debug_line_section_index: ?u8 = null, -load_commands_dirty: bool = false, debug_string_table_dirty: bool = false, debug_abbrev_section_dirty: bool = false, debug_aranges_section_dirty: bool = false, @@ -78,98 +61,40 @@ pub const Reloc = struct { /// You must call this function *after* `MachO.populateMissingMetadata()` /// has been called to get a viable debug symbols output. pub fn populateMissingMetadata(self: *DebugSymbols, allocator: Allocator) !void { - if (self.uuid_cmd_index == null) { - const base_cmd = self.base.load_commands.items[self.base.uuid_cmd_index.?]; - self.uuid_cmd_index = @intCast(u16, self.load_commands.items.len); - try self.load_commands.append(allocator, base_cmd); - self.load_commands_dirty = true; - } - - if (self.symtab_cmd_index == null) { - self.symtab_cmd_index = @intCast(u16, self.load_commands.items.len); - try self.load_commands.append(self.base.base.allocator, .{ - .symtab = .{ - .cmdsize = @sizeOf(macho.symtab_command), - .symoff = 0, - .nsyms = 0, - .stroff = 0, - .strsize = 0, - }, - }); - try self.strtab.buffer.append(allocator, 0); - self.load_commands_dirty = true; - } - - if (self.pagezero_segment_cmd_index == null) { - self.pagezero_segment_cmd_index = @intCast(u16, self.load_commands.items.len); - const base_cmd = self.base.load_commands.items[self.base.pagezero_segment_cmd_index.?].segment; - const cmd = try self.copySegmentCommand(allocator, base_cmd); - try self.load_commands.append(allocator, .{ .segment = cmd }); - self.load_commands_dirty = true; - } - - if (self.text_segment_cmd_index == null) { - self.text_segment_cmd_index = @intCast(u16, self.load_commands.items.len); - const base_cmd = self.base.load_commands.items[self.base.text_segment_cmd_index.?].segment; - const cmd = try self.copySegmentCommand(allocator, base_cmd); - try self.load_commands.append(allocator, .{ .segment = cmd }); - self.load_commands_dirty = true; - } - - if (self.data_const_segment_cmd_index == null) outer: { - if (self.base.data_const_segment_cmd_index == null) break :outer; // __DATA_CONST is optional - self.data_const_segment_cmd_index = @intCast(u16, self.load_commands.items.len); - const base_cmd = self.base.load_commands.items[self.base.data_const_segment_cmd_index.?].segment; - const cmd = try self.copySegmentCommand(allocator, base_cmd); - try self.load_commands.append(allocator, .{ .segment = cmd }); - self.load_commands_dirty = true; - } - - if (self.data_segment_cmd_index == null) outer: { - if (self.base.data_segment_cmd_index == null) break :outer; // __DATA is optional - self.data_segment_cmd_index = @intCast(u16, self.load_commands.items.len); - const base_cmd = self.base.load_commands.items[self.base.data_segment_cmd_index.?].segment; - const cmd = try self.copySegmentCommand(allocator, base_cmd); - try self.load_commands.append(allocator, .{ .segment = cmd }); - self.load_commands_dirty = true; - } - if (self.linkedit_segment_cmd_index == null) { - self.linkedit_segment_cmd_index = @intCast(u16, self.load_commands.items.len); - const base_cmd = self.base.load_commands.items[self.base.linkedit_segment_cmd_index.?].segment; - var cmd = try self.copySegmentCommand(allocator, base_cmd); + self.linkedit_segment_cmd_index = @intCast(u8, self.segments.items.len); // TODO this needs reworking - cmd.inner.vmsize = self.base.page_size; - cmd.inner.fileoff = self.base.page_size; - cmd.inner.filesize = self.base.page_size; - try self.load_commands.append(allocator, .{ .segment = cmd }); - self.load_commands_dirty = true; + try self.segments.append(allocator, .{ + .segname = makeStaticString("__LINKEDIT"), + .vmaddr = self.base.page_size, + .vmsize = self.base.page_size, + .fileoff = self.base.page_size, + .filesize = self.base.page_size, + .maxprot = macho.PROT.READ, + .initprot = macho.PROT.READ, + .cmdsize = @sizeOf(macho.segment_command_64), + }); } if (self.dwarf_segment_cmd_index == null) { - self.dwarf_segment_cmd_index = @intCast(u16, self.load_commands.items.len); + self.dwarf_segment_cmd_index = @intCast(u8, self.segments.items.len); - const linkedit = self.load_commands.items[self.linkedit_segment_cmd_index.?].segment; + const linkedit = self.segments.items[self.base.linkedit_segment_cmd_index.?]; const ideal_size: u16 = 200 + 128 + 160 + 250; const needed_size = mem.alignForwardGeneric(u64, padToIdeal(ideal_size), self.base.page_size); - const fileoff = linkedit.inner.fileoff + linkedit.inner.filesize; - const vmaddr = linkedit.inner.vmaddr + linkedit.inner.vmsize; + const fileoff = linkedit.fileoff + linkedit.filesize; + const vmaddr = linkedit.vmaddr + linkedit.vmsize; log.debug("found __DWARF segment free space 0x{x} to 0x{x}", .{ fileoff, fileoff + needed_size }); - try self.load_commands.append(allocator, .{ - .segment = .{ - .inner = .{ - .segname = makeStaticString("__DWARF"), - .vmaddr = vmaddr, - .vmsize = needed_size, - .fileoff = fileoff, - .filesize = needed_size, - .cmdsize = @sizeOf(macho.segment_command_64), - }, - }, + try self.segments.append(allocator, .{ + .segname = makeStaticString("__DWARF"), + .vmaddr = vmaddr, + .vmsize = needed_size, + .fileoff = fileoff, + .filesize = needed_size, + .cmdsize = @sizeOf(macho.segment_command_64), }); - self.load_commands_dirty = true; } if (self.debug_str_section_index == null) { @@ -203,18 +128,18 @@ pub fn populateMissingMetadata(self: *DebugSymbols, allocator: Allocator) !void } } -fn allocateSection(self: *DebugSymbols, sectname: []const u8, size: u64, alignment: u16) !u16 { - const seg = &self.load_commands.items[self.dwarf_segment_cmd_index.?].segment; +fn allocateSection(self: *DebugSymbols, sectname: []const u8, size: u64, alignment: u16) !u8 { + const segment = &self.segments.items[self.dwarf_segment_cmd_index.?]; var sect = macho.section_64{ .sectname = makeStaticString(sectname), - .segname = seg.inner.segname, + .segname = segment.segname, .size = @intCast(u32, size), .@"align" = alignment, }; const alignment_pow_2 = try math.powi(u32, 2, alignment); const off = self.findFreeSpace(size, alignment_pow_2); - assert(off + size <= seg.inner.fileoff + seg.inner.filesize); // TODO expand + assert(off + size <= segment.fileoff + segment.filesize); // TODO expand log.debug("found {s},{s} section free space 0x{x} to 0x{x}", .{ sect.segName(), @@ -223,31 +148,20 @@ fn allocateSection(self: *DebugSymbols, sectname: []const u8, size: u64, alignme off + size, }); - sect.addr = seg.inner.vmaddr + off - seg.inner.fileoff; + sect.addr = segment.vmaddr + off - segment.fileoff; sect.offset = @intCast(u32, off); - const index = @intCast(u16, seg.sections.items.len); - try seg.sections.append(self.base.base.allocator, sect); - seg.inner.cmdsize += @sizeOf(macho.section_64); - seg.inner.nsects += 1; - - // TODO - // const match = MatchingSection{ - // .seg = segment_id, - // .sect = index, - // }; - // _ = try self.section_ordinals.getOrPut(self.base.allocator, match); - // try self.block_free_lists.putNoClobber(self.base.allocator, match, .{}); - - self.load_commands_dirty = true; + const index = @intCast(u8, self.sections.items.len); + try self.sections.append(self.base.base.allocator, sect); + segment.cmdsize += @sizeOf(macho.section_64); + segment.nsects += 1; return index; } fn detectAllocCollision(self: *DebugSymbols, start: u64, size: u64) ?u64 { - const seg = self.load_commands.items[self.dwarf_segment_cmd_index.?].segment; const end = start + padToIdeal(size); - for (seg.sections.items) |section| { + for (self.sections.items) |section| { const increased_size = padToIdeal(section.size); const test_end = section.offset + increased_size; if (end > section.offset and start < test_end) { @@ -258,8 +172,8 @@ fn detectAllocCollision(self: *DebugSymbols, start: u64, size: u64) ?u64 { } pub fn findFreeSpace(self: *DebugSymbols, object_size: u64, min_alignment: u64) u64 { - const seg = self.load_commands.items[self.dwarf_segment_cmd_index.?].segment; - var offset: u64 = seg.inner.fileoff; + const segment = self.segments.items[self.dwarf_segment_cmd_index.?]; + var offset: u64 = segment.fileoff; while (self.detectAllocCollision(offset, object_size)) |item_end| { offset = mem.alignForwardGeneric(u64, item_end, min_alignment); } @@ -296,8 +210,7 @@ pub fn flushModule(self: *DebugSymbols, allocator: Allocator, options: link.Opti break :blk got_entry.getName(self.base); }, }; - const seg = &self.load_commands.items[self.dwarf_segment_cmd_index.?].segment; - const sect = &seg.sections.items[self.debug_info_section_index.?]; + const sect = &self.sections.items[self.debug_info_section_index.?]; const file_offset = sect.offset + reloc.offset; log.debug("resolving relocation: {d}@{x} ('{s}') at offset {x}", .{ reloc.target, @@ -311,15 +224,13 @@ pub fn flushModule(self: *DebugSymbols, allocator: Allocator, options: link.Opti if (self.debug_abbrev_section_dirty) { try self.dwarf.writeDbgAbbrev(&self.base.base); - self.load_commands_dirty = true; self.debug_abbrev_section_dirty = false; } if (self.debug_info_header_dirty) { // Currently only one compilation unit is supported, so the address range is simply // identical to the main program header virtual address and memory size. - const text_segment = self.load_commands.items[self.text_segment_cmd_index.?].segment; - const text_section = text_segment.sections.items[self.text_section_index.?]; + const text_section = self.base.sections.items(.header)[self.base.text_section_index.?]; const low_pc = text_section.addr; const high_pc = text_section.addr + text_section.size; try self.dwarf.writeDbgInfoHeader(&self.base.base, module, low_pc, high_pc); @@ -329,10 +240,8 @@ pub fn flushModule(self: *DebugSymbols, allocator: Allocator, options: link.Opti if (self.debug_aranges_section_dirty) { // Currently only one compilation unit is supported, so the address range is simply // identical to the main program header virtual address and memory size. - const text_segment = self.load_commands.items[self.text_segment_cmd_index.?].segment; - const text_section = text_segment.sections.items[self.text_section_index.?]; + const text_section = self.base.sections.items(.header)[self.base.text_section_index.?]; try self.dwarf.writeDbgAranges(&self.base.base, text_section.addr, text_section.size); - self.load_commands_dirty = true; self.debug_aranges_section_dirty = false; } @@ -342,8 +251,8 @@ pub fn flushModule(self: *DebugSymbols, allocator: Allocator, options: link.Opti } { - const dwarf_segment = &self.load_commands.items[self.dwarf_segment_cmd_index.?].segment; - const debug_strtab_sect = &dwarf_segment.sections.items[self.debug_str_section_index.?]; + const dwarf_segment = &self.segments.items[self.dwarf_segment_cmd_index.?]; + const debug_strtab_sect = &self.sections.items[self.debug_str_section_index.?]; if (self.debug_string_table_dirty or self.dwarf.strtab.items.len != debug_strtab_sect.size) { const allocated_size = self.allocatedSize(debug_strtab_sect.offset); const needed_size = self.dwarf.strtab.items.len; @@ -351,7 +260,7 @@ pub fn flushModule(self: *DebugSymbols, allocator: Allocator, options: link.Opti if (needed_size > allocated_size) { debug_strtab_sect.size = 0; // free the space const new_offset = self.findFreeSpace(needed_size, 1); - debug_strtab_sect.addr = dwarf_segment.inner.vmaddr + new_offset - dwarf_segment.inner.fileoff; + debug_strtab_sect.addr = dwarf_segment.vmaddr + new_offset - dwarf_segment.fileoff; debug_strtab_sect.offset = @intCast(u32, new_offset); } debug_strtab_sect.size = @intCast(u32, needed_size); @@ -362,28 +271,53 @@ pub fn flushModule(self: *DebugSymbols, allocator: Allocator, options: link.Opti }); try self.file.pwriteAll(self.dwarf.strtab.items, debug_strtab_sect.offset); - self.load_commands_dirty = true; self.debug_string_table_dirty = false; } } - self.updateDwarfSegment(); - try self.writeLinkeditSegment(); - try self.updateVirtualMemoryMapping(); - try self.writeLoadCommands(allocator); - try self.writeHeader(); + var lc_buffer = std.ArrayList(u8).init(allocator); + defer lc_buffer.deinit(); + const lc_writer = lc_buffer.writer(); + var ncmds: u32 = 0; + + try self.writeLinkeditSegmentData(&ncmds, lc_writer); + self.updateDwarfSegment(); + + { + try lc_writer.writeStruct(self.base.uuid); + ncmds += 1; + } + + var headers_buf = std.ArrayList(u8).init(allocator); + defer headers_buf.deinit(); + try self.base.writeSegmentHeaders( + 0, + self.base.linkedit_segment_cmd_index.?, + &ncmds, + headers_buf.writer(), + ); + + for (self.segments.items) |seg| { + try headers_buf.writer().writeStruct(seg); + ncmds += 2; + } + for (self.sections.items) |header| { + try headers_buf.writer().writeStruct(header); + } + + try self.file.pwriteAll(headers_buf.items, @sizeOf(macho.mach_header_64)); + try self.file.pwriteAll(lc_buffer.items, @sizeOf(macho.mach_header_64) + headers_buf.items.len); + + try self.writeHeader(ncmds, @intCast(u32, lc_buffer.items.len + headers_buf.items.len)); - assert(!self.load_commands_dirty); assert(!self.debug_abbrev_section_dirty); assert(!self.debug_aranges_section_dirty); assert(!self.debug_string_table_dirty); } pub fn deinit(self: *DebugSymbols, allocator: Allocator) void { - for (self.load_commands.items) |*lc| { - lc.deinit(allocator); - } - self.load_commands.deinit(allocator); + self.segments.deinit(allocator); + self.sections.deinit(allocator); self.dwarf.deinit(); self.strtab.deinit(allocator); self.relocs.deinit(allocator); @@ -402,59 +336,19 @@ pub fn swapRemoveRelocs(self: *DebugSymbols, target: u32) void { } } -fn copySegmentCommand( - self: *DebugSymbols, - allocator: Allocator, - base_cmd: macho.SegmentCommand, -) !macho.SegmentCommand { - var cmd = macho.SegmentCommand{ - .inner = .{ - .segname = undefined, - .cmdsize = base_cmd.inner.cmdsize, - .vmaddr = base_cmd.inner.vmaddr, - .vmsize = base_cmd.inner.vmsize, - .maxprot = base_cmd.inner.maxprot, - .initprot = base_cmd.inner.initprot, - .nsects = base_cmd.inner.nsects, - .flags = base_cmd.inner.flags, - }, - }; - mem.copy(u8, &cmd.inner.segname, &base_cmd.inner.segname); +fn updateDwarfSegment(self: *DebugSymbols) void { + const linkedit = self.segments.items[self.linkedit_segment_cmd_index.?]; + const dwarf_segment = &self.segments.items[self.dwarf_segment_cmd_index.?]; - try cmd.sections.ensureTotalCapacity(allocator, cmd.inner.nsects); - for (base_cmd.sections.items) |base_sect, i| { - var sect = macho.section_64{ - .sectname = undefined, - .segname = undefined, - .addr = base_sect.addr, - .size = base_sect.size, - .offset = 0, - .@"align" = base_sect.@"align", - .reloff = 0, - .nreloc = 0, - .flags = base_sect.flags, - .reserved1 = base_sect.reserved1, - .reserved2 = base_sect.reserved2, - .reserved3 = base_sect.reserved3, - }; - mem.copy(u8, §.sectname, &base_sect.sectname); - mem.copy(u8, §.segname, &base_sect.segname); - - if (self.base.text_section_index.? == i) { - self.text_section_index = @intCast(u16, i); - } - - cmd.sections.appendAssumeCapacity(sect); + const new_start_aligned = linkedit.vmaddr + linkedit.vmsize; + const old_start_aligned = dwarf_segment.vmaddr; + const diff = new_start_aligned - old_start_aligned; + if (diff > 0) { + dwarf_segment.vmaddr = new_start_aligned; } - return cmd; -} - -fn updateDwarfSegment(self: *DebugSymbols) void { - const dwarf_segment = &self.load_commands.items[self.dwarf_segment_cmd_index.?].segment; - var max_offset: u64 = 0; - for (dwarf_segment.sections.items) |sect| { + for (self.sections.items) |*sect| { log.debug(" {s},{s} - 0x{x}-0x{x} - 0x{x}-0x{x}", .{ sect.segName(), sect.sectName(), @@ -466,44 +360,19 @@ fn updateDwarfSegment(self: *DebugSymbols) void { if (sect.offset + sect.size > max_offset) { max_offset = sect.offset + sect.size; } + sect.addr += diff; } - const file_size = max_offset - dwarf_segment.inner.fileoff; + const file_size = max_offset - dwarf_segment.fileoff; log.debug("__DWARF size 0x{x}", .{file_size}); - if (file_size != dwarf_segment.inner.filesize) { - dwarf_segment.inner.filesize = file_size; - if (dwarf_segment.inner.vmsize < dwarf_segment.inner.filesize) { - dwarf_segment.inner.vmsize = mem.alignForwardGeneric(u64, dwarf_segment.inner.filesize, self.base.page_size); - } - self.load_commands_dirty = true; + if (file_size != dwarf_segment.filesize) { + dwarf_segment.filesize = file_size; + dwarf_segment.vmsize = mem.alignForwardGeneric(u64, dwarf_segment.filesize, self.base.page_size); } } -/// Writes all load commands and section headers. -fn writeLoadCommands(self: *DebugSymbols, allocator: Allocator) !void { - if (!self.load_commands_dirty) return; - - var sizeofcmds: u32 = 0; - for (self.load_commands.items) |lc| { - sizeofcmds += lc.cmdsize(); - } - - var buffer = try allocator.alloc(u8, sizeofcmds); - defer allocator.free(buffer); - var fib = std.io.fixedBufferStream(buffer); - const writer = fib.writer(); - for (self.load_commands.items) |lc| { - try lc.write(writer); - } - - const off = @sizeOf(macho.mach_header_64); - log.debug("writing {} load commands from 0x{x} to 0x{x}", .{ self.load_commands.items.len, off, off + sizeofcmds }); - try self.file.pwriteAll(buffer, off); - self.load_commands_dirty = false; -} - -fn writeHeader(self: *DebugSymbols) !void { +fn writeHeader(self: *DebugSymbols, ncmds: u32, sizeofcmds: u32) !void { var header: macho.mach_header_64 = .{}; header.filetype = macho.MH_DSYM; @@ -519,12 +388,8 @@ fn writeHeader(self: *DebugSymbols) !void { else => return error.UnsupportedCpuArchitecture, } - header.ncmds = @intCast(u32, self.load_commands.items.len); - header.sizeofcmds = 0; - - for (self.load_commands.items) |cmd| { - header.sizeofcmds += cmd.cmdsize(); - } + header.ncmds = ncmds; + header.sizeofcmds = sizeofcmds; log.debug("writing Mach-O header {}", .{header}); @@ -532,79 +397,46 @@ fn writeHeader(self: *DebugSymbols) !void { } pub fn allocatedSize(self: *DebugSymbols, start: u64) u64 { - const seg = self.load_commands.items[self.dwarf_segment_cmd_index.?].segment; - assert(start >= seg.inner.fileoff); + const seg = self.segments.items[self.dwarf_segment_cmd_index.?]; + assert(start >= seg.fileoff); var min_pos: u64 = std.math.maxInt(u64); - for (seg.sections.items) |section| { + for (self.sections.items) |section| { if (section.offset <= start) continue; if (section.offset < min_pos) min_pos = section.offset; } return min_pos - start; } -fn updateVirtualMemoryMapping(self: *DebugSymbols) !void { - const macho_file = self.base; - const allocator = macho_file.base.allocator; - - const IndexTuple = std.meta.Tuple(&[_]type{ *?u16, *?u16 }); - const indices = &[_]IndexTuple{ - .{ &macho_file.text_segment_cmd_index, &self.text_segment_cmd_index }, - .{ &macho_file.data_const_segment_cmd_index, &self.data_const_segment_cmd_index }, - .{ &macho_file.data_segment_cmd_index, &self.data_segment_cmd_index }, - }; - - for (indices) |tuple| { - const orig_cmd = macho_file.load_commands.items[tuple[0].*.?].segment; - const cmd = try self.copySegmentCommand(allocator, orig_cmd); - const comp_cmd = &self.load_commands.items[tuple[1].*.?]; - comp_cmd.deinit(allocator); - self.load_commands.items[tuple[1].*.?] = .{ .segment = cmd }; - } - - // TODO should we set the linkedit vmsize to that of the binary? - const orig_cmd = macho_file.load_commands.items[macho_file.linkedit_segment_cmd_index.?].segment; - const orig_vmaddr = orig_cmd.inner.vmaddr; - const linkedit_cmd = &self.load_commands.items[self.linkedit_segment_cmd_index.?].segment; - linkedit_cmd.inner.vmaddr = orig_vmaddr; - - // Update VM address for the DWARF segment and sections including re-running relocations. - // TODO re-run relocations - const dwarf_cmd = &self.load_commands.items[self.dwarf_segment_cmd_index.?].segment; - const new_start_aligned = orig_vmaddr + linkedit_cmd.inner.vmsize; - const old_start_aligned = dwarf_cmd.inner.vmaddr; - const diff = new_start_aligned - old_start_aligned; - if (diff > 0) { - dwarf_cmd.inner.vmaddr = new_start_aligned; - - for (dwarf_cmd.sections.items) |*sect| { - sect.addr += (new_start_aligned - old_start_aligned); - } - } - - self.load_commands_dirty = true; -} - -fn writeLinkeditSegment(self: *DebugSymbols) !void { +fn writeLinkeditSegmentData(self: *DebugSymbols, ncmds: *u32, lc_writer: anytype) !void { const tracy = trace(@src()); defer tracy.end(); - try self.writeSymbolTable(); - try self.writeStringTable(); + const source_vmaddr = self.base.segments.items[self.base.linkedit_segment_cmd_index.?].vmaddr; + const seg = &self.segments.items[self.linkedit_segment_cmd_index.?]; + seg.vmaddr = source_vmaddr; - const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].segment; - const aligned_size = mem.alignForwardGeneric(u64, seg.inner.filesize, self.base.page_size); - seg.inner.filesize = aligned_size; - seg.inner.vmsize = aligned_size; + var symtab_cmd = macho.symtab_command{ + .cmdsize = @sizeOf(macho.symtab_command), + .symoff = 0, + .nsyms = 0, + .stroff = 0, + .strsize = 0, + }; + try self.writeSymtab(&symtab_cmd); + try self.writeStrtab(&symtab_cmd); + try lc_writer.writeStruct(symtab_cmd); + ncmds.* += 1; + + const aligned_size = mem.alignForwardGeneric(u64, seg.filesize, self.base.page_size); + seg.filesize = aligned_size; + seg.vmsize = aligned_size; } -fn writeSymbolTable(self: *DebugSymbols) !void { +fn writeSymtab(self: *DebugSymbols, lc: *macho.symtab_command) !void { const tracy = trace(@src()); defer tracy.end(); const gpa = self.base.base.allocator; - const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].segment; - const symtab = &self.load_commands.items[self.symtab_cmd_index.?].symtab; - symtab.symoff = @intCast(u32, seg.inner.fileoff); var locals = std.ArrayList(macho.nlist_64).init(gpa); defer locals.deinit(); @@ -634,34 +466,36 @@ fn writeSymbolTable(self: *DebugSymbols) !void { const nlocals = locals.items.len; const nexports = exports.items.len; - const locals_off = symtab.symoff; - const locals_size = nlocals * @sizeOf(macho.nlist_64); - const exports_off = locals_off + locals_size; - const exports_size = nexports * @sizeOf(macho.nlist_64); + const nsyms = nlocals + nexports; - symtab.nsyms = @intCast(u32, nlocals + nexports); - const needed_size = (nlocals + nexports) * @sizeOf(macho.nlist_64); + const seg = &self.segments.items[self.linkedit_segment_cmd_index.?]; + const offset = mem.alignForwardGeneric( + u64, + seg.fileoff + seg.filesize, + @alignOf(macho.nlist_64), + ); + const needed_size = nsyms * @sizeOf(macho.nlist_64); - if (needed_size > seg.inner.filesize) { + if (needed_size > seg.filesize) { const aligned_size = mem.alignForwardGeneric(u64, needed_size, self.base.page_size); - const diff = @intCast(u32, aligned_size - seg.inner.filesize); - const dwarf_seg = &self.load_commands.items[self.dwarf_segment_cmd_index.?].segment; - seg.inner.filesize = aligned_size; + const diff = @intCast(u32, aligned_size - seg.filesize); + const dwarf_seg = &self.segments.items[self.dwarf_segment_cmd_index.?]; + seg.filesize = aligned_size; try MachO.copyRangeAllOverlappingAlloc( self.base.base.allocator, self.file, - dwarf_seg.inner.fileoff, - dwarf_seg.inner.fileoff + diff, - math.cast(usize, dwarf_seg.inner.filesize) orelse return error.Overflow, + dwarf_seg.fileoff, + dwarf_seg.fileoff + diff, + math.cast(usize, dwarf_seg.filesize) orelse return error.Overflow, ); - const old_seg_fileoff = dwarf_seg.inner.fileoff; - dwarf_seg.inner.fileoff += diff; + const old_seg_fileoff = dwarf_seg.fileoff; + dwarf_seg.fileoff += diff; - log.debug(" (moving __DWARF segment from 0x{x} to 0x{x})", .{ old_seg_fileoff, dwarf_seg.inner.fileoff }); + log.debug(" (moving __DWARF segment from 0x{x} to 0x{x})", .{ old_seg_fileoff, dwarf_seg.fileoff }); - for (dwarf_seg.sections.items) |*sect| { + for (self.sections.items) |*sect| { const old_offset = sect.offset; sect.offset += diff; @@ -674,47 +508,53 @@ fn writeSymbolTable(self: *DebugSymbols) !void { } } + lc.symoff = @intCast(u32, offset); + lc.nsyms = @intCast(u32, nsyms); + + const locals_off = lc.symoff; + const locals_size = nlocals * @sizeOf(macho.nlist_64); + const exports_off = locals_off + locals_size; + const exports_size = nexports * @sizeOf(macho.nlist_64); + log.debug("writing local symbols from 0x{x} to 0x{x}", .{ locals_off, locals_size + locals_off }); try self.file.pwriteAll(mem.sliceAsBytes(locals.items), locals_off); log.debug("writing exported symbols from 0x{x} to 0x{x}", .{ exports_off, exports_size + exports_off }); try self.file.pwriteAll(mem.sliceAsBytes(exports.items), exports_off); - - self.load_commands_dirty = true; } -fn writeStringTable(self: *DebugSymbols) !void { +fn writeStrtab(self: *DebugSymbols, lc: *macho.symtab_command) !void { const tracy = trace(@src()); defer tracy.end(); - const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].segment; - const symtab = &self.load_commands.items[self.symtab_cmd_index.?].symtab; - const symtab_size = @intCast(u32, symtab.nsyms * @sizeOf(macho.nlist_64)); - symtab.stroff = symtab.symoff + symtab_size; + const seg = &self.segments.items[self.linkedit_segment_cmd_index.?]; + const symtab_size = @intCast(u32, lc.nsyms * @sizeOf(macho.nlist_64)); + const offset = mem.alignForwardGeneric(u64, lc.symoff + symtab_size, @alignOf(u64)); + lc.stroff = @intCast(u32, offset); const needed_size = mem.alignForwardGeneric(u64, self.strtab.buffer.items.len, @alignOf(u64)); - symtab.strsize = @intCast(u32, needed_size); + lc.strsize = @intCast(u32, needed_size); - if (symtab_size + needed_size > seg.inner.filesize) { - const aligned_size = mem.alignForwardGeneric(u64, symtab_size + needed_size, self.base.page_size); - const diff = @intCast(u32, aligned_size - seg.inner.filesize); - const dwarf_seg = &self.load_commands.items[self.dwarf_segment_cmd_index.?].segment; - seg.inner.filesize = aligned_size; + if (offset + needed_size > seg.filesize) { + const aligned_size = mem.alignForwardGeneric(u64, offset + needed_size, self.base.page_size); + const diff = @intCast(u32, aligned_size - seg.filesize); + const dwarf_seg = &self.segments.items[self.dwarf_segment_cmd_index.?]; + seg.filesize = aligned_size; try MachO.copyRangeAllOverlappingAlloc( self.base.base.allocator, self.file, - dwarf_seg.inner.fileoff, - dwarf_seg.inner.fileoff + diff, - math.cast(usize, dwarf_seg.inner.filesize) orelse return error.Overflow, + dwarf_seg.fileoff, + dwarf_seg.fileoff + diff, + math.cast(usize, dwarf_seg.filesize) orelse return error.Overflow, ); - const old_seg_fileoff = dwarf_seg.inner.fileoff; - dwarf_seg.inner.fileoff += diff; + const old_seg_fileoff = dwarf_seg.fileoff; + dwarf_seg.fileoff += diff; - log.debug(" (moving __DWARF segment from 0x{x} to 0x{x})", .{ old_seg_fileoff, dwarf_seg.inner.fileoff }); + log.debug(" (moving __DWARF segment from 0x{x} to 0x{x})", .{ old_seg_fileoff, dwarf_seg.fileoff }); - for (dwarf_seg.sections.items) |*sect| { + for (self.sections.items) |*sect| { const old_offset = sect.offset; sect.offset += diff; @@ -727,9 +567,7 @@ fn writeStringTable(self: *DebugSymbols) !void { } } - log.debug("writing string table from 0x{x} to 0x{x}", .{ symtab.stroff, symtab.stroff + symtab.strsize }); + log.debug("writing string table from 0x{x} to 0x{x}", .{ lc.stroff, lc.stroff + lc.strsize }); - try self.file.pwriteAll(self.strtab.buffer.items, symtab.stroff); - - self.load_commands_dirty = true; + try self.file.pwriteAll(self.strtab.buffer.items, lc.stroff); } diff --git a/src/link/MachO/Dylib.zig b/src/link/MachO/Dylib.zig index ffc0b2cca6..0f16eada61 100644 --- a/src/link/MachO/Dylib.zig +++ b/src/link/MachO/Dylib.zig @@ -13,23 +13,9 @@ const fat = @import("fat.zig"); const Allocator = mem.Allocator; const CrossTarget = std.zig.CrossTarget; const LibStub = @import("../tapi.zig").LibStub; +const LoadCommandIterator = macho.LoadCommandIterator; const MachO = @import("../MachO.zig"); -file: fs.File, -name: []const u8, - -header: ?macho.mach_header_64 = null, - -// The actual dylib contents we care about linking with will be embedded at -// an offset within a file if we are linking against a fat lib -library_offset: u64 = 0, - -load_commands: std.ArrayListUnmanaged(macho.LoadCommand) = .{}, - -symtab_cmd_index: ?u16 = null, -dysymtab_cmd_index: ?u16 = null, -id_cmd_index: ?u16 = null, - id: ?Id = null, weak: bool = false, @@ -53,16 +39,12 @@ pub const Id = struct { }; } - pub fn fromLoadCommand(allocator: Allocator, lc: macho.GenericCommandWithData(macho.dylib_command)) !Id { - const dylib = lc.inner.dylib; - const dylib_name = @ptrCast([*:0]const u8, lc.data[dylib.name - @sizeOf(macho.dylib_command) ..]); - const name = try allocator.dupe(u8, mem.sliceTo(dylib_name, 0)); - + pub fn fromLoadCommand(allocator: Allocator, lc: macho.dylib_command, name: []const u8) !Id { return Id{ - .name = name, - .timestamp = dylib.timestamp, - .current_version = dylib.current_version, - .compatibility_version = dylib.compatibility_version, + .name = try allocator.dupe(u8, name), + .timestamp = lc.dylib.timestamp, + .current_version = lc.dylib.current_version, + .compatibility_version = lc.dylib.compatibility_version, }; } @@ -126,125 +108,89 @@ pub const Id = struct { }; pub fn deinit(self: *Dylib, allocator: Allocator) void { - for (self.load_commands.items) |*lc| { - lc.deinit(allocator); - } - self.load_commands.deinit(allocator); - for (self.symbols.keys()) |key| { allocator.free(key); } self.symbols.deinit(allocator); - - allocator.free(self.name); - if (self.id) |*id| { id.deinit(allocator); } } -pub fn parse( +pub fn parseFromBinary( self: *Dylib, allocator: Allocator, cpu_arch: std.Target.Cpu.Arch, dylib_id: u16, dependent_libs: anytype, + name: []const u8, + data: []align(@alignOf(u64)) const u8, ) !void { - log.debug("parsing shared library '{s}'", .{self.name}); + var stream = std.io.fixedBufferStream(data); + const reader = stream.reader(); - self.library_offset = try fat.getLibraryOffset(self.file.reader(), cpu_arch); + log.debug("parsing shared library '{s}'", .{name}); - try self.file.seekTo(self.library_offset); + const header = try reader.readStruct(macho.mach_header_64); - var reader = self.file.reader(); - self.header = try reader.readStruct(macho.mach_header_64); - - if (self.header.?.filetype != macho.MH_DYLIB) { - log.debug("invalid filetype: expected 0x{x}, found 0x{x}", .{ macho.MH_DYLIB, self.header.?.filetype }); + if (header.filetype != macho.MH_DYLIB) { + log.debug("invalid filetype: expected 0x{x}, found 0x{x}", .{ macho.MH_DYLIB, header.filetype }); return error.NotDylib; } - const this_arch: std.Target.Cpu.Arch = try fat.decodeArch(self.header.?.cputype, true); + const this_arch: std.Target.Cpu.Arch = try fat.decodeArch(header.cputype, true); if (this_arch != cpu_arch) { - log.err("mismatched cpu architecture: expected {}, found {}", .{ cpu_arch, this_arch }); + log.err("mismatched cpu architecture: expected {s}, found {s}", .{ + @tagName(cpu_arch), + @tagName(this_arch), + }); return error.MismatchedCpuArchitecture; } - try self.readLoadCommands(allocator, reader, dylib_id, dependent_libs); - try self.parseId(allocator); - try self.parseSymbols(allocator); -} - -fn readLoadCommands( - self: *Dylib, - allocator: Allocator, - reader: anytype, - dylib_id: u16, - dependent_libs: anytype, -) !void { - const should_lookup_reexports = self.header.?.flags & macho.MH_NO_REEXPORTED_DYLIBS == 0; - - try self.load_commands.ensureUnusedCapacity(allocator, self.header.?.ncmds); - - var i: u16 = 0; - while (i < self.header.?.ncmds) : (i += 1) { - var cmd = try macho.LoadCommand.read(allocator, reader); + const should_lookup_reexports = header.flags & macho.MH_NO_REEXPORTED_DYLIBS == 0; + var it = LoadCommandIterator{ + .ncmds = header.ncmds, + .buffer = data[@sizeOf(macho.mach_header_64)..][0..header.sizeofcmds], + }; + while (it.next()) |cmd| { switch (cmd.cmd()) { .SYMTAB => { - self.symtab_cmd_index = i; - }, - .DYSYMTAB => { - self.dysymtab_cmd_index = i; + const symtab_cmd = cmd.cast(macho.symtab_command).?; + const symtab = @ptrCast( + [*]const macho.nlist_64, + @alignCast(@alignOf(macho.nlist_64), &data[symtab_cmd.symoff]), + )[0..symtab_cmd.nsyms]; + const strtab = data[symtab_cmd.stroff..][0..symtab_cmd.strsize]; + + for (symtab) |sym| { + const add_to_symtab = sym.ext() and (sym.sect() or sym.indr()); + if (!add_to_symtab) continue; + + const sym_name = mem.sliceTo(@ptrCast([*:0]const u8, strtab.ptr + sym.n_strx), 0); + try self.symbols.putNoClobber(allocator, try allocator.dupe(u8, sym_name), {}); + } }, .ID_DYLIB => { - self.id_cmd_index = i; + self.id = try Id.fromLoadCommand( + allocator, + cmd.cast(macho.dylib_command).?, + cmd.getDylibPathName(), + ); }, .REEXPORT_DYLIB => { if (should_lookup_reexports) { // Parse install_name to dependent dylib. - var id = try Id.fromLoadCommand(allocator, cmd.dylib); + var id = try Id.fromLoadCommand( + allocator, + cmd.cast(macho.dylib_command).?, + cmd.getDylibPathName(), + ); try dependent_libs.writeItem(.{ .id = id, .parent = dylib_id }); } }, - else => { - log.debug("Unknown load command detected: 0x{x}.", .{@enumToInt(cmd.cmd())}); - }, + else => {}, } - self.load_commands.appendAssumeCapacity(cmd); - } -} - -fn parseId(self: *Dylib, allocator: Allocator) !void { - const index = self.id_cmd_index orelse { - log.debug("no LC_ID_DYLIB load command found; using hard-coded defaults...", .{}); - self.id = try Id.default(allocator, self.name); - return; - }; - self.id = try Id.fromLoadCommand(allocator, self.load_commands.items[index].dylib); -} - -fn parseSymbols(self: *Dylib, allocator: Allocator) !void { - const index = self.symtab_cmd_index orelse return; - const symtab_cmd = self.load_commands.items[index].symtab; - - const symtab = try allocator.alloc(u8, @sizeOf(macho.nlist_64) * symtab_cmd.nsyms); - defer allocator.free(symtab); - _ = try self.file.preadAll(symtab, symtab_cmd.symoff + self.library_offset); - const slice = @alignCast(@alignOf(macho.nlist_64), mem.bytesAsSlice(macho.nlist_64, symtab)); - - const strtab = try allocator.alloc(u8, symtab_cmd.strsize); - defer allocator.free(strtab); - _ = try self.file.preadAll(strtab, symtab_cmd.stroff + self.library_offset); - - for (slice) |sym| { - const add_to_symtab = sym.ext() and (sym.sect() or sym.indr()); - - if (!add_to_symtab) continue; - - const sym_name = mem.sliceTo(@ptrCast([*:0]const u8, strtab.ptr + sym.n_strx), 0); - const name = try allocator.dupe(u8, sym_name); - try self.symbols.putNoClobber(allocator, name, {}); } } @@ -356,10 +302,11 @@ pub fn parseFromStub( lib_stub: LibStub, dylib_id: u16, dependent_libs: anytype, + name: []const u8, ) !void { if (lib_stub.inner.len == 0) return error.EmptyStubFile; - log.debug("parsing shared library from stub '{s}'", .{self.name}); + log.debug("parsing shared library from stub '{s}'", .{name}); const umbrella_lib = lib_stub.inner[0]; diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index 0d929627cd..2e2f3dad84 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -3,6 +3,7 @@ const Object = @This(); const std = @import("std"); const build_options = @import("build_options"); const assert = std.debug.assert; +const dwarf = std.dwarf; const fs = std.fs; const io = std.io; const log = std.log.scoped(.link); @@ -14,43 +15,20 @@ const trace = @import("../../tracy.zig").trace; const Allocator = mem.Allocator; const Atom = @import("Atom.zig"); +const LoadCommandIterator = macho.LoadCommandIterator; const MachO = @import("../MachO.zig"); -const MatchingSection = MachO.MatchingSection; const SymbolWithLoc = MachO.SymbolWithLoc; -file: fs.File, name: []const u8, mtime: u64, - -/// Data contents of the file. Includes sections, and data of load commands. -/// Excludes the backing memory for the header and load commands. -/// Initialized in `parse`. -contents: []const u8 = undefined, - -file_offset: ?u32 = null, +contents: []align(@alignOf(u64)) const u8, header: macho.mach_header_64 = undefined, - -load_commands: std.ArrayListUnmanaged(macho.LoadCommand) = .{}, - -segment_cmd_index: ?u16 = null, -text_section_index: ?u16 = null, -symtab_cmd_index: ?u16 = null, -dysymtab_cmd_index: ?u16 = null, -build_version_cmd_index: ?u16 = null, -data_in_code_cmd_index: ?u16 = null, - -// __DWARF segment sections -dwarf_debug_info_index: ?u16 = null, -dwarf_debug_abbrev_index: ?u16 = null, -dwarf_debug_str_index: ?u16 = null, -dwarf_debug_line_index: ?u16 = null, -dwarf_debug_line_str_index: ?u16 = null, -dwarf_debug_ranges_index: ?u16 = null, +in_symtab: []const macho.nlist_64 = undefined, +in_strtab: []const u8 = undefined, symtab: std.ArrayListUnmanaged(macho.nlist_64) = .{}, -strtab: []const u8 = &.{}, -data_in_code_entries: []const macho.data_in_code_entry = &.{}, +sections: std.ArrayListUnmanaged(macho.section_64) = .{}, sections_as_symbols: std.AutoHashMapUnmanaged(u16, u32) = .{}, @@ -61,12 +39,8 @@ managed_atoms: std.ArrayListUnmanaged(*Atom) = .{}, atom_by_index_table: std.AutoHashMapUnmanaged(u32, *Atom) = .{}, pub fn deinit(self: *Object, gpa: Allocator) void { - for (self.load_commands.items) |*lc| { - lc.deinit(gpa); - } - self.load_commands.deinit(gpa); - gpa.free(self.contents); self.symtab.deinit(gpa); + self.sections.deinit(gpa); self.sections_as_symbols.deinit(gpa); self.atom_by_index_table.deinit(gpa); @@ -77,22 +51,15 @@ pub fn deinit(self: *Object, gpa: Allocator) void { self.managed_atoms.deinit(gpa); gpa.free(self.name); + gpa.free(self.contents); } pub fn parse(self: *Object, allocator: Allocator, cpu_arch: std.Target.Cpu.Arch) !void { - const file_stat = try self.file.stat(); - const file_size = math.cast(usize, file_stat.size) orelse return error.Overflow; - self.contents = try self.file.readToEndAlloc(allocator, file_size); - var stream = std.io.fixedBufferStream(self.contents); const reader = stream.reader(); - const file_offset = self.file_offset orelse 0; - if (file_offset > 0) { - try reader.context.seekTo(file_offset); - } - self.header = try reader.readStruct(macho.mach_header_64); + if (self.header.filetype != macho.MH_OBJECT) { log.debug("invalid filetype: expected 0x{x}, found 0x{x}", .{ macho.MH_OBJECT, @@ -110,92 +77,54 @@ pub fn parse(self: *Object, allocator: Allocator, cpu_arch: std.Target.Cpu.Arch) }, }; if (this_arch != cpu_arch) { - log.err("mismatched cpu architecture: expected {}, found {}", .{ cpu_arch, this_arch }); + log.err("mismatched cpu architecture: expected {s}, found {s}", .{ + @tagName(cpu_arch), + @tagName(this_arch), + }); return error.MismatchedCpuArchitecture; } - try self.load_commands.ensureUnusedCapacity(allocator, self.header.ncmds); - - var i: u16 = 0; - while (i < self.header.ncmds) : (i += 1) { - var cmd = try macho.LoadCommand.read(allocator, reader); + var it = LoadCommandIterator{ + .ncmds = self.header.ncmds, + .buffer = self.contents[@sizeOf(macho.mach_header_64)..][0..self.header.sizeofcmds], + }; + while (it.next()) |cmd| { switch (cmd.cmd()) { .SEGMENT_64 => { - self.segment_cmd_index = i; - var seg = cmd.segment; - for (seg.sections.items) |*sect, j| { - const index = @intCast(u16, j); - const segname = sect.segName(); - const sectname = sect.sectName(); - if (mem.eql(u8, segname, "__DWARF")) { - if (mem.eql(u8, sectname, "__debug_info")) { - self.dwarf_debug_info_index = index; - } else if (mem.eql(u8, sectname, "__debug_abbrev")) { - self.dwarf_debug_abbrev_index = index; - } else if (mem.eql(u8, sectname, "__debug_str")) { - self.dwarf_debug_str_index = index; - } else if (mem.eql(u8, sectname, "__debug_line")) { - self.dwarf_debug_line_index = index; - } else if (mem.eql(u8, sectname, "__debug_line_str")) { - self.dwarf_debug_line_str_index = index; - } else if (mem.eql(u8, sectname, "__debug_ranges")) { - self.dwarf_debug_ranges_index = index; - } - } else if (mem.eql(u8, segname, "__TEXT")) { - if (mem.eql(u8, sectname, "__text")) { - self.text_section_index = index; - } - } - - sect.offset += file_offset; - if (sect.reloff > 0) { - sect.reloff += file_offset; - } + const segment = cmd.cast(macho.segment_command_64).?; + try self.sections.ensureUnusedCapacity(allocator, segment.nsects); + for (cmd.getSections()) |sect| { + self.sections.appendAssumeCapacity(sect); } - - seg.inner.fileoff += file_offset; }, .SYMTAB => { - self.symtab_cmd_index = i; - cmd.symtab.symoff += file_offset; - cmd.symtab.stroff += file_offset; - }, - .DYSYMTAB => { - self.dysymtab_cmd_index = i; - }, - .BUILD_VERSION => { - self.build_version_cmd_index = i; - }, - .DATA_IN_CODE => { - self.data_in_code_cmd_index = i; - cmd.linkedit_data.dataoff += file_offset; - }, - else => { - log.debug("Unknown load command detected: 0x{x}.", .{@enumToInt(cmd.cmd())}); + const symtab = cmd.cast(macho.symtab_command).?; + self.in_symtab = @ptrCast( + [*]const macho.nlist_64, + @alignCast(@alignOf(macho.nlist_64), &self.contents[symtab.symoff]), + )[0..symtab.nsyms]; + self.in_strtab = self.contents[symtab.stroff..][0..symtab.strsize]; + try self.symtab.appendSlice(allocator, self.in_symtab); }, + else => {}, } - self.load_commands.appendAssumeCapacity(cmd); } - - try self.parseSymtab(allocator); } const Context = struct { - symtab: []const macho.nlist_64, - strtab: []const u8, + object: *const Object, }; const SymbolAtIndex = struct { index: u32, fn getSymbol(self: SymbolAtIndex, ctx: Context) macho.nlist_64 { - return ctx.symtab[self.index]; + return ctx.object.getSourceSymbol(self.index).?; } fn getSymbolName(self: SymbolAtIndex, ctx: Context) []const u8 { const sym = self.getSymbol(ctx); - assert(sym.n_strx < ctx.strtab.len); - return mem.sliceTo(@ptrCast([*:0]const u8, ctx.strtab.ptr + sym.n_strx), 0); + return ctx.object.getString(sym.n_strx); } /// Returns whether lhs is less than rhs by allocated address in object file. @@ -293,7 +222,6 @@ pub fn splitIntoAtomsOneShot(self: *Object, macho_file: *MachO, object_id: u32) defer tracy.end(); const gpa = macho_file.base.allocator; - const seg = self.load_commands.items[self.segment_cmd_index.?].segment; log.debug("splitting object({d}, {s}) into atoms: one-shot mode", .{ object_id, self.name }); @@ -302,13 +230,12 @@ pub fn splitIntoAtomsOneShot(self: *Object, macho_file: *MachO, object_id: u32) // the GO compiler does not necessarily respect that therefore we sort immediately by type // and address within. const context = Context{ - .symtab = self.getSourceSymtab(), - .strtab = self.strtab, + .object = self, }; - var sorted_all_syms = try std.ArrayList(SymbolAtIndex).initCapacity(gpa, context.symtab.len); + var sorted_all_syms = try std.ArrayList(SymbolAtIndex).initCapacity(gpa, self.in_symtab.len); defer sorted_all_syms.deinit(); - for (context.symtab) |_, index| { + for (self.in_symtab) |_, index| { sorted_all_syms.appendAssumeCapacity(.{ .index = @intCast(u32, index) }); } @@ -320,36 +247,36 @@ pub fn splitIntoAtomsOneShot(self: *Object, macho_file: *MachO, object_id: u32) // Well, shit, sometimes compilers skip the dysymtab load command altogether, meaning we // have to infer the start of undef section in the symtab ourselves. - const iundefsym = if (self.dysymtab_cmd_index) |cmd_index| blk: { - const dysymtab = self.load_commands.items[cmd_index].dysymtab; + const iundefsym = blk: { + const dysymtab = self.parseDysymtab() orelse { + var iundefsym: usize = sorted_all_syms.items.len; + while (iundefsym > 0) : (iundefsym -= 1) { + const sym = sorted_all_syms.items[iundefsym - 1].getSymbol(context); + if (sym.sect()) break; + } + break :blk iundefsym; + }; break :blk dysymtab.iundefsym; - } else blk: { - var iundefsym: usize = sorted_all_syms.items.len; - while (iundefsym > 0) : (iundefsym -= 1) { - const sym = sorted_all_syms.items[iundefsym - 1].getSymbol(context); - if (sym.sect()) break; - } - break :blk iundefsym; }; // We only care about defined symbols, so filter every other out. const sorted_syms = sorted_all_syms.items[0..iundefsym]; const subsections_via_symbols = self.header.flags & macho.MH_SUBSECTIONS_VIA_SYMBOLS != 0; - for (seg.sections.items) |sect, id| { + for (self.sections.items) |sect, id| { const sect_id = @intCast(u8, id); log.debug("splitting section '{s},{s}' into atoms", .{ sect.segName(), sect.sectName() }); // Get matching segment/section in the final artifact. - const match = (try macho_file.getMatchingSection(sect)) orelse { + const match = (try macho_file.getOutputSection(sect)) orelse { log.debug(" unhandled section", .{}); continue; }; log.debug(" output sect({d}, '{s},{s}')", .{ - macho_file.getSectionOrdinal(match), - macho_file.getSection(match).segName(), - macho_file.getSection(match).sectName(), + match + 1, + macho_file.sections.items(.header)[match].segName(), + macho_file.sections.items(.header)[match].sectName(), }); const cpu_arch = macho_file.base.options.target.cpu.arch; @@ -359,14 +286,13 @@ pub fn splitIntoAtomsOneShot(self: *Object, macho_file: *MachO, object_id: u32) }; // Read section's code - const code: ?[]const u8 = if (!is_zerofill) try self.getSectionContents(sect_id) else null; + const code: ?[]const u8 = if (!is_zerofill) try self.getSectionContents(sect) else null; // Read section's list of relocations - const raw_relocs = self.contents[sect.reloff..][0 .. sect.nreloc * @sizeOf(macho.relocation_info)]; - const relocs = mem.bytesAsSlice( - macho.relocation_info, - @alignCast(@alignOf(macho.relocation_info), raw_relocs), - ); + const relocs = @ptrCast( + [*]const macho.relocation_info, + @alignCast(@alignOf(macho.relocation_info), &self.contents[sect.reloff]), + )[0..sect.nreloc]; // Symbols within this section only. const filtered_syms = filterSymbolsByAddress( @@ -387,7 +313,7 @@ pub fn splitIntoAtomsOneShot(self: *Object, macho_file: *MachO, object_id: u32) try self.symtab.append(gpa, .{ .n_strx = 0, .n_type = macho.N_SECT, - .n_sect = macho_file.getSectionOrdinal(match), + .n_sect = match + 1, .n_desc = 0, .n_value = sect.addr, }); @@ -476,7 +402,7 @@ pub fn splitIntoAtomsOneShot(self: *Object, macho_file: *MachO, object_id: u32) try self.symtab.append(gpa, .{ .n_strx = 0, .n_type = macho.N_SECT, - .n_sect = macho_file.getSectionOrdinal(match), + .n_sect = match + 1, .n_desc = 0, .n_value = addr, }); @@ -501,7 +427,7 @@ pub fn splitIntoAtomsOneShot(self: *Object, macho_file: *MachO, object_id: u32) try self.symtab.append(gpa, .{ .n_strx = 0, .n_type = macho.N_SECT, - .n_sect = macho_file.getSectionOrdinal(match), + .n_sect = match + 1, .n_desc = 0, .n_value = sect.addr, }); @@ -535,21 +461,21 @@ fn createAtomFromSubsection( code: ?[]const u8, relocs: []const macho.relocation_info, indexes: []const SymbolAtIndex, - match: MatchingSection, + match: u8, sect: macho.section_64, ) !*Atom { const gpa = macho_file.base.allocator; const sym = self.symtab.items[sym_index]; const atom = try MachO.createEmptyAtom(gpa, sym_index, size, alignment); atom.file = object_id; - self.symtab.items[sym_index].n_sect = macho_file.getSectionOrdinal(match); + self.symtab.items[sym_index].n_sect = match + 1; log.debug("creating ATOM(%{d}, '{s}') in sect({d}, '{s},{s}') in object({d})", .{ sym_index, self.getString(sym.n_strx), - macho_file.getSectionOrdinal(match), - macho_file.getSection(match).segName(), - macho_file.getSection(match).sectName(), + match + 1, + macho_file.sections.items(.header)[match].segName(), + macho_file.sections.items(.header)[match].sectName(), object_id, }); @@ -577,7 +503,7 @@ fn createAtomFromSubsection( try atom.contained.ensureTotalCapacity(gpa, indexes.len); for (indexes) |inner_sym_index| { const inner_sym = &self.symtab.items[inner_sym_index.index]; - inner_sym.n_sect = macho_file.getSectionOrdinal(match); + inner_sym.n_sect = match + 1; atom.contained.appendAssumeCapacity(.{ .sym_index = inner_sym_index.index, .offset = inner_sym.n_value - sym.n_value, @@ -589,48 +515,84 @@ fn createAtomFromSubsection( return atom; } -fn parseSymtab(self: *Object, allocator: Allocator) !void { - const index = self.symtab_cmd_index orelse return; - const symtab = self.load_commands.items[index].symtab; - try self.symtab.appendSlice(allocator, self.getSourceSymtab()); - self.strtab = self.contents[symtab.stroff..][0..symtab.strsize]; -} - -pub fn getSourceSymtab(self: Object) []const macho.nlist_64 { - const index = self.symtab_cmd_index orelse return &[0]macho.nlist_64{}; - const symtab = self.load_commands.items[index].symtab; - const symtab_size = @sizeOf(macho.nlist_64) * symtab.nsyms; - const raw_symtab = self.contents[symtab.symoff..][0..symtab_size]; - return mem.bytesAsSlice( - macho.nlist_64, - @alignCast(@alignOf(macho.nlist_64), raw_symtab), - ); -} - pub fn getSourceSymbol(self: Object, index: u32) ?macho.nlist_64 { - const symtab = self.getSourceSymtab(); - if (index >= symtab.len) return null; - return symtab[index]; + if (index >= self.in_symtab.len) return null; + return self.in_symtab[index]; } pub fn getSourceSection(self: Object, index: u16) macho.section_64 { - const seg = self.load_commands.items[self.segment_cmd_index.?].segment; - assert(index < seg.sections.items.len); - return seg.sections.items[index]; + assert(index < self.sections.items.len); + return self.sections.items[index]; } pub fn parseDataInCode(self: Object) ?[]const macho.data_in_code_entry { - const index = self.data_in_code_cmd_index orelse return null; - const data_in_code = self.load_commands.items[index].linkedit_data; - const raw_dice = self.contents[data_in_code.dataoff..][0..data_in_code.datasize]; - return mem.bytesAsSlice( - macho.data_in_code_entry, - @alignCast(@alignOf(macho.data_in_code_entry), raw_dice), - ); + var it = LoadCommandIterator{ + .ncmds = self.header.ncmds, + .buffer = self.contents[@sizeOf(macho.mach_header_64)..][0..self.header.sizeofcmds], + }; + while (it.next()) |cmd| { + switch (cmd.cmd()) { + .DATA_IN_CODE => { + const dice = cmd.cast(macho.linkedit_data_command).?; + const ndice = @divExact(dice.datasize, @sizeOf(macho.data_in_code_entry)); + return @ptrCast( + [*]const macho.data_in_code_entry, + @alignCast(@alignOf(macho.data_in_code_entry), &self.contents[dice.dataoff]), + )[0..ndice]; + }, + else => {}, + } + } else return null; } -pub fn getSectionContents(self: Object, index: u16) error{Overflow}![]const u8 { - const sect = self.getSourceSection(index); +fn parseDysymtab(self: Object) ?macho.dysymtab_command { + var it = LoadCommandIterator{ + .ncmds = self.header.ncmds, + .buffer = self.contents[@sizeOf(macho.mach_header_64)..][0..self.header.sizeofcmds], + }; + while (it.next()) |cmd| { + switch (cmd.cmd()) { + .DYSYMTAB => { + return cmd.cast(macho.dysymtab_command).?; + }, + else => {}, + } + } else return null; +} + +pub fn parseDwarfInfo(self: Object) error{Overflow}!dwarf.DwarfInfo { + var di = dwarf.DwarfInfo{ + .endian = .Little, + .debug_info = &[0]u8{}, + .debug_abbrev = &[0]u8{}, + .debug_str = &[0]u8{}, + .debug_line = &[0]u8{}, + .debug_line_str = &[0]u8{}, + .debug_ranges = &[0]u8{}, + }; + for (self.sections.items) |sect| { + const segname = sect.segName(); + const sectname = sect.sectName(); + if (mem.eql(u8, segname, "__DWARF")) { + if (mem.eql(u8, sectname, "__debug_info")) { + di.debug_info = try self.getSectionContents(sect); + } else if (mem.eql(u8, sectname, "__debug_abbrev")) { + di.debug_abbrev = try self.getSectionContents(sect); + } else if (mem.eql(u8, sectname, "__debug_str")) { + di.debug_str = try self.getSectionContents(sect); + } else if (mem.eql(u8, sectname, "__debug_line")) { + di.debug_line = try self.getSectionContents(sect); + } else if (mem.eql(u8, sectname, "__debug_line_str")) { + di.debug_line_str = try self.getSectionContents(sect); + } else if (mem.eql(u8, sectname, "__debug_ranges")) { + di.debug_ranges = try self.getSectionContents(sect); + } + } + } + return di; +} + +pub fn getSectionContents(self: Object, sect: macho.section_64) error{Overflow}![]const u8 { const size = math.cast(usize, sect.size) orelse return error.Overflow; log.debug("getting {s},{s} data at 0x{x} - 0x{x}", .{ sect.segName(), @@ -642,8 +604,8 @@ pub fn getSectionContents(self: Object, index: u16) error{Overflow}![]const u8 { } pub fn getString(self: Object, off: u32) []const u8 { - assert(off < self.strtab.len); - return mem.sliceTo(@ptrCast([*:0]const u8, self.strtab.ptr + off), 0); + assert(off < self.in_strtab.len); + return mem.sliceTo(@ptrCast([*:0]const u8, self.in_strtab.ptr + off), 0); } pub fn getAtomForSymbol(self: Object, sym_index: u32) ?*Atom { diff --git a/src/link/MachO/dead_strip.zig b/src/link/MachO/dead_strip.zig index 909a0450d6..12f46c9f26 100644 --- a/src/link/MachO/dead_strip.zig +++ b/src/link/MachO/dead_strip.zig @@ -8,7 +8,6 @@ const mem = std.mem; const Allocator = mem.Allocator; const Atom = @import("Atom.zig"); const MachO = @import("../MachO.zig"); -const MatchingSection = MachO.MatchingSection; pub fn gcAtoms(macho_file: *MachO) !void { const gpa = macho_file.base.allocator; @@ -25,12 +24,12 @@ pub fn gcAtoms(macho_file: *MachO) !void { try prune(arena, alive, macho_file); } -fn removeAtomFromSection(atom: *Atom, match: MatchingSection, macho_file: *MachO) void { - const sect = macho_file.getSectionPtr(match); +fn removeAtomFromSection(atom: *Atom, match: u8, macho_file: *MachO) void { + var section = macho_file.sections.get(match); // If we want to enable GC for incremental codepath, we need to take into // account any padding that might have been left here. - sect.size -= atom.size; + section.header.size -= atom.size; if (atom.prev) |prev| { prev.next = atom.next; @@ -38,15 +37,16 @@ fn removeAtomFromSection(atom: *Atom, match: MatchingSection, macho_file: *MachO if (atom.next) |next| { next.prev = atom.prev; } else { - const last = macho_file.atoms.getPtr(match).?; if (atom.prev) |prev| { - last.* = prev; + section.last_atom = prev; } else { // The section will be GCed in the next step. - last.* = undefined; - sect.size = 0; + section.last_atom = null; + section.header.size = 0; } } + + macho_file.sections.set(match, section); } fn collectRoots(roots: *std.AutoHashMap(*Atom, void), macho_file: *MachO) !void { @@ -173,19 +173,19 @@ fn mark( fn prune(arena: Allocator, alive: std.AutoHashMap(*Atom, void), macho_file: *MachO) !void { // Any section that ends up here will be updated, that is, // its size and alignment recalculated. - var gc_sections = std.AutoHashMap(MatchingSection, void).init(arena); + var gc_sections = std.AutoHashMap(u8, void).init(arena); var loop: bool = true; while (loop) { loop = false; for (macho_file.objects.items) |object| { - for (object.getSourceSymtab()) |_, source_index| { + for (object.in_symtab) |_, source_index| { const atom = object.getAtomForSymbol(@intCast(u32, source_index)) orelse continue; if (alive.contains(atom)) continue; const global = atom.getSymbolWithLoc(); const sym = atom.getSymbolPtr(macho_file); - const match = macho_file.getMatchingSectionFromOrdinal(sym.n_sect); + const match = sym.n_sect - 1; if (sym.n_desc == MachO.N_DESC_GCED) continue; if (!sym.ext() and !refersDead(atom, macho_file)) continue; @@ -232,7 +232,7 @@ fn prune(arena: Allocator, alive: std.AutoHashMap(*Atom, void), macho_file: *Mac // TODO tombstone const atom = entry.getAtom(macho_file); - const match = macho_file.getMatchingSectionFromOrdinal(sym.n_sect); + const match = sym.n_sect - 1; removeAtomFromSection(atom, match, macho_file); _ = try gc_sections.put(match, {}); _ = macho_file.got_entries_table.remove(entry.target); @@ -244,7 +244,7 @@ fn prune(arena: Allocator, alive: std.AutoHashMap(*Atom, void), macho_file: *Mac // TODO tombstone const atom = entry.getAtom(macho_file); - const match = macho_file.getMatchingSectionFromOrdinal(sym.n_sect); + const match = sym.n_sect - 1; removeAtomFromSection(atom, match, macho_file); _ = try gc_sections.put(match, {}); _ = macho_file.stubs_table.remove(entry.target); @@ -256,7 +256,7 @@ fn prune(arena: Allocator, alive: std.AutoHashMap(*Atom, void), macho_file: *Mac // TODO tombstone const atom = entry.getAtom(macho_file); - const match = macho_file.getMatchingSectionFromOrdinal(sym.n_sect); + const match = sym.n_sect - 1; removeAtomFromSection(atom, match, macho_file); _ = try gc_sections.put(match, {}); _ = macho_file.tlv_ptr_entries_table.remove(entry.target); @@ -265,13 +265,13 @@ fn prune(arena: Allocator, alive: std.AutoHashMap(*Atom, void), macho_file: *Mac var gc_sections_it = gc_sections.iterator(); while (gc_sections_it.next()) |entry| { const match = entry.key_ptr.*; - const sect = macho_file.getSectionPtr(match); - if (sect.size == 0) continue; // Pruning happens automatically in next step. + var section = macho_file.sections.get(match); + if (section.header.size == 0) continue; // Pruning happens automatically in next step. - sect.@"align" = 0; - sect.size = 0; + section.header.@"align" = 0; + section.header.size = 0; - var atom = macho_file.atoms.get(match).?; + var atom = section.last_atom.?; while (atom.prev) |prev| { atom = prev; @@ -279,14 +279,16 @@ fn prune(arena: Allocator, alive: std.AutoHashMap(*Atom, void), macho_file: *Mac while (true) { const atom_alignment = try math.powi(u32, 2, atom.alignment); - const aligned_end_addr = mem.alignForwardGeneric(u64, sect.size, atom_alignment); - const padding = aligned_end_addr - sect.size; - sect.size += padding + atom.size; - sect.@"align" = @maximum(sect.@"align", atom.alignment); + const aligned_end_addr = mem.alignForwardGeneric(u64, section.header.size, atom_alignment); + const padding = aligned_end_addr - section.header.size; + section.header.size += padding + atom.size; + section.header.@"align" = @maximum(section.header.@"align", atom.alignment); if (atom.next) |next| { atom = next; } else break; } + + macho_file.sections.set(match, section); } } diff --git a/src/link/MachO/fat.zig b/src/link/MachO/fat.zig index 1511f274a8..7c328c1418 100644 --- a/src/link/MachO/fat.zig +++ b/src/link/MachO/fat.zig @@ -46,7 +46,9 @@ pub fn getLibraryOffset(reader: anytype, cpu_arch: std.Target.Cpu.Arch) !u64 { return fat_arch.offset; } } else { - log.err("Could not find matching cpu architecture in fat library: expected {}", .{cpu_arch}); + log.err("Could not find matching cpu architecture in fat library: expected {s}", .{ + @tagName(cpu_arch), + }); return error.MismatchedCpuArchitecture; } } From bb532584bc569edb563b757c658fd743731837ec Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Mon, 1 Aug 2022 12:28:58 +0200 Subject: [PATCH 2/8] macho: update how we insert output sections Instead of generating sections upfront, allow generation by scanning the object files for input -> output sections mapping. Next, always strive to keep output sections in the final container sorted as they appear in the final binary. This makes the linker less messy wrt handling of output sections sort order for dyld/macOS not to complain. There's still more work to be done for incremental context though to make this work but looks promising already. --- lib/std/macho.zig | 7 +- src/link/MachO.zig | 478 ++++++++++++++-------------------- src/link/MachO/Atom.zig | 4 +- src/link/MachO/Object.zig | 23 +- src/link/MachO/dead_strip.zig | 5 +- 5 files changed, 229 insertions(+), 288 deletions(-) diff --git a/lib/std/macho.zig b/lib/std/macho.zig index 9334f79dc5..aa43229a76 100644 --- a/lib/std/macho.zig +++ b/lib/std/macho.zig @@ -780,7 +780,7 @@ pub const section_64 = extern struct { return parseName(§.segname); } - pub fn type_(sect: section_64) u8 { + pub fn @"type"(sect: section_64) u8 { return @truncate(u8, sect.flags & 0xff); } @@ -793,6 +793,11 @@ pub const section_64 = extern struct { return attr & S_ATTR_PURE_INSTRUCTIONS != 0 or attr & S_ATTR_SOME_INSTRUCTIONS != 0; } + pub fn isZerofill(sect: section_64) bool { + const tt = sect.@"type"(); + return tt == S_ZEROFILL or tt == S_GB_ZEROFILL or tt == S_THREAD_LOCAL_ZEROFILL; + } + pub fn isDebug(sect: section_64) bool { return sect.attrs() & S_ATTR_DEBUG != 0; } diff --git a/src/link/MachO.zig b/src/link/MachO.zig index b912130957..dda5fd48e3 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -559,6 +559,7 @@ pub fn flushModule(self: *MachO, comp: *Compilation, prog_node: *std.Progress.No if (build_options.enable_logging) { self.logSymtab(); + self.logSections(); self.logAtoms(); } @@ -1140,7 +1141,6 @@ fn linkOneShot(self: *MachO, comp: *Compilation, prog_node: *std.Progress.Node) try self.resolveSymbolsInArchives(); try self.resolveDyldStubBinder(); try self.createDyldPrivateAtom(); - try self.createStubHelperPreambleAtom(); try self.resolveSymbolsInDylibs(); try self.createMhExecuteHeaderSymbol(); try self.createDsoHandleSymbol(); @@ -1156,6 +1156,11 @@ fn linkOneShot(self: *MachO, comp: *Compilation, prog_node: *std.Progress.Node) return error.FrameworkNotFound; } + for (self.objects.items) |*object| { + try object.scanInputSections(self); + } + + try self.createStubHelperPreambleAtom(); try self.createTentativeDefAtoms(); for (self.objects.items) |*object, object_id| { @@ -1166,14 +1171,14 @@ fn linkOneShot(self: *MachO, comp: *Compilation, prog_node: *std.Progress.Node) try dead_strip.gcAtoms(self); } - try self.pruneAndSortSections(); try self.allocateSegments(); try self.allocateSymbols(); try self.allocateSpecialSymbols(); - if (build_options.enable_logging) { + if (build_options.enable_logging or true) { self.logSymtab(); + self.logSections(); self.logAtoms(); } @@ -1691,7 +1696,7 @@ pub fn getOutputSection(self: *MachO, sect: macho.section_64) !?u8 { const segname = sect.segName(); const sectname = sect.sectName(); const res: ?u8 = blk: { - switch (sect.type_()) { + switch (sect.@"type"()) { macho.S_4BYTE_LITERALS, macho.S_8BYTE_LITERALS, macho.S_16BYTE_LITERALS => { if (self.text_const_section_index == null) { self.text_const_section_index = try self.initSection( @@ -2197,27 +2202,6 @@ fn allocateSymbols(self: *MachO) !void { } } -fn shiftLocalsByOffset(self: *MachO, sect_id: u8, offset: i64) !void { - var atom = self.sections.items(.last_atom)[sect_id] orelse return; - - while (true) { - const atom_sym = atom.getSymbolPtr(self); - atom_sym.n_value = @intCast(u64, @intCast(i64, atom_sym.n_value) + offset); - - for (atom.contained.items) |sym_at_off| { - const contained_sym = self.getSymbolPtr(.{ - .sym_index = sym_at_off.sym_index, - .file = atom.file, - }); - contained_sym.n_value = @intCast(u64, @intCast(i64, contained_sym.n_value) + offset); - } - - if (atom.prev) |prev| { - atom = prev; - } else break; - } -} - fn allocateSpecialSymbols(self: *MachO) !void { for (&[_][]const u8{ "___dso_handle", @@ -2245,9 +2229,10 @@ fn writeAtomsOneShot(self: *MachO) !void { for (slice.items(.last_atom)) |last_atom, sect_id| { const header = slice.items(.header)[sect_id]; + if (header.size == 0) continue; var atom = last_atom.?; - if (header.flags == macho.S_ZEROFILL or header.flags == macho.S_THREAD_LOCAL_ZEROFILL) continue; + if (header.isZerofill()) continue; var buffer = std.ArrayList(u8).init(gpa); defer buffer.deinit(); @@ -2334,8 +2319,7 @@ fn writeAtomsIncremental(self: *MachO) !void { const sect_i = @intCast(u8, i); const header = slice.items(.header)[sect_i]; - // TODO handle zerofill in stage2 - // if (sect.flags == macho.S_ZEROFILL or sect.flags == macho.S_THREAD_LOCAL_ZEROFILL) continue; + if (header.isZerofill()) continue; log.debug("writing atoms in {s},{s}", .{ header.segName(), header.sectName() }); @@ -3904,7 +3888,12 @@ fn getOutputSectionAtom( // TODO finish and audit this function if (val.isUndefDeep()) { if (mode == .ReleaseFast or mode == .ReleaseSmall) { - break :blk self.bss_section_index.?; + break :blk (try self.getOutputSection(.{ + .segname = makeStaticString("__DATA"), + .sectname = makeStaticString("__bss"), + .size = code.len, + .@"align" = align_log_2, + })).?; } else { break :blk self.data_section_index.?; } @@ -4488,74 +4477,6 @@ fn populateMissingMetadata(self: *MachO) !void { ); } - if (self.tlv_section_index == null) { - const needed_size = if (self.mode == .incremental) - @sizeOf(u64) * self.base.options.symbol_count_hint - else - 0; - const alignment: u16 = 3; // 2^3 = @sizeOf(u64) - self.tlv_section_index = try self.initSection( - self.data_segment_cmd_index.?, - "__thread_vars", - needed_size, - alignment, - .{ - .flags = macho.S_THREAD_LOCAL_VARIABLES, - }, - ); - } - - if (self.tlv_data_section_index == null) { - const needed_size = if (self.mode == .incremental) - @sizeOf(u64) * self.base.options.symbol_count_hint - else - 0; - const alignment: u16 = 3; // 2^3 = @sizeOf(u64) - self.tlv_data_section_index = try self.initSection( - self.data_segment_cmd_index.?, - "__thread_data", - needed_size, - alignment, - .{ - .flags = macho.S_THREAD_LOCAL_REGULAR, - }, - ); - } - - if (self.tlv_bss_section_index == null) { - const needed_size = if (self.mode == .incremental) - @sizeOf(u64) * self.base.options.symbol_count_hint - else - 0; - const alignment: u16 = 3; // 2^3 = @sizeOf(u64) - self.tlv_bss_section_index = try self.initSection( - self.data_segment_cmd_index.?, - "__thread_bss", - needed_size, - alignment, - .{ - .flags = macho.S_THREAD_LOCAL_ZEROFILL, - }, - ); - } - - if (self.bss_section_index == null) { - const needed_size = if (self.mode == .incremental) - @sizeOf(u64) * self.base.options.symbol_count_hint - else - 0; - const alignment: u16 = 3; // 2^3 = @sizeOf(u64) - self.bss_section_index = try self.initSection( - self.data_segment_cmd_index.?, - "__bss", - needed_size, - alignment, - .{ - .flags = macho.S_ZEROFILL, - }, - ); - } - if (self.linkedit_segment_cmd_index == null) { self.linkedit_segment_cmd_index = @intCast(u8, self.segments.items.len); var vmaddr: u64 = 0; @@ -4690,18 +4611,19 @@ fn allocateSegments(self: *MachO) !void { }, try self.calcMinHeaderPad()); if (self.text_segment_cmd_index) |index| blk: { - const seg = &self.segments.items[index]; - if (seg.nsects == 0) break :blk; + const indexes = self.getSectionIndexes(index); + if (indexes.start == indexes.end) break :blk; + const seg = self.segments.items[index]; // Shift all sections to the back to minimize jump size between __TEXT and __DATA segments. var min_alignment: u32 = 0; - for (self.sections.items(.header)[0..seg.nsects]) |header| { + for (self.sections.items(.header)[indexes.start..indexes.end]) |header| { const alignment = try math.powi(u32, 2, header.@"align"); min_alignment = math.max(min_alignment, alignment); } assert(min_alignment > 0); - const last_header = self.sections.items(.header)[seg.nsects - 1]; + const last_header = self.sections.items(.header)[indexes.end - 1]; const shift: u32 = shift: { const diff = seg.filesize - last_header.offset - last_header.size; const factor = @divTrunc(diff, min_alignment); @@ -4709,7 +4631,7 @@ fn allocateSegments(self: *MachO) !void { }; if (shift > 0) { - for (self.sections.items(.header)[0..seg.nsects]) |*header| { + for (self.sections.items(.header)[indexes.start..indexes.end]) |*header| { header.offset += shift; header.addr += shift; } @@ -4746,16 +4668,14 @@ fn allocateSegment(self: *MachO, maybe_index: ?u8, indices: []const ?u8, init_si seg.vmsize = init_size; // Allocate the sections according to their alignment at the beginning of the segment. + const indexes = self.getSectionIndexes(index); var start = init_size; const slice = self.sections.slice(); - for (slice.items(.header)) |*header, sect_id| { - const segment_index = slice.items(.segment_index)[sect_id]; - if (segment_index != index) continue; - const is_zerofill = header.flags == macho.S_ZEROFILL or header.flags == macho.S_THREAD_LOCAL_ZEROFILL; + for (slice.items(.header)[indexes.start..indexes.end]) |*header| { const alignment = try math.powi(u32, 2, header.@"align"); const start_aligned = mem.alignForwardGeneric(u64, start, alignment); - header.offset = if (is_zerofill) + header.offset = if (header.isZerofill()) 0 else @intCast(u32, seg.fileoff + start_aligned); @@ -4763,7 +4683,7 @@ fn allocateSegment(self: *MachO, maybe_index: ?u8, indices: []const ?u8, init_si start = start_aligned + header.size; - if (!is_zerofill) { + if (!header.isZerofill()) { seg.filesize = start; } seg.vmsize = start; @@ -4788,7 +4708,7 @@ fn initSection( opts: InitSectionOpts, ) !u8 { const seg = &self.segments.items[segment_id]; - var header = macho.section_64{ + const index = try self.insertSection(segment_id, .{ .sectname = makeStaticString(sectname), .segname = seg.segname, .size = if (self.mode == .incremental) @intCast(u32, size) else 0, @@ -4796,42 +4716,164 @@ fn initSection( .flags = opts.flags, .reserved1 = opts.reserved1, .reserved2 = opts.reserved2, - }; - - if (self.mode == .incremental) { - const alignment_pow_2 = try math.powi(u32, 2, alignment); - const padding: ?u64 = if (segment_id == self.text_segment_cmd_index.?) - try self.calcMinHeaderPad() - else - null; - const off = self.findFreeSpace(segment_id, alignment_pow_2, padding); - log.debug("allocating {s},{s} section from 0x{x} to 0x{x}", .{ - header.segName(), - header.sectName(), - off, - off + size, - }); - - header.addr = seg.vmaddr + off - seg.fileoff; - - // TODO handle zerofill in stage2 - // const is_zerofill = opts.flags == macho.S_ZEROFILL or opts.flags == macho.S_THREAD_LOCAL_ZEROFILL; - header.offset = @intCast(u32, off); - - try self.updateSectionOrdinals(); - } - - const index = @intCast(u8, self.sections.slice().len); - try self.sections.append(self.base.allocator, .{ - .segment_index = segment_id, - .header = header, }); seg.cmdsize += @sizeOf(macho.section_64); seg.nsects += 1; + if (self.mode == .incremental) { + const header = &self.sections.items(.header)[index]; + const prev_end_off = if (index > 0) blk: { + const prev_section = self.sections.get(index - 1); + if (prev_section.segment_index == segment_id) { + const prev_header = prev_section.header; + break :blk prev_header.offset + padToIdeal(prev_header.size); + } else break :blk seg.fileoff; + } else 0; + const alignment_pow_2 = try math.powi(u32, 2, alignment); + const padding: u64 = if (index == 0) try self.calcMinHeaderPad() else 0; + const off = mem.alignForwardGeneric(u64, padding + prev_end_off, alignment_pow_2); + log.debug("allocating {s},{s} section at 0x{x}", .{ header.segName(), header.sectName(), off }); + + header.addr = seg.vmaddr + off - seg.fileoff; + + if (!header.isZerofill()) { + header.offset = @intCast(u32, off); + } + + self.updateSectionOrdinals(index + 1); + } + return index; } +fn getSectionPrecedence(header: macho.section_64) u4 { + if (header.isCode()) { + if (mem.eql(u8, "__text", header.sectName())) return 0x0; + if (header.@"type"() == macho.S_SYMBOL_STUBS) return 0x1; + return 0x2; + } + switch (header.@"type"()) { + macho.S_NON_LAZY_SYMBOL_POINTERS, + macho.S_LAZY_SYMBOL_POINTERS, + => return 0x0, + macho.S_MOD_INIT_FUNC_POINTERS => return 0x1, + macho.S_MOD_TERM_FUNC_POINTERS => return 0x2, + macho.S_ZEROFILL => return 0xf, + macho.S_THREAD_LOCAL_REGULAR => return 0xd, + macho.S_THREAD_LOCAL_ZEROFILL => return 0xe, + else => if (mem.eql(u8, "__eh_frame", header.sectName())) + return 0xf + else + return 0x3, + } +} + +fn insertSection(self: *MachO, segment_index: u8, header: macho.section_64) !u8 { + const precedence = getSectionPrecedence(header); + const indexes = self.getSectionIndexes(segment_index); + const insertion_index = for (self.sections.items(.header)[indexes.start..indexes.end]) |hdr, i| { + if (getSectionPrecedence(hdr) > precedence) break @intCast(u8, i + indexes.start); + } else indexes.end; + log.debug("inserting section '{s},{s}' at index {d}", .{ + header.segName(), + header.sectName(), + insertion_index, + }); + // TODO slim it down + for (&[_]*?u8{ + // __TEXT + &self.text_section_index, + &self.stubs_section_index, + &self.stub_helper_section_index, + &self.gcc_except_tab_section_index, + &self.cstring_section_index, + &self.ustring_section_index, + &self.text_const_section_index, + &self.objc_methlist_section_index, + &self.objc_methname_section_index, + &self.objc_methtype_section_index, + &self.objc_classname_section_index, + &self.eh_frame_section_index, + // __DATA_CONST + &self.got_section_index, + &self.mod_init_func_section_index, + &self.mod_term_func_section_index, + &self.data_const_section_index, + &self.objc_cfstring_section_index, + &self.objc_classlist_section_index, + &self.objc_imageinfo_section_index, + // __DATA + &self.rustc_section_index, + &self.la_symbol_ptr_section_index, + &self.objc_const_section_index, + &self.objc_selrefs_section_index, + &self.objc_classrefs_section_index, + &self.objc_data_section_index, + &self.data_section_index, + &self.tlv_section_index, + &self.tlv_ptrs_section_index, + &self.tlv_data_section_index, + &self.tlv_bss_section_index, + &self.bss_section_index, + }) |maybe_index| { + const index = maybe_index.* orelse continue; + if (insertion_index <= index) maybe_index.* = index + 1; + } + try self.sections.insert(self.base.allocator, insertion_index, .{ + .segment_index = segment_index, + .header = header, + }); + return insertion_index; +} + +fn updateSectionOrdinals(self: *MachO, start: u8) void { + const tracy = trace(@src()); + defer tracy.end(); + + const slice = self.sections.slice(); + for (slice.items(.last_atom)[start..]) |last_atom| { + var atom = last_atom.?; + + while (true) { + const sym = atom.getSymbolPtr(self); + sym.n_sect = start + 1; + + for (atom.contained.items) |sym_at_off| { + const contained_sym = self.getSymbolPtr(.{ + .sym_index = sym_at_off.sym_index, + .file = atom.file, + }); + contained_sym.n_sect = start + 1; + } + + if (atom.prev) |prev| { + atom = prev; + } else break; + } + } +} + +fn shiftLocalsByOffset(self: *MachO, sect_id: u8, offset: i64) !void { + var atom = self.sections.items(.last_atom)[sect_id] orelse return; + + while (true) { + const atom_sym = atom.getSymbolPtr(self); + atom_sym.n_value = @intCast(u64, @intCast(i64, atom_sym.n_value) + offset); + + for (atom.contained.items) |sym_at_off| { + const contained_sym = self.getSymbolPtr(.{ + .sym_index = sym_at_off.sym_index, + .file = atom.file, + }); + contained_sym.n_value = @intCast(u64, @intCast(i64, contained_sym.n_value) + offset); + } + + if (atom.prev) |prev| { + atom = prev; + } else break; + } +} + fn findFreeSpace(self: MachO, segment_id: u8, alignment: u64, start: ?u64) u64 { const seg = self.segments.items[segment_id]; const indexes = self.getSectionIndexes(segment_id); @@ -5181,153 +5223,18 @@ fn getSegmentAllocBase(self: MachO, indices: []const ?u8) struct { vmaddr: u64, return .{ .vmaddr = 0, .fileoff = 0 }; } -fn pruneAndSortSections(self: *MachO) !void { - const gpa = self.base.allocator; - - var sections = self.sections.toOwnedSlice(); - defer sections.deinit(gpa); - try self.sections.ensureTotalCapacity(gpa, sections.len); - - for (&[_]*?u8{ - // __TEXT - &self.text_section_index, - &self.stubs_section_index, - &self.stub_helper_section_index, - &self.gcc_except_tab_section_index, - &self.cstring_section_index, - &self.ustring_section_index, - &self.text_const_section_index, - &self.objc_methlist_section_index, - &self.objc_methname_section_index, - &self.objc_methtype_section_index, - &self.objc_classname_section_index, - &self.eh_frame_section_index, - // __DATA_CONST - &self.got_section_index, - &self.mod_init_func_section_index, - &self.mod_term_func_section_index, - &self.data_const_section_index, - &self.objc_cfstring_section_index, - &self.objc_classlist_section_index, - &self.objc_imageinfo_section_index, - // __DATA - &self.rustc_section_index, - &self.la_symbol_ptr_section_index, - &self.objc_const_section_index, - &self.objc_selrefs_section_index, - &self.objc_classrefs_section_index, - &self.objc_data_section_index, - &self.data_section_index, - &self.tlv_section_index, - &self.tlv_ptrs_section_index, - &self.tlv_data_section_index, - &self.tlv_bss_section_index, - &self.bss_section_index, - }) |maybe_index| { - const old_idx = maybe_index.* orelse continue; - const segment_index = sections.items(.segment_index)[old_idx]; - const header = sections.items(.header)[old_idx]; - const last_atom = sections.items(.last_atom)[old_idx]; - if (header.size == 0) { - log.debug("pruning section {s},{s}", .{ header.segName(), header.sectName() }); - maybe_index.* = null; - const seg = &self.segments.items[segment_index]; - seg.cmdsize -= @sizeOf(macho.section_64); - seg.nsects -= 1; - } else { - maybe_index.* = @intCast(u8, self.sections.slice().len); - self.sections.appendAssumeCapacity(.{ - .segment_index = segment_index, - .header = header, - .last_atom = last_atom, - }); - } - } - - for (self.segments.items) |*seg| { - const segname = seg.segName(); - if (seg.nsects == 0 and - !mem.eql(u8, "__TEXT", segname) and - !mem.eql(u8, "__PAGEZERO", segname) and - !mem.eql(u8, "__LINKEDIT", segname)) - { - // Segment has now become empty, so mark it as such - log.debug("marking segment {s} as dead", .{seg.segName()}); - seg.cmd = @intToEnum(macho.LC, 0); - } - } -} - -fn updateSectionOrdinals(self: *MachO) !void { - _ = self; - const tracy = trace(@src()); - defer tracy.end(); - - @panic("updating section ordinals"); - - // const gpa = self.base.allocator; - - // var ordinal_remap = std.AutoHashMap(u8, u8).init(gpa); - // defer ordinal_remap.deinit(); - // var ordinals: std.AutoArrayHashMapUnmanaged(MatchingSection, void) = .{}; - - // var new_ordinal: u8 = 0; - // for (&[_]?u16{ - // self.text_segment_cmd_index, - // self.data_const_segment_cmd_index, - // self.data_segment_cmd_index, - // }) |maybe_index| { - // const index = maybe_index orelse continue; - // const seg = self.load_commands.items[index].segment; - // for (seg.sections.items) |sect, sect_id| { - // const match = MatchingSection{ - // .seg = @intCast(u16, index), - // .sect = @intCast(u16, sect_id), - // }; - // const old_ordinal = self.getSectionOrdinal(match); - // new_ordinal += 1; - // log.debug("'{s},{s}': sect({d}, '_,_') => sect({d}, '_,_')", .{ - // sect.segName(), - // sect.sectName(), - // old_ordinal, - // new_ordinal, - // }); - // try ordinal_remap.putNoClobber(old_ordinal, new_ordinal); - // try ordinals.putNoClobber(gpa, match, {}); - // } - // } - - // // FIXME Jakub - // // TODO no need for duping work here; simply walk the atom graph - // for (self.locals.items) |*sym| { - // if (sym.undf()) continue; - // if (sym.n_sect == 0) continue; - // sym.n_sect = ordinal_remap.get(sym.n_sect).?; - // } - // for (self.objects.items) |*object| { - // for (object.symtab.items) |*sym| { - // if (sym.undf()) continue; - // if (sym.n_sect == 0) continue; - // sym.n_sect = ordinal_remap.get(sym.n_sect).?; - // } - // } - - // self.section_ordinals.deinit(gpa); - // self.section_ordinals = ordinals; -} - pub fn writeSegmentHeaders(self: *MachO, start: usize, end: usize, ncmds: *u32, writer: anytype) !void { - var count: usize = 0; - for (self.segments.items[start..end]) |seg| { - if (seg.cmd == .NONE) continue; + for (self.segments.items[start..end]) |seg, i| { + if (seg.nsects == 0 and + (mem.eql(u8, seg.segName(), "__DATA_CONST") or + mem.eql(u8, seg.segName(), "__DATA"))) continue; try writer.writeStruct(seg); - // TODO - for (self.sections.items(.header)[count..][0..seg.nsects]) |header| { + const indexes = self.getSectionIndexes(@intCast(u8, start + i)); + for (self.sections.items(.header)[indexes.start..indexes.end]) |header| { try writer.writeStruct(header); } - count += seg.nsects; ncmds.* += 1; } } @@ -6644,6 +6551,19 @@ fn generateSymbolStabsForSymbol( // try writer.writeByte(']'); // } +fn logSections(self: *MachO) void { + log.debug("sections:", .{}); + for (self.sections.items(.header)) |header, i| { + log.debug(" sect({d}): {s},{s} @{x}, sizeof({x})", .{ + i + 1, + header.segName(), + header.sectName(), + header.offset, + header.size, + }); + } +} + fn logSymAttributes(sym: macho.nlist_64, buf: *[9]u8) []const u8 { mem.set(u8, buf[0..4], '_'); mem.set(u8, buf[4..], ' '); diff --git a/src/link/MachO/Atom.zig b/src/link/MachO/Atom.zig index 90c86e24ed..85b3ca1c2b 100644 --- a/src/link/MachO/Atom.zig +++ b/src/link/MachO/Atom.zig @@ -466,7 +466,7 @@ fn addPtrBindingOrRebase( const section = context.macho_file.sections.get(source_sym.n_sect - 1); const header = section.header; const segment_index = section.segment_index; - const sect_type = header.type_(); + const sect_type = header.@"type"(); const should_rebase = rebase: { if (rel.r_length != 3) break :rebase false; @@ -571,7 +571,7 @@ pub fn resolveRelocs(self: *Atom, macho_file: *MachO) !void { const is_tlv = is_tlv: { const source_sym = self.getSymbol(macho_file); const header = macho_file.sections.items(.header)[source_sym.n_sect - 1]; - break :is_tlv header.type_() == macho.S_THREAD_LOCAL_VARIABLES; + break :is_tlv header.@"type"() == macho.S_THREAD_LOCAL_VARIABLES; }; const target_addr = blk: { const target_atom = rel.getTargetAtom(macho_file) orelse { diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index 2e2f3dad84..996a85ed4b 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -214,6 +214,23 @@ fn filterRelocs( return relocs[start..end]; } +pub fn scanInputSections(self: Object, macho_file: *MachO) !void { + for (self.sections.items) |sect| { + const match = (try macho_file.getOutputSection(sect)) orelse { + log.debug(" unhandled section", .{}); + continue; + }; + const output = macho_file.sections.items(.header)[match]; + log.debug("mapping '{s},{s}' into output sect({d}, '{s},{s}')", .{ + sect.segName(), + sect.sectName(), + match + 1, + output.segName(), + output.sectName(), + }); + } +} + /// Splits object into atoms assuming one-shot linking mode. pub fn splitIntoAtomsOneShot(self: *Object, macho_file: *MachO, object_id: u32) !void { assert(macho_file.mode == .one_shot); @@ -280,13 +297,9 @@ pub fn splitIntoAtomsOneShot(self: *Object, macho_file: *MachO, object_id: u32) }); const cpu_arch = macho_file.base.options.target.cpu.arch; - const is_zerofill = blk: { - const section_type = sect.type_(); - break :blk section_type == macho.S_ZEROFILL or section_type == macho.S_THREAD_LOCAL_ZEROFILL; - }; // Read section's code - const code: ?[]const u8 = if (!is_zerofill) try self.getSectionContents(sect) else null; + const code: ?[]const u8 = if (!sect.isZerofill()) try self.getSectionContents(sect) else null; // Read section's list of relocations const relocs = @ptrCast( diff --git a/src/link/MachO/dead_strip.zig b/src/link/MachO/dead_strip.zig index 12f46c9f26..bf65b96049 100644 --- a/src/link/MachO/dead_strip.zig +++ b/src/link/MachO/dead_strip.zig @@ -43,6 +43,9 @@ fn removeAtomFromSection(atom: *Atom, match: u8, macho_file: *MachO) void { // The section will be GCed in the next step. section.last_atom = null; section.header.size = 0; + const segment = &macho_file.segments.items[section.segment_index]; + segment.cmdsize -= @sizeOf(macho.section_64); + segment.nsects -= 1; } } @@ -93,7 +96,7 @@ fn collectRoots(roots: *std.AutoHashMap(*Atom, void), macho_file: *MachO) !void const is_gc_root = blk: { if (source_sect.isDontDeadStrip()) break :blk true; if (mem.eql(u8, "__StaticInit", source_sect.sectName())) break :blk true; - switch (source_sect.type_()) { + switch (source_sect.@"type"()) { macho.S_MOD_INIT_FUNC_POINTERS, macho.S_MOD_TERM_FUNC_POINTERS, => break :blk true, From 7bba3d330ad90026e8b79cae9940a1e878628119 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Mon, 1 Aug 2022 18:42:18 +0200 Subject: [PATCH 3/8] macho: cleanup output section selection logic Cache only section indexes used by the linker for synthetic sections and/or incremental codepath. --- src/link/MachO.zig | 632 ++++++++++------------------------------ src/link/MachO/Atom.zig | 5 +- 2 files changed, 157 insertions(+), 480 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index dda5fd48e3..205fbcd6bf 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -121,48 +121,12 @@ data_const_segment_cmd_index: ?u8 = null, data_segment_cmd_index: ?u8 = null, linkedit_segment_cmd_index: ?u8 = null, -// __TEXT segment sections text_section_index: ?u8 = null, stubs_section_index: ?u8 = null, stub_helper_section_index: ?u8 = null, -text_const_section_index: ?u8 = null, -cstring_section_index: ?u8 = null, -ustring_section_index: ?u8 = null, -gcc_except_tab_section_index: ?u8 = null, -unwind_info_section_index: ?u8 = null, -eh_frame_section_index: ?u8 = null, - -objc_methlist_section_index: ?u8 = null, -objc_methname_section_index: ?u8 = null, -objc_methtype_section_index: ?u8 = null, -objc_classname_section_index: ?u8 = null, - -// __DATA_CONST segment sections got_section_index: ?u8 = null, -mod_init_func_section_index: ?u8 = null, -mod_term_func_section_index: ?u8 = null, -data_const_section_index: ?u8 = null, - -objc_cfstring_section_index: ?u8 = null, -objc_classlist_section_index: ?u8 = null, -objc_imageinfo_section_index: ?u8 = null, - -// __DATA segment sections -tlv_section_index: ?u8 = null, -tlv_data_section_index: ?u8 = null, -tlv_bss_section_index: ?u8 = null, -tlv_ptrs_section_index: ?u8 = null, la_symbol_ptr_section_index: ?u8 = null, data_section_index: ?u8 = null, -bss_section_index: ?u8 = null, - -objc_const_section_index: ?u8 = null, -objc_selrefs_section_index: ?u8 = null, -objc_classrefs_section_index: ?u8 = null, -objc_data_section_index: ?u8 = null, - -rustc_section_index: ?u8 = null, -rustc_section_size: u64 = 0, locals: std.ArrayListUnmanaged(macho.nlist_64) = .{}, globals: std.StringArrayHashMapUnmanaged(SymbolWithLoc) = .{}, @@ -547,14 +511,15 @@ pub fn flushModule(self: *MachO, comp: *Compilation, prog_node: *std.Progress.No try self.createMhExecuteHeaderSymbol(); try self.resolveDyldStubBinder(); - try self.createDyldPrivateAtom(); - try self.createStubHelperPreambleAtom(); try self.resolveSymbolsInDylibs(); if (self.unresolved.count() > 0) { return error.UndefinedSymbolReference; } + try self.createDyldPrivateAtom(); + try self.createStubHelperPreambleAtom(); + try self.allocateSpecialSymbols(); if (build_options.enable_logging) { @@ -1140,7 +1105,6 @@ fn linkOneShot(self: *MachO, comp: *Compilation, prog_node: *std.Progress.Node) try self.resolveSymbolsInArchives(); try self.resolveDyldStubBinder(); - try self.createDyldPrivateAtom(); try self.resolveSymbolsInDylibs(); try self.createMhExecuteHeaderSymbol(); try self.createDsoHandleSymbol(); @@ -1160,8 +1124,9 @@ fn linkOneShot(self: *MachO, comp: *Compilation, prog_node: *std.Progress.Node) try object.scanInputSections(self); } - try self.createStubHelperPreambleAtom(); + try self.createDyldPrivateAtom(); try self.createTentativeDefAtoms(); + try self.createStubHelperPreambleAtom(); for (self.objects.items) |*object, object_id| { try object.splitIntoAtomsOneShot(self, @intCast(u32, object_id)); @@ -1184,11 +1149,6 @@ fn linkOneShot(self: *MachO, comp: *Compilation, prog_node: *std.Progress.Node) try self.writeAtomsOneShot(); - if (self.rustc_section_index) |id| { - const header = &self.sections.items(.header)[id]; - header.size = self.rustc_section_size; - } - var lc_buffer = std.ArrayList(u8).init(arena); const lc_writer = lc_buffer.writer(); var ncmds: u32 = 0; @@ -1696,417 +1656,142 @@ pub fn getOutputSection(self: *MachO, sect: macho.section_64) !?u8 { const segname = sect.segName(); const sectname = sect.sectName(); const res: ?u8 = blk: { + if (mem.eql(u8, "__LLVM", segname)) { + log.debug("TODO LLVM section: type 0x{x}, name '{s},{s}'", .{ + sect.flags, segname, sectname, + }); + break :blk null; + } + + if (sect.isCode()) { + if (self.text_section_index == null) { + self.text_section_index = try self.initSection( + "__TEXT", + "__text", + sect.size, + sect.@"align", + .{ + .flags = macho.S_REGULAR | + macho.S_ATTR_PURE_INSTRUCTIONS | + macho.S_ATTR_SOME_INSTRUCTIONS, + }, + ); + } + break :blk self.text_section_index.?; + } + + if (sect.isDebug()) { + // TODO debug attributes + if (mem.eql(u8, "__LD", segname) and mem.eql(u8, "__compact_unwind", sectname)) { + log.debug("TODO compact unwind section: type 0x{x}, name '{s},{s}'", .{ + sect.flags, segname, sectname, + }); + } + break :blk null; + } + switch (sect.@"type"()) { - macho.S_4BYTE_LITERALS, macho.S_8BYTE_LITERALS, macho.S_16BYTE_LITERALS => { - if (self.text_const_section_index == null) { - self.text_const_section_index = try self.initSection( - self.text_segment_cmd_index.?, - "__const", - sect.size, - sect.@"align", - .{}, - ); - } - break :blk self.text_const_section_index.?; + macho.S_4BYTE_LITERALS, + macho.S_8BYTE_LITERALS, + macho.S_16BYTE_LITERALS, + => { + break :blk self.getSectionByName("__TEXT", "__const") orelse try self.initSection( + "__TEXT", + "__const", + sect.size, + sect.@"align", + .{}, + ); }, macho.S_CSTRING_LITERALS => { - if (mem.eql(u8, sectname, "__objc_methname")) { - // TODO it seems the common values within the sections in objects are deduplicated/merged - // on merging the sections' contents. - if (self.objc_methname_section_index == null) { - self.objc_methname_section_index = try self.initSection( - self.text_segment_cmd_index.?, - "__objc_methname", - sect.size, - sect.@"align", - .{}, - ); - } - break :blk self.objc_methname_section_index.?; - } else if (mem.eql(u8, sectname, "__objc_methtype")) { - if (self.objc_methtype_section_index == null) { - self.objc_methtype_section_index = try self.initSection( - self.text_segment_cmd_index.?, - "__objc_methtype", - sect.size, - sect.@"align", - .{}, - ); - } - break :blk self.objc_methtype_section_index.?; - } else if (mem.eql(u8, sectname, "__objc_classname")) { - if (self.objc_classname_section_index == null) { - self.objc_classname_section_index = try self.initSection( - self.text_segment_cmd_index.?, - "__objc_classname", - sect.size, - sect.@"align", - .{}, - ); - } - break :blk self.objc_classname_section_index.?; - } - - if (self.cstring_section_index == null) { - self.cstring_section_index = try self.initSection( - self.text_segment_cmd_index.?, - "__cstring", - sect.size, - sect.@"align", - .{ - .flags = macho.S_CSTRING_LITERALS, - }, - ); - } - break :blk self.cstring_section_index.?; - }, - macho.S_LITERAL_POINTERS => { - if (mem.eql(u8, segname, "__DATA") and mem.eql(u8, sectname, "__objc_selrefs")) { - if (self.objc_selrefs_section_index == null) { - self.objc_selrefs_section_index = try self.initSection( - self.data_segment_cmd_index.?, - "__objc_selrefs", - sect.size, - sect.@"align", - .{ - .flags = macho.S_LITERAL_POINTERS, - }, - ); - } - break :blk self.objc_selrefs_section_index.?; - } else { - // TODO investigate - break :blk null; - } - }, - macho.S_MOD_INIT_FUNC_POINTERS => { - if (self.mod_init_func_section_index == null) { - self.mod_init_func_section_index = try self.initSection( - self.data_const_segment_cmd_index.?, - "__mod_init_func", - sect.size, - sect.@"align", - .{ - .flags = macho.S_MOD_INIT_FUNC_POINTERS, - }, - ); - } - break :blk self.mod_init_func_section_index.?; - }, - macho.S_MOD_TERM_FUNC_POINTERS => { - if (self.mod_term_func_section_index == null) { - self.mod_term_func_section_index = try self.initSection( - self.data_const_segment_cmd_index.?, - "__mod_term_func", - sect.size, - sect.@"align", - .{ - .flags = macho.S_MOD_TERM_FUNC_POINTERS, - }, - ); - } - break :blk self.mod_term_func_section_index.?; - }, - macho.S_ZEROFILL => { - if (self.bss_section_index == null) { - self.bss_section_index = try self.initSection( - self.data_segment_cmd_index.?, - "__bss", - sect.size, - sect.@"align", - .{ - .flags = macho.S_ZEROFILL, - }, - ); - } - break :blk self.bss_section_index.?; - }, - macho.S_THREAD_LOCAL_VARIABLES => { - if (self.tlv_section_index == null) { - self.tlv_section_index = try self.initSection( - self.data_segment_cmd_index.?, - "__thread_vars", - sect.size, - sect.@"align", - .{ - .flags = macho.S_THREAD_LOCAL_VARIABLES, - }, - ); - } - break :blk self.tlv_section_index.?; - }, - macho.S_THREAD_LOCAL_VARIABLE_POINTERS => { - if (self.tlv_ptrs_section_index == null) { - self.tlv_ptrs_section_index = try self.initSection( - self.data_segment_cmd_index.?, - "__thread_ptrs", - sect.size, - sect.@"align", - .{ - .flags = macho.S_THREAD_LOCAL_VARIABLE_POINTERS, - }, - ); - } - break :blk self.tlv_ptrs_section_index.?; - }, - macho.S_THREAD_LOCAL_REGULAR => { - if (self.tlv_data_section_index == null) { - self.tlv_data_section_index = try self.initSection( - self.data_segment_cmd_index.?, - "__thread_data", - sect.size, - sect.@"align", - .{ - .flags = macho.S_THREAD_LOCAL_REGULAR, - }, - ); - } - break :blk self.tlv_data_section_index.?; - }, - macho.S_THREAD_LOCAL_ZEROFILL => { - if (self.tlv_bss_section_index == null) { - self.tlv_bss_section_index = try self.initSection( - self.data_segment_cmd_index.?, - "__thread_bss", - sect.size, - sect.@"align", - .{ - .flags = macho.S_THREAD_LOCAL_ZEROFILL, - }, - ); - } - break :blk self.tlv_bss_section_index.?; - }, - macho.S_COALESCED => { - if (mem.eql(u8, "__TEXT", segname) and mem.eql(u8, "__eh_frame", sectname)) { - // TODO I believe __eh_frame is currently part of __unwind_info section - // in the latest ld64 output. - if (self.eh_frame_section_index == null) { - self.eh_frame_section_index = try self.initSection( - self.text_segment_cmd_index.?, - "__eh_frame", - sect.size, - sect.@"align", - .{}, - ); - } - break :blk self.eh_frame_section_index.?; - } - - // TODO audit this: is this the right mapping? - if (self.data_const_section_index == null) { - self.data_const_section_index = try self.initSection( - self.data_const_segment_cmd_index.?, - "__const", + if (mem.startsWith(u8, sectname, "__objc")) { + break :blk self.getSectionByName(segname, sectname) orelse try self.initSection( + segname, + sectname, sect.size, sect.@"align", .{}, ); } - - break :blk self.data_const_section_index.?; + break :blk self.getSectionByName("__TEXT", "__cstring") orelse try self.initSection( + "__TEXT", + "__cstring", + sect.size, + sect.@"align", + .{ .flags = macho.S_CSTRING_LITERALS }, + ); + }, + macho.S_MOD_INIT_FUNC_POINTERS, + macho.S_MOD_TERM_FUNC_POINTERS, + => { + break :blk self.getSectionByName("__DATA_CONST", sectname) orelse try self.initSection( + "__DATA_CONST", + sectname, + sect.size, + sect.@"align", + .{ .flags = sect.flags }, + ); + }, + macho.S_LITERAL_POINTERS, + macho.S_ZEROFILL, + macho.S_THREAD_LOCAL_VARIABLES, + macho.S_THREAD_LOCAL_VARIABLE_POINTERS, + macho.S_THREAD_LOCAL_REGULAR, + macho.S_THREAD_LOCAL_ZEROFILL, + => { + break :blk self.getSectionByName(segname, sectname) orelse try self.initSection( + segname, + sectname, + sect.size, + sect.@"align", + .{ .flags = sect.flags }, + ); + }, + macho.S_COALESCED => { + break :blk self.getSectionByName(segname, sectname) orelse try self.initSection( + segname, + sectname, + sect.size, + sect.@"align", + .{}, + ); }, macho.S_REGULAR => { - if (sect.isCode()) { - if (self.text_section_index == null) { - self.text_section_index = try self.initSection( - self.text_segment_cmd_index.?, - "__text", - sect.size, - sect.@"align", - .{ - .flags = macho.S_REGULAR | - macho.S_ATTR_PURE_INSTRUCTIONS | - macho.S_ATTR_SOME_INSTRUCTIONS, - }, - ); - } - break :blk self.text_section_index.?; - } - if (sect.isDebug()) { - // TODO debug attributes - if (mem.eql(u8, "__LD", segname) and mem.eql(u8, "__compact_unwind", sectname)) { - log.debug("TODO compact unwind section: type 0x{x}, name '{s},{s}'", .{ - sect.flags, segname, sectname, - }); - } - break :blk null; - } - if (mem.eql(u8, segname, "__TEXT")) { - if (mem.eql(u8, sectname, "__ustring")) { - if (self.ustring_section_index == null) { - self.ustring_section_index = try self.initSection( - self.text_segment_cmd_index.?, - "__ustring", - sect.size, - sect.@"align", - .{}, - ); - } - break :blk self.ustring_section_index.?; - } else if (mem.eql(u8, sectname, "__gcc_except_tab")) { - if (self.gcc_except_tab_section_index == null) { - self.gcc_except_tab_section_index = try self.initSection( - self.text_segment_cmd_index.?, - "__gcc_except_tab", - sect.size, - sect.@"align", - .{}, - ); - } - break :blk self.gcc_except_tab_section_index.?; - } else if (mem.eql(u8, sectname, "__objc_methlist")) { - if (self.objc_methlist_section_index == null) { - self.objc_methlist_section_index = try self.initSection( - self.text_segment_cmd_index.?, - "__objc_methlist", - sect.size, - sect.@"align", - .{}, - ); - } - break :blk self.objc_methlist_section_index.?; - } else if (mem.eql(u8, sectname, "__rodata") or + if (mem.eql(u8, sectname, "__rodata") or mem.eql(u8, sectname, "__typelink") or mem.eql(u8, sectname, "__itablink") or mem.eql(u8, sectname, "__gosymtab") or mem.eql(u8, sectname, "__gopclntab")) { - if (self.data_const_section_index == null) { - self.data_const_section_index = try self.initSection( - self.data_const_segment_cmd_index.?, - "__const", - sect.size, - sect.@"align", - .{}, - ); - } - break :blk self.data_const_section_index.?; - } else { - if (self.text_const_section_index == null) { - self.text_const_section_index = try self.initSection( - self.text_segment_cmd_index.?, - "__const", - sect.size, - sect.@"align", - .{}, - ); - } - break :blk self.text_const_section_index.?; - } - } - - if (mem.eql(u8, segname, "__DATA_CONST")) { - if (self.data_const_section_index == null) { - self.data_const_section_index = try self.initSection( - self.data_const_segment_cmd_index.?, + break :blk self.getSectionByName("__DATA_CONST", "__const") orelse try self.initSection( + "__DATA_CONST", "__const", sect.size, sect.@"align", .{}, ); } - break :blk self.data_const_section_index.?; } - if (mem.eql(u8, segname, "__DATA")) { - if (mem.eql(u8, sectname, "__const")) { - if (self.data_const_section_index == null) { - self.data_const_section_index = try self.initSection( - self.data_const_segment_cmd_index.?, - "__const", - sect.size, - sect.@"align", - .{}, - ); - } - break :blk self.data_const_section_index.?; - } else if (mem.eql(u8, sectname, "__cfstring")) { - if (self.objc_cfstring_section_index == null) { - self.objc_cfstring_section_index = try self.initSection( - self.data_const_segment_cmd_index.?, - "__cfstring", - sect.size, - sect.@"align", - .{}, - ); - } - break :blk self.objc_cfstring_section_index.?; - } else if (mem.eql(u8, sectname, "__objc_classlist")) { - if (self.objc_classlist_section_index == null) { - self.objc_classlist_section_index = try self.initSection( - self.data_const_segment_cmd_index.?, - "__objc_classlist", - sect.size, - sect.@"align", - .{}, - ); - } - break :blk self.objc_classlist_section_index.?; - } else if (mem.eql(u8, sectname, "__objc_imageinfo")) { - if (self.objc_imageinfo_section_index == null) { - self.objc_imageinfo_section_index = try self.initSection( - self.data_const_segment_cmd_index.?, - "__objc_imageinfo", - sect.size, - sect.@"align", - .{}, - ); - } - break :blk self.objc_imageinfo_section_index.?; - } else if (mem.eql(u8, sectname, "__objc_const")) { - if (self.objc_const_section_index == null) { - self.objc_const_section_index = try self.initSection( - self.data_segment_cmd_index.?, - "__objc_const", - sect.size, - sect.@"align", - .{}, - ); - } - break :blk self.objc_const_section_index.?; - } else if (mem.eql(u8, sectname, "__objc_classrefs")) { - if (self.objc_classrefs_section_index == null) { - self.objc_classrefs_section_index = try self.initSection( - self.data_segment_cmd_index.?, - "__objc_classrefs", - sect.size, - sect.@"align", - .{}, - ); - } - break :blk self.objc_classrefs_section_index.?; - } else if (mem.eql(u8, sectname, "__objc_data")) { - if (self.objc_data_section_index == null) { - self.objc_data_section_index = try self.initSection( - self.data_segment_cmd_index.?, - "__objc_data", - sect.size, - sect.@"align", - .{}, - ); - } - break :blk self.objc_data_section_index.?; - } else if (mem.eql(u8, sectname, ".rustc")) { - if (self.rustc_section_index == null) { - self.rustc_section_index = try self.initSection( - self.data_segment_cmd_index.?, - ".rustc", - sect.size, - sect.@"align", - .{}, - ); - // We need to preserve the section size for rustc to properly - // decompress the metadata. - self.rustc_section_size = sect.size; - } - break :blk self.rustc_section_index.?; - } else { + if (mem.eql(u8, sectname, "__const") or + mem.eql(u8, sectname, "__cfstring") or + mem.eql(u8, sectname, "__objc_classlist") or + mem.eql(u8, sectname, "__objc_imageinfo")) + { + break :blk self.getSectionByName("__DATA_CONST", sectname) orelse + try self.initSection( + "__DATA_CONST", + sectname, + sect.size, + sect.@"align", + .{}, + ); + } else if (mem.eql(u8, sectname, "__data")) { if (self.data_section_index == null) { self.data_section_index = try self.initSection( - self.data_segment_cmd_index.?, - "__data", + segname, + sectname, sect.size, sect.@"align", .{}, @@ -2115,14 +1800,13 @@ pub fn getOutputSection(self: *MachO, sect: macho.section_64) !?u8 { break :blk self.data_section_index.?; } } - - if (mem.eql(u8, "__LLVM", segname) and mem.eql(u8, "__asm", sectname)) { - log.debug("TODO LLVM asm section: type 0x{x}, name '{s},{s}'", .{ - sect.flags, segname, sectname, - }); - } - - break :blk null; + break :blk self.getSectionByName(segname, sectname) orelse try self.initSection( + segname, + sectname, + sect.size, + sect.@"align", + .{}, + ); }, else => break :blk null, } @@ -2774,11 +2458,16 @@ fn createTentativeDefAtoms(self: *MachO) !void { // text blocks for each tentative definition. const size = sym.n_value; const alignment = (sym.n_desc >> 8) & 0x0f; + const n_sect = (try self.getOutputSection(.{ + .segname = makeStaticString("__DATA"), + .sectname = makeStaticString("__bss"), + .flags = macho.S_ZEROFILL, + })).?; sym.* = .{ .n_strx = sym.n_strx, .n_type = macho.N_SECT | macho.N_EXT, - .n_sect = 0, + .n_sect = n_sect, .n_desc = 0, .n_value = 0, }; @@ -2786,7 +2475,7 @@ fn createTentativeDefAtoms(self: *MachO) !void { const atom = try MachO.createEmptyAtom(gpa, global.sym_index, size, alignment); atom.file = global.file; - try self.allocateAtomCommon(atom, self.bss_section_index.?); + try self.allocateAtomCommon(atom, n_sect); if (global.file) |file| { const object = &self.objects.items[file]; @@ -4174,7 +3863,8 @@ pub fn deleteExport(self: *MachO, exp: Export) void { fn freeUnnamedConsts(self: *MachO, decl_index: Module.Decl.Index) void { const unnamed_consts = self.unnamed_const_atoms.getPtr(decl_index) orelse return; for (unnamed_consts.items) |atom| { - self.freeAtom(atom, self.text_const_section_index.?, true); + const sect_id = atom.getSymbol(self).n_sect; + self.freeAtom(atom, sect_id, true); self.locals_free_list.append(self.base.allocator, atom.sym_index) catch {}; self.locals.items[atom.sym_index].n_type = 0; _ = self.atom_by_index_table.remove(atom.sym_index); @@ -4307,7 +3997,7 @@ fn populateMissingMetadata(self: *MachO) !void { }; const needed_size = if (self.mode == .incremental) self.base.options.program_code_size_hint else 0; self.text_section_index = try self.initSection( - self.text_segment_cmd_index.?, + "__TEXT", "__text", needed_size, alignment, @@ -4330,7 +4020,7 @@ fn populateMissingMetadata(self: *MachO) !void { }; const needed_size = if (self.mode == .incremental) stub_size * self.base.options.symbol_count_hint else 0; self.stubs_section_index = try self.initSection( - self.text_segment_cmd_index.?, + "__TEXT", "__stubs", needed_size, alignment, @@ -4362,7 +4052,7 @@ fn populateMissingMetadata(self: *MachO) !void { else 0; self.stub_helper_section_index = try self.initSection( - self.text_segment_cmd_index.?, + "__TEXT", "__stub_helper", needed_size, alignment, @@ -4407,7 +4097,7 @@ fn populateMissingMetadata(self: *MachO) !void { 0; const alignment: u16 = 3; // 2^3 = @sizeOf(u64) self.got_section_index = try self.initSection( - self.data_const_segment_cmd_index.?, + "__DATA_CONST", "__got", needed_size, alignment, @@ -4452,7 +4142,7 @@ fn populateMissingMetadata(self: *MachO) !void { 0; const alignment: u16 = 3; // 2^3 = @sizeOf(u64) self.la_symbol_ptr_section_index = try self.initSection( - self.data_segment_cmd_index.?, + "__DATA", "__la_symbol_ptr", needed_size, alignment, @@ -4469,7 +4159,7 @@ fn populateMissingMetadata(self: *MachO) !void { 0; const alignment: u16 = 3; // 2^3 = @sizeOf(u64) self.data_section_index = try self.initSection( - self.data_segment_cmd_index.?, + "__DATA", "__data", needed_size, alignment, @@ -4701,12 +4391,13 @@ const InitSectionOpts = struct { fn initSection( self: *MachO, - segment_id: u8, + segname: []const u8, sectname: []const u8, size: u64, alignment: u32, opts: InitSectionOpts, ) !u8 { + const segment_id = self.getSegmentByName(segname).?; const seg = &self.segments.items[segment_id]; const index = try self.insertSection(segment_id, .{ .sectname = makeStaticString(sectname), @@ -4779,42 +4470,13 @@ fn insertSection(self: *MachO, segment_index: u8, header: macho.section_64) !u8 header.sectName(), insertion_index, }); - // TODO slim it down for (&[_]*?u8{ - // __TEXT &self.text_section_index, &self.stubs_section_index, &self.stub_helper_section_index, - &self.gcc_except_tab_section_index, - &self.cstring_section_index, - &self.ustring_section_index, - &self.text_const_section_index, - &self.objc_methlist_section_index, - &self.objc_methname_section_index, - &self.objc_methtype_section_index, - &self.objc_classname_section_index, - &self.eh_frame_section_index, - // __DATA_CONST &self.got_section_index, - &self.mod_init_func_section_index, - &self.mod_term_func_section_index, - &self.data_const_section_index, - &self.objc_cfstring_section_index, - &self.objc_classlist_section_index, - &self.objc_imageinfo_section_index, - // __DATA - &self.rustc_section_index, &self.la_symbol_ptr_section_index, - &self.objc_const_section_index, - &self.objc_selrefs_section_index, - &self.objc_classrefs_section_index, - &self.objc_data_section_index, &self.data_section_index, - &self.tlv_section_index, - &self.tlv_ptrs_section_index, - &self.tlv_data_section_index, - &self.tlv_bss_section_index, - &self.bss_section_index, }) |maybe_index| { const index = maybe_index.* orelse continue; if (insertion_index <= index) maybe_index.* = index + 1; @@ -6017,7 +5679,7 @@ fn writeHeader(self: *MachO, ncmds: u32, sizeofcmds: u32) !void { else => unreachable, } - if (self.tlv_section_index) |_| { + if (self.getSectionByName("__DATA", "__thread_vars")) |_| { header.flags |= macho.MH_HAS_TLV_DESCRIPTORS; } @@ -6042,6 +5704,20 @@ pub fn makeStaticString(bytes: []const u8) [16]u8 { return buf; } +fn getSegmentByName(self: MachO, segname: []const u8) ?u8 { + for (self.segments.items) |seg, i| { + if (mem.eql(u8, segname, seg.segName())) return @intCast(u8, i); + } else return null; +} + +pub fn getSectionByName(self: MachO, segname: []const u8, sectname: []const u8) ?u8 { + // TODO investigate caching with a hashmap + for (self.sections.items(.header)) |header, i| { + if (mem.eql(u8, header.segName(), segname) and mem.eql(u8, header.sectName(), sectname)) + return @intCast(u8, i); + } else return null; +} + fn getSectionIndexes(self: MachO, segment_index: u8) struct { start: u8, end: u8 } { var start: u8 = 0; const nsects = for (self.segments.items) |seg, i| { diff --git a/src/link/MachO/Atom.zig b/src/link/MachO/Atom.zig index 85b3ca1c2b..4871276f3c 100644 --- a/src/link/MachO/Atom.zig +++ b/src/link/MachO/Atom.zig @@ -529,6 +529,7 @@ fn addStub(target: MachO.SymbolWithLoc, context: RelocContext) !void { if (context.macho_file.stubs_table.contains(target)) return; const stub_index = try context.macho_file.allocateStubEntry(target); + const stub_helper_atom = try context.macho_file.createStubHelperAtom(); const laptr_atom = try context.macho_file.createLazyPointerAtom(stub_helper_atom.sym_index, target); const stub_atom = try context.macho_file.createStubAtom(laptr_atom.sym_index); @@ -601,9 +602,9 @@ pub fn resolveRelocs(self: *Atom, macho_file: *MachO) !void { // * wrt to __thread_data if defined, then // * wrt to __thread_bss const sect_id: u16 = sect_id: { - if (macho_file.tlv_data_section_index) |i| { + if (macho_file.getSectionByName("__DATA", "__thread_data")) |i| { break :sect_id i; - } else if (macho_file.tlv_bss_section_index) |i| { + } else if (macho_file.getSectionByName("__DATA", "__thread_bss")) |i| { break :sect_id i; } else { log.err("threadlocal variables present but no initializer sections found", .{}); From 1e710396d4489ce11e70820a95c33ec7463b1c12 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Tue, 2 Aug 2022 00:03:31 +0200 Subject: [PATCH 4/8] macho: fix linking in incremental context Fix incorrect writing of symtab and strtab in dSYM bundle in incremental context. Fix incorrectly navigating unnamed consts (freeing) in incremental context. This is currently hard-coded to require all consts to land in `__TEXT,__const`, which is wrong and needs a rewrite. --- src/link/MachO.zig | 57 +++++++++++++++--------------- src/link/MachO/DebugSymbols.zig | 62 +++++++++++++++++++++------------ 2 files changed, 69 insertions(+), 50 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 205fbcd6bf..9d3ca34b9e 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -511,15 +511,14 @@ pub fn flushModule(self: *MachO, comp: *Compilation, prog_node: *std.Progress.No try self.createMhExecuteHeaderSymbol(); try self.resolveDyldStubBinder(); + try self.createDyldPrivateAtom(); + try self.createStubHelperPreambleAtom(); try self.resolveSymbolsInDylibs(); if (self.unresolved.count() > 0) { return error.UndefinedSymbolReference; } - try self.createDyldPrivateAtom(); - try self.createStubHelperPreambleAtom(); - try self.allocateSpecialSymbols(); if (build_options.enable_logging) { @@ -589,7 +588,7 @@ pub fn flushModule(self: *MachO, comp: *Compilation, prog_node: *std.Progress.No } else null; var headers_buf = std.ArrayList(u8).init(arena); - try self.writeSegmentHeaders(0, self.segments.items.len, &ncmds, headers_buf.writer()); + try self.writeSegmentHeaders(&ncmds, headers_buf.writer()); try self.base.file.?.pwriteAll(headers_buf.items, @sizeOf(macho.mach_header_64)); try self.base.file.?.pwriteAll(lc_buffer.items, @sizeOf(macho.mach_header_64) + headers_buf.items.len); @@ -1203,7 +1202,7 @@ fn linkOneShot(self: *MachO, comp: *Compilation, prog_node: *std.Progress.Node) } else null; var headers_buf = std.ArrayList(u8).init(arena); - try self.writeSegmentHeaders(0, self.segments.items.len, &ncmds, headers_buf.writer()); + try self.writeSegmentHeaders(&ncmds, headers_buf.writer()); try self.base.file.?.pwriteAll(headers_buf.items, @sizeOf(macho.mach_header_64)); try self.base.file.?.pwriteAll(lc_buffer.items, @sizeOf(macho.mach_header_64) + headers_buf.items.len); @@ -3863,7 +3862,9 @@ pub fn deleteExport(self: *MachO, exp: Export) void { fn freeUnnamedConsts(self: *MachO, decl_index: Module.Decl.Index) void { const unnamed_consts = self.unnamed_const_atoms.getPtr(decl_index) orelse return; for (unnamed_consts.items) |atom| { - const sect_id = atom.getSymbol(self).n_sect; + // TODO + // const sect_id = atom.getSymbol(self).n_sect; + const sect_id = self.getSectionByName("__TEXT", "__const").?; self.freeAtom(atom, sect_id, true); self.locals_free_list.append(self.base.allocator, atom.sym_index) catch {}; self.locals.items[atom.sym_index].n_type = 0; @@ -4402,8 +4403,6 @@ fn initSection( const index = try self.insertSection(segment_id, .{ .sectname = makeStaticString(sectname), .segname = seg.segname, - .size = if (self.mode == .incremental) @intCast(u32, size) else 0, - .@"align" = alignment, .flags = opts.flags, .reserved1 = opts.reserved1, .reserved2 = opts.reserved2, @@ -4413,6 +4412,9 @@ fn initSection( if (self.mode == .incremental) { const header = &self.sections.items(.header)[index]; + header.size = size; + header.@"align" = alignment; + const prev_end_off = if (index > 0) blk: { const prev_section = self.sections.get(index - 1); if (prev_section.segment_index == segment_id) { @@ -4421,15 +4423,25 @@ fn initSection( } else break :blk seg.fileoff; } else 0; const alignment_pow_2 = try math.powi(u32, 2, alignment); - const padding: u64 = if (index == 0) try self.calcMinHeaderPad() else 0; + // TODO better prealloc for __text section + // const padding: u64 = if (index == 0) try self.calcMinHeaderPad() else 0; + const padding: u64 = if (index == 0) 0x1000 else 0; const off = mem.alignForwardGeneric(u64, padding + prev_end_off, alignment_pow_2); - log.debug("allocating {s},{s} section at 0x{x}", .{ header.segName(), header.sectName(), off }); - - header.addr = seg.vmaddr + off - seg.fileoff; if (!header.isZerofill()) { header.offset = @intCast(u32, off); } + header.addr = seg.vmaddr + off - seg.fileoff; + + // TODO this will break if we are inserting section that is not the last section + // in a segment. + const max_size = self.allocatedSize(segment_id, off); + + if (size > max_size) { + try self.growSection(index, @intCast(u32, size)); + } + + log.debug("allocating {s},{s} section at 0x{x}", .{ header.segName(), header.sectName(), off }); self.updateSectionOrdinals(index + 1); } @@ -4494,7 +4506,7 @@ fn updateSectionOrdinals(self: *MachO, start: u8) void { const slice = self.sections.slice(); for (slice.items(.last_atom)[start..]) |last_atom| { - var atom = last_atom.?; + var atom = last_atom orelse continue; while (true) { const sym = atom.getSymbolPtr(self); @@ -4536,17 +4548,6 @@ fn shiftLocalsByOffset(self: *MachO, sect_id: u8, offset: i64) !void { } } -fn findFreeSpace(self: MachO, segment_id: u8, alignment: u64, start: ?u64) u64 { - const seg = self.segments.items[segment_id]; - const indexes = self.getSectionIndexes(segment_id); - if (indexes.end - indexes.start == 0) { - return if (start) |v| v else seg.fileoff; - } - const last_sect = self.sections.items(.header)[indexes.end - 1]; - const final_off = last_sect.offset + padToIdeal(last_sect.size); - return mem.alignForwardGeneric(u64, final_off, alignment); -} - fn growSegment(self: *MachO, segment_index: u8, new_size: u64) !void { const segment = &self.segments.items[segment_index]; const new_segment_size = mem.alignForwardGeneric(u64, new_size, self.page_size); @@ -4885,14 +4886,14 @@ fn getSegmentAllocBase(self: MachO, indices: []const ?u8) struct { vmaddr: u64, return .{ .vmaddr = 0, .fileoff = 0 }; } -pub fn writeSegmentHeaders(self: *MachO, start: usize, end: usize, ncmds: *u32, writer: anytype) !void { - for (self.segments.items[start..end]) |seg, i| { +fn writeSegmentHeaders(self: *MachO, ncmds: *u32, writer: anytype) !void { + for (self.segments.items) |seg, i| { if (seg.nsects == 0 and (mem.eql(u8, seg.segName(), "__DATA_CONST") or mem.eql(u8, seg.segName(), "__DATA"))) continue; try writer.writeStruct(seg); - const indexes = self.getSectionIndexes(@intCast(u8, start + i)); + const indexes = self.getSectionIndexes(@intCast(u8, i)); for (self.sections.items(.header)[indexes.start..indexes.end]) |header| { try writer.writeStruct(header); } @@ -5718,7 +5719,7 @@ pub fn getSectionByName(self: MachO, segname: []const u8, sectname: []const u8) } else return null; } -fn getSectionIndexes(self: MachO, segment_index: u8) struct { start: u8, end: u8 } { +pub fn getSectionIndexes(self: MachO, segment_index: u8) struct { start: u8, end: u8 } { var start: u8 = 0; const nsects = for (self.segments.items) |seg, i| { if (i == segment_index) break @intCast(u8, seg.nsects); diff --git a/src/link/MachO/DebugSymbols.zig b/src/link/MachO/DebugSymbols.zig index f191d43f98..65d3319293 100644 --- a/src/link/MachO/DebugSymbols.zig +++ b/src/link/MachO/DebugSymbols.zig @@ -63,6 +63,10 @@ pub const Reloc = struct { pub fn populateMissingMetadata(self: *DebugSymbols, allocator: Allocator) !void { if (self.linkedit_segment_cmd_index == null) { self.linkedit_segment_cmd_index = @intCast(u8, self.segments.items.len); + log.debug("found __LINKEDIT segment free space 0x{x} to 0x{x}", .{ + self.base.page_size, + self.base.page_size * 2, + }); // TODO this needs reworking try self.segments.append(allocator, .{ .segname = makeStaticString("__LINKEDIT"), @@ -79,7 +83,7 @@ pub fn populateMissingMetadata(self: *DebugSymbols, allocator: Allocator) !void if (self.dwarf_segment_cmd_index == null) { self.dwarf_segment_cmd_index = @intCast(u8, self.segments.items.len); - const linkedit = self.segments.items[self.base.linkedit_segment_cmd_index.?]; + const linkedit = self.segments.items[self.linkedit_segment_cmd_index.?]; const ideal_size: u16 = 200 + 128 + 160 + 250; const needed_size = mem.alignForwardGeneric(u64, padToIdeal(ideal_size), self.base.page_size); const fileoff = linkedit.fileoff + linkedit.filesize; @@ -290,20 +294,7 @@ pub fn flushModule(self: *DebugSymbols, allocator: Allocator, options: link.Opti var headers_buf = std.ArrayList(u8).init(allocator); defer headers_buf.deinit(); - try self.base.writeSegmentHeaders( - 0, - self.base.linkedit_segment_cmd_index.?, - &ncmds, - headers_buf.writer(), - ); - - for (self.segments.items) |seg| { - try headers_buf.writer().writeStruct(seg); - ncmds += 2; - } - for (self.sections.items) |header| { - try headers_buf.writer().writeStruct(header); - } + try self.writeSegmentHeaders(&ncmds, headers_buf.writer()); try self.file.pwriteAll(headers_buf.items, @sizeOf(macho.mach_header_64)); try self.file.pwriteAll(lc_buffer.items, @sizeOf(macho.mach_header_64) + headers_buf.items.len); @@ -349,6 +340,7 @@ fn updateDwarfSegment(self: *DebugSymbols) void { var max_offset: u64 = 0; for (self.sections.items) |*sect| { + sect.addr += diff; log.debug(" {s},{s} - 0x{x}-0x{x} - 0x{x}-0x{x}", .{ sect.segName(), sect.sectName(), @@ -360,7 +352,6 @@ fn updateDwarfSegment(self: *DebugSymbols) void { if (sect.offset + sect.size > max_offset) { max_offset = sect.offset + sect.size; } - sect.addr += diff; } const file_size = max_offset - dwarf_segment.fileoff; @@ -372,6 +363,37 @@ fn updateDwarfSegment(self: *DebugSymbols) void { } } +fn writeSegmentHeaders(self: *DebugSymbols, ncmds: *u32, writer: anytype) !void { + // Write segment/section headers from the binary file first. + const end = self.base.linkedit_segment_cmd_index.?; + for (self.base.segments.items[0..end]) |seg, i| { + if (seg.nsects == 0 and + (mem.eql(u8, seg.segName(), "__DATA_CONST") or + mem.eql(u8, seg.segName(), "__DATA"))) continue; + var out_seg = seg; + out_seg.fileoff = 0; + out_seg.filesize = 0; + try writer.writeStruct(out_seg); + + const indexes = self.base.getSectionIndexes(@intCast(u8, i)); + for (self.base.sections.items(.header)[indexes.start..indexes.end]) |header| { + var out_header = header; + out_header.offset = 0; + try writer.writeStruct(out_header); + } + + ncmds.* += 1; + } + // Next, commit DSYM's __LINKEDIT and __DWARF segments headers. + for (self.segments.items) |seg| { + try writer.writeStruct(seg); + ncmds.* += 1; + } + for (self.sections.items) |header| { + try writer.writeStruct(header); + } +} + fn writeHeader(self: *DebugSymbols, ncmds: u32, sizeofcmds: u32) !void { var header: macho.mach_header_64 = .{}; header.filetype = macho.MH_DSYM; @@ -469,11 +491,7 @@ fn writeSymtab(self: *DebugSymbols, lc: *macho.symtab_command) !void { const nsyms = nlocals + nexports; const seg = &self.segments.items[self.linkedit_segment_cmd_index.?]; - const offset = mem.alignForwardGeneric( - u64, - seg.fileoff + seg.filesize, - @alignOf(macho.nlist_64), - ); + const offset = mem.alignForwardGeneric(u64, seg.fileoff, @alignOf(macho.nlist_64)); const needed_size = nsyms * @sizeOf(macho.nlist_64); if (needed_size > seg.filesize) { @@ -535,7 +553,7 @@ fn writeStrtab(self: *DebugSymbols, lc: *macho.symtab_command) !void { const needed_size = mem.alignForwardGeneric(u64, self.strtab.buffer.items.len, @alignOf(u64)); lc.strsize = @intCast(u32, needed_size); - if (offset + needed_size > seg.filesize) { + if (symtab_size + needed_size > seg.filesize) { const aligned_size = mem.alignForwardGeneric(u64, offset + needed_size, self.base.page_size); const diff = @intCast(u32, aligned_size - seg.filesize); const dwarf_seg = &self.segments.items[self.dwarf_segment_cmd_index.?]; From 421d3e8d2822e979c1a2d5e7aaa5859499bc2146 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Tue, 2 Aug 2022 20:38:00 +0200 Subject: [PATCH 5/8] macho: add missing align cast in LoadCommandIterator --- lib/std/macho.zig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/std/macho.zig b/lib/std/macho.zig index aa43229a76..1955a00334 100644 --- a/lib/std/macho.zig +++ b/lib/std/macho.zig @@ -1901,7 +1901,7 @@ pub const LoadCommandIterator = struct { .data = it.buffer[0..hdr.cmdsize], }; - it.buffer = it.buffer[hdr.cmdsize..]; + it.buffer = @alignCast(@alignOf(u64), it.buffer[hdr.cmdsize..]); it.index += 1; return cmd; From 90e326827062fc7899d02516cdaffa7da8366077 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Tue, 2 Aug 2022 22:15:07 +0200 Subject: [PATCH 6/8] macho: do not preempt segment headers; do it when commiting to file This way, tracking segment-to-section mapping becomes a lot easier since it's effectively just start index plus number of sections defined within the segment. If a section becomes empty however care needs to be taken to remove the header upon committing to the final binary. --- src/link/MachO.zig | 23 ++++++++++++++++++----- src/link/MachO/DebugSymbols.zig | 22 +++++++++++++++++----- src/link/MachO/dead_strip.zig | 3 --- 3 files changed, 35 insertions(+), 13 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 9d3ca34b9e..352e3bedf8 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -4888,13 +4888,26 @@ fn getSegmentAllocBase(self: MachO, indices: []const ?u8) struct { vmaddr: u64, fn writeSegmentHeaders(self: *MachO, ncmds: *u32, writer: anytype) !void { for (self.segments.items) |seg, i| { - if (seg.nsects == 0 and - (mem.eql(u8, seg.segName(), "__DATA_CONST") or - mem.eql(u8, seg.segName(), "__DATA"))) continue; - try writer.writeStruct(seg); - const indexes = self.getSectionIndexes(@intCast(u8, i)); + var out_seg = seg; + out_seg.cmdsize = @sizeOf(macho.segment_command_64); + out_seg.nsects = 0; + + // Update section headers count; any section with size of 0 is excluded + // since it doesn't have any data in the final binary file. for (self.sections.items(.header)[indexes.start..indexes.end]) |header| { + if (header.size == 0) continue; + out_seg.cmdsize += @sizeOf(macho.section_64); + out_seg.nsects += 1; + } + + if (out_seg.nsects == 0 and + (mem.eql(u8, out_seg.segName(), "__DATA_CONST") or + mem.eql(u8, out_seg.segName(), "__DATA"))) continue; + + try writer.writeStruct(out_seg); + for (self.sections.items(.header)[indexes.start..indexes.end]) |header| { + if (header.size == 0) continue; try writer.writeStruct(header); } diff --git a/src/link/MachO/DebugSymbols.zig b/src/link/MachO/DebugSymbols.zig index 65d3319293..3bfe334302 100644 --- a/src/link/MachO/DebugSymbols.zig +++ b/src/link/MachO/DebugSymbols.zig @@ -367,16 +367,28 @@ fn writeSegmentHeaders(self: *DebugSymbols, ncmds: *u32, writer: anytype) !void // Write segment/section headers from the binary file first. const end = self.base.linkedit_segment_cmd_index.?; for (self.base.segments.items[0..end]) |seg, i| { - if (seg.nsects == 0 and - (mem.eql(u8, seg.segName(), "__DATA_CONST") or - mem.eql(u8, seg.segName(), "__DATA"))) continue; + const indexes = self.base.getSectionIndexes(@intCast(u8, i)); var out_seg = seg; out_seg.fileoff = 0; out_seg.filesize = 0; - try writer.writeStruct(out_seg); + out_seg.cmdsize = @sizeOf(macho.segment_command_64); + out_seg.nsects = 0; - const indexes = self.base.getSectionIndexes(@intCast(u8, i)); + // Update section headers count; any section with size of 0 is excluded + // since it doesn't have any data in the final binary file. for (self.base.sections.items(.header)[indexes.start..indexes.end]) |header| { + if (header.size == 0) continue; + out_seg.cmdsize += @sizeOf(macho.section_64); + out_seg.nsects += 1; + } + + if (out_seg.nsects == 0 and + (mem.eql(u8, out_seg.segName(), "__DATA_CONST") or + mem.eql(u8, out_seg.segName(), "__DATA"))) continue; + + try writer.writeStruct(out_seg); + for (self.base.sections.items(.header)[indexes.start..indexes.end]) |header| { + if (header.size == 0) continue; var out_header = header; out_header.offset = 0; try writer.writeStruct(out_header); diff --git a/src/link/MachO/dead_strip.zig b/src/link/MachO/dead_strip.zig index bf65b96049..eb2be6e5fe 100644 --- a/src/link/MachO/dead_strip.zig +++ b/src/link/MachO/dead_strip.zig @@ -43,9 +43,6 @@ fn removeAtomFromSection(atom: *Atom, match: u8, macho_file: *MachO) void { // The section will be GCed in the next step. section.last_atom = null; section.header.size = 0; - const segment = &macho_file.segments.items[section.segment_index]; - segment.cmdsize -= @sizeOf(macho.section_64); - segment.nsects -= 1; } } From 2c8fc3b5979d6ca8befbf5de48a661a6fadbe819 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Tue, 2 Aug 2022 22:58:10 +0200 Subject: [PATCH 7/8] macho: add missing u64 to usize casts Fixes 32bit builds. --- src/link/MachO.zig | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 352e3bedf8..a955200a77 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -1487,7 +1487,8 @@ pub fn parseDylib( var file_size = math.cast(usize, file_stat.size) orelse return error.Overflow; const reader = file.reader(); - const fat_offset = try fat.getLibraryOffset(reader, cpu_arch); + const fat_offset = math.cast(usize, try fat.getLibraryOffset(reader, cpu_arch)) orelse + return error.Overflow; try file.seekTo(fat_offset); file_size -= fat_offset; @@ -5091,7 +5092,7 @@ fn writeDyldInfoData(self: *MachO, ncmds: *u32, lc_writer: anytype) !void { const needed_size = export_off + export_size - rebase_off; link_seg.filesize = needed_size; - var buffer = try gpa.alloc(u8, needed_size); + var buffer = try gpa.alloc(u8, math.cast(usize, needed_size) orelse return error.Overflow); defer gpa.free(buffer); mem.set(u8, buffer, 0); @@ -5115,7 +5116,9 @@ fn writeDyldInfoData(self: *MachO, ncmds: *u32, lc_writer: anytype) !void { }); try self.base.file.?.pwriteAll(buffer, rebase_off); - try self.populateLazyBindOffsetsInStubHelper(buffer[lazy_bind_off - rebase_off ..][0..lazy_bind_size]); + const start = math.cast(usize, lazy_bind_off - rebase_off) orelse return error.Overflow; + const end = start + (math.cast(usize, lazy_bind_size) orelse return error.Overflow); + try self.populateLazyBindOffsetsInStubHelper(buffer[start..end]); try lc_writer.writeStruct(macho.dyld_info_command{ .cmd = .DYLD_INFO_ONLY, From 007eb3bd714caee550722cae0e4e98a205ead341 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Wed, 3 Aug 2022 21:26:52 +0200 Subject: [PATCH 8/8] macho: fix some TODOs --- src/link/MachO.zig | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index a955200a77..db207af5f5 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -64,7 +64,10 @@ const SystemLib = struct { const Section = struct { header: macho.section_64, segment_index: u8, - last_atom: ?*Atom = null, // TODO temporary hack; we really should shrink section to 0 + + // TODO is null here necessary, or can we do away with tracking via section + // size in incremental context? + last_atom: ?*Atom = null, /// A list of atoms that have surplus capacity. This list can have false /// positives, as functions grow and shrink over time, only sometimes being added @@ -4434,8 +4437,8 @@ fn initSection( } header.addr = seg.vmaddr + off - seg.fileoff; - // TODO this will break if we are inserting section that is not the last section - // in a segment. + // TODO Will this break if we are inserting section that is not the last section + // in a segment? const max_size = self.allocatedSize(segment_id, off); if (size > max_size) {