From e08f7ba8896ad64a696424cff9b9c0963a7a4a0b Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Wed, 30 Jun 2021 16:59:26 +0200 Subject: [PATCH 01/81] zld: remove redundant codepaths --- src/link/MachO.zig | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index df2e0134e4..20c3b32512 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -964,18 +964,6 @@ fn linkWithZld(self: *MachO, comp: *Compilation) !void { } } -fn darwinArchString(arch: std.Target.Cpu.Arch) []const u8 { - return switch (arch) { - .aarch64, .aarch64_be, .aarch64_32 => "arm64", - .thumb, .arm => "arm", - .thumbeb, .armeb => "armeb", - .powerpc => "ppc", - .powerpc64 => "ppc64", - .powerpc64le => "ppc64le", - else => @tagName(arch), - }; -} - pub fn deinit(self: *MachO) void { if (self.d_sym) |*ds| { ds.deinit(self.base.allocator); From 9c3ebe0216306b5e346ec52959de41d1b4d504d9 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Thu, 1 Jul 2021 10:34:46 +0200 Subject: [PATCH 02/81] zld: clean up logic for creating mach header --- src/link/MachO.zig | 128 ++++++++++++-------------------- src/link/MachO/DebugSymbols.zig | 90 +++++++++------------- src/link/MachO/Zld.zig | 40 ++++------ src/link/MachO/commands.zig | 22 ++++++ 4 files changed, 119 insertions(+), 161 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 20c3b32512..f6ff03e92b 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -41,8 +41,6 @@ d_sym: ?DebugSymbols = null, /// For x86_64 that's 4KB, whereas for aarch64, that's 16KB. page_size: u16, -/// Mach-O header -header: ?macho.mach_header_64 = null, /// We commit 0x1000 = 4096 bytes of space to the header and /// the table of load commands. This should be plenty for any /// potential future extensions. @@ -128,7 +126,6 @@ offset_table: std.ArrayListUnmanaged(GOTEntry) = .{}, error_flags: File.ErrorFlags = File.ErrorFlags{}, offset_table_count_dirty: bool = false, -header_dirty: bool = false, load_commands_dirty: bool = false, rebase_info_dirty: bool = false, binding_info_dirty: bool = false, @@ -497,7 +494,6 @@ pub fn flushModule(self: *MachO, comp: *Compilation) !void { } assert(!self.offset_table_count_dirty); - assert(!self.header_dirty); assert(!self.load_commands_dirty); assert(!self.rebase_info_dirty); assert(!self.binding_info_dirty); @@ -1488,54 +1484,6 @@ pub fn populateMissingMetadata(self: *MachO) !void { .Lib => return error.TODOImplementWritingLibFiles, } - if (self.header == null) { - var header: macho.mach_header_64 = undefined; - header.magic = macho.MH_MAGIC_64; - - const CpuInfo = struct { - cpu_type: macho.cpu_type_t, - cpu_subtype: macho.cpu_subtype_t, - }; - - const cpu_info: CpuInfo = switch (self.base.options.target.cpu.arch) { - .aarch64 => .{ - .cpu_type = macho.CPU_TYPE_ARM64, - .cpu_subtype = macho.CPU_SUBTYPE_ARM_ALL, - }, - .x86_64 => .{ - .cpu_type = macho.CPU_TYPE_X86_64, - .cpu_subtype = macho.CPU_SUBTYPE_X86_64_ALL, - }, - else => return error.UnsupportedMachOArchitecture, - }; - header.cputype = cpu_info.cpu_type; - header.cpusubtype = cpu_info.cpu_subtype; - - const filetype: u32 = switch (self.base.options.output_mode) { - .Exe => macho.MH_EXECUTE, - .Obj => macho.MH_OBJECT, - .Lib => switch (self.base.options.link_mode) { - .Static => return error.TODOStaticLibMachOType, - .Dynamic => macho.MH_DYLIB, - }, - }; - header.filetype = filetype; - // These will get populated at the end of flushing the results to file. - header.ncmds = 0; - header.sizeofcmds = 0; - - switch (self.base.options.output_mode) { - .Exe => { - header.flags = macho.MH_NOUNDEFS | macho.MH_DYLDLINK | macho.MH_PIE; - }, - else => { - header.flags = 0; - }, - } - header.reserved = 0; - self.header = header; - self.header_dirty = true; - } if (self.pagezero_segment_cmd_index == null) { self.pagezero_segment_cmd_index = @intCast(u16, self.load_commands.items.len); try self.load_commands.append(self.base.allocator, .{ @@ -1543,7 +1491,6 @@ pub fn populateMissingMetadata(self: *MachO) !void { .vmsize = 0x100000000, // size always set to 4GB }), }); - self.header_dirty = true; self.load_commands_dirty = true; } if (self.text_segment_cmd_index == null) { @@ -1567,7 +1514,6 @@ pub fn populateMissingMetadata(self: *MachO) !void { .initprot = initprot, }), }); - self.header_dirty = true; self.load_commands_dirty = true; } if (self.text_section_index == null) { @@ -1592,7 +1538,6 @@ pub fn populateMissingMetadata(self: *MachO) !void { .@"align" = alignment, .flags = flags, }); - self.header_dirty = true; self.load_commands_dirty = true; } if (self.stubs_section_index == null) { @@ -1624,7 +1569,6 @@ pub fn populateMissingMetadata(self: *MachO) !void { .flags = flags, .reserved2 = stub_size, }); - self.header_dirty = true; self.load_commands_dirty = true; } if (self.stub_helper_section_index == null) { @@ -1650,7 +1594,6 @@ pub fn populateMissingMetadata(self: *MachO) !void { .@"align" = alignment, .flags = flags, }); - self.header_dirty = true; self.load_commands_dirty = true; } if (self.data_const_segment_cmd_index == null) { @@ -1674,7 +1617,6 @@ pub fn populateMissingMetadata(self: *MachO) !void { .initprot = initprot, }), }); - self.header_dirty = true; self.load_commands_dirty = true; } if (self.got_section_index == null) { @@ -1695,7 +1637,6 @@ pub fn populateMissingMetadata(self: *MachO) !void { .@"align" = 3, // 2^3 = @sizeOf(u64) .flags = flags, }); - self.header_dirty = true; self.load_commands_dirty = true; } if (self.data_segment_cmd_index == null) { @@ -1719,7 +1660,6 @@ pub fn populateMissingMetadata(self: *MachO) !void { .initprot = initprot, }), }); - self.header_dirty = true; self.load_commands_dirty = true; } if (self.la_symbol_ptr_section_index == null) { @@ -1740,7 +1680,6 @@ pub fn populateMissingMetadata(self: *MachO) !void { .@"align" = 3, // 2^3 = @sizeOf(u64) .flags = flags, }); - self.header_dirty = true; self.load_commands_dirty = true; } if (self.data_section_index == null) { @@ -1759,7 +1698,6 @@ pub fn populateMissingMetadata(self: *MachO) !void { .offset = @intCast(u32, off), .@"align" = 3, // 2^3 = @sizeOf(u64) }); - self.header_dirty = true; self.load_commands_dirty = true; } if (self.linkedit_segment_cmd_index == null) { @@ -1779,7 +1717,6 @@ pub fn populateMissingMetadata(self: *MachO) !void { .initprot = initprot, }), }); - self.header_dirty = true; self.load_commands_dirty = true; } if (self.dyld_info_cmd_index == null) { @@ -1826,7 +1763,6 @@ pub fn populateMissingMetadata(self: *MachO) !void { dyld.export_off = @intCast(u32, export_off); dyld.export_size = expected_size; - self.header_dirty = true; self.load_commands_dirty = true; } if (self.symtab_cmd_index == null) { @@ -1858,7 +1794,6 @@ pub fn populateMissingMetadata(self: *MachO) !void { symtab.stroff = @intCast(u32, strtab_off); symtab.strsize = @intCast(u32, strtab_size); - self.header_dirty = true; self.load_commands_dirty = true; self.string_table_dirty = true; } @@ -1895,7 +1830,6 @@ pub fn populateMissingMetadata(self: *MachO) !void { .nlocrel = 0, }, }); - self.header_dirty = true; self.load_commands_dirty = true; } if (self.dylinker_cmd_index == null) { @@ -1914,7 +1848,6 @@ pub fn populateMissingMetadata(self: *MachO) !void { mem.set(u8, dylinker_cmd.data, 0); mem.copy(u8, dylinker_cmd.data, mem.spanZ(DEFAULT_DYLD_PATH)); try self.load_commands.append(self.base.allocator, .{ .Dylinker = dylinker_cmd }); - self.header_dirty = true; self.load_commands_dirty = true; } if (self.libsystem_cmd_index == null) { @@ -1925,7 +1858,6 @@ pub fn populateMissingMetadata(self: *MachO) !void { try self.load_commands.append(self.base.allocator, .{ .Dylib = dylib_cmd }); - self.header_dirty = true; self.load_commands_dirty = true; } if (self.main_cmd_index == null) { @@ -1938,7 +1870,6 @@ pub fn populateMissingMetadata(self: *MachO) !void { .stacksize = 0, }, }); - self.header_dirty = true; self.load_commands_dirty = true; } if (self.version_min_cmd_index == null) { @@ -1960,7 +1891,6 @@ pub fn populateMissingMetadata(self: *MachO) !void { .sdk = version, }, }); - self.header_dirty = true; self.load_commands_dirty = true; } if (self.source_version_cmd_index == null) { @@ -1972,7 +1902,6 @@ pub fn populateMissingMetadata(self: *MachO) !void { .version = 0x0, }, }); - self.header_dirty = true; self.load_commands_dirty = true; } if (self.uuid_cmd_index == null) { @@ -1984,7 +1913,6 @@ pub fn populateMissingMetadata(self: *MachO) !void { }; std.crypto.random.bytes(&uuid_cmd.uuid); try self.load_commands.append(self.base.allocator, .{ .Uuid = uuid_cmd }); - self.header_dirty = true; self.load_commands_dirty = true; } if (self.code_signature_cmd_index == null) { @@ -1997,7 +1925,6 @@ pub fn populateMissingMetadata(self: *MachO) !void { .datasize = 0, }, }); - self.header_dirty = true; self.load_commands_dirty = true; } if (!self.nonlazy_imports.contains("dyld_stub_binder")) { @@ -3224,24 +3151,57 @@ fn writeLoadCommands(self: *MachO) !void { } const off = @sizeOf(macho.mach_header_64); + log.debug("writing {} load commands from 0x{x} to 0x{x}", .{ self.load_commands.items.len, off, off + sizeofcmds }); + try self.base.file.?.pwriteAll(buffer, off); self.load_commands_dirty = false; } /// Writes Mach-O file header. fn writeHeader(self: *MachO) !void { - if (!self.header_dirty) return; + var header = emptyHeader(.{ + .flags = macho.MH_NOUNDEFS | macho.MH_DYLDLINK | macho.MH_PIE | macho.MH_TWOLEVEL, + }); - self.header.?.ncmds = @intCast(u32, self.load_commands.items.len); - var sizeofcmds: u32 = 0; - for (self.load_commands.items) |cmd| { - sizeofcmds += cmd.cmdsize(); + switch (self.base.options.target.cpu.arch) { + .aarch64 => { + header.cputype = macho.CPU_TYPE_ARM64; + header.cpusubtype = macho.CPU_SUBTYPE_ARM_ALL; + }, + .x86_64 => { + header.cputype = macho.CPU_TYPE_X86_64; + header.cpusubtype = macho.CPU_SUBTYPE_X86_64_ALL; + }, + else => return error.UnsupportedCpuArchitecture, } - self.header.?.sizeofcmds = sizeofcmds; - log.debug("writing Mach-O header {}", .{self.header.?}); - try self.base.file.?.pwriteAll(mem.asBytes(&self.header.?), 0); - self.header_dirty = false; + + switch (self.base.options.output_mode) { + .Exe => { + header.filetype = macho.MH_EXECUTE; + }, + .Lib => { + // By this point, it can only be a dylib. + header.filetype = macho.MH_DYLIB; + header.flags |= macho.MH_NO_REEXPORTED_DYLIBS; + }, + else => unreachable, + } + + if (self.hasTlvDescriptors()) { + header.flags |= macho.MH_HAS_TLV_DESCRIPTORS; + } + + header.ncmds = @intCast(u32, self.load_commands.items.len); + header.sizeofcmds = 0; + + for (self.load_commands.items) |cmd| { + header.sizeofcmds += cmd.cmdsize(); + } + + log.debug("writing Mach-O header {}", .{header}); + + try self.base.file.?.pwriteAll(mem.asBytes(&header), 0); } pub fn padToIdeal(actual_size: anytype) @TypeOf(actual_size) { @@ -3249,3 +3209,7 @@ pub fn padToIdeal(actual_size: anytype) @TypeOf(actual_size) { return std.math.add(@TypeOf(actual_size), actual_size, actual_size / ideal_factor) catch std.math.maxInt(@TypeOf(actual_size)); } + +fn hasTlvDescriptors(_: *MachO) bool { + return false; +} diff --git a/src/link/MachO/DebugSymbols.zig b/src/link/MachO/DebugSymbols.zig index 684861ebf5..62c2faad4b 100644 --- a/src/link/MachO/DebugSymbols.zig +++ b/src/link/MachO/DebugSymbols.zig @@ -3,7 +3,7 @@ const DebugSymbols = @This(); const std = @import("std"); const assert = std.debug.assert; const fs = std.fs; -const log = std.log.scoped(.link); +const log = std.log.scoped(.dsym); const macho = std.macho; const mem = std.mem; const DW = std.dwarf; @@ -27,9 +27,6 @@ const page_size: u16 = 0x1000; base: *MachO, file: fs.File, -/// Mach header -header: ?macho.mach_header_64 = null, - /// Table of all load commands load_commands: std.ArrayListUnmanaged(LoadCommand) = .{}, /// __PAGEZERO segment @@ -78,7 +75,6 @@ dbg_info_decl_last: ?*TextBlock = null, /// Table of debug symbol names aka the debug string table. debug_string_table: std.ArrayListUnmanaged(u8) = .{}, -header_dirty: bool = false, load_commands_dirty: bool = false, string_table_dirty: bool = false, debug_string_table_dirty: bool = false, @@ -106,26 +102,10 @@ const min_nop_size = 2; /// You must call this function *after* `MachO.populateMissingMetadata()` /// has been called to get a viable debug symbols output. pub fn populateMissingMetadata(self: *DebugSymbols, allocator: *Allocator) !void { - if (self.header == null) { - const base_header = self.base.header.?; - var header: macho.mach_header_64 = undefined; - header.magic = macho.MH_MAGIC_64; - header.cputype = base_header.cputype; - header.cpusubtype = base_header.cpusubtype; - header.filetype = macho.MH_DSYM; - // These will get populated at the end of flushing the results to file. - header.ncmds = 0; - header.sizeofcmds = 0; - header.flags = 0; - header.reserved = 0; - self.header = header; - self.header_dirty = true; - } if (self.uuid_cmd_index == null) { const base_cmd = self.base.load_commands.items[self.base.uuid_cmd_index.?]; self.uuid_cmd_index = @intCast(u16, self.load_commands.items.len); try self.load_commands.append(allocator, base_cmd); - self.header_dirty = true; self.load_commands_dirty = true; } if (self.symtab_cmd_index == null) { @@ -134,11 +114,11 @@ pub fn populateMissingMetadata(self: *DebugSymbols, allocator: *Allocator) !void const symtab_size = base_cmd.nsyms * @sizeOf(macho.nlist_64); const symtab_off = self.findFreeSpaceLinkedit(symtab_size, @sizeOf(macho.nlist_64)); - log.debug("found dSym symbol table free space 0x{x} to 0x{x}", .{ symtab_off, symtab_off + symtab_size }); + log.debug("found symbol table free space 0x{x} to 0x{x}", .{ symtab_off, symtab_off + symtab_size }); const strtab_off = self.findFreeSpaceLinkedit(base_cmd.strsize, 1); - log.debug("found dSym string table free space 0x{x} to 0x{x}", .{ strtab_off, strtab_off + base_cmd.strsize }); + log.debug("found string table free space 0x{x} to 0x{x}", .{ strtab_off, strtab_off + base_cmd.strsize }); try self.load_commands.append(allocator, .{ .Symtab = .{ @@ -150,7 +130,6 @@ pub fn populateMissingMetadata(self: *DebugSymbols, allocator: *Allocator) !void .strsize = base_cmd.strsize, }, }); - self.header_dirty = true; self.load_commands_dirty = true; self.string_table_dirty = true; } @@ -159,7 +138,6 @@ pub fn populateMissingMetadata(self: *DebugSymbols, allocator: *Allocator) !void const base_cmd = self.base.load_commands.items[self.base.pagezero_segment_cmd_index.?].Segment; const cmd = try self.copySegmentCommand(allocator, base_cmd); try self.load_commands.append(allocator, .{ .Segment = cmd }); - self.header_dirty = true; self.load_commands_dirty = true; } if (self.text_segment_cmd_index == null) { @@ -167,7 +145,6 @@ pub fn populateMissingMetadata(self: *DebugSymbols, allocator: *Allocator) !void const base_cmd = self.base.load_commands.items[self.base.text_segment_cmd_index.?].Segment; const cmd = try self.copySegmentCommand(allocator, base_cmd); try self.load_commands.append(allocator, .{ .Segment = cmd }); - self.header_dirty = true; self.load_commands_dirty = true; } if (self.data_const_segment_cmd_index == null) outer: { @@ -176,7 +153,6 @@ pub fn populateMissingMetadata(self: *DebugSymbols, allocator: *Allocator) !void const base_cmd = self.base.load_commands.items[self.base.data_const_segment_cmd_index.?].Segment; const cmd = try self.copySegmentCommand(allocator, base_cmd); try self.load_commands.append(allocator, .{ .Segment = cmd }); - self.header_dirty = true; self.load_commands_dirty = true; } if (self.data_segment_cmd_index == null) outer: { @@ -185,7 +161,6 @@ pub fn populateMissingMetadata(self: *DebugSymbols, allocator: *Allocator) !void const base_cmd = self.base.load_commands.items[self.base.data_segment_cmd_index.?].Segment; const cmd = try self.copySegmentCommand(allocator, base_cmd); try self.load_commands.append(allocator, .{ .Segment = cmd }); - self.header_dirty = true; self.load_commands_dirty = true; } if (self.linkedit_segment_cmd_index == null) { @@ -196,7 +171,6 @@ pub fn populateMissingMetadata(self: *DebugSymbols, allocator: *Allocator) !void cmd.inner.fileoff = self.linkedit_off; cmd.inner.filesize = self.linkedit_size; try self.load_commands.append(allocator, .{ .Segment = cmd }); - self.header_dirty = true; self.load_commands_dirty = true; } if (self.dwarf_segment_cmd_index == null) { @@ -208,7 +182,7 @@ pub fn populateMissingMetadata(self: *DebugSymbols, allocator: *Allocator) !void const off = linkedit.inner.fileoff + linkedit.inner.filesize; const vmaddr = linkedit.inner.vmaddr + linkedit.inner.vmsize; - log.debug("found dSym __DWARF segment free space 0x{x} to 0x{x}", .{ off, off + needed_size }); + log.debug("found __DWARF segment free space 0x{x} to 0x{x}", .{ off, off + needed_size }); try self.load_commands.append(allocator, .{ .Segment = SegmentCommand.empty("__DWARF", .{ @@ -218,7 +192,6 @@ pub fn populateMissingMetadata(self: *DebugSymbols, allocator: *Allocator) !void .filesize = needed_size, }), }); - self.header_dirty = true; self.load_commands_dirty = true; } if (self.debug_str_section_index == null) { @@ -232,7 +205,6 @@ pub fn populateMissingMetadata(self: *DebugSymbols, allocator: *Allocator) !void .offset = @intCast(u32, dwarf_segment.inner.fileoff), .@"align" = 1, }); - self.header_dirty = true; self.load_commands_dirty = true; self.debug_string_table_dirty = true; } @@ -244,7 +216,7 @@ pub fn populateMissingMetadata(self: *DebugSymbols, allocator: *Allocator) !void const p_align = 1; const off = dwarf_segment.findFreeSpace(file_size_hint, p_align, null); - log.debug("found dSym __debug_info free space 0x{x} to 0x{x}", .{ off, off + file_size_hint }); + log.debug("found __debug_info free space 0x{x} to 0x{x}", .{ off, off + file_size_hint }); try dwarf_segment.addSection(allocator, "__debug_info", .{ .addr = dwarf_segment.inner.vmaddr + off - dwarf_segment.inner.fileoff, @@ -252,7 +224,6 @@ pub fn populateMissingMetadata(self: *DebugSymbols, allocator: *Allocator) !void .offset = @intCast(u32, off), .@"align" = p_align, }); - self.header_dirty = true; self.load_commands_dirty = true; self.debug_info_header_dirty = true; } @@ -264,7 +235,7 @@ pub fn populateMissingMetadata(self: *DebugSymbols, allocator: *Allocator) !void const p_align = 1; const off = dwarf_segment.findFreeSpace(file_size_hint, p_align, null); - log.debug("found dSym __debug_abbrev free space 0x{x} to 0x{x}", .{ off, off + file_size_hint }); + log.debug("found __debug_abbrev free space 0x{x} to 0x{x}", .{ off, off + file_size_hint }); try dwarf_segment.addSection(allocator, "__debug_abbrev", .{ .addr = dwarf_segment.inner.vmaddr + off - dwarf_segment.inner.fileoff, @@ -272,7 +243,6 @@ pub fn populateMissingMetadata(self: *DebugSymbols, allocator: *Allocator) !void .offset = @intCast(u32, off), .@"align" = p_align, }); - self.header_dirty = true; self.load_commands_dirty = true; self.debug_abbrev_section_dirty = true; } @@ -284,7 +254,7 @@ pub fn populateMissingMetadata(self: *DebugSymbols, allocator: *Allocator) !void const p_align = 16; const off = dwarf_segment.findFreeSpace(file_size_hint, p_align, null); - log.debug("found dSym __debug_aranges free space 0x{x} to 0x{x}", .{ off, off + file_size_hint }); + log.debug("found __debug_aranges free space 0x{x} to 0x{x}", .{ off, off + file_size_hint }); try dwarf_segment.addSection(allocator, "__debug_aranges", .{ .addr = dwarf_segment.inner.vmaddr + off - dwarf_segment.inner.fileoff, @@ -292,7 +262,6 @@ pub fn populateMissingMetadata(self: *DebugSymbols, allocator: *Allocator) !void .offset = @intCast(u32, off), .@"align" = p_align, }); - self.header_dirty = true; self.load_commands_dirty = true; self.debug_aranges_section_dirty = true; } @@ -304,7 +273,7 @@ pub fn populateMissingMetadata(self: *DebugSymbols, allocator: *Allocator) !void const p_align = 1; const off = dwarf_segment.findFreeSpace(file_size_hint, p_align, null); - log.debug("found dSym __debug_line free space 0x{x} to 0x{x}", .{ off, off + file_size_hint }); + log.debug("found __debug_line free space 0x{x} to 0x{x}", .{ off, off + file_size_hint }); try dwarf_segment.addSection(allocator, "__debug_line", .{ .addr = dwarf_segment.inner.vmaddr + off - dwarf_segment.inner.fileoff, @@ -312,7 +281,6 @@ pub fn populateMissingMetadata(self: *DebugSymbols, allocator: *Allocator) !void .offset = @intCast(u32, off), .@"align" = p_align, }); - self.header_dirty = true; self.load_commands_dirty = true; self.debug_line_header_dirty = true; } @@ -624,7 +592,6 @@ pub fn flushModule(self: *DebugSymbols, allocator: *Allocator, options: link.Opt try self.writeLoadCommands(allocator); try self.writeHeader(); - assert(!self.header_dirty); assert(!self.load_commands_dirty); assert(!self.string_table_dirty); assert(!self.debug_abbrev_section_dirty); @@ -716,23 +683,38 @@ fn writeLoadCommands(self: *DebugSymbols, allocator: *Allocator) !void { } const off = @sizeOf(macho.mach_header_64); - log.debug("writing {} dSym load commands from 0x{x} to 0x{x}", .{ self.load_commands.items.len, off, off + sizeofcmds }); + log.debug("writing {} load commands from 0x{x} to 0x{x}", .{ self.load_commands.items.len, off, off + sizeofcmds }); try self.file.pwriteAll(buffer, off); self.load_commands_dirty = false; } fn writeHeader(self: *DebugSymbols) !void { - if (!self.header_dirty) return; + var header = emptyHeader(.{ + .filetype = macho.MH_DSYM, + }); - self.header.?.ncmds = @intCast(u32, self.load_commands.items.len); - var sizeofcmds: u32 = 0; - for (self.load_commands.items) |cmd| { - sizeofcmds += cmd.cmdsize(); + switch (self.base.base.options.target.cpu.arch) { + .aarch64 => { + header.cputype = macho.CPU_TYPE_ARM64; + header.cpusubtype = macho.CPU_SUBTYPE_ARM_ALL; + }, + .x86_64 => { + header.cputype = macho.CPU_TYPE_X86_64; + header.cpusubtype = macho.CPU_SUBTYPE_X86_64_ALL; + }, + else => return error.UnsupportedCpuArchitecture, } - self.header.?.sizeofcmds = sizeofcmds; - log.debug("writing Mach-O dSym header {}", .{self.header.?}); - try self.file.pwriteAll(mem.asBytes(&self.header.?), 0); - self.header_dirty = false; + + header.ncmds = @intCast(u32, self.load_commands.items.len); + header.sizeofcmds = 0; + + for (self.load_commands.items) |cmd| { + header.sizeofcmds += cmd.cmdsize(); + } + + log.debug("writing Mach-O header {}", .{header}); + + try self.file.pwriteAll(mem.asBytes(&header), 0); } fn allocatedSizeLinkedit(self: *DebugSymbols, start: u64) u64 { @@ -798,7 +780,7 @@ fn relocateSymbolTable(self: *DebugSymbols) !void { const existing_size = symtab.nsyms * @sizeOf(macho.nlist_64); assert(new_symoff + existing_size <= self.linkedit_off + self.linkedit_size); // TODO expand LINKEDIT segment. - log.debug("relocating dSym symbol table from 0x{x}-0x{x} to 0x{x}-0x{x}", .{ + log.debug("relocating symbol table from 0x{x}-0x{x} to 0x{x}-0x{x}", .{ symtab.symoff, symtab.symoff + existing_size, new_symoff, @@ -820,7 +802,7 @@ pub fn writeLocalSymbol(self: *DebugSymbols, index: usize) !void { try self.relocateSymbolTable(); const symtab = &self.load_commands.items[self.symtab_cmd_index.?].Symtab; const off = symtab.symoff + @sizeOf(macho.nlist_64) * index; - log.debug("writing dSym local symbol {} at 0x{x}", .{ index, off }); + log.debug("writing local symbol {} at 0x{x}", .{ index, off }); try self.file.pwriteAll(mem.asBytes(&self.base.locals.items[index]), off); } @@ -839,7 +821,7 @@ fn writeStringTable(self: *DebugSymbols) !void { symtab.stroff = @intCast(u32, self.findFreeSpaceLinkedit(needed_size, 1)); } symtab.strsize = @intCast(u32, needed_size); - log.debug("writing dSym string table from 0x{x} to 0x{x}", .{ symtab.stroff, symtab.stroff + symtab.strsize }); + log.debug("writing string table from 0x{x} to 0x{x}", .{ symtab.stroff, symtab.stroff + symtab.strsize }); try self.file.pwriteAll(self.base.string_table.items, symtab.stroff); self.load_commands_dirty = true; diff --git a/src/link/MachO/Zld.zig b/src/link/MachO/Zld.zig index 454b5dbcfe..d223e04fd7 100644 --- a/src/link/MachO/Zld.zig +++ b/src/link/MachO/Zld.zig @@ -3132,54 +3132,44 @@ fn writeLoadCommands(self: *Zld) !void { } fn writeHeader(self: *Zld) !void { - var header: macho.mach_header_64 = undefined; - header.magic = macho.MH_MAGIC_64; + var header = emptyHeader(.{ + .flags = macho.MH_NOUNDEFS | macho.MH_DYLDLINK | macho.MH_PIE | macho.MH_TWOLEVEL, + }); - const CpuInfo = struct { - cpu_type: macho.cpu_type_t, - cpu_subtype: macho.cpu_subtype_t, - }; - - const cpu_info: CpuInfo = switch (self.target.?.cpu.arch) { - .aarch64 => .{ - .cpu_type = macho.CPU_TYPE_ARM64, - .cpu_subtype = macho.CPU_SUBTYPE_ARM_ALL, + switch (self.target.?.cpu.arch) { + .aarch64 => { + header.cputype = macho.CPU_TYPE_ARM64; + header.cpusubtype = macho.CPU_SUBTYPE_ARM_ALL; }, - .x86_64 => .{ - .cpu_type = macho.CPU_TYPE_X86_64, - .cpu_subtype = macho.CPU_SUBTYPE_X86_64_ALL, + .x86_64 => { + header.cputype = macho.CPU_TYPE_X86_64; + header.cpusubtype = macho.CPU_SUBTYPE_X86_64_ALL; }, else => return error.UnsupportedCpuArchitecture, - }; - header.cputype = cpu_info.cpu_type; - header.cpusubtype = cpu_info.cpu_subtype; + } switch (self.output.?.tag) { .exe => { header.filetype = macho.MH_EXECUTE; - header.flags = macho.MH_NOUNDEFS | macho.MH_DYLDLINK | macho.MH_PIE | macho.MH_TWOLEVEL; }, .dylib => { header.filetype = macho.MH_DYLIB; - header.flags = macho.MH_NOUNDEFS | - macho.MH_DYLDLINK | - macho.MH_PIE | - macho.MH_TWOLEVEL | - macho.MH_NO_REEXPORTED_DYLIBS; + header.flags |= macho.MH_NO_REEXPORTED_DYLIBS; }, } - header.reserved = 0; - if (self.tlv_section_index) |_| header.flags |= macho.MH_HAS_TLV_DESCRIPTORS; header.ncmds = @intCast(u32, self.load_commands.items.len); header.sizeofcmds = 0; + for (self.load_commands.items) |cmd| { header.sizeofcmds += cmd.cmdsize(); } + log.debug("writing Mach-O header {}", .{header}); + try self.file.?.pwriteAll(mem.asBytes(&header), 0); } diff --git a/src/link/MachO/commands.zig b/src/link/MachO/commands.zig index 6958b8d1e6..5919496526 100644 --- a/src/link/MachO/commands.zig +++ b/src/link/MachO/commands.zig @@ -11,6 +11,28 @@ const Allocator = std.mem.Allocator; const MachO = @import("../MachO.zig"); const padToIdeal = MachO.padToIdeal; +pub const HeaderArgs = struct { + magic: u32 = macho.MH_MAGIC_64, + cputype: macho.cpu_type_t = 0, + cpusubtype: macho.cpu_subtype_t = 0, + filetype: u32 = 0, + flags: u32 = 0, + reserved: u32 = 0, +}; + +pub fn emptyHeader(args: HeaderArgs) macho.mach_header_64 { + return .{ + .magic = args.magic, + .cputype = args.cputype, + .cpusubtype = args.cpusubtype, + .filetype = args.filetype, + .ncmds = 0, + .sizeofcmds = 0, + .flags = args.flags, + .reserved = args.reserved, + }; +} + pub const LoadCommand = union(enum) { Segment: SegmentCommand, DyldInfoOnly: macho.dyld_info_command, From 3622fe08dbdcaccb04204b48257e1d5fcbe0d164 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Thu, 1 Jul 2021 15:11:55 +0200 Subject: [PATCH 03/81] zld: abstract away string table with fewer allocs --- CMakeLists.txt | 1 + src/link/MachO.zig | 100 +++++++++----------------------- src/link/MachO/DebugSymbols.zig | 14 ++--- src/link/MachO/StringTable.zig | 84 +++++++++++++++++++++++++++ 4 files changed, 119 insertions(+), 80 deletions(-) create mode 100644 src/link/MachO/StringTable.zig diff --git a/CMakeLists.txt b/CMakeLists.txt index 44417e4159..d47a285e31 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -581,6 +581,7 @@ set(ZIG_STAGE2_SOURCES "${CMAKE_SOURCE_DIR}/src/link/MachO/DebugSymbols.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO/Dylib.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO/Object.zig" + "${CMAKE_SOURCE_DIR}/src/link/MachO/StringTable.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO/Symbol.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO/Trie.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO/Zld.zig" diff --git a/src/link/MachO.zig b/src/link/MachO.zig index f6ff03e92b..59cfab4292 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -26,6 +26,7 @@ const target_util = @import("../target.zig"); const DebugSymbols = @import("MachO/DebugSymbols.zig"); const Trie = @import("MachO/Trie.zig"); const CodeSignature = @import("MachO/CodeSignature.zig"); +const StringTable = @import("MachO/StringTable.zig"); const Zld = @import("MachO/Zld.zig"); usingnamespace @import("MachO/commands.zig"); @@ -116,9 +117,7 @@ offset_table_free_list: std.ArrayListUnmanaged(u32) = .{}, stub_helper_stubs_start_off: ?u64 = null, -/// Table of symbol names aka the string table. -string_table: std.ArrayListUnmanaged(u8) = .{}, -string_table_directory: std.StringHashMapUnmanaged(u32) = .{}, +strtab: StringTable = undefined, /// Table of GOT entries. offset_table: std.ArrayListUnmanaged(GOTEntry) = .{}, @@ -131,9 +130,9 @@ rebase_info_dirty: bool = false, binding_info_dirty: bool = false, lazy_binding_info_dirty: bool = false, export_info_dirty: bool = false, -string_table_dirty: bool = false, -string_table_needs_relocation: bool = false, +strtab_dirty: bool = false, +strtab_needs_relocation: bool = false, /// A list of text blocks that have surplus capacity. This list can have false /// positives, as functions grow and shrink over time, only sometimes being added @@ -413,6 +412,7 @@ pub fn openPath(allocator: *Allocator, sub_path: []const u8, options: link.Optio pub fn createEmpty(gpa: *Allocator, options: link.Options) !*MachO { const self = try gpa.create(MachO); + self.* = .{ .base = .{ .tag = .macho, @@ -421,7 +421,9 @@ pub fn createEmpty(gpa: *Allocator, options: link.Options) !*MachO { .file = null, }, .page_size = if (options.target.cpu.arch == .aarch64) 0x4000 else 0x1000, + .strtab = try StringTable.init(gpa), }; + return self; } @@ -499,8 +501,8 @@ pub fn flushModule(self: *MachO, comp: *Compilation) !void { assert(!self.binding_info_dirty); assert(!self.lazy_binding_info_dirty); assert(!self.export_info_dirty); - assert(!self.string_table_dirty); - assert(!self.string_table_needs_relocation); + assert(!self.strtab_dirty); + assert(!self.strtab_needs_relocation); if (target.cpu.arch == .aarch64) { switch (output_mode) { @@ -977,14 +979,7 @@ pub fn deinit(self: *MachO) void { self.text_block_free_list.deinit(self.base.allocator); self.offset_table.deinit(self.base.allocator); self.offset_table_free_list.deinit(self.base.allocator); - { - var it = self.string_table_directory.keyIterator(); - while (it.next()) |key| { - self.base.allocator.free(key.*); - } - } - self.string_table_directory.deinit(self.base.allocator); - self.string_table.deinit(self.base.allocator); + self.strtab.deinit(); self.globals.deinit(self.base.allocator); self.globals_free_list.deinit(self.base.allocator); self.locals.deinit(self.base.allocator); @@ -1202,7 +1197,7 @@ pub fn updateDecl(self: *MachO, module: *Module, decl: *Module.Decl) !void { const new_name = try std.fmt.allocPrint(self.base.allocator, "_{s}", .{mem.spanZ(decl.name)}); defer self.base.allocator.free(new_name); - symbol.n_strx = try self.updateString(symbol.n_strx, new_name); + symbol.n_strx = try self.strtab.getOrPut(new_name); symbol.n_type = macho.N_SECT; symbol.n_sect = @intCast(u8, self.text_section_index.?) + 1; symbol.n_desc = 0; @@ -1214,7 +1209,7 @@ pub fn updateDecl(self: *MachO, module: *Module, decl: *Module.Decl) !void { const decl_name = try std.fmt.allocPrint(self.base.allocator, "_{s}", .{mem.spanZ(decl.name)}); defer self.base.allocator.free(decl_name); - const name_str_index = try self.makeString(decl_name); + const name_str_index = try self.strtab.getOrPut(decl_name); const addr = try self.allocateTextBlock(&decl.link.macho, code.len, required_alignment); log.debug("allocated text block for {s} at 0x{x}", .{ decl_name, addr }); @@ -1404,14 +1399,14 @@ pub fn updateDeclExports( if (exp.link.macho.sym_index) |i| { const sym = &self.globals.items[i]; sym.* = .{ - .n_strx = try self.updateString(sym.n_strx, exp_name), + .n_strx = try self.strtab.getOrPut(exp_name), .n_type = n_type, .n_sect = @intCast(u8, self.text_section_index.?) + 1, .n_desc = n_desc, .n_value = decl_sym.n_value, }; } else { - const name_str_index = try self.makeString(exp_name); + const name_str_index = try self.strtab.getOrPut(exp_name); const i = if (self.globals_free_list.popOrNull()) |i| i else blk: { _ = self.globals.addOneAssumeCapacity(); self.export_info_dirty = true; @@ -1787,15 +1782,14 @@ pub fn populateMissingMetadata(self: *MachO) !void { symtab.symoff = @intCast(u32, symtab_off); symtab.nsyms = @intCast(u32, self.base.options.symbol_count_hint); - try self.string_table.append(self.base.allocator, 0); // Need a null at position 0. - const strtab_size = self.string_table.items.len; + const strtab_size = self.strtab.size(); const strtab_off = self.findFreeSpaceLinkedit(strtab_size, 1, symtab_off); log.debug("found string table free space 0x{x} to 0x{x}", .{ strtab_off, strtab_off + strtab_size }); symtab.stroff = @intCast(u32, strtab_off); symtab.strsize = @intCast(u32, strtab_size); self.load_commands_dirty = true; - self.string_table_dirty = true; + self.strtab_dirty = true; } if (self.dysymtab_cmd_index == null) { self.dysymtab_cmd_index = @intCast(u16, self.load_commands.items.len); @@ -1930,7 +1924,7 @@ pub fn populateMissingMetadata(self: *MachO) !void { if (!self.nonlazy_imports.contains("dyld_stub_binder")) { const index = @intCast(u32, self.nonlazy_imports.count()); const name = try self.base.allocator.dupe(u8, "dyld_stub_binder"); - const offset = try self.makeString("dyld_stub_binder"); + const offset = try self.strtab.getOrPut("dyld_stub_binder"); try self.nonlazy_imports.putNoClobber(self.base.allocator, name, .{ .symbol = .{ .n_strx = offset, @@ -2061,49 +2055,9 @@ fn allocateTextBlock(self: *MachO, text_block: *TextBlock, new_block_size: u64, return vaddr; } -fn makeString(self: *MachO, bytes: []const u8) !u32 { - if (self.string_table_directory.get(bytes)) |offset| { - log.debug("reusing '{s}' from string table at offset 0x{x}", .{ bytes, offset }); - return offset; - } - - try self.string_table.ensureCapacity(self.base.allocator, self.string_table.items.len + bytes.len + 1); - const offset = @intCast(u32, self.string_table.items.len); - - log.debug("writing new string '{s}' into string table at offset 0x{x}", .{ bytes, offset }); - - self.string_table.appendSliceAssumeCapacity(bytes); - self.string_table.appendAssumeCapacity(0); - - try self.string_table_directory.putNoClobber( - self.base.allocator, - try self.base.allocator.dupe(u8, bytes), - offset, - ); - - self.string_table_dirty = true; - if (self.d_sym) |*ds| - ds.string_table_dirty = true; - - return offset; -} - -fn getString(self: *MachO, str_off: u32) []const u8 { - assert(str_off < self.string_table.items.len); - return mem.spanZ(@ptrCast([*:0]const u8, self.string_table.items.ptr + str_off)); -} - -fn updateString(self: *MachO, old_str_off: u32, new_name: []const u8) !u32 { - const existing_name = self.getString(old_str_off); - if (mem.eql(u8, existing_name, new_name)) { - return old_str_off; - } - return self.makeString(new_name); -} - pub fn addExternSymbol(self: *MachO, name: []const u8) !u32 { const index = @intCast(u32, self.lazy_imports.count()); - const offset = try self.makeString(name); + const offset = try self.strtab.getOrPut(name); const sym_name = try self.base.allocator.dupe(u8, name); const dylib_ordinal = 1; // TODO this is now hardcoded, since we only support libSystem. try self.lazy_imports.putNoClobber(self.base.allocator, sym_name, .{ @@ -2293,7 +2247,7 @@ fn writeOffsetTableEntry(self: *MachO, index: usize) !void { }, } }; - const sym_name = self.getString(sym.n_strx); + const sym_name = self.strtab.get(sym.n_strx) orelse unreachable; log.debug("writing offset table entry [ 0x{x} => 0x{x} ({s}) ]", .{ off, sym.n_value, sym_name }); try self.base.file.?.pwriteAll(mem.asBytes(&sym.n_value), off); } @@ -2592,7 +2546,7 @@ fn relocateSymbolTable(self: *MachO) !void { const amt = try self.base.file.?.copyRangeAll(symtab.symoff, self.base.file.?, new_symoff, existing_size); if (amt != existing_size) return error.InputOutput; symtab.symoff = @intCast(u32, new_symoff); - self.string_table_needs_relocation = true; + self.strtab_needs_relocation = true; } symtab.nsyms = @intCast(u32, nsyms); self.load_commands_dirty = true; @@ -2791,7 +2745,7 @@ fn writeExportTrie(self: *MachO) !void { const text_segment = self.load_commands.items[self.text_segment_cmd_index.?].Segment; for (self.globals.items) |symbol| { // TODO figure out if we should put all global symbols into the export trie - const name = self.getString(symbol.n_strx); + const name = self.strtab.get(symbol.n_strx) orelse unreachable; assert(symbol.n_value >= text_segment.inner.vmaddr); try trie.put(.{ .name = name, @@ -3065,26 +3019,26 @@ fn populateLazyBindOffsetsInStubHelper(self: *MachO, buffer: []const u8) !void { } fn writeStringTable(self: *MachO) !void { - if (!self.string_table_dirty) return; + if (!self.strtab_dirty) return; const tracy = trace(@src()); defer tracy.end(); const symtab = &self.load_commands.items[self.symtab_cmd_index.?].Symtab; const allocated_size = self.allocatedSizeLinkedit(symtab.stroff); - const needed_size = mem.alignForwardGeneric(u64, self.string_table.items.len, @alignOf(u64)); + const needed_size = mem.alignForwardGeneric(u64, self.strtab.size(), @alignOf(u64)); - if (needed_size > allocated_size or self.string_table_needs_relocation) { + if (needed_size > allocated_size or self.strtab_needs_relocation) { symtab.strsize = 0; symtab.stroff = @intCast(u32, self.findFreeSpaceLinkedit(needed_size, 1, symtab.symoff)); - self.string_table_needs_relocation = false; + self.strtab_needs_relocation = false; } symtab.strsize = @intCast(u32, needed_size); log.debug("writing string table from 0x{x} to 0x{x}", .{ symtab.stroff, symtab.stroff + symtab.strsize }); - try self.base.file.?.pwriteAll(self.string_table.items, symtab.stroff); + try self.base.file.?.pwriteAll(self.strtab.asSlice(), symtab.stroff); self.load_commands_dirty = true; - self.string_table_dirty = false; + self.strtab_dirty = false; } fn updateLinkeditSegmentSizes(self: *MachO) !void { diff --git a/src/link/MachO/DebugSymbols.zig b/src/link/MachO/DebugSymbols.zig index 62c2faad4b..38e13800a6 100644 --- a/src/link/MachO/DebugSymbols.zig +++ b/src/link/MachO/DebugSymbols.zig @@ -76,7 +76,7 @@ dbg_info_decl_last: ?*TextBlock = null, debug_string_table: std.ArrayListUnmanaged(u8) = .{}, load_commands_dirty: bool = false, -string_table_dirty: bool = false, +strtab_dirty: bool = false, debug_string_table_dirty: bool = false, debug_abbrev_section_dirty: bool = false, debug_aranges_section_dirty: bool = false, @@ -131,7 +131,7 @@ pub fn populateMissingMetadata(self: *DebugSymbols, allocator: *Allocator) !void }, }); self.load_commands_dirty = true; - self.string_table_dirty = true; + self.strtab_dirty = true; } if (self.pagezero_segment_cmd_index == null) { self.pagezero_segment_cmd_index = @intCast(u16, self.load_commands.items.len); @@ -593,7 +593,7 @@ pub fn flushModule(self: *DebugSymbols, allocator: *Allocator, options: link.Opt try self.writeHeader(); assert(!self.load_commands_dirty); - assert(!self.string_table_dirty); + assert(!self.strtab_dirty); assert(!self.debug_abbrev_section_dirty); assert(!self.debug_aranges_section_dirty); assert(!self.debug_string_table_dirty); @@ -807,14 +807,14 @@ pub fn writeLocalSymbol(self: *DebugSymbols, index: usize) !void { } fn writeStringTable(self: *DebugSymbols) !void { - if (!self.string_table_dirty) return; + if (!self.strtab_dirty) return; const tracy = trace(@src()); defer tracy.end(); const symtab = &self.load_commands.items[self.symtab_cmd_index.?].Symtab; const allocated_size = self.allocatedSizeLinkedit(symtab.stroff); - const needed_size = mem.alignForwardGeneric(u64, self.base.string_table.items.len, @alignOf(u64)); + const needed_size = mem.alignForwardGeneric(u64, self.base.strtab.size(), @alignOf(u64)); if (needed_size > allocated_size) { symtab.strsize = 0; @@ -823,9 +823,9 @@ fn writeStringTable(self: *DebugSymbols) !void { symtab.strsize = @intCast(u32, needed_size); log.debug("writing string table from 0x{x} to 0x{x}", .{ symtab.stroff, symtab.stroff + symtab.strsize }); - try self.file.pwriteAll(self.base.string_table.items, symtab.stroff); + try self.file.pwriteAll(self.base.strtab.asSlice(), symtab.stroff); self.load_commands_dirty = true; - self.string_table_dirty = false; + self.strtab_dirty = false; } pub fn updateDeclLineNumber(self: *DebugSymbols, module: *Module, decl: *const Module.Decl) !void { diff --git a/src/link/MachO/StringTable.zig b/src/link/MachO/StringTable.zig new file mode 100644 index 0000000000..5437c70476 --- /dev/null +++ b/src/link/MachO/StringTable.zig @@ -0,0 +1,84 @@ +const StringTable = @This(); + +const std = @import("std"); +const log = std.log.scoped(.strtab); +const mem = std.mem; + +const Allocator = mem.Allocator; + +allocator: *Allocator, +buffer: std.ArrayListUnmanaged(u8) = .{}, +used_offsets: std.ArrayListUnmanaged(u32) = .{}, +cache: std.StringHashMapUnmanaged(u32) = .{}, + +pub const Error = error{OutOfMemory}; + +pub fn init(allocator: *Allocator) Error!StringTable { + var strtab = StringTable{ + .allocator = allocator, + }; + try strtab.buffer.append(allocator, 0); + return strtab; +} + +pub fn deinit(self: *StringTable) void { + self.cache.deinit(self.allocator); + self.used_offsets.deinit(self.allocator); + self.buffer.deinit(self.allocator); +} + +pub fn getOrPut(self: *StringTable, string: []const u8) Error!u32 { + if (self.cache.get(string)) |off| { + log.debug("reusing string '{s}' at offset 0x{x}", .{ string, off }); + return off; + } + + const invalidate_cache = self.needsToGrow(string.len + 1); + + try self.buffer.ensureUnusedCapacity(self.allocator, string.len + 1); + const new_off = @intCast(u32, self.buffer.items.len); + + log.debug("writing new string '{s}' at offset 0x{x}", .{ string, new_off }); + + self.buffer.appendSliceAssumeCapacity(string); + self.buffer.appendAssumeCapacity(0); + + if (invalidate_cache) { + log.debug("invalidating cache", .{}); + // Re-create the cache. + self.cache.clearRetainingCapacity(); + for (self.used_offsets.items) |off| { + try self.cache.putNoClobber(self.allocator, self.get(off).?, off); + } + } + + { + log.debug("cache:", .{}); + var it = self.cache.iterator(); + while (it.next()) |entry| { + log.debug(" | {s} => {}", .{ entry.key_ptr.*, entry.value_ptr.* }); + } + } + + try self.cache.putNoClobber(self.allocator, self.get(new_off).?, new_off); + try self.used_offsets.append(self.allocator, new_off); + + return new_off; +} + +pub fn get(self: StringTable, off: u32) ?[]const u8 { + if (off >= self.buffer.items.len) return null; + return mem.spanZ(@ptrCast([*:0]const u8, self.buffer.items.ptr + off)); +} + +pub fn asSlice(self: StringTable) []const u8 { + return self.buffer.items; +} + +pub fn size(self: StringTable) u64 { + return self.buffer.items.len; +} + +fn needsToGrow(self: StringTable, needed_space: u64) bool { + return self.buffer.capacity < needed_space + self.size(); +} From 2b3bda43e352152f0150bf2e795419cf1bcfcd90 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Thu, 1 Jul 2021 17:25:51 +0200 Subject: [PATCH 04/81] zld: abstract Symbol creation logic --- src/link/MachO.zig | 2 +- src/link/MachO/Dylib.zig | 22 ++-- src/link/MachO/Object.zig | 35 ++---- src/link/MachO/StringTable.zig | 34 ++---- src/link/MachO/Symbol.zig | 189 ++++++++++++++++++++++++++++++--- src/link/MachO/Zld.zig | 133 ++++++----------------- 6 files changed, 231 insertions(+), 184 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 59cfab4292..8095366c15 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -724,7 +724,7 @@ fn linkWithZld(self: *MachO, comp: *Compilation) !void { try fs.cwd().copyFile(the_object_path, fs.cwd(), full_out_path, .{}); } } else { - var zld = Zld.init(self.base.allocator); + var zld = try Zld.init(self.base.allocator); defer { zld.closeFiles(); zld.deinit(); diff --git a/src/link/MachO/Dylib.zig b/src/link/MachO/Dylib.zig index 324c54d362..cfd5ae18d3 100644 --- a/src/link/MachO/Dylib.zig +++ b/src/link/MachO/Dylib.zig @@ -146,7 +146,12 @@ pub const CreateOpts = struct { id: ?Id = null, }; -pub fn createAndParseFromPath(allocator: *Allocator, arch: Arch, path: []const u8, opts: CreateOpts) Error!?[]*Dylib { +pub fn createAndParseFromPath( + allocator: *Allocator, + arch: Arch, + path: []const u8, + opts: CreateOpts, +) Error!?[]*Dylib { const file = fs.cwd().openFile(path, .{}) catch |err| switch (err) { error.FileNotFound => return null, else => |e| return e, @@ -505,18 +510,7 @@ pub fn parseDependentLibs(self: *Dylib, out: *std.ArrayList(*Dylib)) !void { pub fn createProxy(self: *Dylib, sym_name: []const u8) !?*Symbol { if (!self.symbols.contains(sym_name)) return null; - - const name = try self.allocator.dupe(u8, sym_name); - const proxy = try self.allocator.create(Symbol.Proxy); - errdefer self.allocator.destroy(proxy); - - proxy.* = .{ - .base = .{ - .@"type" = .proxy, - .name = name, - }, + return Symbol.Proxy.new(self.allocator, sym_name, .{ .file = self, - }; - - return &proxy.base; + }); } diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index cb55dd1fd8..197c302316 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -9,12 +9,12 @@ const log = std.log.scoped(.object); const macho = std.macho; const mem = std.mem; const reloc = @import("reloc.zig"); +const parseName = @import("Zld.zig").parseName; const Allocator = mem.Allocator; const Arch = std.Target.Cpu.Arch; const Relocation = reloc.Relocation; const Symbol = @import("Symbol.zig"); -const parseName = @import("Zld.zig").parseName; usingnamespace @import("commands.zig"); @@ -437,47 +437,26 @@ pub fn parseSymbols(self: *Object) !void { if (Symbol.isWeakDef(sym) or Symbol.isPext(sym)) break :linkage .linkage_unit; break :linkage .global; }; - const regular = try self.allocator.create(Symbol.Regular); - errdefer self.allocator.destroy(regular); - regular.* = .{ - .base = .{ - .@"type" = .regular, - .name = name, - }, + break :symbol try Symbol.Regular.new(self.allocator, name, .{ .linkage = linkage, .address = sym.n_value, .section = sym.n_sect - 1, .weak_ref = Symbol.isWeakRef(sym), .file = self, - }; - break :symbol ®ular.base; + }); } if (sym.n_value != 0) { - const tentative = try self.allocator.create(Symbol.Tentative); - errdefer self.allocator.destroy(tentative); - tentative.* = .{ - .base = .{ - .@"type" = .tentative, - .name = name, - }, + break :symbol try Symbol.Tentative.new(self.allocator, name, .{ .size = sym.n_value, .alignment = (sym.n_desc >> 8) & 0x0f, .file = self, - }; - break :symbol &tentative.base; + }); } - const undef = try self.allocator.create(Symbol.Unresolved); - errdefer self.allocator.destroy(undef); - undef.* = .{ - .base = .{ - .@"type" = .unresolved, - .name = name, - }, + break :symbol try Symbol.Unresolved.new(self.allocator, name, .{ .file = self, - }; - break :symbol &undef.base; + }); }; try self.symbols.append(self.allocator, symbol); diff --git a/src/link/MachO/StringTable.zig b/src/link/MachO/StringTable.zig index 5437c70476..43770afdc1 100644 --- a/src/link/MachO/StringTable.zig +++ b/src/link/MachO/StringTable.zig @@ -8,7 +8,6 @@ const Allocator = mem.Allocator; allocator: *Allocator, buffer: std.ArrayListUnmanaged(u8) = .{}, -used_offsets: std.ArrayListUnmanaged(u32) = .{}, cache: std.StringHashMapUnmanaged(u32) = .{}, pub const Error = error{OutOfMemory}; @@ -22,8 +21,13 @@ pub fn init(allocator: *Allocator) Error!StringTable { } pub fn deinit(self: *StringTable) void { + { + var it = self.cache.keyIterator(); + while (it.next()) |key| { + self.allocator.free(key.*); + } + } self.cache.deinit(self.allocator); - self.used_offsets.deinit(self.allocator); self.buffer.deinit(self.allocator); } @@ -33,8 +37,6 @@ pub fn getOrPut(self: *StringTable, string: []const u8) Error!u32 { return off; } - const invalidate_cache = self.needsToGrow(string.len + 1); - try self.buffer.ensureUnusedCapacity(self.allocator, string.len + 1); const new_off = @intCast(u32, self.buffer.items.len); @@ -43,25 +45,7 @@ pub fn getOrPut(self: *StringTable, string: []const u8) Error!u32 { self.buffer.appendSliceAssumeCapacity(string); self.buffer.appendAssumeCapacity(0); - if (invalidate_cache) { - log.debug("invalidating cache", .{}); - // Re-create the cache. - self.cache.clearRetainingCapacity(); - for (self.used_offsets.items) |off| { - try self.cache.putNoClobber(self.allocator, self.get(off).?, off); - } - } - - { - log.debug("cache:", .{}); - var it = self.cache.iterator(); - while (it.next()) |entry| { - log.debug(" | {s} => {}", .{ entry.key_ptr.*, entry.value_ptr.* }); - } - } - - try self.cache.putNoClobber(self.allocator, self.get(new_off).?, new_off); - try self.used_offsets.append(self.allocator, new_off); + try self.cache.putNoClobber(self.allocator, try self.allocator.dupe(u8, string), new_off); return new_off; } @@ -78,7 +62,3 @@ pub fn asSlice(self: StringTable) []const u8 { pub fn size(self: StringTable) u64 { return self.buffer.items.len; } - -fn needsToGrow(self: StringTable, needed_space: u64) bool { - return self.buffer.capacity < needed_space + self.size(); -} diff --git a/src/link/MachO/Symbol.zig b/src/link/MachO/Symbol.zig index 8da4704909..59a6f3d836 100644 --- a/src/link/MachO/Symbol.zig +++ b/src/link/MachO/Symbol.zig @@ -7,6 +7,7 @@ const mem = std.mem; const Allocator = mem.Allocator; const Dylib = @import("Dylib.zig"); const Object = @import("Object.zig"); +const StringTable = @import("StringTable.zig"); pub const Type = enum { regular, @@ -19,7 +20,7 @@ pub const Type = enum { @"type": Type, /// Symbol name. Owned slice. -name: []u8, +name: []const u8, /// Alias of. alias: ?*Symbol = null, @@ -43,23 +44,14 @@ pub const Regular = struct { section: u8, /// Whether the symbol is a weak ref. - weak_ref: bool, + weak_ref: bool = false, /// Object file where to locate this symbol. - file: *Object, + /// null means self-reference. + file: ?*Object = null, /// Debug stab if defined. - stab: ?struct { - /// Stab kind - kind: enum { - function, - global, - static, - }, - - /// Size of the stab. - size: u64, - } = null, + stab: ?Stab = null, /// True if symbol was already committed into the final /// symbol table. @@ -73,6 +65,68 @@ pub const Regular = struct { global, }; + pub const Stab = struct { + /// Stab kind + kind: enum { + function, + global, + static, + }, + + /// Size of the stab. + size: u64, + }; + + const Opts = struct { + linkage: Linkage = .translation_unit, + address: u64 = 0, + section: u8 = 0, + weak_ref: bool = false, + file: ?*Object = null, + stab: ?Stab = null, + }; + + pub fn new(allocator: *Allocator, name: []const u8, opts: Opts) !*Symbol { + const reg = try allocator.create(Regular); + errdefer allocator.destroy(reg); + + reg.* = .{ + .base = .{ + .@"type" = .regular, + .name = try allocator.dupe(u8, name), + }, + .linkage = opts.linkage, + .address = opts.address, + .section = opts.section, + .weak_ref = opts.weak_ref, + .file = opts.file, + .stab = opts.stab, + }; + + return ®.base; + } + + pub fn asNlist(regular: *Regular, strtab: *StringTable) !macho.nlist_64 { + const n_strx = try strtab.getOrPut(regular.base.name); + var nlist = macho.nlist_64{ + .n_strx = n_strx, + .n_type = macho.N_SECT, + .n_sect = regular.section, + .n_desc = 0, + .n_value = regular.address, + }; + + if (regular.linkage != .translation_unit) { + nlist.n_type |= macho.N_EXT; + } + if (regular.linkage == .linkage_unit) { + nlist.n_type |= macho.N_PEXT; + nlist.n_desc |= macho.N_WEAK_DEF; + } + + return nlist; + } + pub fn isTemp(regular: *Regular) bool { if (regular.linkage == .translation_unit) { return mem.startsWith(u8, regular.base.name, "l") or mem.startsWith(u8, regular.base.name, "L"); @@ -97,6 +151,36 @@ pub const Proxy = struct { pub const base_type: Symbol.Type = .proxy; + const Opts = struct { + file: ?*Dylib = null, + }; + + pub fn new(allocator: *Allocator, name: []const u8, opts: Opts) !*Symbol { + const proxy = try allocator.create(Proxy); + errdefer allocator.destroy(proxy); + + proxy.* = .{ + .base = .{ + .@"type" = .proxy, + .name = try allocator.dupe(u8, name), + }, + .file = opts.file, + }; + + return &proxy.base; + } + + pub fn asNlist(proxy: *Proxy, strtab: *StringTable) !macho.nlist_64 { + const n_strx = try strtab.getOrPut(proxy.base.name); + return macho.nlist_64{ + .n_strx = n_strx, + .n_type = macho.N_UNDF | macho.N_EXT, + .n_sect = 0, + .n_desc = (proxy.dylibOrdinal() * macho.N_SYMBOL_RESOLVER) | macho.REFERENCE_FLAG_UNDEFINED_NON_LAZY, + .n_value = 0, + }; + } + pub fn deinit(proxy: *Proxy, allocator: *Allocator) void { proxy.bind_info.deinit(allocator); } @@ -115,6 +199,36 @@ pub const Unresolved = struct { file: ?*Object = null, pub const base_type: Symbol.Type = .unresolved; + + const Opts = struct { + file: ?*Object = null, + }; + + pub fn new(allocator: *Allocator, name: []const u8, opts: Opts) !*Symbol { + const undef = try allocator.create(Unresolved); + errdefer allocator.destroy(undef); + + undef.* = .{ + .base = .{ + .@"type" = .unresolved, + .name = try allocator.dupe(u8, name), + }, + .file = opts.file, + }; + + return &undef.base; + } + + pub fn asNlist(undef: *Unresolved, strtab: *StringTable) !macho.nlist_64 { + const n_strx = try strtab.getOrPut(undef.base.name); + return macho.nlist_64{ + .n_strx = n_strx, + .n_type = macho.N_UNDF, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }; + } }; pub const Tentative = struct { @@ -127,13 +241,49 @@ pub const Tentative = struct { alignment: u16, /// File where this symbol was referenced. - file: *Object, + file: ?*Object = null, pub const base_type: Symbol.Type = .tentative; + + const Opts = struct { + size: u64 = 0, + alignment: u16 = 0, + file: ?*Object = null, + }; + + pub fn new(allocator: *Allocator, name: []const u8, opts: Opts) !*Symbol { + const tent = try allocator.create(Tentative); + errdefer allocator.destroy(tent); + + tent.* = .{ + .base = .{ + .@"type" = .tentative, + .name = try allocator.dupe(u8, name), + }, + .size = opts.size, + .alignment = opts.alignment, + .file = opts.file, + }; + + return &tent.base; + } + + pub fn asNlist(tent: *Tentative, strtab: *StringTable) !macho.nlist_64 { + // TODO + const n_strx = try strtab.getOrPut(tent.base.name); + return macho.nlist_64{ + .n_strx = n_strx, + .n_type = macho.N_UNDF, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }; + } }; pub fn deinit(base: *Symbol, allocator: *Allocator) void { allocator.free(base.name); + switch (base.@"type") { .proxy => @fieldParentPtr(Proxy, "base", base).deinit(allocator), else => {}, @@ -154,6 +304,15 @@ pub fn getTopmostAlias(base: *Symbol) *Symbol { return base; } +pub fn asNlist(base: *Symbol, strtab: *StringTable) !macho.nlist_64 { + return switch (base.tag) { + .regular => @fieldParentPtr(Regular, "base", base).asNlist(strtab), + .proxy => @fieldParentPtr(Proxy, "base", base).asNlist(strtab), + .unresolved => @fieldParentPtr(Unresolved, "base", base).asNlist(strtab), + .tentative => @fieldParentPtr(Tentative, "base", base).asNlist(strtab), + }; +} + pub fn isStab(sym: macho.nlist_64) bool { return (macho.N_STAB & sym.n_type) != 0; } diff --git a/src/link/MachO/Zld.zig b/src/link/MachO/Zld.zig index d223e04fd7..a20879f856 100644 --- a/src/link/MachO/Zld.zig +++ b/src/link/MachO/Zld.zig @@ -17,6 +17,7 @@ const Archive = @import("Archive.zig"); const CodeSignature = @import("CodeSignature.zig"); const Dylib = @import("Dylib.zig"); const Object = @import("Object.zig"); +const StringTable = @import("StringTable.zig"); const Symbol = @import("Symbol.zig"); const Trie = @import("Trie.zig"); @@ -24,6 +25,7 @@ usingnamespace @import("commands.zig"); usingnamespace @import("bind.zig"); allocator: *Allocator, +strtab: StringTable, target: ?std.Target = null, page_size: ?u16 = null, @@ -109,9 +111,6 @@ tentatives: std.StringArrayHashMapUnmanaged(*Symbol) = .{}, /// Set if the linker found tentative definitions in any of the objects. tentative_defs_offset: u64 = 0, -strtab: std.ArrayListUnmanaged(u8) = .{}, -strtab_dir: std.StringHashMapUnmanaged(u32) = .{}, - threadlocal_offsets: std.ArrayListUnmanaged(TlvOffset) = .{}, // TODO merge with Symbol abstraction local_rebases: std.ArrayListUnmanaged(Pointer) = .{}, stubs: std.ArrayListUnmanaged(*Symbol) = .{}, @@ -138,8 +137,11 @@ const TlvOffset = struct { /// Default path to dyld const DEFAULT_DYLD_PATH: [*:0]const u8 = "/usr/lib/dyld"; -pub fn init(allocator: *Allocator) Zld { - return .{ .allocator = allocator }; +pub fn init(allocator: *Allocator) !Zld { + return Zld{ + .allocator = allocator, + .strtab = try StringTable.init(allocator), + }; } pub fn deinit(self: *Zld) void { @@ -180,15 +182,7 @@ pub fn deinit(self: *Zld) void { self.tentatives.deinit(self.allocator); self.globals.deinit(self.allocator); self.unresolved.deinit(self.allocator); - self.strtab.deinit(self.allocator); - - { - var it = self.strtab_dir.keyIterator(); - while (it.next()) |key| { - self.allocator.free(key.*); - } - } - self.strtab_dir.deinit(self.allocator); + self.strtab.deinit(); } pub fn closeFiles(self: Zld) void { @@ -1137,16 +1131,7 @@ fn allocateTentativeSymbols(self: *Zld) !void { // Convert tentative definitions into regular symbols. for (self.tentatives.values()) |sym| { const tent = sym.cast(Symbol.Tentative) orelse unreachable; - const reg = try self.allocator.create(Symbol.Regular); - errdefer self.allocator.destroy(reg); - - reg.* = .{ - .base = .{ - .@"type" = .regular, - .name = try self.allocator.dupe(u8, tent.base.name), - .got_index = tent.base.got_index, - .stubs_index = tent.base.stubs_index, - }, + const reg = try Symbol.Regular.new(self.allocator, tent.base.name, .{ .linkage = .global, .address = base_address, .section = section, @@ -1156,16 +1141,18 @@ fn allocateTentativeSymbols(self: *Zld) !void { .kind = .global, .size = 0, }, - }; + }); + reg.got_index = tent.base.got_index; + reg.stubs_index = tent.base.stubs_index; - try self.globals.putNoClobber(self.allocator, reg.base.name, ®.base); - tent.base.alias = ®.base; + try self.globals.putNoClobber(self.allocator, reg.name, reg); + tent.base.alias = reg; if (tent.base.got_index) |idx| { - self.got_entries.items[idx] = ®.base; + self.got_entries.items[idx] = reg; } if (tent.base.stubs_index) |idx| { - self.stubs.items[idx] = ®.base; + self.stubs.items[idx] = reg; } const address = mem.alignForwardGeneric(u64, base_address + tent.size, alignment); @@ -1615,15 +1602,8 @@ fn resolveSymbols(self: *Zld) !void { { const name = try self.allocator.dupe(u8, "dyld_stub_binder"); errdefer self.allocator.free(name); - const undef = try self.allocator.create(Symbol.Unresolved); - errdefer self.allocator.destroy(undef); - undef.* = .{ - .base = .{ - .@"type" = .unresolved, - .name = name, - }, - }; - try unresolved.append(&undef.base); + const undef = try Symbol.Unresolved.new(self.allocator, name, .{}); + try unresolved.append(undef); } var referenced = std.AutoHashMap(*Dylib, void).init(self.allocator); @@ -1641,16 +1621,7 @@ fn resolveSymbols(self: *Zld) !void { // TODO this is just a temp patch until I work out what to actually // do with ___dso_handle and __mh_execute_header symbols which are // synthetically created by the linker on macOS. - const name = try self.allocator.dupe(u8, undef.name); - const proxy = try self.allocator.create(Symbol.Proxy); - errdefer self.allocator.destroy(proxy); - proxy.* = .{ - .base = .{ - .@"type" = .proxy, - .name = name, - }, - }; - break :inner &proxy.base; + break :inner try Symbol.Proxy.new(self.allocator, undef.name, .{}); } self.unresolved.putAssumeCapacityNoClobber(undef.name, undef); @@ -2126,7 +2097,6 @@ fn populateMetadata(self: *Zld) !void { .strsize = 0, }, }); - try self.strtab.append(self.allocator, 0); } if (self.dysymtab_cmd_index == null) { @@ -2752,7 +2722,7 @@ fn writeDebugInfo(self: *Zld) !void { const dirname = std.fs.path.dirname(tu_path) orelse "./"; // Current dir try stabs.append(.{ - .n_strx = try self.makeString(tu_path[0 .. dirname.len + 1]), + .n_strx = try self.strtab.getOrPut(tu_path[0 .. dirname.len + 1]), .n_type = macho.N_SO, .n_sect = 0, .n_desc = 0, @@ -2760,7 +2730,7 @@ fn writeDebugInfo(self: *Zld) !void { }); // Artifact name try stabs.append(.{ - .n_strx = try self.makeString(tu_path[dirname.len + 1 ..]), + .n_strx = try self.strtab.getOrPut(tu_path[dirname.len + 1 ..]), .n_type = macho.N_SO, .n_sect = 0, .n_desc = 0, @@ -2768,7 +2738,7 @@ fn writeDebugInfo(self: *Zld) !void { }); // Path to object file with debug info try stabs.append(.{ - .n_strx = try self.makeString(object.name.?), + .n_strx = try self.strtab.getOrPut(object.name.?), .n_type = macho.N_OSO, .n_sect = 0, .n_desc = 1, @@ -2801,7 +2771,7 @@ fn writeDebugInfo(self: *Zld) !void { .n_value = reg.address, }); try stabs.append(.{ - .n_strx = try self.makeString(sym.name), + .n_strx = try self.strtab.getOrPut(sym.name), .n_type = macho.N_FUN, .n_sect = reg.section, .n_desc = 0, @@ -2824,7 +2794,7 @@ fn writeDebugInfo(self: *Zld) !void { }, .global => { try stabs.append(.{ - .n_strx = try self.makeString(sym.name), + .n_strx = try self.strtab.getOrPut(sym.name), .n_type = macho.N_GSYM, .n_sect = 0, .n_desc = 0, @@ -2833,7 +2803,7 @@ fn writeDebugInfo(self: *Zld) !void { }, .static => { try stabs.append(.{ - .n_strx = try self.makeString(sym.name), + .n_strx = try self.strtab.getOrPut(sym.name), .n_type = macho.N_STSYM, .n_sect = reg.section, .n_desc = 0, @@ -2892,24 +2862,14 @@ fn writeSymbolTable(self: *Zld) !void { if (reg.isTemp()) continue; if (reg.visited) continue; + const nlist = try reg.asNlist(&self.strtab); + switch (reg.linkage) { .translation_unit => { - try locals.append(.{ - .n_strx = try self.makeString(sym.name), - .n_type = macho.N_SECT, - .n_sect = reg.section, - .n_desc = 0, - .n_value = reg.address, - }); + try locals.append(nlist); }, else => { - try exports.append(.{ - .n_strx = try self.makeString(sym.name), - .n_type = macho.N_SECT | macho.N_EXT, - .n_sect = reg.section, - .n_desc = 0, - .n_value = reg.address, - }); + try exports.append(nlist); }, } @@ -2922,13 +2882,8 @@ fn writeSymbolTable(self: *Zld) !void { for (self.imports.values()) |sym| { const proxy = sym.cast(Symbol.Proxy) orelse unreachable; - try undefs.append(.{ - .n_strx = try self.makeString(sym.name), - .n_type = macho.N_UNDF | macho.N_EXT, - .n_sect = 0, - .n_desc = (proxy.dylibOrdinal() * macho.N_SYMBOL_RESOLVER) | macho.REFERENCE_FLAG_UNDEFINED_NON_LAZY, - .n_value = 0, - }); + const nlist = try proxy.asNlist(&self.strtab); + try undefs.append(nlist); } const nlocals = locals.items.len; @@ -3017,14 +2972,14 @@ fn writeStringTable(self: *Zld) !void { const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; const symtab = &self.load_commands.items[self.symtab_cmd_index.?].Symtab; symtab.stroff = @intCast(u32, seg.inner.fileoff + seg.inner.filesize); - symtab.strsize = @intCast(u32, mem.alignForwardGeneric(u64, self.strtab.items.len, @alignOf(u64))); + symtab.strsize = @intCast(u32, mem.alignForwardGeneric(u64, self.strtab.size(), @alignOf(u64))); seg.inner.filesize += symtab.strsize; log.debug("writing string table from 0x{x} to 0x{x}", .{ symtab.stroff, symtab.stroff + symtab.strsize }); - try self.file.?.pwriteAll(self.strtab.items, symtab.stroff); + try self.file.?.pwriteAll(self.strtab.asSlice(), symtab.stroff); - if (symtab.strsize > self.strtab.items.len and self.target.?.cpu.arch == .x86_64) { + if (symtab.strsize > self.strtab.size() and self.target.?.cpu.arch == .x86_64) { // This is the last section, so we need to pad it out. try self.file.?.pwriteAll(&[_]u8{0}, seg.inner.fileoff + seg.inner.filesize - 1); } @@ -3173,26 +3128,6 @@ fn writeHeader(self: *Zld) !void { try self.file.?.pwriteAll(mem.asBytes(&header), 0); } -fn makeString(self: *Zld, bytes: []const u8) !u32 { - if (self.strtab_dir.get(bytes)) |offset| { - log.debug("reusing '{s}' from string table at offset 0x{x}", .{ bytes, offset }); - return offset; - } - - try self.strtab.ensureCapacity(self.allocator, self.strtab.items.len + bytes.len + 1); - const offset = @intCast(u32, self.strtab.items.len); - log.debug("writing new string '{s}' into string table at offset 0x{x}", .{ bytes, offset }); - self.strtab.appendSliceAssumeCapacity(bytes); - self.strtab.appendAssumeCapacity(0); - try self.strtab_dir.putNoClobber(self.allocator, try self.allocator.dupe(u8, bytes), offset); - return offset; -} - -fn getString(self: *const Zld, str_off: u32) []const u8 { - assert(str_off < self.strtab.items.len); - return mem.spanZ(@ptrCast([*:0]const u8, self.strtab.items.ptr + str_off)); -} - pub fn parseName(name: *const [16]u8) []const u8 { const len = mem.indexOfScalar(u8, name, @as(u8, 0)) orelse name.len; return name[0..len]; From ee6e25bc13b3f23b5f2fd0c8b57f0d115c239fc2 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Fri, 2 Jul 2021 00:13:46 +0200 Subject: [PATCH 05/81] zld: add Symbol.Stab and move nlist creation logic there --- src/link/MachO/Archive.zig | 6 ++ src/link/MachO/Dylib.zig | 1 - src/link/MachO/Object.zig | 69 ++++++++++++--- src/link/MachO/Symbol.zig | 172 +++++++++++++++++++++++++++++++------ src/link/MachO/Zld.zig | 146 ++----------------------------- 5 files changed, 216 insertions(+), 178 deletions(-) diff --git a/src/link/MachO/Archive.zig b/src/link/MachO/Archive.zig index 8f047b4968..4004cdaefc 100644 --- a/src/link/MachO/Archive.zig +++ b/src/link/MachO/Archive.zig @@ -81,6 +81,11 @@ const ar_hdr = extern struct { } } + fn date(self: ar_hdr) !u64 { + const value = getValue(&self.ar_date); + return std.fmt.parseInt(u64, value, 10); + } + fn size(self: ar_hdr) !u32 { const value = getValue(&self.ar_size); return std.fmt.parseInt(u32, value, 10); @@ -264,6 +269,7 @@ pub fn parseObject(self: Archive, offset: u32) !*Object { .file = try fs.cwd().openFile(self.name.?, .{}), .name = name, .file_offset = @intCast(u32, try reader.context.getPos()), + .mtime = try self.header.?.date(), }; try object.parse(); try reader.context.seekTo(0); diff --git a/src/link/MachO/Dylib.zig b/src/link/MachO/Dylib.zig index cfd5ae18d3..8fd4498931 100644 --- a/src/link/MachO/Dylib.zig +++ b/src/link/MachO/Dylib.zig @@ -18,7 +18,6 @@ const LibStub = @import("../tapi.zig").LibStub; usingnamespace @import("commands.zig"); allocator: *Allocator, - arch: ?Arch = null, header: ?macho.mach_header_64 = null, file: ?fs.File = null, diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index 197c302316..952fbb794c 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -24,6 +24,7 @@ header: ?macho.mach_header_64 = null, file: ?fs.File = null, file_offset: ?u32 = null, name: ?[]const u8 = null, +mtime: ?u64 = null, load_commands: std.ArrayListUnmanaged(LoadCommand) = .{}, sections: std.ArrayListUnmanaged(Section) = .{}, @@ -45,12 +46,10 @@ dwarf_debug_line_index: ?u16 = null, dwarf_debug_ranges_index: ?u16 = null, symbols: std.ArrayListUnmanaged(*Symbol) = .{}, +stabs: std.ArrayListUnmanaged(*Symbol) = .{}, initializers: std.ArrayListUnmanaged(*Symbol) = .{}, data_in_code_entries: std.ArrayListUnmanaged(macho.data_in_code_entry) = .{}, -tu_path: ?[]const u8 = null, -tu_mtime: ?u64 = null, - pub const Section = struct { inner: macho.section_64, code: []u8, @@ -223,16 +222,18 @@ pub fn deinit(self: *Object) void { } self.symbols.deinit(self.allocator); + for (self.stabs.items) |stab| { + stab.deinit(self.allocator); + self.allocator.destroy(stab); + } + self.stabs.deinit(self.allocator); + self.data_in_code_entries.deinit(self.allocator); self.initializers.deinit(self.allocator); if (self.name) |n| { self.allocator.free(n); } - - if (self.tu_path) |tu_path| { - self.allocator.free(tu_path); - } } pub fn closeFile(self: Object) void { @@ -484,11 +485,33 @@ pub fn parseDebugInfo(self: *Object) !void { const name = try compile_unit.die.getAttrString(&debug_info.inner, dwarf.AT_name); const comp_dir = try compile_unit.die.getAttrString(&debug_info.inner, dwarf.AT_comp_dir); - self.tu_path = try std.fs.path.join(self.allocator, &[_][]const u8{ comp_dir, name }); - self.tu_mtime = mtime: { - const stat = try self.file.?.stat(); - break :mtime @intCast(u64, @divFloor(stat.mtime, 1_000_000_000)); - }; + if (self.mtime == null) { + self.mtime = mtime: { + const file = self.file orelse break :mtime 0; + const stat = file.stat() catch break :mtime 0; + break :mtime @intCast(u64, @divFloor(stat.mtime, 1_000_000_000)); + }; + } + + try self.stabs.ensureUnusedCapacity(self.allocator, self.symbols.items.len + 4); + + // Current dir + self.stabs.appendAssumeCapacity(try Symbol.Stab.new(self.allocator, comp_dir, .{ + .kind = .so, + .file = self, + })); + + // Artifact name + self.stabs.appendAssumeCapacity(try Symbol.Stab.new(self.allocator, name, .{ + .kind = .so, + .file = self, + })); + + // Path to object file with debug info + self.stabs.appendAssumeCapacity(try Symbol.Stab.new(self.allocator, self.name.?, .{ + .kind = .oso, + .file = self, + })); for (self.symbols.items) |sym| { if (sym.cast(Symbol.Regular)) |reg| { @@ -500,7 +523,7 @@ pub fn parseDebugInfo(self: *Object) !void { } } else 0; - reg.stab = .{ + const stab = try Symbol.Stab.new(self.allocator, sym.name, .{ .kind = kind: { if (size > 0) break :kind .function; switch (reg.linkage) { @@ -509,9 +532,27 @@ pub fn parseDebugInfo(self: *Object) !void { } }, .size = size, - }; + .symbol = sym, + .file = self, + }); + self.stabs.appendAssumeCapacity(stab); + } else if (sym.cast(Symbol.Tentative)) |_| { + const stab = try Symbol.Stab.new(self.allocator, sym.name, .{ + .kind = .global, + .size = 0, + .symbol = sym, + .file = self, + }); + self.stabs.appendAssumeCapacity(stab); } } + + // Closing delimiter. + const delim_stab = try Symbol.Stab.new(self.allocator, "", .{ + .kind = .so, + .file = self, + }); + self.stabs.appendAssumeCapacity(delim_stab); } fn readSection(self: Object, allocator: *Allocator, index: u16) ![]u8 { diff --git a/src/link/MachO/Symbol.zig b/src/link/MachO/Symbol.zig index 59a6f3d836..023e2ed7a8 100644 --- a/src/link/MachO/Symbol.zig +++ b/src/link/MachO/Symbol.zig @@ -10,6 +10,7 @@ const Object = @import("Object.zig"); const StringTable = @import("StringTable.zig"); pub const Type = enum { + stab, regular, proxy, unresolved, @@ -31,6 +32,151 @@ got_index: ?u32 = null, /// Index in stubs table for late binding. stubs_index: ?u32 = null, +pub const Stab = struct { + base: Symbol, + + // Symbol kind: function, etc. + kind: Kind, + + // Size of stab. + size: u64, + + // Base regular symbol for this stub if defined. + symbol: ?*Symbol = null, + + // null means self-reference. + file: ?*Object = null, + + pub const base_type: Symbol.Type = .stab; + + pub const Kind = enum { + so, + oso, + function, + global, + static, + }; + + const Opts = struct { + kind: Kind = .so, + size: u64 = 0, + symbol: ?*Symbol = null, + file: ?*Object = null, + }; + + pub fn new(allocator: *Allocator, name: []const u8, opts: Opts) !*Symbol { + const stab = try allocator.create(Stab); + errdefer allocator.destroy(stab); + + stab.* = .{ + .base = .{ + .@"type" = .stab, + .name = try allocator.dupe(u8, name), + }, + .kind = opts.kind, + .size = opts.size, + .symbol = opts.symbol, + .file = opts.file, + }; + + return &stab.base; + } + + pub fn asNlists(stab: *Stab, allocator: *Allocator, strtab: *StringTable) ![]macho.nlist_64 { + var out = std.ArrayList(macho.nlist_64).init(allocator); + defer out.deinit(); + if (stab.kind == .so) { + try out.append(.{ + .n_strx = try strtab.getOrPut(stab.base.name), + .n_type = macho.N_SO, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }); + } else if (stab.kind == .oso) { + const mtime = mtime: { + const object = stab.file orelse break :mtime 0; + break :mtime object.mtime orelse 0; + }; + try out.append(.{ + .n_strx = try strtab.getOrPut(stab.base.name), + .n_type = macho.N_OSO, + .n_sect = 0, + .n_desc = 1, + .n_value = mtime, + }); + } else outer: { + const symbol = stab.symbol orelse unreachable; + const regular = symbol.getTopmostAlias().cast(Regular) orelse unreachable; + const is_match = blk: { + if (regular.file == null and stab.file == null) break :blk true; + if (regular.file) |f1| { + if (stab.file) |f2| { + if (f1 == f2) break :blk true; + } + } + break :blk false; + }; + if (!is_match) break :outer; + + switch (stab.kind) { + .function => { + try out.ensureUnusedCapacity(4); + out.appendAssumeCapacity(.{ + .n_strx = 0, + .n_type = macho.N_BNSYM, + .n_sect = regular.section, + .n_desc = 0, + .n_value = regular.address, + }); + out.appendAssumeCapacity(.{ + .n_strx = try strtab.getOrPut(stab.base.name), + .n_type = macho.N_FUN, + .n_sect = regular.section, + .n_desc = 0, + .n_value = regular.address, + }); + out.appendAssumeCapacity(.{ + .n_strx = 0, + .n_type = macho.N_FUN, + .n_sect = 0, + .n_desc = 0, + .n_value = stab.size, + }); + out.appendAssumeCapacity(.{ + .n_strx = 0, + .n_type = macho.N_ENSYM, + .n_sect = regular.section, + .n_desc = 0, + .n_value = stab.size, + }); + }, + .global => { + try out.append(.{ + .n_strx = try strtab.getOrPut(stab.base.name), + .n_type = macho.N_GSYM, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }); + }, + .static => { + try out.append(.{ + .n_strx = try strtab.getOrPut(stab.base.name), + .n_type = macho.N_STSYM, + .n_sect = regular.section, + .n_desc = 0, + .n_value = regular.address, + }); + }, + .so, .oso => unreachable, + } + } + + return out.toOwnedSlice(); + } +}; + pub const Regular = struct { base: Symbol, @@ -50,9 +196,6 @@ pub const Regular = struct { /// null means self-reference. file: ?*Object = null, - /// Debug stab if defined. - stab: ?Stab = null, - /// True if symbol was already committed into the final /// symbol table. visited: bool = false, @@ -65,25 +208,12 @@ pub const Regular = struct { global, }; - pub const Stab = struct { - /// Stab kind - kind: enum { - function, - global, - static, - }, - - /// Size of the stab. - size: u64, - }; - const Opts = struct { linkage: Linkage = .translation_unit, address: u64 = 0, section: u8 = 0, weak_ref: bool = false, file: ?*Object = null, - stab: ?Stab = null, }; pub fn new(allocator: *Allocator, name: []const u8, opts: Opts) !*Symbol { @@ -100,7 +230,6 @@ pub const Regular = struct { .section = opts.section, .weak_ref = opts.weak_ref, .file = opts.file, - .stab = opts.stab, }; return ®.base; @@ -304,15 +433,6 @@ pub fn getTopmostAlias(base: *Symbol) *Symbol { return base; } -pub fn asNlist(base: *Symbol, strtab: *StringTable) !macho.nlist_64 { - return switch (base.tag) { - .regular => @fieldParentPtr(Regular, "base", base).asNlist(strtab), - .proxy => @fieldParentPtr(Proxy, "base", base).asNlist(strtab), - .unresolved => @fieldParentPtr(Unresolved, "base", base).asNlist(strtab), - .tentative => @fieldParentPtr(Tentative, "base", base).asNlist(strtab), - }; -} - pub fn isStab(sym: macho.nlist_64) bool { return (macho.N_STAB & sym.n_type) != 0; } diff --git a/src/link/MachO/Zld.zig b/src/link/MachO/Zld.zig index a20879f856..64e5a2af20 100644 --- a/src/link/MachO/Zld.zig +++ b/src/link/MachO/Zld.zig @@ -1137,10 +1137,6 @@ fn allocateTentativeSymbols(self: *Zld) !void { .section = section, .weak_ref = false, .file = tent.file, - .stab = .{ - .kind = .global, - .size = 0, - }, }); reg.got_index = tent.base.got_index; reg.stubs_index = tent.base.stubs_index; @@ -2338,7 +2334,6 @@ fn flush(self: *Zld) !void { symtab.symoff = @intCast(u32, seg.inner.fileoff + seg.inner.filesize); } - try self.writeDebugInfo(); try self.writeSymbolTable(); try self.writeStringTable(); @@ -2711,138 +2706,6 @@ fn writeExportInfo(self: *Zld) !void { try self.file.?.pwriteAll(buffer, dyld_info.export_off); } -fn writeDebugInfo(self: *Zld) !void { - var stabs = std.ArrayList(macho.nlist_64).init(self.allocator); - defer stabs.deinit(); - - for (self.objects.items) |object| { - const tu_path = object.tu_path orelse continue; - const tu_mtime = object.tu_mtime orelse continue; - _ = tu_mtime; - const dirname = std.fs.path.dirname(tu_path) orelse "./"; - // Current dir - try stabs.append(.{ - .n_strx = try self.strtab.getOrPut(tu_path[0 .. dirname.len + 1]), - .n_type = macho.N_SO, - .n_sect = 0, - .n_desc = 0, - .n_value = 0, - }); - // Artifact name - try stabs.append(.{ - .n_strx = try self.strtab.getOrPut(tu_path[dirname.len + 1 ..]), - .n_type = macho.N_SO, - .n_sect = 0, - .n_desc = 0, - .n_value = 0, - }); - // Path to object file with debug info - try stabs.append(.{ - .n_strx = try self.strtab.getOrPut(object.name.?), - .n_type = macho.N_OSO, - .n_sect = 0, - .n_desc = 1, - .n_value = 0, //tu_mtime, TODO figure out why precalculated mtime value doesn't work - }); - - for (object.symbols.items) |sym| { - const reg = reg: { - switch (sym.@"type") { - .regular => break :reg sym.cast(Symbol.Regular) orelse unreachable, - .tentative => { - const final = sym.getTopmostAlias().cast(Symbol.Regular) orelse unreachable; - if (object != final.file) continue; - break :reg final; - }, - else => continue, - } - }; - - if (reg.isTemp() or reg.stab == null) continue; - const stab = reg.stab orelse unreachable; - - switch (stab.kind) { - .function => { - try stabs.append(.{ - .n_strx = 0, - .n_type = macho.N_BNSYM, - .n_sect = reg.section, - .n_desc = 0, - .n_value = reg.address, - }); - try stabs.append(.{ - .n_strx = try self.strtab.getOrPut(sym.name), - .n_type = macho.N_FUN, - .n_sect = reg.section, - .n_desc = 0, - .n_value = reg.address, - }); - try stabs.append(.{ - .n_strx = 0, - .n_type = macho.N_FUN, - .n_sect = 0, - .n_desc = 0, - .n_value = stab.size, - }); - try stabs.append(.{ - .n_strx = 0, - .n_type = macho.N_ENSYM, - .n_sect = reg.section, - .n_desc = 0, - .n_value = stab.size, - }); - }, - .global => { - try stabs.append(.{ - .n_strx = try self.strtab.getOrPut(sym.name), - .n_type = macho.N_GSYM, - .n_sect = 0, - .n_desc = 0, - .n_value = 0, - }); - }, - .static => { - try stabs.append(.{ - .n_strx = try self.strtab.getOrPut(sym.name), - .n_type = macho.N_STSYM, - .n_sect = reg.section, - .n_desc = 0, - .n_value = reg.address, - }); - }, - } - } - - // Close the source file! - try stabs.append(.{ - .n_strx = 0, - .n_type = macho.N_SO, - .n_sect = 0, - .n_desc = 0, - .n_value = 0, - }); - } - - if (stabs.items.len == 0) return; - - // Write stabs into the symbol table - const linkedit = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; - const symtab = &self.load_commands.items[self.symtab_cmd_index.?].Symtab; - - symtab.nsyms = @intCast(u32, stabs.items.len); - - const stabs_off = symtab.symoff; - const stabs_size = symtab.nsyms * @sizeOf(macho.nlist_64); - log.debug("writing symbol stabs from 0x{x} to 0x{x}", .{ stabs_off, stabs_size + stabs_off }); - try self.file.?.pwriteAll(mem.sliceAsBytes(stabs.items), stabs_off); - - linkedit.inner.filesize += stabs_size; - - // Update dynamic symbol table. - const dysymtab = &self.load_commands.items[self.dysymtab_cmd_index.?].Dysymtab; - dysymtab.nlocalsym = symtab.nsyms; -} - fn writeSymbolTable(self: *Zld) !void { const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; const symtab = &self.load_commands.items[self.symtab_cmd_index.?].Symtab; @@ -2854,6 +2717,15 @@ fn writeSymbolTable(self: *Zld) !void { defer exports.deinit(); for (self.objects.items) |object| { + for (object.stabs.items) |sym| { + const stab = sym.cast(Symbol.Stab) orelse unreachable; + + const nlists = try stab.asNlists(self.allocator, &self.strtab); + defer self.allocator.free(nlists); + + try locals.appendSlice(nlists); + } + for (object.symbols.items) |sym| { const final = sym.getTopmostAlias(); if (final.@"type" != .regular) continue; From 980f2915fa15ab35029e8f3cab21d309811f6e30 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Fri, 2 Jul 2021 11:58:28 +0200 Subject: [PATCH 06/81] zld: use index to symbol in reloc instead of pointer to the Symbol struct in the hope that we can overwrite the Symbol in the object's symbol table with the resolved Symbol later down the line. --- src/link/MachO/Object.zig | 4 ++-- src/link/MachO/Zld.zig | 16 +++++++++------- src/link/MachO/reloc.zig | 10 +++------- src/link/MachO/reloc/aarch64.zig | 17 ++++++++--------- src/link/MachO/reloc/x86_64.zig | 16 +++++++--------- 5 files changed, 29 insertions(+), 34 deletions(-) diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index 952fbb794c..1e169e93eb 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -377,7 +377,6 @@ pub fn parseSections(self: *Object) !void { self.arch.?, section.code, mem.bytesAsSlice(macho.relocation_info, raw_relocs), - self.symbols.items, ); } @@ -395,7 +394,8 @@ pub fn parseInitializers(self: *Object) !void { const relocs = section.relocs orelse unreachable; try self.initializers.ensureCapacity(self.allocator, relocs.len); for (relocs) |rel| { - self.initializers.appendAssumeCapacity(rel.target.symbol); + const sym = self.symbols.items[rel.target.symbol]; + self.initializers.appendAssumeCapacity(sym); } mem.reverse(*Symbol, self.initializers.items); diff --git a/src/link/MachO/Zld.zig b/src/link/MachO/Zld.zig index 64e5a2af20..b0677f2604 100644 --- a/src/link/MachO/Zld.zig +++ b/src/link/MachO/Zld.zig @@ -1172,7 +1172,7 @@ fn allocateProxyBindAddresses(self: *Zld) !void { if (rel.@"type" != .unsigned) continue; // GOT is currently special-cased if (rel.target != .symbol) continue; - const sym = rel.target.symbol.getTopmostAlias(); + const sym = object.symbols.items[rel.target.symbol].getTopmostAlias(); if (sym.cast(Symbol.Proxy)) |proxy| { const target_map = sect.target_map orelse continue; const target_seg = self.load_commands.items[target_map.segment_id].Segment; @@ -1670,7 +1670,7 @@ fn resolveStubsAndGotEntries(self: *Zld) !void { switch (rel.@"type") { .unsigned => continue, .got_page, .got_page_off, .got_load, .got, .pointer_to_got => { - const sym = rel.target.symbol.getTopmostAlias(); + const sym = object.symbols.items[rel.target.symbol].getTopmostAlias(); if (sym.got_index != null) continue; const index = @intCast(u32, self.got_entries.items.len); @@ -1682,7 +1682,7 @@ fn resolveStubsAndGotEntries(self: *Zld) !void { else => { if (rel.target != .symbol) continue; - const sym = rel.target.symbol.getTopmostAlias(); + const sym = object.symbols.items[rel.target.symbol].getTopmostAlias(); assert(sym.@"type" != .unresolved); if (sym.stubs_index != null) continue; @@ -1781,7 +1781,7 @@ fn resolveRelocsAndWriteSections(self: *Zld) !void { break :rebase false; } if (rel.target == .symbol) { - const final = rel.target.symbol.getTopmostAlias(); + const final = object.symbols.items[rel.target.symbol].getTopmostAlias(); if (final.cast(Symbol.Proxy)) |_| { break :rebase false; } @@ -1801,7 +1801,7 @@ fn resolveRelocsAndWriteSections(self: *Zld) !void { // Calculate the offset to the initializer. if (flags == macho.S_THREAD_LOCAL_VARIABLES) tlv: { // TODO we don't want to save offset to tlv_bootstrap - if (mem.eql(u8, rel.target.symbol.name, "__tlv_bootstrap")) break :tlv; + if (mem.eql(u8, object.symbols.items[rel.target.symbol].name, "__tlv_bootstrap")) break :tlv; const base_addr = blk: { if (self.tlv_data_section_index) |index| { @@ -1823,7 +1823,8 @@ fn resolveRelocsAndWriteSections(self: *Zld) !void { .got_page, .got_page_off, .got_load, .got, .pointer_to_got => { const dc_seg = self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; const got = dc_seg.sections.items[self.got_section_index.?]; - const final = rel.target.symbol.getTopmostAlias(); + const sym = object.symbols.items[rel.target.symbol]; + const final = sym.getTopmostAlias(); const got_index = final.got_index orelse { log.err("expected GOT index relocating symbol '{s}'", .{final.name}); log.err("this is an internal linker error", .{}); @@ -1879,7 +1880,8 @@ fn resolveRelocsAndWriteSections(self: *Zld) !void { fn relocTargetAddr(self: *Zld, object: *const Object, target: reloc.Relocation.Target) !u64 { const target_addr = blk: { switch (target) { - .symbol => |sym| { + .symbol => |sym_id| { + const sym = object.symbols.items[sym_id]; const final = sym.getTopmostAlias(); if (final.cast(Symbol.Regular)) |reg| { log.debug(" | regular '{s}'", .{sym.name}); diff --git a/src/link/MachO/reloc.zig b/src/link/MachO/reloc.zig index 1e1b938196..02484923ff 100644 --- a/src/link/MachO/reloc.zig +++ b/src/link/MachO/reloc.zig @@ -10,7 +10,6 @@ const aarch64 = @import("reloc/aarch64.zig"); const x86_64 = @import("reloc/x86_64.zig"); const Allocator = mem.Allocator; -const Symbol = @import("Symbol.zig"); pub const Relocation = struct { @"type": Type, @@ -81,12 +80,12 @@ pub const Relocation = struct { }; pub const Target = union(enum) { - symbol: *Symbol, + symbol: u32, section: u16, - pub fn from_reloc(reloc: macho.relocation_info, symbols: []*Symbol) Target { + pub fn fromReloc(reloc: macho.relocation_info) Target { return if (reloc.r_extern == 1) .{ - .symbol = symbols[reloc.r_symbolnum], + .symbol = reloc.r_symbolnum, } else .{ .section = @intCast(u16, reloc.r_symbolnum - 1), }; @@ -142,7 +141,6 @@ pub fn parse( arch: std.Target.Cpu.Arch, code: []u8, relocs: []const macho.relocation_info, - symbols: []*Symbol, ) ![]*Relocation { var it = RelocIterator{ .buffer = relocs, @@ -155,7 +153,6 @@ pub fn parse( .it = &it, .code = code, .parsed = std.ArrayList(*Relocation).init(allocator), - .symbols = symbols, }; defer parser.deinit(); try parser.parse(); @@ -168,7 +165,6 @@ pub fn parse( .it = &it, .code = code, .parsed = std.ArrayList(*Relocation).init(allocator), - .symbols = symbols, }; defer parser.deinit(); try parser.parse(); diff --git a/src/link/MachO/reloc/aarch64.zig b/src/link/MachO/reloc/aarch64.zig index 16b982bf90..3eaeb65a9d 100644 --- a/src/link/MachO/reloc/aarch64.zig +++ b/src/link/MachO/reloc/aarch64.zig @@ -203,7 +203,6 @@ pub const Parser = struct { it: *reloc.RelocIterator, code: []u8, parsed: std.ArrayList(*Relocation), - symbols: []*Symbol, addend: ?u32 = null, subtractor: ?Relocation.Target = null, @@ -287,7 +286,7 @@ pub const Parser = struct { var branch = try parser.allocator.create(Branch); errdefer parser.allocator.destroy(branch); - const target = Relocation.Target.from_reloc(rel, parser.symbols); + const target = Relocation.Target.fromReloc(rel); branch.* = .{ .base = .{ @@ -308,7 +307,7 @@ pub const Parser = struct { assert(rel.r_length == 2); const rel_type = @intToEnum(macho.reloc_type_arm64, rel.r_type); - const target = Relocation.Target.from_reloc(rel, parser.symbols); + const target = Relocation.Target.fromReloc(rel); const offset = @intCast(u32, rel.r_address); const inst = parser.code[offset..][0..4]; @@ -414,7 +413,7 @@ pub const Parser = struct { aarch64.Instruction.load_store_register, ), inst) }; } - const target = Relocation.Target.from_reloc(rel, parser.symbols); + const target = Relocation.Target.fromReloc(rel); var page_off = try parser.allocator.create(PageOff); errdefer parser.allocator.destroy(page_off); @@ -451,7 +450,7 @@ pub const Parser = struct { ), inst); assert(parsed_inst.size == 3); - const target = Relocation.Target.from_reloc(rel, parser.symbols); + const target = Relocation.Target.fromReloc(rel); var page_off = try parser.allocator.create(GotPageOff); errdefer parser.allocator.destroy(page_off); @@ -510,7 +509,7 @@ pub const Parser = struct { } }; - const target = Relocation.Target.from_reloc(rel, parser.symbols); + const target = Relocation.Target.fromReloc(rel); var page_off = try parser.allocator.create(TlvpPageOff); errdefer parser.allocator.destroy(page_off); @@ -545,7 +544,7 @@ pub const Parser = struct { assert(rel.r_pcrel == 0); assert(parser.subtractor == null); - parser.subtractor = Relocation.Target.from_reloc(rel, parser.symbols); + parser.subtractor = Relocation.Target.fromReloc(rel); // Verify SUBTRACTOR is followed by UNSIGNED. const next = @intToEnum(macho.reloc_type_arm64, parser.it.peek().r_type); @@ -568,7 +567,7 @@ pub const Parser = struct { var unsigned = try parser.allocator.create(reloc.Unsigned); errdefer parser.allocator.destroy(unsigned); - const target = Relocation.Target.from_reloc(rel, parser.symbols); + const target = Relocation.Target.fromReloc(rel); const is_64bit: bool = switch (rel.r_length) { 3 => true, 2 => false, @@ -605,7 +604,7 @@ pub const Parser = struct { var ptr_to_got = try parser.allocator.create(PointerToGot); errdefer parser.allocator.destroy(ptr_to_got); - const target = Relocation.Target.from_reloc(rel, parser.symbols); + const target = Relocation.Target.fromReloc(rel); const offset = @intCast(u32, rel.r_address); ptr_to_got.* = .{ diff --git a/src/link/MachO/reloc/x86_64.zig b/src/link/MachO/reloc/x86_64.zig index 6df68b6b3e..9f3c4702c8 100644 --- a/src/link/MachO/reloc/x86_64.zig +++ b/src/link/MachO/reloc/x86_64.zig @@ -9,7 +9,6 @@ const reloc = @import("../reloc.zig"); const Allocator = mem.Allocator; const Relocation = reloc.Relocation; -const Symbol = @import("../Symbol.zig"); pub const Branch = struct { base: Relocation, @@ -103,7 +102,6 @@ pub const Parser = struct { it: *reloc.RelocIterator, code: []u8, parsed: std.ArrayList(*Relocation), - symbols: []*Symbol, subtractor: ?Relocation.Target = null, pub fn deinit(parser: *Parser) void { @@ -154,7 +152,7 @@ pub const Parser = struct { var branch = try parser.allocator.create(Branch); errdefer parser.allocator.destroy(branch); - const target = Relocation.Target.from_reloc(rel, parser.symbols); + const target = Relocation.Target.fromReloc(rel); branch.* = .{ .base = .{ @@ -174,7 +172,7 @@ pub const Parser = struct { assert(rel.r_length == 2); const rel_type = @intToEnum(macho.reloc_type_x86_64, rel.r_type); - const target = Relocation.Target.from_reloc(rel, parser.symbols); + const target = Relocation.Target.fromReloc(rel); const offset = @intCast(u32, rel.r_address); const inst = parser.code[offset..][0..4]; @@ -213,7 +211,7 @@ pub const Parser = struct { const offset = @intCast(u32, rel.r_address); const inst = parser.code[offset..][0..4]; - const target = Relocation.Target.from_reloc(rel, parser.symbols); + const target = Relocation.Target.fromReloc(rel); var got_load = try parser.allocator.create(GotLoad); errdefer parser.allocator.destroy(got_load); @@ -239,7 +237,7 @@ pub const Parser = struct { const offset = @intCast(u32, rel.r_address); const inst = parser.code[offset..][0..4]; - const target = Relocation.Target.from_reloc(rel, parser.symbols); + const target = Relocation.Target.fromReloc(rel); const addend = mem.readIntLittle(i32, inst); var got = try parser.allocator.create(Got); @@ -267,7 +265,7 @@ pub const Parser = struct { const offset = @intCast(u32, rel.r_address); const inst = parser.code[offset..][0..4]; - const target = Relocation.Target.from_reloc(rel, parser.symbols); + const target = Relocation.Target.fromReloc(rel); var tlv = try parser.allocator.create(Tlv); errdefer parser.allocator.destroy(tlv); @@ -292,7 +290,7 @@ pub const Parser = struct { assert(rel.r_pcrel == 0); assert(parser.subtractor == null); - parser.subtractor = Relocation.Target.from_reloc(rel, parser.symbols); + parser.subtractor = Relocation.Target.fromReloc(rel); // Verify SUBTRACTOR is followed by UNSIGNED. const next = @intToEnum(macho.reloc_type_x86_64, parser.it.peek().r_type); @@ -315,7 +313,7 @@ pub const Parser = struct { var unsigned = try parser.allocator.create(reloc.Unsigned); errdefer parser.allocator.destroy(unsigned); - const target = Relocation.Target.from_reloc(rel, parser.symbols); + const target = Relocation.Target.fromReloc(rel); const is_64bit: bool = switch (rel.r_length) { 3 => true, 2 => false, From 989639efba0a7098819c3eb85130cb50413cbf7c Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sat, 3 Jul 2021 10:48:39 +0200 Subject: [PATCH 07/81] zld: coalesce symbols on creation --- src/link/MachO/Object.zig | 98 ++------ src/link/MachO/Symbol.zig | 481 +++++++++++--------------------------- src/link/MachO/Zld.zig | 413 +++++++++++++++++--------------- 3 files changed, 385 insertions(+), 607 deletions(-) diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index 1e169e93eb..c7150c2edc 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -45,9 +45,12 @@ dwarf_debug_str_index: ?u16 = null, dwarf_debug_line_index: ?u16 = null, dwarf_debug_ranges_index: ?u16 = null, +symtab: std.ArrayListUnmanaged(macho.nlist_64) = .{}, +strtab: std.ArrayListUnmanaged(u8) = .{}, + symbols: std.ArrayListUnmanaged(*Symbol) = .{}, stabs: std.ArrayListUnmanaged(*Symbol) = .{}, -initializers: std.ArrayListUnmanaged(*Symbol) = .{}, +initializers: std.ArrayListUnmanaged(u32) = .{}, data_in_code_entries: std.ArrayListUnmanaged(macho.data_in_code_entry) = .{}, pub const Section = struct { @@ -216,20 +219,13 @@ pub fn deinit(self: *Object) void { } self.sections.deinit(self.allocator); - for (self.symbols.items) |sym| { - sym.deinit(self.allocator); - self.allocator.destroy(sym); - } self.symbols.deinit(self.allocator); - - for (self.stabs.items) |stab| { - stab.deinit(self.allocator); - self.allocator.destroy(stab); - } self.stabs.deinit(self.allocator); self.data_in_code_entries.deinit(self.allocator); self.initializers.deinit(self.allocator); + self.symtab.deinit(self.allocator); + self.strtab.deinit(self.allocator); if (self.name) |n| { self.allocator.free(n); @@ -271,11 +267,10 @@ pub fn parse(self: *Object) !void { self.header = header; try self.readLoadCommands(reader); - try self.parseSymbols(); try self.parseSections(); + try self.parseSymtab(); try self.parseDataInCode(); try self.parseInitializers(); - try self.parseDebugInfo(); } pub fn readLoadCommands(self: *Object, reader: anytype) !void { @@ -394,14 +389,13 @@ pub fn parseInitializers(self: *Object) !void { const relocs = section.relocs orelse unreachable; try self.initializers.ensureCapacity(self.allocator, relocs.len); for (relocs) |rel| { - const sym = self.symbols.items[rel.target.symbol]; - self.initializers.appendAssumeCapacity(sym); + self.initializers.appendAssumeCapacity(rel.target.symbol); } - mem.reverse(*Symbol, self.initializers.items); + mem.reverse(u32, self.initializers.items); } -pub fn parseSymbols(self: *Object) !void { +fn parseSymtab(self: *Object) !void { const index = self.symtab_cmd_index orelse return; const symtab_cmd = self.load_commands.items[index].Symtab; @@ -409,59 +403,12 @@ pub fn parseSymbols(self: *Object) !void { defer self.allocator.free(symtab); _ = try self.file.?.preadAll(symtab, symtab_cmd.symoff); const slice = @alignCast(@alignOf(macho.nlist_64), mem.bytesAsSlice(macho.nlist_64, symtab)); + try self.symtab.appendSlice(self.allocator, slice); var strtab = try self.allocator.alloc(u8, symtab_cmd.strsize); defer self.allocator.free(strtab); _ = try self.file.?.preadAll(strtab, symtab_cmd.stroff); - - for (slice) |sym| { - const sym_name = mem.spanZ(@ptrCast([*:0]const u8, strtab.ptr + sym.n_strx)); - - if (Symbol.isStab(sym)) { - log.err("unhandled symbol type: stab {s} in {s}", .{ sym_name, self.name.? }); - return error.UnhandledSymbolType; - } - if (Symbol.isIndr(sym)) { - log.err("unhandled symbol type: indirect {s} in {s}", .{ sym_name, self.name.? }); - return error.UnhandledSymbolType; - } - if (Symbol.isAbs(sym)) { - log.err("unhandled symbol type: absolute {s} in {s}", .{ sym_name, self.name.? }); - return error.UnhandledSymbolType; - } - - const name = try self.allocator.dupe(u8, sym_name); - const symbol: *Symbol = symbol: { - if (Symbol.isSect(sym)) { - const linkage: Symbol.Regular.Linkage = linkage: { - if (!Symbol.isExt(sym)) break :linkage .translation_unit; - if (Symbol.isWeakDef(sym) or Symbol.isPext(sym)) break :linkage .linkage_unit; - break :linkage .global; - }; - break :symbol try Symbol.Regular.new(self.allocator, name, .{ - .linkage = linkage, - .address = sym.n_value, - .section = sym.n_sect - 1, - .weak_ref = Symbol.isWeakRef(sym), - .file = self, - }); - } - - if (sym.n_value != 0) { - break :symbol try Symbol.Tentative.new(self.allocator, name, .{ - .size = sym.n_value, - .alignment = (sym.n_desc >> 8) & 0x0f, - .file = self, - }); - } - - break :symbol try Symbol.Unresolved.new(self.allocator, name, .{ - .file = self, - }); - }; - - try self.symbols.append(self.allocator, symbol); - } + try self.strtab.appendSlice(self.allocator, strtab); } pub fn parseDebugInfo(self: *Object) !void { @@ -555,14 +502,6 @@ pub fn parseDebugInfo(self: *Object) !void { self.stabs.appendAssumeCapacity(delim_stab); } -fn readSection(self: Object, allocator: *Allocator, index: u16) ![]u8 { - const seg = self.load_commands.items[self.segment_cmd_index.?].Segment; - const sect = seg.sections.items[index]; - var buffer = try allocator.alloc(u8, @intCast(usize, sect.size)); - _ = try self.file.?.preadAll(buffer, sect.offset); - return buffer; -} - pub fn parseDataInCode(self: *Object) !void { const index = self.data_in_code_cmd_index orelse return; const data_in_code = self.load_commands.items[index].LinkeditData; @@ -582,3 +521,16 @@ pub fn parseDataInCode(self: *Object) !void { try self.data_in_code_entries.append(self.allocator, dice); } } + +fn readSection(self: Object, allocator: *Allocator, index: u16) ![]u8 { + const seg = self.load_commands.items[self.segment_cmd_index.?].Segment; + const sect = seg.sections.items[index]; + var buffer = try allocator.alloc(u8, @intCast(usize, sect.size)); + _ = try self.file.?.preadAll(buffer, sect.offset); + return buffer; +} + +pub fn getString(self: Object, off: u32) []const u8 { + assert(off < self.strtab.items.len); + return mem.spanZ(@ptrCast([*:0]const u8, self.strtab.items.ptr + off)); +} diff --git a/src/link/MachO/Symbol.zig b/src/link/MachO/Symbol.zig index 023e2ed7a8..5a0bfe9762 100644 --- a/src/link/MachO/Symbol.zig +++ b/src/link/MachO/Symbol.zig @@ -1,6 +1,7 @@ const Symbol = @This(); const std = @import("std"); +const assert = std.debug.assert; const macho = std.macho; const mem = std.mem; @@ -9,177 +10,32 @@ const Dylib = @import("Dylib.zig"); const Object = @import("Object.zig"); const StringTable = @import("StringTable.zig"); -pub const Type = enum { - stab, - regular, - proxy, - unresolved, - tentative, -}; - -/// Symbol type. -@"type": Type, - /// Symbol name. Owned slice. name: []const u8, -/// Alias of. -alias: ?*Symbol = null, - /// Index in GOT table for indirection. got_index: ?u32 = null, /// Index in stubs table for late binding. stubs_index: ?u32 = null, -pub const Stab = struct { - base: Symbol, +payload: union(enum) { + regular: Regular, + tentative: Tentative, + proxy: Proxy, + undef: Undefined, - // Symbol kind: function, etc. - kind: Kind, - - // Size of stab. - size: u64, - - // Base regular symbol for this stub if defined. - symbol: ?*Symbol = null, - - // null means self-reference. - file: ?*Object = null, - - pub const base_type: Symbol.Type = .stab; - - pub const Kind = enum { - so, - oso, - function, - global, - static, - }; - - const Opts = struct { - kind: Kind = .so, - size: u64 = 0, - symbol: ?*Symbol = null, - file: ?*Object = null, - }; - - pub fn new(allocator: *Allocator, name: []const u8, opts: Opts) !*Symbol { - const stab = try allocator.create(Stab); - errdefer allocator.destroy(stab); - - stab.* = .{ - .base = .{ - .@"type" = .stab, - .name = try allocator.dupe(u8, name), - }, - .kind = opts.kind, - .size = opts.size, - .symbol = opts.symbol, - .file = opts.file, + pub fn format(self: @This(), comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { + return switch (self) { + .regular => |p| p.format(fmt, options, writer), + .tentative => |p| p.format(fmt, options, writer), + .proxy => |p| p.format(fmt, options, writer), + .undef => |p| p.format(fmt, options, writer), }; - - return &stab.base; } - - pub fn asNlists(stab: *Stab, allocator: *Allocator, strtab: *StringTable) ![]macho.nlist_64 { - var out = std.ArrayList(macho.nlist_64).init(allocator); - defer out.deinit(); - if (stab.kind == .so) { - try out.append(.{ - .n_strx = try strtab.getOrPut(stab.base.name), - .n_type = macho.N_SO, - .n_sect = 0, - .n_desc = 0, - .n_value = 0, - }); - } else if (stab.kind == .oso) { - const mtime = mtime: { - const object = stab.file orelse break :mtime 0; - break :mtime object.mtime orelse 0; - }; - try out.append(.{ - .n_strx = try strtab.getOrPut(stab.base.name), - .n_type = macho.N_OSO, - .n_sect = 0, - .n_desc = 1, - .n_value = mtime, - }); - } else outer: { - const symbol = stab.symbol orelse unreachable; - const regular = symbol.getTopmostAlias().cast(Regular) orelse unreachable; - const is_match = blk: { - if (regular.file == null and stab.file == null) break :blk true; - if (regular.file) |f1| { - if (stab.file) |f2| { - if (f1 == f2) break :blk true; - } - } - break :blk false; - }; - if (!is_match) break :outer; - - switch (stab.kind) { - .function => { - try out.ensureUnusedCapacity(4); - out.appendAssumeCapacity(.{ - .n_strx = 0, - .n_type = macho.N_BNSYM, - .n_sect = regular.section, - .n_desc = 0, - .n_value = regular.address, - }); - out.appendAssumeCapacity(.{ - .n_strx = try strtab.getOrPut(stab.base.name), - .n_type = macho.N_FUN, - .n_sect = regular.section, - .n_desc = 0, - .n_value = regular.address, - }); - out.appendAssumeCapacity(.{ - .n_strx = 0, - .n_type = macho.N_FUN, - .n_sect = 0, - .n_desc = 0, - .n_value = stab.size, - }); - out.appendAssumeCapacity(.{ - .n_strx = 0, - .n_type = macho.N_ENSYM, - .n_sect = regular.section, - .n_desc = 0, - .n_value = stab.size, - }); - }, - .global => { - try out.append(.{ - .n_strx = try strtab.getOrPut(stab.base.name), - .n_type = macho.N_GSYM, - .n_sect = 0, - .n_desc = 0, - .n_value = 0, - }); - }, - .static => { - try out.append(.{ - .n_strx = try strtab.getOrPut(stab.base.name), - .n_type = macho.N_STSYM, - .n_sect = regular.section, - .n_desc = 0, - .n_value = regular.address, - }); - }, - .so, .oso => unreachable, - } - } - - return out.toOwnedSlice(); - } -}; +}, pub const Regular = struct { - base: Symbol, - /// Linkage type. linkage: Linkage, @@ -196,77 +52,56 @@ pub const Regular = struct { /// null means self-reference. file: ?*Object = null, - /// True if symbol was already committed into the final - /// symbol table. - visited: bool = false, - - pub const base_type: Symbol.Type = .regular; - pub const Linkage = enum { translation_unit, linkage_unit, global, }; - const Opts = struct { - linkage: Linkage = .translation_unit, - address: u64 = 0, - section: u8 = 0, - weak_ref: bool = false, - file: ?*Object = null, - }; - - pub fn new(allocator: *Allocator, name: []const u8, opts: Opts) !*Symbol { - const reg = try allocator.create(Regular); - errdefer allocator.destroy(reg); - - reg.* = .{ - .base = .{ - .@"type" = .regular, - .name = try allocator.dupe(u8, name), - }, - .linkage = opts.linkage, - .address = opts.address, - .section = opts.section, - .weak_ref = opts.weak_ref, - .file = opts.file, - }; - - return ®.base; - } - - pub fn asNlist(regular: *Regular, strtab: *StringTable) !macho.nlist_64 { - const n_strx = try strtab.getOrPut(regular.base.name); - var nlist = macho.nlist_64{ - .n_strx = n_strx, - .n_type = macho.N_SECT, - .n_sect = regular.section, - .n_desc = 0, - .n_value = regular.address, - }; - - if (regular.linkage != .translation_unit) { - nlist.n_type |= macho.N_EXT; - } - if (regular.linkage == .linkage_unit) { - nlist.n_type |= macho.N_PEXT; - nlist.n_desc |= macho.N_WEAK_DEF; - } - - return nlist; - } - - pub fn isTemp(regular: *Regular) bool { + pub fn isTemp(regular: Regular) bool { if (regular.linkage == .translation_unit) { return mem.startsWith(u8, regular.base.name, "l") or mem.startsWith(u8, regular.base.name, "L"); } return false; } + + pub fn format(self: Regular, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { + try std.fmt.format(writer, "Regular {{ ", .{}); + try std.fmt.format(writer, ".linkage = {s}, ", .{self.linkage}); + try std.fmt.format(writer, ".address = 0x{x}, ", .{self.address}); + try std.fmt.format(writer, ".section = {}, ", .{self.section}); + if (self.weak_ref) { + try std.fmt.format(writer, ".weak_ref, ", .{}); + } + if (self.file) |file| { + try std.fmt.format(writer, ".file = {s}, ", .{file.name.?}); + } + try std.fmt.format(writer, "}}", .{}); + } +}; + +pub const Tentative = struct { + /// Symbol size. + size: u64, + + /// Symbol alignment as power of two. + alignment: u16, + + /// File where this symbol was referenced. + file: ?*Object = null, + + pub fn format(self: Tentative, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { + try std.fmt.format(writer, "Tentative {{ ", .{}); + try std.fmt.format(writer, ".size = 0x{x}, ", .{self.size}); + try std.fmt.format(writer, ".alignment = 0x{x}, ", .{self.alignment}); + if (self.file) |file| { + try std.fmt.format(writer, ".file = {s}, ", .{file.name.?}); + } + try std.fmt.format(writer, "}}", .{}); + } }; pub const Proxy = struct { - base: Symbol, - /// Dynamic binding info - spots within the final /// executable where this proxy is referenced from. bind_info: std.ArrayListUnmanaged(struct { @@ -278,161 +113,123 @@ pub const Proxy = struct { /// null means self-reference. file: ?*Dylib = null, - pub const base_type: Symbol.Type = .proxy; - - const Opts = struct { - file: ?*Dylib = null, - }; - - pub fn new(allocator: *Allocator, name: []const u8, opts: Opts) !*Symbol { - const proxy = try allocator.create(Proxy); - errdefer allocator.destroy(proxy); - - proxy.* = .{ - .base = .{ - .@"type" = .proxy, - .name = try allocator.dupe(u8, name), - }, - .file = opts.file, - }; - - return &proxy.base; - } - - pub fn asNlist(proxy: *Proxy, strtab: *StringTable) !macho.nlist_64 { - const n_strx = try strtab.getOrPut(proxy.base.name); - return macho.nlist_64{ - .n_strx = n_strx, - .n_type = macho.N_UNDF | macho.N_EXT, - .n_sect = 0, - .n_desc = (proxy.dylibOrdinal() * macho.N_SYMBOL_RESOLVER) | macho.REFERENCE_FLAG_UNDEFINED_NON_LAZY, - .n_value = 0, - }; - } - pub fn deinit(proxy: *Proxy, allocator: *Allocator) void { proxy.bind_info.deinit(allocator); } - pub fn dylibOrdinal(proxy: *Proxy) u16 { + pub fn dylibOrdinal(proxy: Proxy) u16 { const dylib = proxy.file orelse return 0; return dylib.ordinal.?; } + + pub fn format(self: Proxy, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { + try std.fmt.format(writer, "Proxy {{ ", .{}); + if (self.bind_info.items.len > 0) { + // TODO + try std.fmt.format(writer, ".bind_info = {}, ", .{self.bind_info.items.len}); + } + if (self.file) |file| { + try std.fmt.format(writer, ".file = {s}, ", .{file.name.?}); + } + try std.fmt.format(writer, "}}", .{}); + } }; -pub const Unresolved = struct { - base: Symbol, - +pub const Undefined = struct { /// File where this symbol was referenced. /// null means synthetic, e.g., dyld_stub_binder. file: ?*Object = null, - pub const base_type: Symbol.Type = .unresolved; - - const Opts = struct { - file: ?*Object = null, - }; - - pub fn new(allocator: *Allocator, name: []const u8, opts: Opts) !*Symbol { - const undef = try allocator.create(Unresolved); - errdefer allocator.destroy(undef); - - undef.* = .{ - .base = .{ - .@"type" = .unresolved, - .name = try allocator.dupe(u8, name), - }, - .file = opts.file, - }; - - return &undef.base; - } - - pub fn asNlist(undef: *Unresolved, strtab: *StringTable) !macho.nlist_64 { - const n_strx = try strtab.getOrPut(undef.base.name); - return macho.nlist_64{ - .n_strx = n_strx, - .n_type = macho.N_UNDF, - .n_sect = 0, - .n_desc = 0, - .n_value = 0, - }; + pub fn format(self: Undefined, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { + try std.fmt.format(writer, "Undefined {{ ", .{}); + if (self.file) |file| { + try std.fmt.format(writer, ".file = {s}, ", .{file.name.?}); + } + try std.fmt.format(writer, "}}", .{}); } }; -pub const Tentative = struct { - base: Symbol, +/// Create new undefined symbol. +pub fn new(allocator: *Allocator, name: []const u8) !*Symbol { + const new_sym = try allocator.create(Symbol); + errdefer allocator.destroy(new_sym); - /// Symbol size. - size: u64, - - /// Symbol alignment as power of two. - alignment: u16, - - /// File where this symbol was referenced. - file: ?*Object = null, - - pub const base_type: Symbol.Type = .tentative; - - const Opts = struct { - size: u64 = 0, - alignment: u16 = 0, - file: ?*Object = null, + new_sym.* = .{ + .name = try allocator.dupe(u8, name), + .payload = .{ + .undef = .{}, + }, }; - pub fn new(allocator: *Allocator, name: []const u8, opts: Opts) !*Symbol { - const tent = try allocator.create(Tentative); - errdefer allocator.destroy(tent); + return new_sym; +} - tent.* = .{ - .base = .{ - .@"type" = .tentative, - .name = try allocator.dupe(u8, name), +pub fn asNlist(symbol: *Symbol, strtab: *StringTable) macho.nlist_64 { + const n_strx = try strtab.getOrPut(symbol.name); + const nlist = nlist: { + switch (symbol.payload) { + .regular => |regular| { + var nlist = macho.nlist_64{ + .n_strx = n_strx, + .n_type = macho.N_SECT, + .n_sect = regular.section, + .n_desc = 0, + .n_value = regular.address, + }; + + if (regular.linkage != .translation_unit) { + nlist.n_type |= macho.N_EXT; + } + if (regular.linkage == .linkage_unit) { + nlist.n_type |= macho.N_PEXT; + nlist.n_desc |= macho.N_WEAK_DEF; + } + + break :nlist nlist; }, - .size = opts.size, - .alignment = opts.alignment, - .file = opts.file, - }; + .tentative => |tentative| { + // TODO + break :nlist macho.nlist_64{ + .n_strx = n_strx, + .n_type = macho.N_UNDF, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }; + }, + .proxy => |proxy| { + break :nlist macho.nlist_64{ + .n_strx = n_strx, + .n_type = macho.N_UNDF | macho.N_EXT, + .n_sect = 0, + .n_desc = (proxy.dylibOrdinal() * macho.N_SYMBOL_RESOLVER) | macho.REFERENCE_FLAG_UNDEFINED_NON_LAZY, + .n_value = 0, + }; + }, + .undef => |undef| { + // TODO + break :nlist macho.nlist_64{ + .n_strx = n_strx, + .n_type = macho.N_UNDF, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }; + }, + } + }; + return nlist; +} - return &tent.base; - } +pub fn deinit(symbol: *Symbol, allocator: *Allocator) void { + allocator.free(symbol.name); - pub fn asNlist(tent: *Tentative, strtab: *StringTable) !macho.nlist_64 { - // TODO - const n_strx = try strtab.getOrPut(tent.base.name); - return macho.nlist_64{ - .n_strx = n_strx, - .n_type = macho.N_UNDF, - .n_sect = 0, - .n_desc = 0, - .n_value = 0, - }; - } -}; - -pub fn deinit(base: *Symbol, allocator: *Allocator) void { - allocator.free(base.name); - - switch (base.@"type") { - .proxy => @fieldParentPtr(Proxy, "base", base).deinit(allocator), + switch (symbol.payload) { + .proxy => |*proxy| proxy.deinit(allocator), else => {}, } } -pub fn cast(base: *Symbol, comptime T: type) ?*T { - if (base.@"type" != T.base_type) { - return null; - } - return @fieldParentPtr(T, "base", base); -} - -pub fn getTopmostAlias(base: *Symbol) *Symbol { - if (base.alias) |alias| { - return alias.getTopmostAlias(); - } - return base; -} - pub fn isStab(sym: macho.nlist_64) bool { return (macho.N_STAB & sym.n_type) != 0; } diff --git a/src/link/MachO/Zld.zig b/src/link/MachO/Zld.zig index b0677f2604..3eeaa3f181 100644 --- a/src/link/MachO/Zld.zig +++ b/src/link/MachO/Zld.zig @@ -102,10 +102,9 @@ objc_selrefs_section_index: ?u16 = null, objc_classrefs_section_index: ?u16 = null, objc_data_section_index: ?u16 = null, +locals: std.ArrayListUnmanaged(*Symbol) = .{}, globals: std.StringArrayHashMapUnmanaged(*Symbol) = .{}, imports: std.StringArrayHashMapUnmanaged(*Symbol) = .{}, -unresolved: std.StringArrayHashMapUnmanaged(*Symbol) = .{}, -tentatives: std.StringArrayHashMapUnmanaged(*Symbol) = .{}, /// Offset into __DATA,__common section. /// Set if the linker found tentative definitions in any of the objects. @@ -173,15 +172,24 @@ pub fn deinit(self: *Zld) void { } self.dylibs.deinit(self.allocator); - for (self.imports.values()) |proxy| { - proxy.deinit(self.allocator); - self.allocator.destroy(proxy); + for (self.imports.values()) |sym| { + sym.deinit(self.allocator); + self.allocator.destroy(sym); } self.imports.deinit(self.allocator); - self.tentatives.deinit(self.allocator); + for (self.globals.values()) |sym| { + sym.deinit(self.allocator); + self.allocator.destroy(sym); + } self.globals.deinit(self.allocator); - self.unresolved.deinit(self.allocator); + + for (self.locals.items) |sym| { + sym.deinit(self.allocator); + self.allocator.destroy(sym); + } + self.locals.deinit(self.allocator); + self.strtab.deinit(); } @@ -221,20 +229,21 @@ pub fn link(self: *Zld, files: []const []const u8, output: Output, args: LinkArg try self.parseInputFiles(files, args.syslibroot); try self.parseLibs(args.libs, args.syslibroot); try self.resolveSymbols(); - try self.resolveStubsAndGotEntries(); - try self.updateMetadata(); - try self.sortSections(); - try self.addRpaths(args.rpaths); - try self.addDataInCodeLC(); - try self.addCodeSignatureLC(); - try self.allocateTextSegment(); - try self.allocateDataConstSegment(); - try self.allocateDataSegment(); - self.allocateLinkeditSegment(); - try self.allocateSymbols(); - try self.allocateTentativeSymbols(); - try self.allocateProxyBindAddresses(); - try self.flush(); + return error.TODO; + // try self.resolveStubsAndGotEntries(); + // try self.updateMetadata(); + // try self.sortSections(); + // try self.addRpaths(args.rpaths); + // try self.addDataInCodeLC(); + // try self.addCodeSignatureLC(); + // try self.allocateTextSegment(); + // try self.allocateDataConstSegment(); + // try self.allocateDataSegment(); + // self.allocateLinkeditSegment(); + // try self.allocateSymbols(); + // try self.allocateTentativeSymbols(); + // try self.allocateProxyBindAddresses(); + // try self.flush(); } fn parseInputFiles(self: *Zld, files: []const []const u8, syslibroot: ?[]const u8) !void { @@ -1458,92 +1467,100 @@ fn writeStubInStubHelper(self: *Zld, index: u32) !void { fn resolveSymbolsInObject(self: *Zld, object: *Object) !void { log.debug("resolving symbols in '{s}'", .{object.name}); - for (object.symbols.items) |sym| { - if (sym.cast(Symbol.Regular)) |reg| { - if (reg.linkage == .translation_unit) continue; // Symbol local to TU. + for (object.symtab.items) |sym| { + const sym_name = object.getString(sym.n_strx); - if (self.tentatives.fetchSwapRemove(sym.name)) |kv| { - // Create link to the global. - kv.value.alias = sym; - } - if (self.unresolved.fetchSwapRemove(sym.name)) |kv| { - // Create link to the global. - kv.value.alias = sym; - } - const sym_ptr = self.globals.getPtr(sym.name) orelse { - // Put new global symbol into the symbol table. - try self.globals.putNoClobber(self.allocator, sym.name, sym); - continue; + if (Symbol.isStab(sym)) { + log.err("unhandled symbol type: stab {s}", .{sym_name}); + log.err(" | first definition in {s}", .{object.name.?}); + return error.UnhandledSymbolType; + } + + if (Symbol.isIndr(sym)) { + log.err("unhandled symbol type: indirect {s}", .{sym_name}); + log.err(" | first definition in {s}", .{object.name.?}); + return error.UnhandledSymbolType; + } + + if (Symbol.isAbs(sym)) { + log.err("unhandled symbol type: absolute {s}", .{sym_name}); + log.err(" | first definition in {s}", .{object.name.?}); + return error.UnhandledSymbolType; + } + + if (Symbol.isSect(sym) and !Symbol.isExt(sym)) { + // Regular symbol local to translation unit + const symbol = try Symbol.new(self.allocator, sym_name); + symbol.payload = .{ + .regular = .{ + .linkage = .translation_unit, + .address = sym.n_value, + .section = sym.n_sect - 1, + .weak_ref = Symbol.isWeakRef(sym), + .file = object, + }, }; - const g_sym = sym_ptr.*; - const g_reg = g_sym.cast(Symbol.Regular) orelse unreachable; + try self.locals.append(self.allocator, symbol); + try object.symbols.append(self.allocator, symbol); + continue; + } - switch (g_reg.linkage) { - .translation_unit => unreachable, - .linkage_unit => { - if (reg.linkage == .linkage_unit) { - // Create link to the first encountered linkage_unit symbol. - sym.alias = g_sym; - continue; - } - }, - .global => { - if (reg.linkage == .global) { - log.debug("symbol '{s}' defined multiple times", .{reg.base.name}); - return error.MultipleSymbolDefinitions; - } - sym.alias = g_sym; - continue; - }, + const symbol = self.globals.get(sym_name) orelse symbol: { + // Insert new global symbol. + const symbol = try Symbol.new(self.allocator, sym_name); + symbol.payload.undef.file = object; + try self.globals.putNoClobber(self.allocator, symbol.name, symbol); + break :symbol symbol; + }; + + if (Symbol.isSect(sym)) { + // Global symbol + const linkage: Symbol.Regular.Linkage = if (Symbol.isWeakDef(sym) or Symbol.isPext(sym)) + .linkage_unit + else + .global; + + const should_update = if (symbol.payload == .regular) blk: { + if (symbol.payload.regular.linkage == .global and linkage == .global) { + log.err("symbol '{s}' defined multiple times", .{sym_name}); + log.err(" | first definition in {s}", .{symbol.payload.regular.file.?.name.?}); + log.err(" | next definition in {s}", .{object.name.?}); + return error.MultipleSymbolDefinitions; + } + break :blk symbol.payload.regular.linkage != .global; + } else true; + + if (should_update) { + symbol.payload = .{ + .regular = .{ + .linkage = linkage, + .address = sym.n_value, + .section = sym.n_sect - 1, + .weak_ref = Symbol.isWeakRef(sym), + .file = object, + }, + }; } - - g_sym.alias = sym; - sym_ptr.* = sym; - } else if (sym.cast(Symbol.Tentative)) |tent| { - if (self.globals.get(sym.name)) |g_sym| { - sym.alias = g_sym; - continue; - } - - if (self.unresolved.fetchSwapRemove(sym.name)) |kv| { - kv.value.alias = sym; - } - - const sym_ptr = self.tentatives.getPtr(sym.name) orelse { - // Put new tentative definition symbol into symbol table. - try self.tentatives.putNoClobber(self.allocator, sym.name, sym); - continue; + } else if (sym.n_value != 0) { + // Tentative definition + const should_update = switch (symbol.payload) { + .tentative => |tent| tent.size < sym.n_value, + .undef => true, + else => false, }; - // Compare by size and pick the largest tentative definition. - // We model this like a heap where the tentative definition with the - // largest size always washes up on top. - const t_sym = sym_ptr.*; - const t_tent = t_sym.cast(Symbol.Tentative) orelse unreachable; + if (should_update) { + symbol.payload = .{ + .tentative = .{ + .size = sym.n_value, + .alignment = (sym.n_desc >> 8) & 0x0f, + .file = object, + }, + }; + } + } - if (tent.size < t_tent.size) { - sym.alias = t_sym; - continue; - } - - t_sym.alias = sym; - sym_ptr.* = sym; - } else if (sym.cast(Symbol.Unresolved)) |_| { - if (self.globals.get(sym.name)) |g_sym| { - sym.alias = g_sym; - continue; - } - if (self.tentatives.get(sym.name)) |t_sym| { - sym.alias = t_sym; - continue; - } - if (self.unresolved.get(sym.name)) |u_sym| { - sym.alias = u_sym; - continue; - } - - try self.unresolved.putNoClobber(self.allocator, sym.name, sym); - } else unreachable; + try object.symbols.append(self.allocator, symbol); } } @@ -1553,111 +1570,123 @@ fn resolveSymbols(self: *Zld) !void { try self.resolveSymbolsInObject(object); } - // Second pass, resolve symbols in static libraries. - var next_sym: usize = 0; - while (true) { - if (next_sym == self.unresolved.count()) break; + log.warn("globals", .{}); + for (self.globals.values()) |value| { + log.warn(" | {s}: {}", .{ value.name, value.payload }); + } - const sym = self.unresolved.values()[next_sym]; - - var reset: bool = false; - for (self.archives.items) |archive| { - // Check if the entry exists in a static archive. - const offsets = archive.toc.get(sym.name) orelse { - // No hit. - continue; - }; - assert(offsets.items.len > 0); - - const object = try archive.parseObject(offsets.items[0]); - try self.objects.append(self.allocator, object); - try self.resolveSymbolsInObject(object); - - reset = true; - break; - } - - if (reset) { - next_sym = 0; - } else { - next_sym += 1; + for (self.objects.items) |object| { + log.warn("object {s}", .{object.name.?}); + for (object.symbols.items) |sym| { + log.warn(" | {s}: {}", .{ sym.name, sym.payload }); } } - // Third pass, resolve symbols in dynamic libraries. - var unresolved = std.ArrayList(*Symbol).init(self.allocator); - defer unresolved.deinit(); + // // Second pass, resolve symbols in static libraries. + // var next_sym: usize = 0; + // while (true) { + // if (next_sym == self.unresolved.count()) break; - try unresolved.ensureCapacity(self.unresolved.count()); - for (self.unresolved.values()) |value| { - unresolved.appendAssumeCapacity(value); - } - self.unresolved.clearRetainingCapacity(); + // const sym = self.unresolved.values()[next_sym]; - // Put dyld_stub_binder as an unresolved special symbol. - { - const name = try self.allocator.dupe(u8, "dyld_stub_binder"); - errdefer self.allocator.free(name); - const undef = try Symbol.Unresolved.new(self.allocator, name, .{}); - try unresolved.append(undef); - } + // var reset: bool = false; + // for (self.archives.items) |archive| { + // // Check if the entry exists in a static archive. + // const offsets = archive.toc.get(sym.name) orelse { + // // No hit. + // continue; + // }; + // assert(offsets.items.len > 0); - var referenced = std.AutoHashMap(*Dylib, void).init(self.allocator); - defer referenced.deinit(); + // const object = try archive.parseObject(offsets.items[0]); + // try self.objects.append(self.allocator, object); + // try self.resolveSymbolsInObject(object); - loop: while (unresolved.popOrNull()) |undef| { - const proxy = self.imports.get(undef.name) orelse outer: { - const proxy = inner: { - for (self.dylibs.items) |dylib| { - const proxy = (try dylib.createProxy(undef.name)) orelse continue; - try referenced.put(dylib, {}); - break :inner proxy; - } - if (mem.eql(u8, undef.name, "___dso_handle")) { - // TODO this is just a temp patch until I work out what to actually - // do with ___dso_handle and __mh_execute_header symbols which are - // synthetically created by the linker on macOS. - break :inner try Symbol.Proxy.new(self.allocator, undef.name, .{}); - } + // reset = true; + // break; + // } - self.unresolved.putAssumeCapacityNoClobber(undef.name, undef); - continue :loop; - }; + // if (reset) { + // next_sym = 0; + // } else { + // next_sym += 1; + // } + // } - try self.imports.putNoClobber(self.allocator, proxy.name, proxy); - break :outer proxy; - }; - undef.alias = proxy; - } + // // Third pass, resolve symbols in dynamic libraries. + // var unresolved = std.ArrayList(*Symbol).init(self.allocator); + // defer unresolved.deinit(); - // Add LC_LOAD_DYLIB load command for each referenced dylib/stub. - var it = referenced.iterator(); - while (it.next()) |entry| { - const dylib = entry.key_ptr.*; - dylib.ordinal = self.next_dylib_ordinal; - const dylib_id = dylib.id orelse unreachable; - var dylib_cmd = try createLoadDylibCommand( - self.allocator, - dylib_id.name, - dylib_id.timestamp, - dylib_id.current_version, - dylib_id.compatibility_version, - ); - errdefer dylib_cmd.deinit(self.allocator); - try self.load_commands.append(self.allocator, .{ .Dylib = dylib_cmd }); - self.next_dylib_ordinal += 1; - } + // try unresolved.ensureCapacity(self.unresolved.count()); + // for (self.unresolved.values()) |value| { + // unresolved.appendAssumeCapacity(value); + // } + // self.unresolved.clearRetainingCapacity(); - if (self.unresolved.count() > 0) { - for (self.unresolved.values()) |undef| { - log.err("undefined reference to symbol '{s}'", .{undef.name}); - if (undef.cast(Symbol.Unresolved).?.file) |file| { - log.err(" | referenced in {s}", .{file.name.?}); - } - } + // // Put dyld_stub_binder as an unresolved special symbol. + // { + // const name = try self.allocator.dupe(u8, "dyld_stub_binder"); + // errdefer self.allocator.free(name); + // const undef = try Symbol.Unresolved.new(self.allocator, name, .{}); + // try unresolved.append(undef); + // } - return error.UndefinedSymbolReference; - } + // var referenced = std.AutoHashMap(*Dylib, void).init(self.allocator); + // defer referenced.deinit(); + + // loop: while (unresolved.popOrNull()) |undef| { + // const proxy = self.imports.get(undef.name) orelse outer: { + // const proxy = inner: { + // for (self.dylibs.items) |dylib| { + // const proxy = (try dylib.createProxy(undef.name)) orelse continue; + // try referenced.put(dylib, {}); + // break :inner proxy; + // } + // if (mem.eql(u8, undef.name, "___dso_handle")) { + // // TODO this is just a temp patch until I work out what to actually + // // do with ___dso_handle and __mh_execute_header symbols which are + // // synthetically created by the linker on macOS. + // break :inner try Symbol.Proxy.new(self.allocator, undef.name, .{}); + // } + + // self.unresolved.putAssumeCapacityNoClobber(undef.name, undef); + // continue :loop; + // }; + + // try self.imports.putNoClobber(self.allocator, proxy.name, proxy); + // break :outer proxy; + // }; + // undef.alias = proxy; + // } + + // // Add LC_LOAD_DYLIB load command for each referenced dylib/stub. + // var it = referenced.iterator(); + // while (it.next()) |entry| { + // const dylib = entry.key_ptr.*; + // dylib.ordinal = self.next_dylib_ordinal; + // const dylib_id = dylib.id orelse unreachable; + // var dylib_cmd = try createLoadDylibCommand( + // self.allocator, + // dylib_id.name, + // dylib_id.timestamp, + // dylib_id.current_version, + // dylib_id.compatibility_version, + // ); + // errdefer dylib_cmd.deinit(self.allocator); + // try self.load_commands.append(self.allocator, .{ .Dylib = dylib_cmd }); + // self.next_dylib_ordinal += 1; + // } + + // if (self.unresolved.count() > 0) { + // for (self.unresolved.values()) |undef| { + // log.err("undefined reference to symbol '{s}'", .{undef.name}); + // if (undef.cast(Symbol.Unresolved).?.file) |file| { + // log.err(" | referenced in {s}", .{file.name.?}); + // } + // } + + // return error.UndefinedSymbolReference; + // } } fn resolveStubsAndGotEntries(self: *Zld) !void { From ceb431507d827f5ac53a18d8904886708325c0e7 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sat, 3 Jul 2021 14:31:49 +0200 Subject: [PATCH 08/81] zld: resolve symbols in dylibs using new scheme --- src/link/MachO/Dylib.zig | 7 -- src/link/MachO/Zld.zig | 202 +++++++++++++++++---------------------- 2 files changed, 89 insertions(+), 120 deletions(-) diff --git a/src/link/MachO/Dylib.zig b/src/link/MachO/Dylib.zig index 8fd4498931..b751249ce4 100644 --- a/src/link/MachO/Dylib.zig +++ b/src/link/MachO/Dylib.zig @@ -506,10 +506,3 @@ pub fn parseDependentLibs(self: *Dylib, out: *std.ArrayList(*Dylib)) !void { } } } - -pub fn createProxy(self: *Dylib, sym_name: []const u8) !?*Symbol { - if (!self.symbols.contains(sym_name)) return null; - return Symbol.Proxy.new(self.allocator, sym_name, .{ - .file = self, - }); -} diff --git a/src/link/MachO/Zld.zig b/src/link/MachO/Zld.zig index 3eeaa3f181..0adac2aeb6 100644 --- a/src/link/MachO/Zld.zig +++ b/src/link/MachO/Zld.zig @@ -104,7 +104,6 @@ objc_data_section_index: ?u16 = null, locals: std.ArrayListUnmanaged(*Symbol) = .{}, globals: std.StringArrayHashMapUnmanaged(*Symbol) = .{}, -imports: std.StringArrayHashMapUnmanaged(*Symbol) = .{}, /// Offset into __DATA,__common section. /// Set if the linker found tentative definitions in any of the objects. @@ -172,12 +171,6 @@ pub fn deinit(self: *Zld) void { } self.dylibs.deinit(self.allocator); - for (self.imports.values()) |sym| { - sym.deinit(self.allocator); - self.allocator.destroy(sym); - } - self.imports.deinit(self.allocator); - for (self.globals.values()) |sym| { sym.deinit(self.allocator); self.allocator.destroy(sym); @@ -1570,6 +1563,95 @@ fn resolveSymbols(self: *Zld) !void { try self.resolveSymbolsInObject(object); } + // Second pass, resolve symbols in static libraries. + var sym_it = self.globals.iterator(); + while (sym_it.next()) |entry| { + const symbol = entry.value_ptr.*; + if (symbol.payload != .undef) continue; + + for (self.archives.items) |archive| { + // Check if the entry exists in a static archive. + const offsets = archive.toc.get(symbol.name) orelse { + // No hit. + continue; + }; + assert(offsets.items.len > 0); + + const object = try archive.parseObject(offsets.items[0]); + try self.objects.append(self.allocator, object); + try self.resolveSymbolsInObject(object); + + sym_it = self.globals.iterator(); + break; + } + } + + // Third pass, resolve symbols in dynamic libraries. + { + // Put dyld_stub_binder as an undefined special symbol. + const symbol = try Symbol.new(self.allocator, "dyld_stub_binder"); + try self.globals.putNoClobber(self.allocator, symbol.name, symbol); + } + + var referenced = std.AutoHashMap(*Dylib, void).init(self.allocator); + defer referenced.deinit(); + + loop: for (self.globals.values()) |symbol| { + if (symbol.payload != .undef) continue; + + for (self.dylibs.items) |dylib| { + if (!dylib.symbols.contains(symbol.name)) continue; + + try referenced.put(dylib, {}); + symbol.payload = .{ + .proxy = .{ + .file = dylib, + }, + }; + continue :loop; + } + } + + // Add LC_LOAD_DYLIB load command for each referenced dylib/stub. + var it = referenced.iterator(); + while (it.next()) |entry| { + const dylib = entry.key_ptr.*; + dylib.ordinal = self.next_dylib_ordinal; + const dylib_id = dylib.id orelse unreachable; + var dylib_cmd = try createLoadDylibCommand( + self.allocator, + dylib_id.name, + dylib_id.timestamp, + dylib_id.current_version, + dylib_id.compatibility_version, + ); + errdefer dylib_cmd.deinit(self.allocator); + try self.load_commands.append(self.allocator, .{ .Dylib = dylib_cmd }); + self.next_dylib_ordinal += 1; + } + + // Fourth pass, handle synthetic symbols and flag any undefined references. + if (self.globals.get("___dso_handle")) |symbol| { + if (symbol.payload == .undef) { + symbol.payload = .{ + .proxy = .{}, + }; + } + } + + var has_undefined = false; + for (self.globals.values()) |symbol| { + if (symbol.payload != .undef) continue; + + log.err("undefined reference to symbol '{s}'", .{symbol.name}); + if (symbol.payload.undef.file) |file| { + log.err(" | referenced in {s}", .{file.name.?}); + } + has_undefined = true; + } + + if (has_undefined) return error.UndefinedSymbolReference; + log.warn("globals", .{}); for (self.globals.values()) |value| { log.warn(" | {s}: {}", .{ value.name, value.payload }); @@ -1581,112 +1663,6 @@ fn resolveSymbols(self: *Zld) !void { log.warn(" | {s}: {}", .{ sym.name, sym.payload }); } } - - // // Second pass, resolve symbols in static libraries. - // var next_sym: usize = 0; - // while (true) { - // if (next_sym == self.unresolved.count()) break; - - // const sym = self.unresolved.values()[next_sym]; - - // var reset: bool = false; - // for (self.archives.items) |archive| { - // // Check if the entry exists in a static archive. - // const offsets = archive.toc.get(sym.name) orelse { - // // No hit. - // continue; - // }; - // assert(offsets.items.len > 0); - - // const object = try archive.parseObject(offsets.items[0]); - // try self.objects.append(self.allocator, object); - // try self.resolveSymbolsInObject(object); - - // reset = true; - // break; - // } - - // if (reset) { - // next_sym = 0; - // } else { - // next_sym += 1; - // } - // } - - // // Third pass, resolve symbols in dynamic libraries. - // var unresolved = std.ArrayList(*Symbol).init(self.allocator); - // defer unresolved.deinit(); - - // try unresolved.ensureCapacity(self.unresolved.count()); - // for (self.unresolved.values()) |value| { - // unresolved.appendAssumeCapacity(value); - // } - // self.unresolved.clearRetainingCapacity(); - - // // Put dyld_stub_binder as an unresolved special symbol. - // { - // const name = try self.allocator.dupe(u8, "dyld_stub_binder"); - // errdefer self.allocator.free(name); - // const undef = try Symbol.Unresolved.new(self.allocator, name, .{}); - // try unresolved.append(undef); - // } - - // var referenced = std.AutoHashMap(*Dylib, void).init(self.allocator); - // defer referenced.deinit(); - - // loop: while (unresolved.popOrNull()) |undef| { - // const proxy = self.imports.get(undef.name) orelse outer: { - // const proxy = inner: { - // for (self.dylibs.items) |dylib| { - // const proxy = (try dylib.createProxy(undef.name)) orelse continue; - // try referenced.put(dylib, {}); - // break :inner proxy; - // } - // if (mem.eql(u8, undef.name, "___dso_handle")) { - // // TODO this is just a temp patch until I work out what to actually - // // do with ___dso_handle and __mh_execute_header symbols which are - // // synthetically created by the linker on macOS. - // break :inner try Symbol.Proxy.new(self.allocator, undef.name, .{}); - // } - - // self.unresolved.putAssumeCapacityNoClobber(undef.name, undef); - // continue :loop; - // }; - - // try self.imports.putNoClobber(self.allocator, proxy.name, proxy); - // break :outer proxy; - // }; - // undef.alias = proxy; - // } - - // // Add LC_LOAD_DYLIB load command for each referenced dylib/stub. - // var it = referenced.iterator(); - // while (it.next()) |entry| { - // const dylib = entry.key_ptr.*; - // dylib.ordinal = self.next_dylib_ordinal; - // const dylib_id = dylib.id orelse unreachable; - // var dylib_cmd = try createLoadDylibCommand( - // self.allocator, - // dylib_id.name, - // dylib_id.timestamp, - // dylib_id.current_version, - // dylib_id.compatibility_version, - // ); - // errdefer dylib_cmd.deinit(self.allocator); - // try self.load_commands.append(self.allocator, .{ .Dylib = dylib_cmd }); - // self.next_dylib_ordinal += 1; - // } - - // if (self.unresolved.count() > 0) { - // for (self.unresolved.values()) |undef| { - // log.err("undefined reference to symbol '{s}'", .{undef.name}); - // if (undef.cast(Symbol.Unresolved).?.file) |file| { - // log.err(" | referenced in {s}", .{file.name.?}); - // } - // } - - // return error.UndefinedSymbolReference; - // } } fn resolveStubsAndGotEntries(self: *Zld) !void { From 7c82079d2cfb2f8c299707aa4c79455a73601914 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sat, 3 Jul 2021 14:54:53 +0200 Subject: [PATCH 09/81] zld: allocate symbols using the new scheme --- src/link/MachO/Zld.zig | 213 +++++++++++++++++++++-------------------- 1 file changed, 108 insertions(+), 105 deletions(-) diff --git a/src/link/MachO/Zld.zig b/src/link/MachO/Zld.zig index 0adac2aeb6..7f12c5ed25 100644 --- a/src/link/MachO/Zld.zig +++ b/src/link/MachO/Zld.zig @@ -108,6 +108,7 @@ globals: std.StringArrayHashMapUnmanaged(*Symbol) = .{}, /// Offset into __DATA,__common section. /// Set if the linker found tentative definitions in any of the objects. tentative_defs_offset: u64 = 0, +has_tentative_defs: bool = false, threadlocal_offsets: std.ArrayListUnmanaged(TlvOffset) = .{}, // TODO merge with Symbol abstraction local_rebases: std.ArrayListUnmanaged(Pointer) = .{}, @@ -222,20 +223,33 @@ pub fn link(self: *Zld, files: []const []const u8, output: Output, args: LinkArg try self.parseInputFiles(files, args.syslibroot); try self.parseLibs(args.libs, args.syslibroot); try self.resolveSymbols(); + try self.resolveStubsAndGotEntries(); + try self.updateMetadata(); + try self.sortSections(); + try self.addRpaths(args.rpaths); + try self.addDataInCodeLC(); + try self.addCodeSignatureLC(); + try self.allocateTextSegment(); + try self.allocateDataConstSegment(); + try self.allocateDataSegment(); + self.allocateLinkeditSegment(); + try self.allocateSymbols(); + try self.allocateTentativeSymbols(); + try self.allocateProxyBindAddresses(); + + log.warn("globals", .{}); + for (self.globals.values()) |value| { + log.warn(" | {s}: {}", .{ value.name, value.payload }); + } + + for (self.objects.items) |object| { + log.warn("object {s}", .{object.name.?}); + for (object.symbols.items) |sym| { + log.warn(" | {s}: {}", .{ sym.name, sym.payload }); + } + } + return error.TODO; - // try self.resolveStubsAndGotEntries(); - // try self.updateMetadata(); - // try self.sortSections(); - // try self.addRpaths(args.rpaths); - // try self.addDataInCodeLC(); - // try self.addCodeSignatureLC(); - // try self.allocateTextSegment(); - // try self.allocateDataConstSegment(); - // try self.allocateDataSegment(); - // self.allocateLinkeditSegment(); - // try self.allocateSymbols(); - // try self.allocateTentativeSymbols(); - // try self.allocateProxyBindAddresses(); // try self.flush(); } @@ -351,7 +365,7 @@ fn updateMetadata(self: *Zld) !void { // Ensure we have __DATA,__common section if we have tentative definitions. // Update size and alignment of __DATA,__common section. - if (self.tentatives.values().len > 0) { + if (self.has_tentative_defs) { const data_seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; const common_section_index = self.common_section_index orelse ind: { self.common_section_index = @intCast(u16, data_seg.sections.items.len); @@ -364,10 +378,10 @@ fn updateMetadata(self: *Zld) !void { var max_align: u16 = 0; var added_size: u64 = 0; - for (self.tentatives.values()) |sym| { - const tent = sym.cast(Symbol.Tentative) orelse unreachable; - max_align = math.max(max_align, tent.alignment); - added_size += tent.size; + for (self.globals.values()) |sym| { + if (sym.payload != .tentative) continue; + max_align = math.max(max_align, sym.payload.tentative.alignment); + added_size += sym.payload.tentative.size; } common_sect.@"align" = math.max(common_sect.@"align", max_align); @@ -1069,47 +1083,55 @@ fn allocateSegment(self: *Zld, index: u16, offset: u64) !void { seg.inner.vmsize = seg_size_aligned; } -fn allocateSymbols(self: *Zld) !void { - for (self.objects.items) |object| { - for (object.symbols.items) |sym| { - const reg = sym.cast(Symbol.Regular) orelse continue; +fn allocateSymbol(self: *Zld, symbol: *Symbol) !void { + const reg = &symbol.payload.regular; + const object = reg.file orelse return; + const source_sect = &object.sections.items[reg.section]; + const target_map = source_sect.target_map orelse { + log.debug("section '{s},{s}' not mapped for symbol '{s}'", .{ + parseName(&source_sect.inner.segname), + parseName(&source_sect.inner.sectname), + symbol.name, + }); + return; + }; - const source_sect = &object.sections.items[reg.section]; - const target_map = source_sect.target_map orelse { - log.debug("section '{s},{s}' not mapped for symbol '{s}'", .{ - parseName(&source_sect.inner.segname), - parseName(&source_sect.inner.sectname), - sym.name, - }); - continue; - }; + const target_seg = self.load_commands.items[target_map.segment_id].Segment; + const target_sect = target_seg.sections.items[target_map.section_id]; + const target_addr = target_sect.addr + target_map.offset; + const address = reg.address - source_sect.inner.addr + target_addr; - const target_seg = self.load_commands.items[target_map.segment_id].Segment; - const target_sect = target_seg.sections.items[target_map.section_id]; - const target_addr = target_sect.addr + target_map.offset; - const address = reg.address - source_sect.inner.addr + target_addr; + log.debug("resolving symbol '{s}' at 0x{x}", .{ symbol.name, address }); - log.debug("resolving symbol '{s}' at 0x{x}", .{ sym.name, address }); - - // TODO there might be a more generic way of doing this. - var section: u8 = 0; - for (self.load_commands.items) |cmd, cmd_id| { - if (cmd != .Segment) break; - if (cmd_id == target_map.segment_id) { - section += @intCast(u8, target_map.section_id) + 1; - break; - } - section += @intCast(u8, cmd.Segment.sections.items.len); - } - - reg.address = address; - reg.section = section; + // TODO there might be a more generic way of doing this. + var section: u8 = 0; + for (self.load_commands.items) |cmd, cmd_id| { + if (cmd != .Segment) break; + if (cmd_id == target_map.segment_id) { + section += @intCast(u8, target_map.section_id) + 1; + break; } + section += @intCast(u8, cmd.Segment.sections.items.len); + } + + reg.address = address; + reg.section = section; +} + +fn allocateSymbols(self: *Zld) !void { + for (self.locals.items) |symbol| { + if (symbol.payload != .regular) continue; + try self.allocateSymbol(symbol); + } + + for (self.globals.values()) |symbol| { + if (symbol.payload != .regular) continue; + try self.allocateSymbol(symbol); } } fn allocateTentativeSymbols(self: *Zld) !void { - if (self.tentatives.values().len == 0) return; + if (!self.has_tentative_defs) return; const data_seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; const common_sect = &data_seg.sections.items[self.common_section_index.?]; @@ -1131,36 +1153,21 @@ fn allocateTentativeSymbols(self: *Zld) !void { } // Convert tentative definitions into regular symbols. - for (self.tentatives.values()) |sym| { - const tent = sym.cast(Symbol.Tentative) orelse unreachable; - const reg = try Symbol.Regular.new(self.allocator, tent.base.name, .{ - .linkage = .global, - .address = base_address, - .section = section, - .weak_ref = false, - .file = tent.file, - }); - reg.got_index = tent.base.got_index; - reg.stubs_index = tent.base.stubs_index; + for (self.globals.values()) |sym| { + if (sym.payload != .tentative) continue; - try self.globals.putNoClobber(self.allocator, reg.name, reg); - tent.base.alias = reg; + const address = mem.alignForwardGeneric(u64, base_address + sym.payload.tentative.size, alignment); - if (tent.base.got_index) |idx| { - self.got_entries.items[idx] = reg; - } - if (tent.base.stubs_index) |idx| { - self.stubs.items[idx] = reg; - } - - const address = mem.alignForwardGeneric(u64, base_address + tent.size, alignment); - - log.debug("tentative definition '{s}' allocated from 0x{x} to 0x{x}", .{ - tent.base.name, - base_address, - address, - }); + log.debug("tentative definition '{s}' allocated from 0x{x} to 0x{x}", .{ sym.name, base_address, address }); + sym.payload = .{ + .regular = .{ + .linkage = .global, + .address = base_address, + .section = section, + .weak_ref = false, + }, + }; base_address = address; } } @@ -1174,17 +1181,17 @@ fn allocateProxyBindAddresses(self: *Zld) !void { if (rel.@"type" != .unsigned) continue; // GOT is currently special-cased if (rel.target != .symbol) continue; - const sym = object.symbols.items[rel.target.symbol].getTopmostAlias(); - if (sym.cast(Symbol.Proxy)) |proxy| { - const target_map = sect.target_map orelse continue; - const target_seg = self.load_commands.items[target_map.segment_id].Segment; - const target_sect = target_seg.sections.items[target_map.section_id]; + const sym = object.symbols.items[rel.target.symbol]; + if (sym.payload != .proxy) continue; - try proxy.bind_info.append(self.allocator, .{ - .segment_id = target_map.segment_id, - .address = target_sect.addr + target_map.offset + rel.offset, - }); - } + const target_map = sect.target_map orelse continue; + const target_seg = self.load_commands.items[target_map.segment_id].Segment; + const target_sect = target_seg.sections.items[target_map.section_id]; + + try sym.payload.proxy.bind_info.append(self.allocator, .{ + .segment_id = target_map.segment_id, + .address = target_sect.addr + target_map.offset + rel.offset, + }); } } } @@ -1586,6 +1593,14 @@ fn resolveSymbols(self: *Zld) !void { } } + // Mark if we need to allocate zerofill section for tentative definitions + for (self.globals.values()) |symbol| { + if (symbol.payload == .tentative) { + self.has_tentative_defs = true; + break; + } + } + // Third pass, resolve symbols in dynamic libraries. { // Put dyld_stub_binder as an undefined special symbol. @@ -1651,18 +1666,6 @@ fn resolveSymbols(self: *Zld) !void { } if (has_undefined) return error.UndefinedSymbolReference; - - log.warn("globals", .{}); - for (self.globals.values()) |value| { - log.warn(" | {s}: {}", .{ value.name, value.payload }); - } - - for (self.objects.items) |object| { - log.warn("object {s}", .{object.name.?}); - for (object.symbols.items) |sym| { - log.warn(" | {s}: {}", .{ sym.name, sym.payload }); - } - } } fn resolveStubsAndGotEntries(self: *Zld) !void { @@ -1675,7 +1678,7 @@ fn resolveStubsAndGotEntries(self: *Zld) !void { switch (rel.@"type") { .unsigned => continue, .got_page, .got_page_off, .got_load, .got, .pointer_to_got => { - const sym = object.symbols.items[rel.target.symbol].getTopmostAlias(); + const sym = object.symbols.items[rel.target.symbol]; if (sym.got_index != null) continue; const index = @intCast(u32, self.got_entries.items.len); @@ -1687,11 +1690,11 @@ fn resolveStubsAndGotEntries(self: *Zld) !void { else => { if (rel.target != .symbol) continue; - const sym = object.symbols.items[rel.target.symbol].getTopmostAlias(); - assert(sym.@"type" != .unresolved); + const sym = object.symbols.items[rel.target.symbol]; + assert(sym.payload != .undef); if (sym.stubs_index != null) continue; - if (sym.@"type" != .proxy) continue; + if (sym.payload != .proxy) continue; const index = @intCast(u32, self.stubs.items.len); sym.stubs_index = index; @@ -1705,7 +1708,7 @@ fn resolveStubsAndGotEntries(self: *Zld) !void { } // Finally, put dyld_stub_binder as the final GOT entry - const sym = self.imports.get("dyld_stub_binder") orelse unreachable; + const sym = self.globals.get("dyld_stub_binder") orelse unreachable; const index = @intCast(u32, self.got_entries.items.len); sym.got_index = index; try self.got_entries.append(self.allocator, sym); From 3bd9f3801771189ad1aa9d9b41786026add38075 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sat, 3 Jul 2021 15:30:42 +0200 Subject: [PATCH 10/81] zld: reenable entire linker in the new scheme without the stabs... They are tricky and need a bit more work. --- src/link/MachO/Symbol.zig | 21 ++-- src/link/MachO/Zld.zig | 244 ++++++++++++++++++-------------------- 2 files changed, 131 insertions(+), 134 deletions(-) diff --git a/src/link/MachO/Symbol.zig b/src/link/MachO/Symbol.zig index 5a0bfe9762..9c17c5c833 100644 --- a/src/link/MachO/Symbol.zig +++ b/src/link/MachO/Symbol.zig @@ -58,13 +58,6 @@ pub const Regular = struct { global, }; - pub fn isTemp(regular: Regular) bool { - if (regular.linkage == .translation_unit) { - return mem.startsWith(u8, regular.base.name, "l") or mem.startsWith(u8, regular.base.name, "L"); - } - return false; - } - pub fn format(self: Regular, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { try std.fmt.format(writer, "Regular {{ ", .{}); try std.fmt.format(writer, ".linkage = {s}, ", .{self.linkage}); @@ -164,7 +157,19 @@ pub fn new(allocator: *Allocator, name: []const u8) !*Symbol { return new_sym; } -pub fn asNlist(symbol: *Symbol, strtab: *StringTable) macho.nlist_64 { +pub fn isTemp(symbol: Symbol) bool { + switch (symbol.payload) { + .regular => |regular| { + if (regular.linkage == .translation_unit) { + return mem.startsWith(u8, symbol.name, "l") or mem.startsWith(u8, symbol.name, "L"); + } + }, + else => {}, + } + return false; +} + +pub fn asNlist(symbol: *Symbol, strtab: *StringTable) !macho.nlist_64 { const n_strx = try strtab.getOrPut(symbol.name); const nlist = nlist: { switch (symbol.payload) { diff --git a/src/link/MachO/Zld.zig b/src/link/MachO/Zld.zig index 7f12c5ed25..f37bf7b696 100644 --- a/src/link/MachO/Zld.zig +++ b/src/link/MachO/Zld.zig @@ -237,20 +237,19 @@ pub fn link(self: *Zld, files: []const []const u8, output: Output, args: LinkArg try self.allocateTentativeSymbols(); try self.allocateProxyBindAddresses(); - log.warn("globals", .{}); - for (self.globals.values()) |value| { - log.warn(" | {s}: {}", .{ value.name, value.payload }); - } + // log.warn("globals", .{}); + // for (self.globals.values()) |value| { + // log.warn(" | {s}: {}", .{ value.name, value.payload }); + // } - for (self.objects.items) |object| { - log.warn("object {s}", .{object.name.?}); - for (object.symbols.items) |sym| { - log.warn(" | {s}: {}", .{ sym.name, sym.payload }); - } - } + // for (self.objects.items) |object| { + // log.warn("object {s}", .{object.name.?}); + // for (object.symbols.items) |sym| { + // log.warn(" | {s}: {}", .{ sym.name, sym.payload }); + // } + // } - return error.TODO; - // try self.flush(); + try self.flush(); } fn parseInputFiles(self: *Zld, files: []const []const u8, syslibroot: ?[]const u8) !void { @@ -1226,7 +1225,7 @@ fn writeStubHelperCommon(self: *Zld) !void { code[9] = 0xff; code[10] = 0x25; { - const dyld_stub_binder = self.imports.get("dyld_stub_binder").?; + const dyld_stub_binder = self.globals.get("dyld_stub_binder").?; const addr = (got.addr + dyld_stub_binder.got_index.? * @sizeOf(u64)); const displacement = try math.cast(u32, addr - stub_helper.addr - code_size); mem.writeIntLittle(u32, code[11..], displacement); @@ -1270,7 +1269,7 @@ fn writeStubHelperCommon(self: *Zld) !void { code[10] = 0xbf; code[11] = 0xa9; binder_blk_outer: { - const dyld_stub_binder = self.imports.get("dyld_stub_binder").?; + const dyld_stub_binder = self.globals.get("dyld_stub_binder").?; const this_addr = stub_helper.addr + 3 * @sizeOf(u32); const target_addr = (got.addr + dyld_stub_binder.got_index.? * @sizeOf(u64)); binder_blk: { @@ -1789,8 +1788,8 @@ fn resolveRelocsAndWriteSections(self: *Zld) !void { break :rebase false; } if (rel.target == .symbol) { - const final = object.symbols.items[rel.target.symbol].getTopmostAlias(); - if (final.cast(Symbol.Proxy)) |_| { + const sym = object.symbols.items[rel.target.symbol]; + if (sym.payload == .proxy) { break :rebase false; } } @@ -1832,9 +1831,8 @@ fn resolveRelocsAndWriteSections(self: *Zld) !void { const dc_seg = self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; const got = dc_seg.sections.items[self.got_section_index.?]; const sym = object.symbols.items[rel.target.symbol]; - const final = sym.getTopmostAlias(); - const got_index = final.got_index orelse { - log.err("expected GOT index relocating symbol '{s}'", .{final.name}); + const got_index = sym.got_index orelse { + log.err("expected GOT index relocating symbol '{s}'", .{sym.name}); log.err("this is an internal linker error", .{}); return error.FailedToResolveRelocationTarget; }; @@ -1890,37 +1888,40 @@ fn relocTargetAddr(self: *Zld, object: *const Object, target: reloc.Relocation.T switch (target) { .symbol => |sym_id| { const sym = object.symbols.items[sym_id]; - const final = sym.getTopmostAlias(); - if (final.cast(Symbol.Regular)) |reg| { - log.debug(" | regular '{s}'", .{sym.name}); - break :blk reg.address; - } else if (final.cast(Symbol.Proxy)) |proxy| { - if (mem.eql(u8, sym.name, "__tlv_bootstrap")) { - log.debug(" | symbol '__tlv_bootstrap'", .{}); - const segment = self.load_commands.items[self.data_segment_cmd_index.?].Segment; - const tlv = segment.sections.items[self.tlv_section_index.?]; - break :blk tlv.addr; - } - - log.debug(" | symbol stub '{s}'", .{sym.name}); - const segment = self.load_commands.items[self.text_segment_cmd_index.?].Segment; - const stubs = segment.sections.items[self.stubs_section_index.?]; - const stubs_index = proxy.base.stubs_index orelse { - if (proxy.bind_info.items.len > 0) { - break :blk 0; // Dynamically bound by dyld. + switch (sym.payload) { + .regular => |reg| { + log.debug(" | regular '{s}'", .{sym.name}); + break :blk reg.address; + }, + .proxy => |proxy| { + if (mem.eql(u8, sym.name, "__tlv_bootstrap")) { + log.debug(" | symbol '__tlv_bootstrap'", .{}); + const segment = self.load_commands.items[self.data_segment_cmd_index.?].Segment; + const tlv = segment.sections.items[self.tlv_section_index.?]; + break :blk tlv.addr; } - log.err( - "expected stubs index or dynamic bind address when relocating symbol '{s}'", - .{final.name}, - ); + + log.debug(" | symbol stub '{s}'", .{sym.name}); + const segment = self.load_commands.items[self.text_segment_cmd_index.?].Segment; + const stubs = segment.sections.items[self.stubs_section_index.?]; + const stubs_index = sym.stubs_index orelse { + if (proxy.bind_info.items.len > 0) { + break :blk 0; // Dynamically bound by dyld. + } + log.err( + "expected stubs index or dynamic bind address when relocating symbol '{s}'", + .{sym.name}, + ); + log.err("this is an internal linker error", .{}); + return error.FailedToResolveRelocationTarget; + }; + break :blk stubs.addr + stubs_index * stubs.reserved2; + }, + else => { + log.err("failed to resolve symbol '{s}' as a relocation target", .{sym.name}); log.err("this is an internal linker error", .{}); return error.FailedToResolveRelocationTarget; - }; - break :blk stubs.addr + stubs_index * stubs.reserved2; - } else { - log.err("failed to resolve symbol '{s}' as a relocation target", .{sym.name}); - log.err("this is an internal linker error", .{}); - return error.FailedToResolveRelocationTarget; + }, } }, .section => |sect_id| { @@ -2320,8 +2321,8 @@ fn flush(self: *Zld) !void { defer initializers.deinit(); for (self.objects.items) |object| { - for (object.initializers.items) |initializer| { - const address = initializer.cast(Symbol.Regular).?.address; + for (object.initializers.items) |sym_id| { + const address = object.symbols.items[sym_id].payload.regular.address; try initializers.append(address); } } @@ -2381,7 +2382,10 @@ fn writeGotEntries(self: *Zld) !void { var writer = stream.writer(); for (self.got_entries.items) |sym| { - const address: u64 = if (sym.cast(Symbol.Regular)) |reg| reg.address else 0; + const address: u64 = switch (sym.payload) { + .regular => |reg| reg.address, + else => 0, + }; try writer.writeIntLittle(u64, address); } @@ -2397,9 +2401,8 @@ fn setEntryPoint(self: *Zld) !void { // entrypoint. For now, assume default of `_main`. const seg = self.load_commands.items[self.text_segment_cmd_index.?].Segment; const sym = self.globals.get("_main") orelse return error.MissingMainEntrypoint; - const entry_sym = sym.cast(Symbol.Regular) orelse unreachable; const ec = &self.load_commands.items[self.main_cmd_index.?].Main; - ec.entryoff = @intCast(u32, entry_sym.address - seg.inner.vmaddr); + ec.entryoff = @intCast(u32, sym.payload.regular.address - seg.inner.vmaddr); ec.stacksize = self.stack_size; } @@ -2417,7 +2420,8 @@ fn writeRebaseInfoTable(self: *Zld) !void { const segment_id = @intCast(u16, self.data_const_segment_cmd_index.?); for (self.got_entries.items) |sym| { - if (sym.@"type" == .proxy) continue; + if (sym.payload == .proxy) continue; + try pointers.append(.{ .offset = base_offset + sym.got_index.? * @sizeOf(u64), .segment_id = segment_id, @@ -2489,28 +2493,30 @@ fn writeBindInfoTable(self: *Zld) !void { const segment_id = @intCast(u16, self.data_const_segment_cmd_index.?); for (self.got_entries.items) |sym| { - if (sym.cast(Symbol.Proxy)) |proxy| { - try pointers.append(.{ - .offset = base_offset + proxy.base.got_index.? * @sizeOf(u64), - .segment_id = segment_id, - .dylib_ordinal = proxy.dylibOrdinal(), - .name = proxy.base.name, - }); - } + if (sym.payload != .proxy) continue; + + const proxy = sym.payload.proxy; + try pointers.append(.{ + .offset = base_offset + sym.got_index.? * @sizeOf(u64), + .segment_id = segment_id, + .dylib_ordinal = proxy.dylibOrdinal(), + .name = sym.name, + }); } } - for (self.imports.values()) |sym| { - if (sym.cast(Symbol.Proxy)) |proxy| { - for (proxy.bind_info.items) |info| { - const seg = self.load_commands.items[info.segment_id].Segment; - try pointers.append(.{ - .offset = info.address - seg.inner.vmaddr, - .segment_id = info.segment_id, - .dylib_ordinal = proxy.dylibOrdinal(), - .name = proxy.base.name, - }); - } + for (self.globals.values()) |sym| { + if (sym.payload != .proxy) continue; + + const proxy = sym.payload.proxy; + for (proxy.bind_info.items) |info| { + const seg = self.load_commands.items[info.segment_id].Segment; + try pointers.append(.{ + .offset = info.address - seg.inner.vmaddr, + .segment_id = info.segment_id, + .dylib_ordinal = proxy.dylibOrdinal(), + .name = sym.name, + }); } } @@ -2520,14 +2526,13 @@ fn writeBindInfoTable(self: *Zld) !void { const base_offset = sect.addr - seg.inner.vmaddr; const segment_id = @intCast(u16, self.data_segment_cmd_index.?); - const sym = self.imports.get("__tlv_bootstrap") orelse unreachable; - const proxy = sym.cast(Symbol.Proxy) orelse unreachable; - + const sym = self.globals.get("__tlv_bootstrap") orelse unreachable; + const proxy = sym.payload.proxy; try pointers.append(.{ .offset = base_offset, .segment_id = segment_id, .dylib_ordinal = proxy.dylibOrdinal(), - .name = proxy.base.name, + .name = sym.name, }); } @@ -2562,7 +2567,7 @@ fn writeLazyBindInfoTable(self: *Zld) !void { try pointers.ensureCapacity(self.stubs.items.len); for (self.stubs.items) |sym| { - const proxy = sym.cast(Symbol.Proxy) orelse unreachable; + const proxy = sym.payload.proxy; pointers.appendAssumeCapacity(.{ .offset = base_offset + sym.stubs_index.? * @sizeOf(u64), .segment_id = segment_id, @@ -2676,7 +2681,8 @@ fn writeExportInfo(self: *Zld) !void { defer sorted_globals.deinit(); for (self.globals.values()) |sym| { - const reg = sym.cast(Symbol.Regular) orelse continue; + if (sym.payload != .regular) continue; + const reg = sym.payload.regular; if (reg.linkage != .global) continue; try sorted_globals.append(sym.name); } @@ -2685,9 +2691,9 @@ fn writeExportInfo(self: *Zld) !void { for (sorted_globals.items) |sym_name| { const sym = self.globals.get(sym_name) orelse unreachable; - const reg = sym.cast(Symbol.Regular) orelse unreachable; + const reg = sym.payload.regular; - log.debug(" | putting '{s}' defined at 0x{x}", .{ reg.base.name, reg.address }); + log.debug(" | putting '{s}' defined at 0x{x}", .{ sym.name, reg.address }); try trie.put(.{ .name = sym.name, @@ -2722,50 +2728,33 @@ fn writeSymbolTable(self: *Zld) !void { var locals = std.ArrayList(macho.nlist_64).init(self.allocator); defer locals.deinit(); + try locals.ensureTotalCapacity(self.locals.items.len); + + for (self.locals.items) |symbol| { + if (symbol.isTemp()) continue; // TODO when merging codepaths, this should go into freelist + const nlist = try symbol.asNlist(&self.strtab); + locals.appendAssumeCapacity(nlist); + } var exports = std.ArrayList(macho.nlist_64).init(self.allocator); defer exports.deinit(); - for (self.objects.items) |object| { - for (object.stabs.items) |sym| { - const stab = sym.cast(Symbol.Stab) orelse unreachable; - - const nlists = try stab.asNlists(self.allocator, &self.strtab); - defer self.allocator.free(nlists); - - try locals.appendSlice(nlists); - } - - for (object.symbols.items) |sym| { - const final = sym.getTopmostAlias(); - if (final.@"type" != .regular) continue; - - const reg = final.cast(Symbol.Regular) orelse unreachable; - if (reg.isTemp()) continue; - if (reg.visited) continue; - - const nlist = try reg.asNlist(&self.strtab); - - switch (reg.linkage) { - .translation_unit => { - try locals.append(nlist); - }, - else => { - try exports.append(nlist); - }, - } - - reg.visited = true; - } - } - var undefs = std.ArrayList(macho.nlist_64).init(self.allocator); defer undefs.deinit(); + var undef_dir = std.StringHashMap(u32).init(self.allocator); + defer undef_dir.deinit(); - for (self.imports.values()) |sym| { - const proxy = sym.cast(Symbol.Proxy) orelse unreachable; - const nlist = try proxy.asNlist(&self.strtab); - try undefs.append(nlist); + for (self.globals.values()) |sym| { + const nlist = try sym.asNlist(&self.strtab); + switch (sym.payload) { + .regular => try exports.append(nlist), + .proxy => { + const id = @intCast(u32, undefs.items.len); + try undefs.append(nlist); + try undef_dir.putNoClobber(sym.name, id); + }, + else => unreachable, + } } const nlocals = locals.items.len; @@ -2827,24 +2816,27 @@ fn writeSymbolTable(self: *Zld) !void { stubs.reserved1 = 0; for (self.stubs.items) |sym| { - const id = self.imports.getIndex(sym.name) orelse unreachable; - try writer.writeIntLittle(u32, dysymtab.iundefsym + @intCast(u32, id)); + const id = undef_dir.get(sym.name) orelse unreachable; + try writer.writeIntLittle(u32, dysymtab.iundefsym + id); } got.reserved1 = nstubs; for (self.got_entries.items) |sym| { - if (sym.@"type" == .proxy) { - const id = self.imports.getIndex(sym.name) orelse unreachable; - try writer.writeIntLittle(u32, dysymtab.iundefsym + @intCast(u32, id)); - } else { - try writer.writeIntLittle(u32, macho.INDIRECT_SYMBOL_LOCAL); + switch (sym.payload) { + .proxy => { + const id = undef_dir.get(sym.name) orelse unreachable; + try writer.writeIntLittle(u32, dysymtab.iundefsym + id); + }, + else => { + try writer.writeIntLittle(u32, macho.INDIRECT_SYMBOL_LOCAL); + }, } } la_symbol_ptr.reserved1 = got.reserved1 + ngot_entries; for (self.stubs.items) |sym| { - const id = self.imports.getIndex(sym.name) orelse unreachable; - try writer.writeIntLittle(u32, dysymtab.iundefsym + @intCast(u32, id)); + const id = undef_dir.get(sym.name) orelse unreachable; + try writer.writeIntLittle(u32, dysymtab.iundefsym + id); } try self.file.?.pwriteAll(buf, dysymtab.indirectsymoff); From 669ac92af0626b77442415e54be3f604c5b49535 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sat, 3 Jul 2021 16:09:23 +0200 Subject: [PATCH 11/81] zld: fix ast errors --- src/link/MachO/Symbol.zig | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/link/MachO/Symbol.zig b/src/link/MachO/Symbol.zig index 9c17c5c833..0a35101eab 100644 --- a/src/link/MachO/Symbol.zig +++ b/src/link/MachO/Symbol.zig @@ -59,6 +59,8 @@ pub const Regular = struct { }; pub fn format(self: Regular, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { + _ = fmt; + _ = options; try std.fmt.format(writer, "Regular {{ ", .{}); try std.fmt.format(writer, ".linkage = {s}, ", .{self.linkage}); try std.fmt.format(writer, ".address = 0x{x}, ", .{self.address}); @@ -84,6 +86,8 @@ pub const Tentative = struct { file: ?*Object = null, pub fn format(self: Tentative, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { + _ = fmt; + _ = options; try std.fmt.format(writer, "Tentative {{ ", .{}); try std.fmt.format(writer, ".size = 0x{x}, ", .{self.size}); try std.fmt.format(writer, ".alignment = 0x{x}, ", .{self.alignment}); @@ -116,6 +120,8 @@ pub const Proxy = struct { } pub fn format(self: Proxy, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { + _ = fmt; + _ = options; try std.fmt.format(writer, "Proxy {{ ", .{}); if (self.bind_info.items.len > 0) { // TODO @@ -134,6 +140,8 @@ pub const Undefined = struct { file: ?*Object = null, pub fn format(self: Undefined, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { + _ = fmt; + _ = options; try std.fmt.format(writer, "Undefined {{ ", .{}); if (self.file) |file| { try std.fmt.format(writer, ".file = {s}, ", .{file.name.?}); @@ -192,7 +200,7 @@ pub fn asNlist(symbol: *Symbol, strtab: *StringTable) !macho.nlist_64 { break :nlist nlist; }, - .tentative => |tentative| { + .tentative => { // TODO break :nlist macho.nlist_64{ .n_strx = n_strx, @@ -211,7 +219,7 @@ pub fn asNlist(symbol: *Symbol, strtab: *StringTable) !macho.nlist_64 { .n_value = 0, }; }, - .undef => |undef| { + .undef => { // TODO break :nlist macho.nlist_64{ .n_strx = n_strx, From 453c16d8acead9bc5ef19155474ba8af37d151cf Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sat, 3 Jul 2021 20:30:18 +0200 Subject: [PATCH 12/81] zld: draft out splitting sections into blocks --- src/link/MachO/Object.zig | 106 ++++++++++++++++++++++++++++++++++++++ src/link/MachO/Zld.zig | 24 ++++----- 2 files changed, 117 insertions(+), 13 deletions(-) diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index c7150c2edc..8627a3917e 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -15,6 +15,7 @@ const Allocator = mem.Allocator; const Arch = std.Target.Cpu.Arch; const Relocation = reloc.Relocation; const Symbol = @import("Symbol.zig"); +const TextBlock = @import("Zld.zig").TextBlock; usingnamespace @import("commands.zig"); @@ -271,6 +272,7 @@ pub fn parse(self: *Object) !void { try self.parseSymtab(); try self.parseDataInCode(); try self.parseInitializers(); + try self.parseDummy(); } pub fn readLoadCommands(self: *Object, reader: anytype) !void { @@ -379,6 +381,110 @@ pub fn parseSections(self: *Object) !void { } } +fn cmpNlist(_: void, lhs: macho.nlist_64, rhs: macho.nlist_64) bool { + return lhs.n_value < rhs.n_value; +} + +fn filterSymsInSection(symbols: []macho.nlist_64, sect_id: u8) []macho.nlist_64 { + var start: usize = 0; + var end: usize = symbols.len; + + while (true) { + var change = false; + if (symbols[start].n_sect != sect_id) { + start += 1; + change = true; + } + if (symbols[end - 1].n_sect != sect_id) { + end -= 1; + change = true; + } + + if (start == end) break; + if (!change) break; + } + + return symbols[start..end]; +} + +pub fn parseDummy(self: *Object) !void { + const seg = self.load_commands.items[self.segment_cmd_index.?].Segment; + + log.warn("analysing {s}", .{self.name.?}); + + const dysymtab = self.load_commands.items[self.dysymtab_cmd_index.?].Dysymtab; + + var sorted_syms = std.ArrayList(macho.nlist_64).init(self.allocator); + defer sorted_syms.deinit(); + try sorted_syms.appendSlice(self.symtab.items[dysymtab.ilocalsym..dysymtab.iundefsym]); + + std.sort.sort(macho.nlist_64, sorted_syms.items, {}, cmpNlist); + + for (seg.sections.items) |sect, sect_id| { + log.warn("section {s},{s}", .{ parseName(§.segname), parseName(§.sectname) }); + // Read code + var code = try self.allocator.alloc(u8, @intCast(usize, sect.size)); + defer self.allocator.free(code); + _ = try self.file.?.preadAll(code, sect.offset); + + // Read and parse relocs + const raw_relocs = try self.allocator.alloc(u8, @sizeOf(macho.relocation_info) * sect.nreloc); + defer self.allocator.free(raw_relocs); + _ = try self.file.?.preadAll(raw_relocs, sect.reloff); + + const relocs = try reloc.parse( + self.allocator, + self.arch.?, + code, + mem.bytesAsSlice(macho.relocation_info, raw_relocs), + ); + + if (self.header.?.flags & macho.MH_SUBSECTIONS_VIA_SYMBOLS != 0) { + const syms = filterSymsInSection(sorted_syms.items, @intCast(u8, sect_id + 1)); + + var indices = std.ArrayList(u32).init(self.allocator); + defer indices.deinit(); + + var i: u32 = 0; + while (i < syms.len) : (i += 1) { + const curr = syms[i]; + try indices.append(i); + + const next: ?macho.nlist_64 = if (i + 1 < syms.len) + syms[i + 1] + else + null; + + if (next) |n| { + if (curr.n_value == n.n_value) { + continue; + } + } + + const start_addr = curr.n_value - sect.addr; + const end_addr = if (next) |n| n.n_value - sect.addr else sect.size; + const alignment = sect.@"align"; + + const tb_code = code[start_addr..end_addr]; + const size = tb_code.len; + + log.warn("TextBlock", .{}); + for (indices.items) |id| { + log.warn(" | symbol {s}", .{self.getString(syms[id].n_strx)}); + } + log.warn(" | start_addr = 0x{x}", .{start_addr}); + log.warn(" | end_addr = 0x{x}", .{end_addr}); + log.warn(" | size = {}", .{size}); + log.warn(" | alignment = 0x{x}", .{alignment}); + + indices.clearRetainingCapacity(); + } + } else { + return error.TODOOneLargeTextBlock; + } + } +} + pub fn parseInitializers(self: *Object) !void { const index = self.mod_init_func_section_index orelse return; const section = self.sections.items[index]; diff --git a/src/link/MachO/Zld.zig b/src/link/MachO/Zld.zig index f37bf7b696..152995c931 100644 --- a/src/link/MachO/Zld.zig +++ b/src/link/MachO/Zld.zig @@ -17,6 +17,7 @@ const Archive = @import("Archive.zig"); const CodeSignature = @import("CodeSignature.zig"); const Dylib = @import("Dylib.zig"); const Object = @import("Object.zig"); +const Relocation = reloc.Relocation; const StringTable = @import("StringTable.zig"); const Symbol = @import("Symbol.zig"); const Trie = @import("Trie.zig"); @@ -133,6 +134,16 @@ const TlvOffset = struct { } }; +pub const TextBlock = struct { + local_sym_index: ?u32 = null, + size: u64, + alignment: u32, + code: []u8, + relocs: []*Relocation, + segment_id: u16, + section_id: u16, +}; + /// Default path to dyld const DEFAULT_DYLD_PATH: [*:0]const u8 = "/usr/lib/dyld"; @@ -236,19 +247,6 @@ pub fn link(self: *Zld, files: []const []const u8, output: Output, args: LinkArg try self.allocateSymbols(); try self.allocateTentativeSymbols(); try self.allocateProxyBindAddresses(); - - // log.warn("globals", .{}); - // for (self.globals.values()) |value| { - // log.warn(" | {s}: {}", .{ value.name, value.payload }); - // } - - // for (self.objects.items) |object| { - // log.warn("object {s}", .{object.name.?}); - // for (object.symbols.items) |sym| { - // log.warn(" | {s}: {}", .{ sym.name, sym.payload }); - // } - // } - try self.flush(); } From 5b3c4691e628cb288e6595974781ffbadb717c28 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sun, 4 Jul 2021 12:56:14 +0200 Subject: [PATCH 13/81] zld: put relocs in a TextBlock --- src/link/MachO/Object.zig | 198 ++++++++++++++++++++---------------- src/link/MachO/Zld.zig | 85 ++++++++-------- src/link/MachO/commands.zig | 38 +++++++ 3 files changed, 190 insertions(+), 131 deletions(-) diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index 8627a3917e..8e9a3075d5 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -9,13 +9,13 @@ const log = std.log.scoped(.object); const macho = std.macho; const mem = std.mem; const reloc = @import("reloc.zig"); -const parseName = @import("Zld.zig").parseName; const Allocator = mem.Allocator; const Arch = std.Target.Cpu.Arch; const Relocation = reloc.Relocation; const Symbol = @import("Symbol.zig"); -const TextBlock = @import("Zld.zig").TextBlock; +const TextBlock = Zld.TextBlock; +const Zld = @import("Zld.zig"); usingnamespace @import("commands.zig"); @@ -74,43 +74,6 @@ pub const Section = struct { allocator.free(relocs); } } - - pub fn segname(self: Section) []const u8 { - return parseName(&self.inner.segname); - } - - pub fn sectname(self: Section) []const u8 { - return parseName(&self.inner.sectname); - } - - pub fn flags(self: Section) u32 { - return self.inner.flags; - } - - pub fn sectionType(self: Section) u8 { - return @truncate(u8, self.flags() & 0xff); - } - - pub fn sectionAttrs(self: Section) u32 { - return self.flags() & 0xffffff00; - } - - pub fn isCode(self: Section) bool { - const attr = self.sectionAttrs(); - return attr & macho.S_ATTR_PURE_INSTRUCTIONS != 0 or attr & macho.S_ATTR_SOME_INSTRUCTIONS != 0; - } - - pub fn isDebug(self: Section) bool { - return self.sectionAttrs() & macho.S_ATTR_DEBUG != 0; - } - - pub fn dontDeadStrip(self: Section) bool { - return self.sectionAttrs() & macho.S_ATTR_NO_DEAD_STRIP != 0; - } - - pub fn dontDeadStripIfReferencesLive(self: Section) bool { - return self.sectionAttrs() & macho.S_ATTR_LIVE_SUPPORT != 0; - } }; const DebugInfo = struct { @@ -272,7 +235,6 @@ pub fn parse(self: *Object) !void { try self.parseSymtab(); try self.parseDataInCode(); try self.parseInitializers(); - try self.parseDummy(); } pub fn readLoadCommands(self: *Object, reader: anytype) !void { @@ -288,8 +250,8 @@ pub fn readLoadCommands(self: *Object, reader: anytype) !void { var seg = cmd.Segment; for (seg.sections.items) |*sect, j| { const index = @intCast(u16, j); - const segname = parseName(§.segname); - const sectname = parseName(§.sectname); + const segname = segmentName(sect.*); + const sectname = sectionName(sect.*); if (mem.eql(u8, segname, "__DWARF")) { if (mem.eql(u8, sectname, "__debug_info")) { self.dwarf_debug_info_index = index; @@ -351,7 +313,7 @@ pub fn parseSections(self: *Object) !void { try self.sections.ensureCapacity(self.allocator, seg.sections.items.len); for (seg.sections.items) |sect| { - log.debug("parsing section '{s},{s}'", .{ parseName(§.segname), parseName(§.sectname) }); + log.debug("parsing section '{s},{s}'", .{ segmentName(sect), sectionName(sect) }); // Read sections' code var code = try self.allocator.alloc(u8, @intCast(usize, sect.size)); _ = try self.file.?.preadAll(code, sect.offset); @@ -381,47 +343,91 @@ pub fn parseSections(self: *Object) !void { } } -fn cmpNlist(_: void, lhs: macho.nlist_64, rhs: macho.nlist_64) bool { - return lhs.n_value < rhs.n_value; -} - -fn filterSymsInSection(symbols: []macho.nlist_64, sect_id: u8) []macho.nlist_64 { - var start: usize = 0; - var end: usize = symbols.len; - - while (true) { - var change = false; - if (symbols[start].n_sect != sect_id) { - start += 1; - change = true; - } - if (symbols[end - 1].n_sect != sect_id) { - end -= 1; - change = true; - } - - if (start == end) break; - if (!change) break; - } - - return symbols[start..end]; -} - -pub fn parseDummy(self: *Object) !void { +pub fn parseTextBlocks(self: *Object, zld: *Zld) !void { const seg = self.load_commands.items[self.segment_cmd_index.?].Segment; log.warn("analysing {s}", .{self.name.?}); const dysymtab = self.load_commands.items[self.dysymtab_cmd_index.?].Dysymtab; - var sorted_syms = std.ArrayList(macho.nlist_64).init(self.allocator); - defer sorted_syms.deinit(); - try sorted_syms.appendSlice(self.symtab.items[dysymtab.ilocalsym..dysymtab.iundefsym]); + const SymWithIndex = struct { + nlist: macho.nlist_64, + index: u32, - std.sort.sort(macho.nlist_64, sorted_syms.items, {}, cmpNlist); + pub fn cmp(_: void, lhs: @This(), rhs: @This()) bool { + return lhs.nlist.n_value < rhs.nlist.n_value; + } + + fn filterSymsInSection(symbols: []@This(), sect_id: u8) []@This() { + var start: usize = 0; + var end: usize = symbols.len; + + while (true) { + var change = false; + if (symbols[start].nlist.n_sect != sect_id) { + start += 1; + change = true; + } + if (symbols[end - 1].nlist.n_sect != sect_id) { + end -= 1; + change = true; + } + + if (start == end) break; + if (!change) break; + } + + return symbols[start..end]; + } + + fn filterRelocs(relocs: []macho.relocation_info, start: u64, end: u64) []macho.relocation_info { + if (relocs.len == 0) return relocs; + + var start_id: usize = 0; + var end_id: usize = relocs.len; + + while (true) { + var change = false; + if (relocs[start_id].r_address > end) { + start_id += 1; + change = true; + } + if (relocs[end_id - 1].r_address < start) { + end_id -= 1; + change = true; + } + + if (start_id == end_id) break; + if (!change) break; + } + + return relocs[start_id..end_id]; + } + }; + + const nlists = self.symtab.items[dysymtab.ilocalsym..dysymtab.iundefsym]; + + var sorted_syms = std.ArrayList(SymWithIndex).init(self.allocator); + defer sorted_syms.deinit(); + try sorted_syms.ensureTotalCapacity(nlists.len); + + for (nlists) |nlist, index| { + sorted_syms.appendAssumeCapacity(.{ + .nlist = nlist, + .index = @intCast(u32, index + dysymtab.ilocalsym), + }); + } + + std.sort.sort(SymWithIndex, sorted_syms.items, {}, SymWithIndex.cmp); for (seg.sections.items) |sect, sect_id| { - log.warn("section {s},{s}", .{ parseName(§.segname), parseName(§.sectname) }); + log.warn("section {s},{s}", .{ segmentName(sect), sectionName(sect) }); + + const match = (try zld.getMatchingSection(sect)) orelse { + log.warn("unhandled section", .{}); + continue; + }; + // Read code var code = try self.allocator.alloc(u8, @intCast(usize, sect.size)); defer self.allocator.free(code); @@ -431,16 +437,25 @@ pub fn parseDummy(self: *Object) !void { const raw_relocs = try self.allocator.alloc(u8, @sizeOf(macho.relocation_info) * sect.nreloc); defer self.allocator.free(raw_relocs); _ = try self.file.?.preadAll(raw_relocs, sect.reloff); + const relocs = mem.bytesAsSlice(macho.relocation_info, raw_relocs); - const relocs = try reloc.parse( - self.allocator, - self.arch.?, - code, - mem.bytesAsSlice(macho.relocation_info, raw_relocs), - ); + const alignment = sect.@"align"; if (self.header.?.flags & macho.MH_SUBSECTIONS_VIA_SYMBOLS != 0) { - const syms = filterSymsInSection(sorted_syms.items, @intCast(u8, sect_id + 1)); + const syms = SymWithIndex.filterSymsInSection(sorted_syms.items, @intCast(u8, sect_id + 1)); + + if (syms.len == 0) { + // One large text block referenced by section offsets only + log.warn("TextBlock", .{}); + log.warn(" | referenced by section offsets", .{}); + log.warn(" | start_addr = {}", .{sect.addr}); + log.warn(" | end_addr = {}", .{sect.size}); + log.warn(" | size = {}", .{sect.size}); + log.warn(" | alignment = 0x{x}", .{alignment}); + log.warn(" | segment_id = {}", .{match.seg}); + log.warn(" | section_id = {}", .{match.sect}); + log.warn(" | relocs: {any}", .{relocs}); + } var indices = std.ArrayList(u32).init(self.allocator); defer indices.deinit(); @@ -450,32 +465,35 @@ pub fn parseDummy(self: *Object) !void { const curr = syms[i]; try indices.append(i); - const next: ?macho.nlist_64 = if (i + 1 < syms.len) + const next: ?SymWithIndex = if (i + 1 < syms.len) syms[i + 1] else null; if (next) |n| { - if (curr.n_value == n.n_value) { + if (curr.nlist.n_value == n.nlist.n_value) { continue; } } - const start_addr = curr.n_value - sect.addr; - const end_addr = if (next) |n| n.n_value - sect.addr else sect.size; - const alignment = sect.@"align"; + const start_addr = curr.nlist.n_value - sect.addr; + const end_addr = if (next) |n| n.nlist.n_value - sect.addr else sect.size; const tb_code = code[start_addr..end_addr]; const size = tb_code.len; log.warn("TextBlock", .{}); for (indices.items) |id| { - log.warn(" | symbol {s}", .{self.getString(syms[id].n_strx)}); + const sym = self.symbols.items[syms[id].index]; + log.warn(" | symbol = {s}", .{sym.name}); } - log.warn(" | start_addr = 0x{x}", .{start_addr}); - log.warn(" | end_addr = 0x{x}", .{end_addr}); + log.warn(" | start_addr = {}", .{start_addr}); + log.warn(" | end_addr = {}", .{end_addr}); log.warn(" | size = {}", .{size}); log.warn(" | alignment = 0x{x}", .{alignment}); + log.warn(" | segment_id = {}", .{match.seg}); + log.warn(" | section_id = {}", .{match.sect}); + log.warn(" | relocs: {any}", .{SymWithIndex.filterRelocs(relocs, start_addr, end_addr)}); indices.clearRetainingCapacity(); } diff --git a/src/link/MachO/Zld.zig b/src/link/MachO/Zld.zig index 152995c931..2b7b905b89 100644 --- a/src/link/MachO/Zld.zig +++ b/src/link/MachO/Zld.zig @@ -234,6 +234,7 @@ pub fn link(self: *Zld, files: []const []const u8, output: Output, args: LinkArg try self.parseInputFiles(files, args.syslibroot); try self.parseLibs(args.libs, args.syslibroot); try self.resolveSymbols(); + try self.parseTextBlocks(); try self.resolveStubsAndGotEntries(); try self.updateMetadata(); try self.sortSections(); @@ -322,10 +323,10 @@ fn mapAndUpdateSections( log.debug("{s}: '{s},{s}' mapped to '{s},{s}' from 0x{x} to 0x{x}", .{ object.name.?, - parseName(&source_sect.inner.segname), - parseName(&source_sect.inner.sectname), - parseName(&target_sect.segname), - parseName(&target_sect.sectname), + segmentName(source_sect.inner), + sectionName(source_sect.inner), + segmentName(target_sect.*), + sectionName(target_sect.*), offset, offset + size, }); @@ -343,12 +344,12 @@ fn updateMetadata(self: *Zld) !void { for (self.objects.items) |object| { // Find ideal section alignment and update section mappings for (object.sections.items) |sect, sect_id| { - const match = (try self.getMatchingSection(sect)) orelse { + const match = (try self.getMatchingSection(sect.inner)) orelse { log.debug("{s}: unhandled section type 0x{x} for '{s},{s}'", .{ object.name.?, - sect.flags(), - sect.segname(), - sect.sectname(), + sect.inner.flags, + segmentName(sect.inner), + sectionName(sect.inner), }); continue; }; @@ -441,15 +442,15 @@ const MatchingSection = struct { sect: u16, }; -fn getMatchingSection(self: *Zld, sect: Object.Section) !?MatchingSection { +pub fn getMatchingSection(self: *Zld, sect: macho.section_64) !?MatchingSection { const text_seg = &self.load_commands.items[self.text_segment_cmd_index.?].Segment; const data_const_seg = &self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; const data_seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; - const segname = sect.segname(); - const sectname = sect.sectname(); + const segname = segmentName(sect); + const sectname = sectionName(sect); const res: ?MatchingSection = blk: { - switch (sect.sectionType()) { + switch (sectionType(sect)) { macho.S_4BYTE_LITERALS, macho.S_8BYTE_LITERALS, macho.S_16BYTE_LITERALS => { if (self.text_const_section_index == null) { self.text_const_section_index = @intCast(u16, text_seg.sections.items.len); @@ -649,7 +650,7 @@ fn getMatchingSection(self: *Zld, sect: Object.Section) !?MatchingSection { }; }, macho.S_REGULAR => { - if (sect.isCode()) { + if (sectionIsCode(sect)) { if (self.text_section_index == null) { self.text_section_index = @intCast(u16, text_seg.sections.items.len); try text_seg.addSection(self.allocator, "__text", .{ @@ -662,11 +663,11 @@ fn getMatchingSection(self: *Zld, sect: Object.Section) !?MatchingSection { .sect = self.text_section_index.?, }; } - if (sect.isDebug()) { + if (sectionIsDebug(sect)) { // TODO debug attributes if (mem.eql(u8, "__LD", segname) and mem.eql(u8, "__compact_unwind", sectname)) { log.debug("TODO compact unwind section: type 0x{x}, name '{s},{s}'", .{ - sect.flags(), segname, sectname, + sect.flags, segname, sectname, }); } break :blk null; @@ -829,7 +830,7 @@ fn getMatchingSection(self: *Zld, sect: Object.Section) !?MatchingSection { if (mem.eql(u8, "__LLVM", segname) and mem.eql(u8, "__asm", sectname)) { log.debug("TODO LLVM asm section: type 0x{x}, name '{s},{s}'", .{ - sect.flags(), segname, sectname, + sect.flags, segname, sectname, }); } @@ -956,8 +957,8 @@ fn sortSections(self: *Zld) !void { log.debug("remapping in {s}: '{s},{s}': {} => {}", .{ object.name.?, - parseName(§.inner.segname), - parseName(§.inner.sectname), + segmentName(sect.inner), + sectionName(sect.inner), target_map.section_id, new_index, }); @@ -1086,8 +1087,8 @@ fn allocateSymbol(self: *Zld, symbol: *Symbol) !void { const source_sect = &object.sections.items[reg.section]; const target_map = source_sect.target_map orelse { log.debug("section '{s},{s}' not mapped for symbol '{s}'", .{ - parseName(&source_sect.inner.segname), - parseName(&source_sect.inner.sectname), + segmentName(source_sect.inner), + sectionName(source_sect.inner), symbol.name, }); return; @@ -1464,7 +1465,7 @@ fn writeStubInStubHelper(self: *Zld, index: u32) !void { fn resolveSymbolsInObject(self: *Zld, object: *Object) !void { log.debug("resolving symbols in '{s}'", .{object.name}); - for (object.symtab.items) |sym| { + for (object.symtab.items) |sym, sym_id| { const sym_name = object.getString(sym.n_strx); if (Symbol.isStab(sym)) { @@ -1497,6 +1498,7 @@ fn resolveSymbolsInObject(self: *Zld, object: *Object) !void { .file = object, }, }; + const index = @intCast(u32, self.locals.items.len); try self.locals.append(self.allocator, symbol); try object.symbols.append(self.allocator, symbol); continue; @@ -1665,6 +1667,12 @@ fn resolveSymbols(self: *Zld) !void { if (has_undefined) return error.UndefinedSymbolReference; } +fn parseTextBlocks(self: *Zld) !void { + for (self.objects.items) |object| { + try object.parseTextBlocks(self); + } +} + fn resolveStubsAndGotEntries(self: *Zld) !void { for (self.objects.items) |object| { log.debug("resolving stubs and got entries from {s}", .{object.name}); @@ -1718,11 +1726,11 @@ fn resolveRelocsAndWriteSections(self: *Zld) !void { log.debug("relocating object {s}", .{object.name}); for (object.sections.items) |sect| { - if (sect.inner.flags == macho.S_MOD_INIT_FUNC_POINTERS or - sect.inner.flags == macho.S_MOD_TERM_FUNC_POINTERS) continue; + if (sectionType(sect.inner) == macho.S_MOD_INIT_FUNC_POINTERS or + sectionType(sect.inner) == macho.S_MOD_TERM_FUNC_POINTERS) continue; - const segname = parseName(§.inner.segname); - const sectname = parseName(§.inner.sectname); + const segname = segmentName(sect.inner); + const sectname = sectionName(sect.inner); log.debug("relocating section '{s},{s}'", .{ segname, sectname }); @@ -1759,7 +1767,7 @@ fn resolveRelocsAndWriteSections(self: *Zld) !void { args.source_target_sect_addr = source_sect.inner.addr; } - const flags = @truncate(u8, target_sect.flags & 0xff); + const sect_type = sectionType(target_sect); const should_rebase = rebase: { if (!unsigned.is_64bit) break :rebase false; @@ -1780,8 +1788,8 @@ fn resolveRelocsAndWriteSections(self: *Zld) !void { }; if (!is_right_segment) break :rebase false; - if (flags != macho.S_LITERAL_POINTERS and - flags != macho.S_REGULAR) + if (sect_type != macho.S_LITERAL_POINTERS and + sect_type != macho.S_REGULAR) { break :rebase false; } @@ -1804,7 +1812,7 @@ fn resolveRelocsAndWriteSections(self: *Zld) !void { // TLV is handled via a separate offset mechanism. // Calculate the offset to the initializer. - if (flags == macho.S_THREAD_LOCAL_VARIABLES) tlv: { + if (sect_type == macho.S_THREAD_LOCAL_VARIABLES) tlv: { // TODO we don't want to save offset to tlv_bootstrap if (mem.eql(u8, object.symbols.items[rel.target.symbol].name, "__tlv_bootstrap")) break :tlv; @@ -1858,13 +1866,13 @@ fn resolveRelocsAndWriteSections(self: *Zld) !void { target_sect_off + sect.code.len, }); - if (target_sect.flags == macho.S_ZEROFILL or - target_sect.flags == macho.S_THREAD_LOCAL_ZEROFILL or - target_sect.flags == macho.S_THREAD_LOCAL_VARIABLES) + if (sectionType(target_sect) == macho.S_ZEROFILL or + sectionType(target_sect) == macho.S_THREAD_LOCAL_ZEROFILL or + sectionType(target_sect) == macho.S_THREAD_LOCAL_VARIABLES) { log.debug("zeroing out '{s},{s}' from 0x{x} to 0x{x}", .{ - parseName(&target_sect.segname), - parseName(&target_sect.sectname), + segmentName(target_sect), + sectionName(target_sect), target_sect_off, target_sect_off + sect.code.len, }); @@ -1926,8 +1934,8 @@ fn relocTargetAddr(self: *Zld, object: *const Object, target: reloc.Relocation.T log.debug(" | section offset", .{}); const source_sect = object.sections.items[sect_id]; log.debug(" | section '{s},{s}'", .{ - parseName(&source_sect.inner.segname), - parseName(&source_sect.inner.sectname), + segmentName(source_sect.inner), + sectionName(source_sect.inner), }); const target_map = source_sect.target_map orelse unreachable; const target_seg = self.load_commands.items[target_map.segment_id].Segment; @@ -2999,8 +3007,3 @@ fn writeHeader(self: *Zld) !void { try self.file.?.pwriteAll(mem.asBytes(&header), 0); } - -pub fn parseName(name: *const [16]u8) []const u8 { - const len = mem.indexOfScalar(u8, name, @as(u8, 0)) orelse name.len; - return name[0..len]; -} diff --git a/src/link/MachO/commands.zig b/src/link/MachO/commands.zig index 5919496526..f7a2fd3eda 100644 --- a/src/link/MachO/commands.zig +++ b/src/link/MachO/commands.zig @@ -425,6 +425,44 @@ fn makeStaticString(bytes: []const u8) [16]u8 { return buf; } +fn parseName(name: *const [16]u8) []const u8 { + const len = mem.indexOfScalar(u8, name, @as(u8, 0)) orelse name.len; + return name[0..len]; +} + +pub fn segmentName(sect: macho.section_64) []const u8 { + return parseName(§.segname); +} + +pub fn sectionName(sect: macho.section_64) []const u8 { + return parseName(§.sectname); +} + +pub fn sectionType(sect: macho.section_64) u8 { + return @truncate(u8, sect.flags & 0xff); +} + +pub fn sectionAttrs(sect: macho.section_64) u32 { + return sect.flags & 0xffffff00; +} + +pub fn sectionIsCode(sect: macho.section_64) bool { + const attr = sectionAttrs(sect); + return attr & macho.S_ATTR_PURE_INSTRUCTIONS != 0 or attr & macho.S_ATTR_SOME_INSTRUCTIONS != 0; +} + +pub fn sectionIsDebug(sect: macho.section_64) bool { + return sectionAttrs(sect) & macho.S_ATTR_DEBUG != 0; +} + +pub fn sectionIsDontDeadStrip(sect: macho.section_64) bool { + return sectionAttrs(sect) & macho.S_ATTR_NO_DEAD_STRIP != 0; +} + +pub fn sectionIsDontDeadStripIfReferencesLive(sect: macho.section_64) bool { + return sectionAttrs(sect) & macho.S_ATTR_LIVE_SUPPORT != 0; +} + fn testRead(allocator: *Allocator, buffer: []const u8, expected: anytype) !void { var stream = io.fixedBufferStream(buffer); var given = try LoadCommand.read(allocator, stream.reader()); From 5649242025cd885a6a2f0607d96f54b1926b0a5a Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Mon, 5 Jul 2021 07:39:49 +0200 Subject: [PATCH 14/81] zld: draft up final format of TextBlock --- src/link/MachO/Object.zig | 50 ++++++++++++++++++++++++++++++++++++++- src/link/MachO/Zld.zig | 25 ++++++++++++++++---- 2 files changed, 70 insertions(+), 5 deletions(-) diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index 8e9a3075d5..43bfea67d5 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -343,7 +343,7 @@ pub fn parseSections(self: *Object) !void { } } -pub fn parseTextBlocks(self: *Object, zld: *Zld) !void { +pub fn parseTextBlocks(self: *Object, zld: *Zld) !*TextBlock { const seg = self.load_commands.items[self.segment_cmd_index.?].Segment; log.warn("analysing {s}", .{self.name.?}); @@ -503,6 +503,54 @@ pub fn parseTextBlocks(self: *Object, zld: *Zld) !void { } } +const SectionAsTextBlocksArgs = struct { + sect: macho.section_64, + code: []u8, + subsections_via_symbols: bool = false, + relocs: ?[]macho.relocation_info = null, + segment_id: u16 = 0, + section_id: u16 = 0, +}; + +fn sectionAsTextBlocks(self: *Object, args: SectionAsTextBlocksArgs) !*TextBlock { + const sect = args.sect; + + log.warn("putting section '{s},{s}' as a TextBlock", .{ segmentName(sect), sectionName(sect) }); + + // Section alignment will be the assumed alignment per symbol. + const alignment = sect.@"align"; + + const first_block: *TextBlock = blk: { + if (args.subsections_via_symbols) { + return error.TODO; + } else { + const block = try self.allocator.create(TextBlock); + errdefer self.allocator.destroy(block); + + block.* = .{ + .ref = .{ + .section = undefined, // Will be populated when we allocated final sections. + }, + .code = args.code, + .relocs = null, + .size = sect.size, + .alignment = alignment, + .segment_id = args.segment_id, + .section_id = args.section_id, + }; + + // TODO parse relocs + if (args.relocs) |relocs| { + block.relocs = try reloc.parse(self.allocator, self.arch.?, args.code, relocs, symbols); + } + + break :blk block; + } + }; + + return first_block; +} + pub fn parseInitializers(self: *Object) !void { const index = self.mod_init_func_section_index orelse return; const section = self.sections.items[index]; diff --git a/src/link/MachO/Zld.zig b/src/link/MachO/Zld.zig index 2b7b905b89..9d7eea042a 100644 --- a/src/link/MachO/Zld.zig +++ b/src/link/MachO/Zld.zig @@ -135,13 +135,30 @@ const TlvOffset = struct { }; pub const TextBlock = struct { - local_sym_index: ?u32 = null, + allocator: *Allocator, + local_sym_index: u32, + aliases: std.ArrayList(u32), + references: std.ArrayList(u32), + code: []u8, + relocs: ?std.ArrayList(*Relocation) = null, size: u64, alignment: u32, - code: []u8, - relocs: []*Relocation, segment_id: u16, section_id: u16, + next: ?*TextBlock = null, + prev: ?*TextBlock = null, + + pub fn deinit(block: *TextBlock, allocator: *Allocator) void { + block.aliases.deinit(); + block.references.deinit(); + if (block.relocs) |relocs| { + for (relocs.items) |reloc| { + allocator.destroy(reloc); + } + relocs.deinit(); + } + allocator.free(code); + } }; /// Default path to dyld @@ -1669,7 +1686,7 @@ fn resolveSymbols(self: *Zld) !void { fn parseTextBlocks(self: *Zld) !void { for (self.objects.items) |object| { - try object.parseTextBlocks(self); + _ = try object.parseTextBlocks(self); } } From 7b4063d55899b0e35711c848f7b19de6f928282b Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Mon, 5 Jul 2021 16:31:20 +0200 Subject: [PATCH 15/81] zld: convert section in linked list of TextBlocks --- src/link/MachO/Object.zig | 406 ++++++++++++++++++-------------------- src/link/MachO/Symbol.zig | 11 +- src/link/MachO/Zld.zig | 108 +++++++--- 3 files changed, 272 insertions(+), 253 deletions(-) diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index 43bfea67d5..951e91a408 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -28,7 +28,6 @@ name: ?[]const u8 = null, mtime: ?u64 = null, load_commands: std.ArrayListUnmanaged(LoadCommand) = .{}, -sections: std.ArrayListUnmanaged(Section) = .{}, segment_cmd_index: ?u16 = null, symtab_cmd_index: ?u16 = null, @@ -49,32 +48,10 @@ dwarf_debug_ranges_index: ?u16 = null, symtab: std.ArrayListUnmanaged(macho.nlist_64) = .{}, strtab: std.ArrayListUnmanaged(u8) = .{}, -symbols: std.ArrayListUnmanaged(*Symbol) = .{}, -stabs: std.ArrayListUnmanaged(*Symbol) = .{}, initializers: std.ArrayListUnmanaged(u32) = .{}, data_in_code_entries: std.ArrayListUnmanaged(macho.data_in_code_entry) = .{}, -pub const Section = struct { - inner: macho.section_64, - code: []u8, - relocs: ?[]*Relocation, - target_map: ?struct { - segment_id: u16, - section_id: u16, - offset: u32, - } = null, - - pub fn deinit(self: *Section, allocator: *Allocator) void { - allocator.free(self.code); - - if (self.relocs) |relocs| { - for (relocs) |rel| { - allocator.destroy(rel); - } - allocator.free(relocs); - } - } -}; +symbols: std.ArrayListUnmanaged(*Symbol) = .{}, const DebugInfo = struct { inner: dwarf.DwarfInfo, @@ -177,19 +154,11 @@ pub fn deinit(self: *Object) void { lc.deinit(self.allocator); } self.load_commands.deinit(self.allocator); - - for (self.sections.items) |*sect| { - sect.deinit(self.allocator); - } - self.sections.deinit(self.allocator); - - self.symbols.deinit(self.allocator); - self.stabs.deinit(self.allocator); - self.data_in_code_entries.deinit(self.allocator); self.initializers.deinit(self.allocator); self.symtab.deinit(self.allocator); self.strtab.deinit(self.allocator); + self.symbols.deinit(self.allocator); if (self.name) |n| { self.allocator.free(n); @@ -231,10 +200,8 @@ pub fn parse(self: *Object) !void { self.header = header; try self.readLoadCommands(reader); - try self.parseSections(); try self.parseSymtab(); try self.parseDataInCode(); - try self.parseInitializers(); } pub fn readLoadCommands(self: *Object, reader: anytype) !void { @@ -305,250 +272,253 @@ pub fn readLoadCommands(self: *Object, reader: anytype) !void { } } -pub fn parseSections(self: *Object) !void { - const seg = self.load_commands.items[self.segment_cmd_index.?].Segment; +const NlistWithIndex = struct { + nlist: macho.nlist_64, + index: u32, - log.debug("parsing sections in {s}", .{self.name.?}); + pub fn cmp(_: void, lhs: @This(), rhs: @This()) bool { + return lhs.nlist.n_value < rhs.nlist.n_value; + } - try self.sections.ensureCapacity(self.allocator, seg.sections.items.len); + fn filterNlistsInSection(symbols: []@This(), sect_id: u8) []@This() { + var start: usize = 0; + var end: usize = symbols.len; - for (seg.sections.items) |sect| { - log.debug("parsing section '{s},{s}'", .{ segmentName(sect), sectionName(sect) }); - // Read sections' code - var code = try self.allocator.alloc(u8, @intCast(usize, sect.size)); - _ = try self.file.?.preadAll(code, sect.offset); + while (true) { + var change = false; + if (symbols[start].nlist.n_sect != sect_id) { + start += 1; + change = true; + } + if (symbols[end - 1].nlist.n_sect != sect_id) { + end -= 1; + change = true; + } - var section = Section{ - .inner = sect, - .code = code, - .relocs = null, - }; - - // Parse relocations - if (sect.nreloc > 0) { - var raw_relocs = try self.allocator.alloc(u8, @sizeOf(macho.relocation_info) * sect.nreloc); - defer self.allocator.free(raw_relocs); - - _ = try self.file.?.preadAll(raw_relocs, sect.reloff); - - section.relocs = try reloc.parse( - self.allocator, - self.arch.?, - section.code, - mem.bytesAsSlice(macho.relocation_info, raw_relocs), - ); + if (start == end) break; + if (!change) break; } - self.sections.appendAssumeCapacity(section); + return symbols[start..end]; } +}; + +fn filterRelocs(relocs: []macho.relocation_info, start: u64, end: u64) []macho.relocation_info { + if (relocs.len == 0) return relocs; + + var start_id: usize = 0; + var end_id: usize = relocs.len; + + while (true) { + var change = false; + if (relocs[start_id].r_address > end) { + start_id += 1; + change = true; + } + if (relocs[end_id - 1].r_address < start) { + end_id -= 1; + change = true; + } + + if (start_id == end_id) break; + if (!change) break; + } + + return relocs[start_id..end_id]; } -pub fn parseTextBlocks(self: *Object, zld: *Zld) !*TextBlock { +const SeniorityContext = struct { + zld: *Zld, +}; +fn cmpSymBySeniority(context: SeniorityContext, lhs: u32, rhs: u32) bool { + const lreg = context.zld.locals.items[lhs].payload.regular; + const rreg = context.zld.locals.items[rhs].payload.regular; + + return switch (rreg.linkage) { + .global => true, + .linkage_unit => lreg.linkage == .translation_unit, + else => false, + }; +} + +pub fn parseTextBlocks(self: *Object, zld: *Zld) !?*TextBlock { const seg = self.load_commands.items[self.segment_cmd_index.?].Segment; log.warn("analysing {s}", .{self.name.?}); const dysymtab = self.load_commands.items[self.dysymtab_cmd_index.?].Dysymtab; - - const SymWithIndex = struct { - nlist: macho.nlist_64, - index: u32, - - pub fn cmp(_: void, lhs: @This(), rhs: @This()) bool { - return lhs.nlist.n_value < rhs.nlist.n_value; - } - - fn filterSymsInSection(symbols: []@This(), sect_id: u8) []@This() { - var start: usize = 0; - var end: usize = symbols.len; - - while (true) { - var change = false; - if (symbols[start].nlist.n_sect != sect_id) { - start += 1; - change = true; - } - if (symbols[end - 1].nlist.n_sect != sect_id) { - end -= 1; - change = true; - } - - if (start == end) break; - if (!change) break; - } - - return symbols[start..end]; - } - - fn filterRelocs(relocs: []macho.relocation_info, start: u64, end: u64) []macho.relocation_info { - if (relocs.len == 0) return relocs; - - var start_id: usize = 0; - var end_id: usize = relocs.len; - - while (true) { - var change = false; - if (relocs[start_id].r_address > end) { - start_id += 1; - change = true; - } - if (relocs[end_id - 1].r_address < start) { - end_id -= 1; - change = true; - } - - if (start_id == end_id) break; - if (!change) break; - } - - return relocs[start_id..end_id]; - } - }; - + // We only care about defined symbols, so filter every other out. const nlists = self.symtab.items[dysymtab.ilocalsym..dysymtab.iundefsym]; - var sorted_syms = std.ArrayList(SymWithIndex).init(self.allocator); - defer sorted_syms.deinit(); - try sorted_syms.ensureTotalCapacity(nlists.len); + var sorted_nlists = std.ArrayList(NlistWithIndex).init(self.allocator); + defer sorted_nlists.deinit(); + try sorted_nlists.ensureTotalCapacity(nlists.len); for (nlists) |nlist, index| { - sorted_syms.appendAssumeCapacity(.{ + sorted_nlists.appendAssumeCapacity(.{ .nlist = nlist, .index = @intCast(u32, index + dysymtab.ilocalsym), }); } - std.sort.sort(SymWithIndex, sorted_syms.items, {}, SymWithIndex.cmp); + std.sort.sort(NlistWithIndex, sorted_nlists.items, {}, NlistWithIndex.cmp); + + var last_block: ?*TextBlock = null; for (seg.sections.items) |sect, sect_id| { - log.warn("section {s},{s}", .{ segmentName(sect), sectionName(sect) }); + log.warn("putting section '{s},{s}' as a TextBlock", .{ + segmentName(sect), + sectionName(sect), + }); + // Get matching segment/section in the final artifact. const match = (try zld.getMatchingSection(sect)) orelse { log.warn("unhandled section", .{}); continue; }; - // Read code + // Read section's code var code = try self.allocator.alloc(u8, @intCast(usize, sect.size)); defer self.allocator.free(code); _ = try self.file.?.preadAll(code, sect.offset); - // Read and parse relocs - const raw_relocs = try self.allocator.alloc(u8, @sizeOf(macho.relocation_info) * sect.nreloc); - defer self.allocator.free(raw_relocs); - _ = try self.file.?.preadAll(raw_relocs, sect.reloff); - const relocs = mem.bytesAsSlice(macho.relocation_info, raw_relocs); + // Is there any padding between symbols within the section? + const is_padded = self.header.?.flags & macho.MH_SUBSECTIONS_VIA_SYMBOLS != 0; + // Section alignment will be the assumed alignment per symbol. const alignment = sect.@"align"; - if (self.header.?.flags & macho.MH_SUBSECTIONS_VIA_SYMBOLS != 0) { - const syms = SymWithIndex.filterSymsInSection(sorted_syms.items, @intCast(u8, sect_id + 1)); + next: { + if (is_padded) blocks: { + const filtered_nlists = NlistWithIndex.filterNlistsInSection( + sorted_nlists.items, + @intCast(u8, sect_id + 1), + ); - if (syms.len == 0) { - // One large text block referenced by section offsets only - log.warn("TextBlock", .{}); - log.warn(" | referenced by section offsets", .{}); - log.warn(" | start_addr = {}", .{sect.addr}); - log.warn(" | end_addr = {}", .{sect.size}); - log.warn(" | size = {}", .{sect.size}); - log.warn(" | alignment = 0x{x}", .{alignment}); - log.warn(" | segment_id = {}", .{match.seg}); - log.warn(" | section_id = {}", .{match.sect}); - log.warn(" | relocs: {any}", .{relocs}); - } + if (filtered_nlists.len == 0) break :blocks; - var indices = std.ArrayList(u32).init(self.allocator); - defer indices.deinit(); + var nlist_indices = std.ArrayList(u32).init(self.allocator); + defer nlist_indices.deinit(); - var i: u32 = 0; - while (i < syms.len) : (i += 1) { - const curr = syms[i]; - try indices.append(i); + var i: u32 = 0; + while (i < filtered_nlists.len) : (i += 1) { + const curr = filtered_nlists[i]; + try nlist_indices.append(curr.index); - const next: ?SymWithIndex = if (i + 1 < syms.len) - syms[i + 1] - else - null; + const next: ?NlistWithIndex = if (i + 1 < filtered_nlists.len) + filtered_nlists[i + 1] + else + null; - if (next) |n| { - if (curr.nlist.n_value == n.nlist.n_value) { - continue; + if (next) |n| { + if (curr.nlist.n_value == n.nlist.n_value) { + continue; + } } + + // Bubble-up senior symbol as the main link to the text block. + for (nlist_indices.items) |*index| { + const sym = self.symbols.items[index.*]; + if (sym.payload != .regular) { + log.err("expected a regular symbol, found {s}", .{sym.payload}); + log.err(" when remapping {s}", .{sym.name}); + return error.SymbolIsNotRegular; + } + assert(sym.payload.regular.local_sym_index != 0); // This means the symbol has not been properly resolved. + index.* = sym.payload.regular.local_sym_index; + } + + std.sort.sort(u32, nlist_indices.items, SeniorityContext{ .zld = zld }, cmpSymBySeniority); + + const local_sym_index = nlist_indices.pop(); + const sym = zld.locals.items[local_sym_index]; + if (sym.payload.regular.file) |file| { + if (file != self) { + log.warn("deduping definition of {s} in {s}", .{ sym.name, self.name.? }); + continue; + } + } + + const start_addr = curr.nlist.n_value - sect.addr; + const end_addr = if (next) |n| n.nlist.n_value - sect.addr else sect.size; + + const tb_code = code[start_addr..end_addr]; + const size = tb_code.len; + + const block = try self.allocator.create(TextBlock); + errdefer self.allocator.destroy(block); + + block.* = .{ + .local_sym_index = local_sym_index, + .aliases = std.ArrayList(u32).init(self.allocator), + .references = std.ArrayList(u32).init(self.allocator), + .code = tb_code, + .relocs = std.ArrayList(*Relocation).init(self.allocator), + .size = size, + .alignment = alignment, + .segment_id = match.seg, + .section_id = match.sect, + }; + try block.aliases.appendSlice(nlist_indices.items); + + // TODO parse relocs + + if (last_block) |last| { + last.next = block; + block.prev = last; + } + last_block = block; + + nlist_indices.clearRetainingCapacity(); } - const start_addr = curr.nlist.n_value - sect.addr; - const end_addr = if (next) |n| n.nlist.n_value - sect.addr else sect.size; - - const tb_code = code[start_addr..end_addr]; - const size = tb_code.len; - - log.warn("TextBlock", .{}); - for (indices.items) |id| { - const sym = self.symbols.items[syms[id].index]; - log.warn(" | symbol = {s}", .{sym.name}); - } - log.warn(" | start_addr = {}", .{start_addr}); - log.warn(" | end_addr = {}", .{end_addr}); - log.warn(" | size = {}", .{size}); - log.warn(" | alignment = 0x{x}", .{alignment}); - log.warn(" | segment_id = {}", .{match.seg}); - log.warn(" | section_id = {}", .{match.sect}); - log.warn(" | relocs: {any}", .{SymWithIndex.filterRelocs(relocs, start_addr, end_addr)}); - - indices.clearRetainingCapacity(); + break :next; } - } else { - return error.TODOOneLargeTextBlock; - } - } -} -const SectionAsTextBlocksArgs = struct { - sect: macho.section_64, - code: []u8, - subsections_via_symbols: bool = false, - relocs: ?[]macho.relocation_info = null, - segment_id: u16 = 0, - section_id: u16 = 0, -}; + // Since there is no symbol to refer to this block, we create + // a temp one. + const name = try std.fmt.allocPrint(self.allocator, "l_{s}_{s}_{s}", .{ + self.name.?, + segmentName(sect), + sectionName(sect), + }); + defer self.allocator.free(name); + const symbol = try Symbol.new(self.allocator, name); + symbol.payload = .{ + .regular = .{ + .linkage = .translation_unit, + .file = self, + }, + }; + const local_sym_index = @intCast(u32, zld.locals.items.len); + try zld.locals.append(zld.allocator, symbol); -fn sectionAsTextBlocks(self: *Object, args: SectionAsTextBlocksArgs) !*TextBlock { - const sect = args.sect; - - log.warn("putting section '{s},{s}' as a TextBlock", .{ segmentName(sect), sectionName(sect) }); - - // Section alignment will be the assumed alignment per symbol. - const alignment = sect.@"align"; - - const first_block: *TextBlock = blk: { - if (args.subsections_via_symbols) { - return error.TODO; - } else { const block = try self.allocator.create(TextBlock); errdefer self.allocator.destroy(block); block.* = .{ - .ref = .{ - .section = undefined, // Will be populated when we allocated final sections. - }, - .code = args.code, - .relocs = null, + .local_sym_index = local_sym_index, + .aliases = std.ArrayList(u32).init(self.allocator), + .references = std.ArrayList(u32).init(self.allocator), + .code = code, + .relocs = std.ArrayList(*Relocation).init(self.allocator), .size = sect.size, .alignment = alignment, - .segment_id = args.segment_id, - .section_id = args.section_id, + .segment_id = match.seg, + .section_id = match.sect, }; // TODO parse relocs - if (args.relocs) |relocs| { - block.relocs = try reloc.parse(self.allocator, self.arch.?, args.code, relocs, symbols); + + if (last_block) |last| { + last.next = block; + block.prev = last; } - - break :blk block; + last_block = block; } - }; + } - return first_block; + return last_block; } pub fn parseInitializers(self: *Object) !void { diff --git a/src/link/MachO/Symbol.zig b/src/link/MachO/Symbol.zig index 0a35101eab..37a8be946e 100644 --- a/src/link/MachO/Symbol.zig +++ b/src/link/MachO/Symbol.zig @@ -40,10 +40,13 @@ pub const Regular = struct { linkage: Linkage, /// Symbol address. - address: u64, + address: u64 = 0, - /// Section ID where the symbol resides. - section: u8, + /// Segment ID + segment_id: u16 = 0, + + /// Section ID + section: u16 = 0, /// Whether the symbol is a weak ref. weak_ref: bool = false, @@ -52,6 +55,8 @@ pub const Regular = struct { /// null means self-reference. file: ?*Object = null, + local_sym_index: u32 = 0, + pub const Linkage = enum { translation_unit, linkage_unit, diff --git a/src/link/MachO/Zld.zig b/src/link/MachO/Zld.zig index 9d7eea042a..07503d13ab 100644 --- a/src/link/MachO/Zld.zig +++ b/src/link/MachO/Zld.zig @@ -104,6 +104,7 @@ objc_classrefs_section_index: ?u16 = null, objc_data_section_index: ?u16 = null, locals: std.ArrayListUnmanaged(*Symbol) = .{}, +imports: std.ArrayListUnmanaged(*Symbol) = .{}, globals: std.StringArrayHashMapUnmanaged(*Symbol) = .{}, /// Offset into __DATA,__common section. @@ -118,6 +119,8 @@ got_entries: std.ArrayListUnmanaged(*Symbol) = .{}, stub_helper_stubs_start_off: ?u64 = null, +last_text_block: ?*TextBlock = null, + pub const Output = struct { tag: enum { exe, dylib }, path: []const u8, @@ -135,12 +138,11 @@ const TlvOffset = struct { }; pub const TextBlock = struct { - allocator: *Allocator, local_sym_index: u32, aliases: std.ArrayList(u32), references: std.ArrayList(u32), code: []u8, - relocs: ?std.ArrayList(*Relocation) = null, + relocs: std.ArrayList(*Relocation), size: u64, alignment: u32, segment_id: u16, @@ -151,14 +153,33 @@ pub const TextBlock = struct { pub fn deinit(block: *TextBlock, allocator: *Allocator) void { block.aliases.deinit(); block.references.deinit(); - if (block.relocs) |relocs| { - for (relocs.items) |reloc| { - allocator.destroy(reloc); - } - relocs.deinit(); + for (block.relocs.items) |reloc| { + allocator.destroy(reloc); } + block.relocs.deinit(); allocator.free(code); } + + fn print(self: *const TextBlock, zld: *Zld) void { + if (self.prev) |prev| { + prev.print(zld); + } + + log.warn("TextBlock", .{}); + log.warn(" | {}: '{s}'", .{ self.local_sym_index, zld.locals.items[self.local_sym_index].name }); + log.warn(" | Aliases:", .{}); + for (self.aliases.items) |index| { + log.warn(" | {}: '{s}'", .{ index, zld.locals.items[index].name }); + } + log.warn(" | References:", .{}); + for (self.references.items) |index| { + log.warn(" | {}: '{s}'", .{ index, zld.locals.items[index].name }); + } + log.warn(" | size = {}", .{self.size}); + log.warn(" | align = {}", .{self.alignment}); + log.warn(" | segment_id = {}", .{self.segment_id}); + log.warn(" | section_id = {}", .{self.section_id}); + } }; /// Default path to dyld @@ -200,11 +221,13 @@ pub fn deinit(self: *Zld) void { } self.dylibs.deinit(self.allocator); - for (self.globals.values()) |sym| { + self.globals.deinit(self.allocator); + + for (self.imports.items) |sym| { sym.deinit(self.allocator); self.allocator.destroy(sym); } - self.globals.deinit(self.allocator); + self.imports.deinit(self.allocator); for (self.locals.items) |sym| { sym.deinit(self.allocator); @@ -252,20 +275,21 @@ pub fn link(self: *Zld, files: []const []const u8, output: Output, args: LinkArg try self.parseLibs(args.libs, args.syslibroot); try self.resolveSymbols(); try self.parseTextBlocks(); - try self.resolveStubsAndGotEntries(); - try self.updateMetadata(); - try self.sortSections(); - try self.addRpaths(args.rpaths); - try self.addDataInCodeLC(); - try self.addCodeSignatureLC(); - try self.allocateTextSegment(); - try self.allocateDataConstSegment(); - try self.allocateDataSegment(); - self.allocateLinkeditSegment(); - try self.allocateSymbols(); - try self.allocateTentativeSymbols(); - try self.allocateProxyBindAddresses(); - try self.flush(); + return error.TODO; + // try self.resolveStubsAndGotEntries(); + // try self.updateMetadata(); + // try self.sortSections(); + // try self.addRpaths(args.rpaths); + // try self.addDataInCodeLC(); + // try self.addCodeSignatureLC(); + // try self.allocateTextSegment(); + // try self.allocateDataConstSegment(); + // try self.allocateDataSegment(); + // self.allocateLinkeditSegment(); + // try self.allocateSymbols(); + // try self.allocateTentativeSymbols(); + // try self.allocateProxyBindAddresses(); + // try self.flush(); } fn parseInputFiles(self: *Zld, files: []const []const u8, syslibroot: ?[]const u8) !void { @@ -1509,13 +1533,11 @@ fn resolveSymbolsInObject(self: *Zld, object: *Object) !void { symbol.payload = .{ .regular = .{ .linkage = .translation_unit, - .address = sym.n_value, - .section = sym.n_sect - 1, .weak_ref = Symbol.isWeakRef(sym), .file = object, + .local_sym_index = @intCast(u32, self.locals.items.len), }, }; - const index = @intCast(u32, self.locals.items.len); try self.locals.append(self.allocator, symbol); try object.symbols.append(self.allocator, symbol); continue; @@ -1550,8 +1572,6 @@ fn resolveSymbolsInObject(self: *Zld, object: *Object) !void { symbol.payload = .{ .regular = .{ .linkage = linkage, - .address = sym.n_value, - .section = sym.n_sect - 1, .weak_ref = Symbol.isWeakRef(sym), .file = object, }, @@ -1581,6 +1601,11 @@ fn resolveSymbolsInObject(self: *Zld, object: *Object) !void { } fn resolveSymbols(self: *Zld) !void { + // TODO mimicking insertion of null symbol from incremental linker. + // This will need to moved. + const null_sym = try Symbol.new(self.allocator, ""); + try self.locals.append(self.allocator, null_sym); + // First pass, resolve symbols in provided objects. for (self.objects.items) |object| { try self.resolveSymbolsInObject(object); @@ -1609,11 +1634,18 @@ fn resolveSymbols(self: *Zld) !void { } } + // Put any globally defined regular symbol as local. // Mark if we need to allocate zerofill section for tentative definitions for (self.globals.values()) |symbol| { - if (symbol.payload == .tentative) { - self.has_tentative_defs = true; - break; + switch (symbol.payload) { + .regular => |*reg| { + reg.local_sym_index = @intCast(u32, self.locals.items.len); + try self.locals.append(self.allocator, symbol); + }, + .tentative => { + self.has_tentative_defs = true; + }, + else => {}, } } @@ -1639,6 +1671,7 @@ fn resolveSymbols(self: *Zld) !void { .file = dylib, }, }; + try self.imports.append(self.allocator, symbol); continue :loop; } } @@ -1667,6 +1700,7 @@ fn resolveSymbols(self: *Zld) !void { symbol.payload = .{ .proxy = .{}, }; + try self.imports.append(self.allocator, symbol); } } @@ -1686,7 +1720,17 @@ fn resolveSymbols(self: *Zld) !void { fn parseTextBlocks(self: *Zld) !void { for (self.objects.items) |object| { - _ = try object.parseTextBlocks(self); + if (try object.parseTextBlocks(self)) |block| { + if (self.last_text_block) |last| { + last.next = block; + block.prev = last; + } + self.last_text_block = block; + } + } + + if (self.last_text_block) |block| { + block.print(self); } } From 51e334af447b126862238f0743342755d719f897 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Mon, 5 Jul 2021 20:20:07 +0200 Subject: [PATCH 16/81] zld: refactor section into TextBlocks conversion --- src/link/MachO/Object.zig | 197 +++++++++++++++++++++++--------------- src/link/MachO/Zld.zig | 51 ++++++---- 2 files changed, 151 insertions(+), 97 deletions(-) diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index 951e91a408..e52e0276be 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -276,11 +276,11 @@ const NlistWithIndex = struct { nlist: macho.nlist_64, index: u32, - pub fn cmp(_: void, lhs: @This(), rhs: @This()) bool { + fn lessThan(_: void, lhs: @This(), rhs: @This()) bool { return lhs.nlist.n_value < rhs.nlist.n_value; } - fn filterNlistsInSection(symbols: []@This(), sect_id: u8) []@This() { + fn filterInSection(symbols: []@This(), sect_id: u8) []@This() { var start: usize = 0; var end: usize = symbols.len; @@ -327,19 +327,111 @@ fn filterRelocs(relocs: []macho.relocation_info, start: u64, end: u64) []macho.r return relocs[start_id..end_id]; } -const SeniorityContext = struct { +const TextBlockParser = struct { + allocator: *Allocator, + section: macho.section_64, + code: []u8, + object: *Object, zld: *Zld, -}; -fn cmpSymBySeniority(context: SeniorityContext, lhs: u32, rhs: u32) bool { - const lreg = context.zld.locals.items[lhs].payload.regular; - const rreg = context.zld.locals.items[rhs].payload.regular; + nlists: []NlistWithIndex, + index: u32 = 0, - return switch (rreg.linkage) { - .global => true, - .linkage_unit => lreg.linkage == .translation_unit, - else => false, + fn peek(self: *TextBlockParser) ?NlistWithIndex { + return if (self.index + 1 < self.nlists.len) self.nlists[self.index + 1] else null; + } + + const SeniorityContext = struct { + zld: *Zld, }; -} + + fn lessThanBySeniority(context: SeniorityContext, lhs: NlistWithIndex, rhs: NlistWithIndex) bool { + const lreg = context.zld.locals.items[lhs.index].payload.regular; + const rreg = context.zld.locals.items[rhs.index].payload.regular; + + return switch (rreg.linkage) { + .global => true, + .linkage_unit => lreg.linkage == .translation_unit, + else => false, + }; + } + + pub fn next(self: *TextBlockParser) !?*TextBlock { + if (self.index == self.nlists.len) return null; + + var aliases = std.ArrayList(NlistWithIndex).init(self.allocator); + defer aliases.deinit(); + + const next_nlist: ?NlistWithIndex = blk: while (true) { + const curr_nlist = self.nlists[self.index]; + try aliases.append(curr_nlist); + + if (self.peek()) |next_nlist| { + if (curr_nlist.nlist.n_value == next_nlist.nlist.n_value) { + self.index += 1; + continue; + } + break :blk next_nlist; + } + break :blk null; + } else null; + + for (aliases.items) |*nlist_with_index| { + const sym = self.object.symbols.items[nlist_with_index.index]; + if (sym.payload != .regular) { + log.err("expected a regular symbol, found {s}", .{sym.payload}); + log.err(" when remapping {s}", .{sym.name}); + return error.SymbolIsNotRegular; + } + assert(sym.payload.regular.local_sym_index != 0); // This means the symbol has not been properly resolved. + nlist_with_index.index = sym.payload.regular.local_sym_index; + } + + if (aliases.items.len > 1) { + // Bubble-up senior symbol as the main link to the text block. + std.sort.sort( + NlistWithIndex, + aliases.items, + SeniorityContext{ .zld = self.zld }, + @This().lessThanBySeniority, + ); + } + + const senior_nlist = aliases.pop(); + const senior_sym = self.zld.locals.items[senior_nlist.index]; + assert(senior_sym.payload == .regular); + + const start_addr = senior_nlist.nlist.n_value - self.section.addr; + const end_addr = if (next_nlist) |n| n.nlist.n_value - self.section.addr else self.section.size; + + const code = self.code[start_addr..end_addr]; + const size = code.len; + + const alias_only_indices = if (aliases.items.len > 0) blk: { + var out = std.ArrayList(u32).init(self.allocator); + try out.ensureTotalCapacity(aliases.items.len); + for (aliases.items) |alias| { + out.appendAssumeCapacity(alias.index); + } + break :blk out.toOwnedSlice(); + } else null; + + const block = try self.allocator.create(TextBlock); + errdefer self.allocator.destroy(block); + + block.* = .{ + .local_sym_index = senior_nlist.index, + .aliases = alias_only_indices, + .code = code, + .size = size, + .alignment = self.section.@"align", + }; + + self.index += 1; + block.print_this(self.zld); + + return block; + } +}; pub fn parseTextBlocks(self: *Object, zld: *Zld) !?*TextBlock { const seg = self.load_commands.items[self.segment_cmd_index.?].Segment; @@ -361,7 +453,7 @@ pub fn parseTextBlocks(self: *Object, zld: *Zld) !?*TextBlock { }); } - std.sort.sort(NlistWithIndex, sorted_nlists.items, {}, NlistWithIndex.cmp); + std.sort.sort(NlistWithIndex, sorted_nlists.items, {}, NlistWithIndex.lessThan); var last_block: ?*TextBlock = null; @@ -385,53 +477,26 @@ pub fn parseTextBlocks(self: *Object, zld: *Zld) !?*TextBlock { // Is there any padding between symbols within the section? const is_padded = self.header.?.flags & macho.MH_SUBSECTIONS_VIA_SYMBOLS != 0; - // Section alignment will be the assumed alignment per symbol. - const alignment = sect.@"align"; - next: { if (is_padded) blocks: { - const filtered_nlists = NlistWithIndex.filterNlistsInSection( + const filtered_nlists = NlistWithIndex.filterInSection( sorted_nlists.items, @intCast(u8, sect_id + 1), ); if (filtered_nlists.len == 0) break :blocks; - var nlist_indices = std.ArrayList(u32).init(self.allocator); - defer nlist_indices.deinit(); + var parser = TextBlockParser{ + .allocator = self.allocator, + .section = sect, + .code = code, + .object = self, + .zld = zld, + .nlists = filtered_nlists, + }; - var i: u32 = 0; - while (i < filtered_nlists.len) : (i += 1) { - const curr = filtered_nlists[i]; - try nlist_indices.append(curr.index); - - const next: ?NlistWithIndex = if (i + 1 < filtered_nlists.len) - filtered_nlists[i + 1] - else - null; - - if (next) |n| { - if (curr.nlist.n_value == n.nlist.n_value) { - continue; - } - } - - // Bubble-up senior symbol as the main link to the text block. - for (nlist_indices.items) |*index| { - const sym = self.symbols.items[index.*]; - if (sym.payload != .regular) { - log.err("expected a regular symbol, found {s}", .{sym.payload}); - log.err(" when remapping {s}", .{sym.name}); - return error.SymbolIsNotRegular; - } - assert(sym.payload.regular.local_sym_index != 0); // This means the symbol has not been properly resolved. - index.* = sym.payload.regular.local_sym_index; - } - - std.sort.sort(u32, nlist_indices.items, SeniorityContext{ .zld = zld }, cmpSymBySeniority); - - const local_sym_index = nlist_indices.pop(); - const sym = zld.locals.items[local_sym_index]; + while (try parser.next()) |block| { + const sym = zld.locals.items[block.local_sym_index]; if (sym.payload.regular.file) |file| { if (file != self) { log.warn("deduping definition of {s} in {s}", .{ sym.name, self.name.? }); @@ -439,27 +504,8 @@ pub fn parseTextBlocks(self: *Object, zld: *Zld) !?*TextBlock { } } - const start_addr = curr.nlist.n_value - sect.addr; - const end_addr = if (next) |n| n.nlist.n_value - sect.addr else sect.size; - - const tb_code = code[start_addr..end_addr]; - const size = tb_code.len; - - const block = try self.allocator.create(TextBlock); - errdefer self.allocator.destroy(block); - - block.* = .{ - .local_sym_index = local_sym_index, - .aliases = std.ArrayList(u32).init(self.allocator), - .references = std.ArrayList(u32).init(self.allocator), - .code = tb_code, - .relocs = std.ArrayList(*Relocation).init(self.allocator), - .size = size, - .alignment = alignment, - .segment_id = match.seg, - .section_id = match.sect, - }; - try block.aliases.appendSlice(nlist_indices.items); + block.segment_id = match.seg; + block.section_id = match.sect; // TODO parse relocs @@ -468,8 +514,6 @@ pub fn parseTextBlocks(self: *Object, zld: *Zld) !?*TextBlock { block.prev = last; } last_block = block; - - nlist_indices.clearRetainingCapacity(); } break :next; @@ -498,12 +542,9 @@ pub fn parseTextBlocks(self: *Object, zld: *Zld) !?*TextBlock { block.* = .{ .local_sym_index = local_sym_index, - .aliases = std.ArrayList(u32).init(self.allocator), - .references = std.ArrayList(u32).init(self.allocator), .code = code, - .relocs = std.ArrayList(*Relocation).init(self.allocator), .size = sect.size, - .alignment = alignment, + .alignment = sect.@"align", .segment_id = match.seg, .section_id = match.sect, }; diff --git a/src/link/MachO/Zld.zig b/src/link/MachO/Zld.zig index 07503d13ab..8e712e7a8b 100644 --- a/src/link/MachO/Zld.zig +++ b/src/link/MachO/Zld.zig @@ -139,47 +139,60 @@ const TlvOffset = struct { pub const TextBlock = struct { local_sym_index: u32, - aliases: std.ArrayList(u32), - references: std.ArrayList(u32), + aliases: ?[]u32 = null, + references: ?[]u32 = null, code: []u8, - relocs: std.ArrayList(*Relocation), + relocs: ?[]*Relocation = null, size: u64, alignment: u32, - segment_id: u16, - section_id: u16, + segment_id: u16 = 0, + section_id: u16 = 0, next: ?*TextBlock = null, prev: ?*TextBlock = null, pub fn deinit(block: *TextBlock, allocator: *Allocator) void { - block.aliases.deinit(); - block.references.deinit(); + if (block.aliases) |aliases| { + allocator.free(aliases); + } + if (block.references) |references| { + allocator.free(references); + } for (block.relocs.items) |reloc| { allocator.destroy(reloc); } - block.relocs.deinit(); + if (block.relocs) |relocs| { + allocator.free(relocs); + } allocator.free(code); } - fn print(self: *const TextBlock, zld: *Zld) void { - if (self.prev) |prev| { - prev.print(zld); - } - + pub fn print_this(self: *const TextBlock, zld: *Zld) void { log.warn("TextBlock", .{}); log.warn(" | {}: '{s}'", .{ self.local_sym_index, zld.locals.items[self.local_sym_index].name }); - log.warn(" | Aliases:", .{}); - for (self.aliases.items) |index| { - log.warn(" | {}: '{s}'", .{ index, zld.locals.items[index].name }); + if (self.aliases) |aliases| { + log.warn(" | Aliases:", .{}); + for (aliases) |index| { + log.warn(" | {}: '{s}'", .{ index, zld.locals.items[index].name }); + } } - log.warn(" | References:", .{}); - for (self.references.items) |index| { - log.warn(" | {}: '{s}'", .{ index, zld.locals.items[index].name }); + if (self.references) |references| { + log.warn(" | References:", .{}); + for (references) |index| { + log.warn(" | {}: '{s}'", .{ index, zld.locals.items[index].name }); + } } log.warn(" | size = {}", .{self.size}); log.warn(" | align = {}", .{self.alignment}); log.warn(" | segment_id = {}", .{self.segment_id}); log.warn(" | section_id = {}", .{self.section_id}); } + + pub fn print(self: *const TextBlock, zld: *Zld) void { + if (self.prev) |prev| { + prev.print(zld); + } + self.print_this(zld); + } }; /// Default path to dyld From 54888c6f4699b07eadd21b744d797006fb96a284 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Tue, 6 Jul 2021 00:00:11 +0200 Subject: [PATCH 17/81] zld: create TextBlocks for tentative definitions and fix the links in the `TextBlock`s linked list! --- src/link/MachO/Object.zig | 59 +++++++++------- src/link/MachO/Symbol.zig | 20 +++++- src/link/MachO/Zld.zig | 144 +++++++++++++------------------------- 3 files changed, 100 insertions(+), 123 deletions(-) diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index e52e0276be..1b3df72412 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -9,6 +9,7 @@ const log = std.log.scoped(.object); const macho = std.macho; const mem = std.mem; const reloc = @import("reloc.zig"); +const sort = std.sort; const Allocator = mem.Allocator; const Arch = std.Target.Cpu.Arch; @@ -345,13 +346,15 @@ const TextBlockParser = struct { }; fn lessThanBySeniority(context: SeniorityContext, lhs: NlistWithIndex, rhs: NlistWithIndex) bool { - const lreg = context.zld.locals.items[lhs.index].payload.regular; - const rreg = context.zld.locals.items[rhs.index].payload.regular; + const lsym = context.zld.locals.items[lhs.index]; + const rsym = context.zld.locals.items[rhs.index]; + const lreg = lsym.payload.regular; + const rreg = rsym.payload.regular; return switch (rreg.linkage) { .global => true, .linkage_unit => lreg.linkage == .translation_unit, - else => false, + else => lsym.isTemp(), }; } @@ -388,7 +391,7 @@ const TextBlockParser = struct { if (aliases.items.len > 1) { // Bubble-up senior symbol as the main link to the text block. - std.sort.sort( + sort.sort( NlistWithIndex, aliases.items, SeniorityContext{ .zld = self.zld }, @@ -427,13 +430,12 @@ const TextBlockParser = struct { }; self.index += 1; - block.print_this(self.zld); return block; } }; -pub fn parseTextBlocks(self: *Object, zld: *Zld) !?*TextBlock { +pub fn parseTextBlocks(self: *Object, zld: *Zld) !void { const seg = self.load_commands.items[self.segment_cmd_index.?].Segment; log.warn("analysing {s}", .{self.name.?}); @@ -453,9 +455,7 @@ pub fn parseTextBlocks(self: *Object, zld: *Zld) !?*TextBlock { }); } - std.sort.sort(NlistWithIndex, sorted_nlists.items, {}, NlistWithIndex.lessThan); - - var last_block: ?*TextBlock = null; + sort.sort(NlistWithIndex, sorted_nlists.items, {}, NlistWithIndex.lessThan); for (seg.sections.items) |sect, sect_id| { log.warn("putting section '{s},{s}' as a TextBlock", .{ @@ -496,24 +496,35 @@ pub fn parseTextBlocks(self: *Object, zld: *Zld) !?*TextBlock { }; while (try parser.next()) |block| { - const sym = zld.locals.items[block.local_sym_index]; - if (sym.payload.regular.file) |file| { - if (file != self) { - log.warn("deduping definition of {s} in {s}", .{ sym.name, self.name.? }); - continue; + { + const sym = zld.locals.items[block.local_sym_index]; + const reg = &sym.payload.regular; + if (reg.file) |file| { + if (file != self) { + log.warn("deduping definition of {s} in {s}", .{ sym.name, self.name.? }); + continue; + } + } + reg.segment_id = match.seg; + reg.section_id = match.sect; + } + + if (block.aliases) |aliases| { + for (aliases) |alias| { + const sym = zld.locals.items[alias]; + const reg = &sym.payload.regular; + reg.segment_id = match.seg; + reg.section_id = match.sect; } } - block.segment_id = match.seg; - block.section_id = match.sect; - // TODO parse relocs - if (last_block) |last| { + if (zld.last_text_block) |last| { last.next = block; block.prev = last; } - last_block = block; + zld.last_text_block = block; } break :next; @@ -531,6 +542,8 @@ pub fn parseTextBlocks(self: *Object, zld: *Zld) !?*TextBlock { symbol.payload = .{ .regular = .{ .linkage = .translation_unit, + .segment_id = match.seg, + .section_id = match.sect, .file = self, }, }; @@ -545,21 +558,17 @@ pub fn parseTextBlocks(self: *Object, zld: *Zld) !?*TextBlock { .code = code, .size = sect.size, .alignment = sect.@"align", - .segment_id = match.seg, - .section_id = match.sect, }; // TODO parse relocs - if (last_block) |last| { + if (zld.last_text_block) |last| { last.next = block; block.prev = last; } - last_block = block; + zld.last_text_block = block; } } - - return last_block; } pub fn parseInitializers(self: *Object) !void { diff --git a/src/link/MachO/Symbol.zig b/src/link/MachO/Symbol.zig index 37a8be946e..16cd0c9ecc 100644 --- a/src/link/MachO/Symbol.zig +++ b/src/link/MachO/Symbol.zig @@ -46,7 +46,7 @@ pub const Regular = struct { segment_id: u16 = 0, /// Section ID - section: u16 = 0, + section_id: u16 = 0, /// Whether the symbol is a weak ref. weak_ref: bool = false, @@ -69,7 +69,8 @@ pub const Regular = struct { try std.fmt.format(writer, "Regular {{ ", .{}); try std.fmt.format(writer, ".linkage = {s}, ", .{self.linkage}); try std.fmt.format(writer, ".address = 0x{x}, ", .{self.address}); - try std.fmt.format(writer, ".section = {}, ", .{self.section}); + try std.fmt.format(writer, ".segment_id = {}, ", .{self.segment_id}); + try std.fmt.format(writer, ".section_id = {}, ", .{self.section_id}); if (self.weak_ref) { try std.fmt.format(writer, ".weak_ref, ", .{}); } @@ -170,6 +171,21 @@ pub fn new(allocator: *Allocator, name: []const u8) !*Symbol { return new_sym; } +pub fn format(self: Symbol, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { + _ = fmt; + _ = options; + try std.fmt.format(writer, "Symbol {{", .{}); + try std.fmt.format(writer, ".name = {s}, ", .{self.name}); + if (self.got_index) |got_index| { + try std.fmt.format(writer, ".got_index = {}, ", .{got_index}); + } + if (self.stubs_index) |stubs_index| { + try std.fmt.format(writer, ".stubs_index = {}, ", .{stubs_index}); + } + try std.fmt.format(writer, "{}, ", .{self.payload}); + try std.fmt.format(writer, "}}", .{}); +} + pub fn isTemp(symbol: Symbol) bool { switch (symbol.payload) { .regular => |regular| { diff --git a/src/link/MachO/Zld.zig b/src/link/MachO/Zld.zig index 8e712e7a8b..bfd4e4b2f4 100644 --- a/src/link/MachO/Zld.zig +++ b/src/link/MachO/Zld.zig @@ -107,11 +107,6 @@ locals: std.ArrayListUnmanaged(*Symbol) = .{}, imports: std.ArrayListUnmanaged(*Symbol) = .{}, globals: std.StringArrayHashMapUnmanaged(*Symbol) = .{}, -/// Offset into __DATA,__common section. -/// Set if the linker found tentative definitions in any of the objects. -tentative_defs_offset: u64 = 0, -has_tentative_defs: bool = false, - threadlocal_offsets: std.ArrayListUnmanaged(TlvOffset) = .{}, // TODO merge with Symbol abstraction local_rebases: std.ArrayListUnmanaged(Pointer) = .{}, stubs: std.ArrayListUnmanaged(*Symbol) = .{}, @@ -145,8 +140,6 @@ pub const TextBlock = struct { relocs: ?[]*Relocation = null, size: u64, alignment: u32, - segment_id: u16 = 0, - section_id: u16 = 0, next: ?*TextBlock = null, prev: ?*TextBlock = null, @@ -168,23 +161,21 @@ pub const TextBlock = struct { pub fn print_this(self: *const TextBlock, zld: *Zld) void { log.warn("TextBlock", .{}); - log.warn(" | {}: '{s}'", .{ self.local_sym_index, zld.locals.items[self.local_sym_index].name }); + log.warn(" | {}: {}", .{ self.local_sym_index, zld.locals.items[self.local_sym_index] }); if (self.aliases) |aliases| { log.warn(" | Aliases:", .{}); for (aliases) |index| { - log.warn(" | {}: '{s}'", .{ index, zld.locals.items[index].name }); + log.warn(" | {}: {}", .{ index, zld.locals.items[index] }); } } if (self.references) |references| { log.warn(" | References:", .{}); for (references) |index| { - log.warn(" | {}: '{s}'", .{ index, zld.locals.items[index].name }); + log.warn(" | {}: {}", .{ index, zld.locals.items[index] }); } } log.warn(" | size = {}", .{self.size}); log.warn(" | align = {}", .{self.alignment}); - log.warn(" | segment_id = {}", .{self.segment_id}); - log.warn(" | section_id = {}", .{self.section_id}); } pub fn print(self: *const TextBlock, zld: *Zld) void { @@ -300,7 +291,6 @@ pub fn link(self: *Zld, files: []const []const u8, output: Output, args: LinkArg // try self.allocateDataSegment(); // self.allocateLinkeditSegment(); // try self.allocateSymbols(); - // try self.allocateTentativeSymbols(); // try self.allocateProxyBindAddresses(); // try self.flush(); } @@ -415,37 +405,6 @@ fn updateMetadata(self: *Zld) !void { } } - // Ensure we have __DATA,__common section if we have tentative definitions. - // Update size and alignment of __DATA,__common section. - if (self.has_tentative_defs) { - const data_seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; - const common_section_index = self.common_section_index orelse ind: { - self.common_section_index = @intCast(u16, data_seg.sections.items.len); - try data_seg.addSection(self.allocator, "__common", .{ - .flags = macho.S_ZEROFILL, - }); - break :ind self.common_section_index.?; - }; - const common_sect = &data_seg.sections.items[common_section_index]; - - var max_align: u16 = 0; - var added_size: u64 = 0; - for (self.globals.values()) |sym| { - if (sym.payload != .tentative) continue; - max_align = math.max(max_align, sym.payload.tentative.alignment); - added_size += sym.payload.tentative.size; - } - - common_sect.@"align" = math.max(common_sect.@"align", max_align); - - const alignment = try math.powi(u32, 2, common_sect.@"align"); - const offset = mem.alignForwardGeneric(u64, common_sect.size, alignment); - const size = mem.alignForwardGeneric(u64, added_size, alignment); - - common_sect.size = offset + size; - self.tentative_defs_offset = offset; - } - tlv_align: { const has_tlv = self.tlv_section_index != null or @@ -1182,48 +1141,6 @@ fn allocateSymbols(self: *Zld) !void { } } -fn allocateTentativeSymbols(self: *Zld) !void { - if (!self.has_tentative_defs) return; - - const data_seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; - const common_sect = &data_seg.sections.items[self.common_section_index.?]; - - const alignment = try math.powi(u32, 2, common_sect.@"align"); - var base_address: u64 = common_sect.addr + self.tentative_defs_offset; - - log.debug("base address for tentative definitions 0x{x}", .{base_address}); - - // TODO there might be a more generic way of doing this. - var section: u8 = 0; - for (self.load_commands.items) |cmd, cmd_id| { - if (cmd != .Segment) break; - if (cmd_id == self.data_segment_cmd_index.?) { - section += @intCast(u8, self.common_section_index.?) + 1; - break; - } - section += @intCast(u8, cmd.Segment.sections.items.len); - } - - // Convert tentative definitions into regular symbols. - for (self.globals.values()) |sym| { - if (sym.payload != .tentative) continue; - - const address = mem.alignForwardGeneric(u64, base_address + sym.payload.tentative.size, alignment); - - log.debug("tentative definition '{s}' allocated from 0x{x} to 0x{x}", .{ sym.name, base_address, address }); - - sym.payload = .{ - .regular = .{ - .linkage = .global, - .address = base_address, - .section = section, - .weak_ref = false, - }, - }; - base_address = address; - } -} - fn allocateProxyBindAddresses(self: *Zld) !void { for (self.objects.items) |object| { for (object.sections.items) |sect| { @@ -1648,15 +1565,56 @@ fn resolveSymbols(self: *Zld) !void { } // Put any globally defined regular symbol as local. - // Mark if we need to allocate zerofill section for tentative definitions + // Convert any tentative definition into a regular symbol and allocate + // text blocks for each tentative defintion. for (self.globals.values()) |symbol| { switch (symbol.payload) { .regular => |*reg| { reg.local_sym_index = @intCast(u32, self.locals.items.len); try self.locals.append(self.allocator, symbol); }, - .tentative => { - self.has_tentative_defs = true; + .tentative => |tent| { + if (self.common_section_index == null) { + const data_seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; + self.common_section_index = @intCast(u16, data_seg.sections.items.len); + try data_seg.addSection(self.allocator, "__common", .{ + .flags = macho.S_ZEROFILL, + }); + } + + const size = tent.size; + const code = try self.allocator.alloc(u8, size); + mem.set(u8, code, 0); + const alignment = tent.alignment; + const local_sym_index = @intCast(u32, self.locals.items.len); + + symbol.payload = .{ + .regular = .{ + .linkage = .global, + .segment_id = self.data_segment_cmd_index.?, + .section_id = self.common_section_index.?, + .local_sym_index = local_sym_index, + }, + }; + try self.locals.append(self.allocator, symbol); + + const block = try self.allocator.create(TextBlock); + errdefer self.allocator.destroy(block); + + block.* = .{ + .local_sym_index = local_sym_index, + .code = code, + .size = size, + .alignment = alignment, + }; + + // TODO I'm not 100% sure about this yet, but I believe we should keep a separate list of + // TextBlocks per segment. + if (self.last_text_block) |last| { + last.next = block; + block.prev = last; + } + self.last_text_block = block; }, else => {}, } @@ -1733,13 +1691,7 @@ fn resolveSymbols(self: *Zld) !void { fn parseTextBlocks(self: *Zld) !void { for (self.objects.items) |object| { - if (try object.parseTextBlocks(self)) |block| { - if (self.last_text_block) |last| { - last.next = block; - block.prev = last; - } - self.last_text_block = block; - } + try object.parseTextBlocks(self); } if (self.last_text_block) |block| { From 15b85df3dd8a754bc26159ea2202781b748a613e Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Tue, 6 Jul 2021 16:31:47 +0200 Subject: [PATCH 18/81] zld: parse relocs per generated TextBlock --- src/link/MachO/Object.zig | 122 ++++++- src/link/MachO/Zld.zig | 80 +---- src/link/MachO/reloc.zig | 209 +++++------- src/link/MachO/reloc/aarch64.zig | 561 +++++++++++++++---------------- src/link/MachO/reloc/x86_64.zig | 312 +++++++++-------- 5 files changed, 662 insertions(+), 622 deletions(-) diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index 1b3df72412..06d5a260cd 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -53,6 +53,7 @@ initializers: std.ArrayListUnmanaged(u32) = .{}, data_in_code_entries: std.ArrayListUnmanaged(macho.data_in_code_entry) = .{}, symbols: std.ArrayListUnmanaged(*Symbol) = .{}, +sections_as_symbols: std.AutoHashMapUnmanaged(u8, *Symbol) = .{}, const DebugInfo = struct { inner: dwarf.DwarfInfo, @@ -160,6 +161,7 @@ pub fn deinit(self: *Object) void { self.symtab.deinit(self.allocator); self.strtab.deinit(self.allocator); self.symbols.deinit(self.allocator); + self.sections_as_symbols.deinit(self.allocator); if (self.name) |n| { self.allocator.free(n); @@ -312,7 +314,7 @@ fn filterRelocs(relocs: []macho.relocation_info, start: u64, end: u64) []macho.r while (true) { var change = false; - if (relocs[start_id].r_address > end) { + if (relocs[start_id].r_address >= end) { start_id += 1; change = true; } @@ -332,6 +334,7 @@ const TextBlockParser = struct { allocator: *Allocator, section: macho.section_64, code: []u8, + relocs: []macho.relocation_info, object: *Object, zld: *Zld, nlists: []NlistWithIndex, @@ -405,6 +408,7 @@ const TextBlockParser = struct { const start_addr = senior_nlist.nlist.n_value - self.section.addr; const end_addr = if (next_nlist) |n| n.nlist.n_value - self.section.addr else self.section.size; + log.warn("{} - {}", .{ start_addr, end_addr }); const code = self.code[start_addr..end_addr]; const size = code.len; @@ -424,11 +428,18 @@ const TextBlockParser = struct { block.* = .{ .local_sym_index = senior_nlist.index, .aliases = alias_only_indices, - .code = code, + .references = std.AutoArrayHashMap(u32, void).init(self.allocator), + .code = try self.allocator.dupe(u8, code), + .relocs = std.ArrayList(*Relocation).init(self.allocator), .size = size, .alignment = self.section.@"align", }; + const relocs = filterRelocs(self.relocs, start_addr, end_addr); + if (relocs.len > 0) { + try self.object.parseRelocs(self.zld, relocs, block, start_addr); + } + self.index += 1; return block; @@ -457,7 +468,8 @@ pub fn parseTextBlocks(self: *Object, zld: *Zld) !void { sort.sort(NlistWithIndex, sorted_nlists.items, {}, NlistWithIndex.lessThan); - for (seg.sections.items) |sect, sect_id| { + for (seg.sections.items) |sect, id| { + const sect_id = @intCast(u8, id); log.warn("putting section '{s},{s}' as a TextBlock", .{ segmentName(sect), sectionName(sect), @@ -474,6 +486,12 @@ pub fn parseTextBlocks(self: *Object, zld: *Zld) !void { defer self.allocator.free(code); _ = try self.file.?.preadAll(code, sect.offset); + // Read section's list of relocations + var raw_relocs = try self.allocator.alloc(u8, sect.nreloc * @sizeOf(macho.relocation_info)); + defer self.allocator.free(raw_relocs); + _ = try self.file.?.preadAll(raw_relocs, sect.reloff); + const relocs = mem.bytesAsSlice(macho.relocation_info, raw_relocs); + // Is there any padding between symbols within the section? const is_padded = self.header.?.flags & macho.MH_SUBSECTIONS_VIA_SYMBOLS != 0; @@ -481,7 +499,7 @@ pub fn parseTextBlocks(self: *Object, zld: *Zld) !void { if (is_padded) blocks: { const filtered_nlists = NlistWithIndex.filterInSection( sorted_nlists.items, - @intCast(u8, sect_id + 1), + sect_id + 1, ); if (filtered_nlists.len == 0) break :blocks; @@ -490,6 +508,7 @@ pub fn parseTextBlocks(self: *Object, zld: *Zld) !void { .allocator = self.allocator, .section = sect, .code = code, + .relocs = relocs, .object = self, .zld = zld, .nlists = filtered_nlists, @@ -518,8 +537,6 @@ pub fn parseTextBlocks(self: *Object, zld: *Zld) !void { } } - // TODO parse relocs - if (zld.last_text_block) |last| { last.next = block; block.prev = last; @@ -531,14 +548,19 @@ pub fn parseTextBlocks(self: *Object, zld: *Zld) !void { } // Since there is no symbol to refer to this block, we create - // a temp one. - const name = try std.fmt.allocPrint(self.allocator, "l_{s}_{s}_{s}", .{ - self.name.?, - segmentName(sect), - sectionName(sect), - }); - defer self.allocator.free(name); - const symbol = try Symbol.new(self.allocator, name); + // a temp one, unless we already did that when working out the relocations + // of other text blocks. + const symbol = self.sections_as_symbols.get(sect_id) orelse symbol: { + const name = try std.fmt.allocPrint(self.allocator, "l_{s}_{s}_{s}", .{ + self.name.?, + segmentName(sect), + sectionName(sect), + }); + defer self.allocator.free(name); + const symbol = try Symbol.new(self.allocator, name); + try self.sections_as_symbols.putNoClobber(self.allocator, sect_id, symbol); + break :symbol symbol; + }; symbol.payload = .{ .regular = .{ .linkage = .translation_unit, @@ -555,12 +577,16 @@ pub fn parseTextBlocks(self: *Object, zld: *Zld) !void { block.* = .{ .local_sym_index = local_sym_index, - .code = code, + .references = std.AutoArrayHashMap(u32, void).init(self.allocator), + .code = try self.allocator.dupe(u8, code), + .relocs = std.ArrayList(*Relocation).init(self.allocator), .size = sect.size, .alignment = sect.@"align", }; - // TODO parse relocs + if (relocs.len > 0) { + try self.parseRelocs(zld, relocs, block, 0); + } if (zld.last_text_block) |last| { last.next = block; @@ -571,6 +597,70 @@ pub fn parseTextBlocks(self: *Object, zld: *Zld) !void { } } +fn parseRelocs( + self: *Object, + zld: *Zld, + relocs: []const macho.relocation_info, + block: *TextBlock, + base_addr: u64, +) !void { + var it = reloc.RelocIterator{ + .buffer = relocs, + }; + + switch (self.arch.?) { + .aarch64 => { + var parser = reloc.aarch64.Parser{ + .object = self, + .zld = zld, + .it = &it, + .block = block, + .base_addr = base_addr, + }; + try parser.parse(); + }, + .x86_64 => { + var parser = reloc.x86_64.Parser{ + .object = self, + .zld = zld, + .it = &it, + .block = block, + .base_addr = base_addr, + }; + try parser.parse(); + }, + else => unreachable, + } +} + +pub fn symbolFromReloc(self: *Object, rel: macho.relocation_info) !*Symbol { + const symbol = blk: { + if (rel.r_extern == 1) { + break :blk self.symbols.items[rel.r_symbolnum]; + } else { + const sect_id = @intCast(u8, rel.r_symbolnum - 1); + const symbol = self.sections_as_symbols.get(sect_id) orelse symbol: { + // We need a valid pointer to Symbol even if there is no symbol, so we create a + // dummy symbol upfront which will later be populated when created a TextBlock from + // the target section here. + const seg = self.load_commands.items[self.segment_cmd_index.?].Segment; + const sect = seg.sections.items[sect_id]; + const name = try std.fmt.allocPrint(self.allocator, "l_{s}_{s}_{s}", .{ + self.name.?, + segmentName(sect), + sectionName(sect), + }); + defer self.allocator.free(name); + const symbol = try Symbol.new(self.allocator, name); + try self.sections_as_symbols.putNoClobber(self.allocator, sect_id, symbol); + break :symbol symbol; + }; + break :blk symbol; + } + }; + return symbol; +} + pub fn parseInitializers(self: *Object) !void { const index = self.mod_init_func_section_index orelse return; const section = self.sections.items[index]; diff --git a/src/link/MachO/Zld.zig b/src/link/MachO/Zld.zig index bfd4e4b2f4..7f7997bc17 100644 --- a/src/link/MachO/Zld.zig +++ b/src/link/MachO/Zld.zig @@ -135,9 +135,9 @@ const TlvOffset = struct { pub const TextBlock = struct { local_sym_index: u32, aliases: ?[]u32 = null, - references: ?[]u32 = null, + references: std.AutoArrayHashMap(u32, void), code: []u8, - relocs: ?[]*Relocation = null, + relocs: std.ArrayList(*Relocation), size: u64, alignment: u32, next: ?*TextBlock = null, @@ -147,15 +147,8 @@ pub const TextBlock = struct { if (block.aliases) |aliases| { allocator.free(aliases); } - if (block.references) |references| { - allocator.free(references); - } - for (block.relocs.items) |reloc| { - allocator.destroy(reloc); - } - if (block.relocs) |relocs| { - allocator.free(relocs); - } + block.relocs.deinit(); + block.references.deinit(); allocator.free(code); } @@ -168,12 +161,19 @@ pub const TextBlock = struct { log.warn(" | {}: {}", .{ index, zld.locals.items[index] }); } } - if (self.references) |references| { + if (self.references.count() > 0) { log.warn(" | References:", .{}); - for (references) |index| { + for (self.references.keys()) |index| { log.warn(" | {}: {}", .{ index, zld.locals.items[index] }); } } + log.warn(" | code.len = {}", .{self.code.len}); + if (self.relocs.items.len > 0) { + log.warn("Relocations:", .{}); + for (self.relocs.items) |rel| { + log.warn(" | {}", .{rel}); + } + } log.warn(" | size = {}", .{self.size}); log.warn(" | align = {}", .{self.alignment}); } @@ -280,7 +280,6 @@ pub fn link(self: *Zld, files: []const []const u8, output: Output, args: LinkArg try self.resolveSymbols(); try self.parseTextBlocks(); return error.TODO; - // try self.resolveStubsAndGotEntries(); // try self.updateMetadata(); // try self.sortSections(); // try self.addRpaths(args.rpaths); @@ -1603,7 +1602,9 @@ fn resolveSymbols(self: *Zld) !void { block.* = .{ .local_sym_index = local_sym_index, + .references = std.AutoArrayHashMap(u32, void).init(self.allocator), .code = code, + .relocs = std.ArrayList(*Relocation).init(self.allocator), .size = size, .alignment = alignment, }; @@ -1624,6 +1625,9 @@ fn resolveSymbols(self: *Zld) !void { { // Put dyld_stub_binder as an undefined special symbol. const symbol = try Symbol.new(self.allocator, "dyld_stub_binder"); + const index = @intCast(u32, self.got_entries.items.len); + symbol.got_index = index; + try self.got_entries.append(self.allocator, symbol); try self.globals.putNoClobber(self.allocator, symbol.name, symbol); } @@ -1699,54 +1703,6 @@ fn parseTextBlocks(self: *Zld) !void { } } -fn resolveStubsAndGotEntries(self: *Zld) !void { - for (self.objects.items) |object| { - log.debug("resolving stubs and got entries from {s}", .{object.name}); - - for (object.sections.items) |sect| { - const relocs = sect.relocs orelse continue; - for (relocs) |rel| { - switch (rel.@"type") { - .unsigned => continue, - .got_page, .got_page_off, .got_load, .got, .pointer_to_got => { - const sym = object.symbols.items[rel.target.symbol]; - if (sym.got_index != null) continue; - - const index = @intCast(u32, self.got_entries.items.len); - sym.got_index = index; - try self.got_entries.append(self.allocator, sym); - - log.debug(" | found GOT entry {s}: {*}", .{ sym.name, sym }); - }, - else => { - if (rel.target != .symbol) continue; - - const sym = object.symbols.items[rel.target.symbol]; - assert(sym.payload != .undef); - - if (sym.stubs_index != null) continue; - if (sym.payload != .proxy) continue; - - const index = @intCast(u32, self.stubs.items.len); - sym.stubs_index = index; - try self.stubs.append(self.allocator, sym); - - log.debug(" | found stub {s}: {*}", .{ sym.name, sym }); - }, - } - } - } - } - - // Finally, put dyld_stub_binder as the final GOT entry - const sym = self.globals.get("dyld_stub_binder") orelse unreachable; - const index = @intCast(u32, self.got_entries.items.len); - sym.got_index = index; - try self.got_entries.append(self.allocator, sym); - - log.debug(" | found GOT entry {s}: {*}", .{ sym.name, sym }); -} - fn resolveRelocsAndWriteSections(self: *Zld) !void { for (self.objects.items) |object| { log.debug("relocating object {s}", .{object.name}); diff --git a/src/link/MachO/reloc.zig b/src/link/MachO/reloc.zig index 02484923ff..e11e850aa6 100644 --- a/src/link/MachO/reloc.zig +++ b/src/link/MachO/reloc.zig @@ -6,16 +6,18 @@ const math = std.math; const mem = std.mem; const meta = std.meta; -const aarch64 = @import("reloc/aarch64.zig"); -const x86_64 = @import("reloc/x86_64.zig"); +pub const aarch64 = @import("reloc/aarch64.zig"); +pub const x86_64 = @import("reloc/x86_64.zig"); const Allocator = mem.Allocator; +const Symbol = @import("Symbol.zig"); +const TextBlock = @import("Zld.zig").TextBlock; pub const Relocation = struct { @"type": Type, - code: []u8, offset: u32, - target: Target, + block: *TextBlock, + target: *Symbol, pub fn cast(base: *Relocation, comptime T: type) ?*T { if (base.@"type" != T.base_type) @@ -24,43 +26,24 @@ pub const Relocation = struct { return @fieldParentPtr(T, "base", base); } - pub const ResolveArgs = struct { - source_addr: u64, - target_addr: u64, - subtractor: ?u64 = null, - source_source_sect_addr: ?u64 = null, - source_target_sect_addr: ?u64 = null, - }; - - pub fn resolve(base: *Relocation, args: ResolveArgs) !void { - log.debug("{s}", .{base.@"type"}); - log.debug(" | offset 0x{x}", .{base.offset}); - log.debug(" | source address 0x{x}", .{args.source_addr}); - log.debug(" | target address 0x{x}", .{args.target_addr}); - if (args.subtractor) |sub| - log.debug(" | subtractor address 0x{x}", .{sub}); - if (args.source_source_sect_addr) |addr| - log.debug(" | source source section address 0x{x}", .{addr}); - if (args.source_target_sect_addr) |addr| - log.debug(" | source target section address 0x{x}", .{addr}); - - return switch (base.@"type") { - .unsigned => @fieldParentPtr(Unsigned, "base", base).resolve(args), - .branch_aarch64 => @fieldParentPtr(aarch64.Branch, "base", base).resolve(args), - .page => @fieldParentPtr(aarch64.Page, "base", base).resolve(args), - .page_off => @fieldParentPtr(aarch64.PageOff, "base", base).resolve(args), - .got_page => @fieldParentPtr(aarch64.GotPage, "base", base).resolve(args), - .got_page_off => @fieldParentPtr(aarch64.GotPageOff, "base", base).resolve(args), - .pointer_to_got => @fieldParentPtr(aarch64.PointerToGot, "base", base).resolve(args), - .tlvp_page => @fieldParentPtr(aarch64.TlvpPage, "base", base).resolve(args), - .tlvp_page_off => @fieldParentPtr(aarch64.TlvpPageOff, "base", base).resolve(args), - .branch_x86_64 => @fieldParentPtr(x86_64.Branch, "base", base).resolve(args), - .signed => @fieldParentPtr(x86_64.Signed, "base", base).resolve(args), - .got_load => @fieldParentPtr(x86_64.GotLoad, "base", base).resolve(args), - .got => @fieldParentPtr(x86_64.Got, "base", base).resolve(args), - .tlv => @fieldParentPtr(x86_64.Tlv, "base", base).resolve(args), - }; - } + // pub fn resolve(base: *Relocation) !void { + // return switch (base.@"type") { + // .unsigned => @fieldParentPtr(Unsigned, "base", base).resolve(), + // .branch_aarch64 => @fieldParentPtr(aarch64.Branch, "base", base).resolve(), + // .page => @fieldParentPtr(aarch64.Page, "base", base).resolve(), + // .page_off => @fieldParentPtr(aarch64.PageOff, "base", base).resolve(), + // .got_page => @fieldParentPtr(aarch64.GotPage, "base", base).resolve(), + // .got_page_off => @fieldParentPtr(aarch64.GotPageOff, "base", base).resolve(), + // .pointer_to_got => @fieldParentPtr(aarch64.PointerToGot, "base", base).resolve(), + // .tlvp_page => @fieldParentPtr(aarch64.TlvpPage, "base", base).resolve(), + // .tlvp_page_off => @fieldParentPtr(aarch64.TlvpPageOff, "base", base).resolve(), + // .branch_x86_64 => @fieldParentPtr(x86_64.Branch, "base", base).resolve(), + // .signed => @fieldParentPtr(x86_64.Signed, "base", base).resolve(), + // .got_load => @fieldParentPtr(x86_64.GotLoad, "base", base).resolve(), + // .got => @fieldParentPtr(x86_64.Got, "base", base).resolve(), + // .tlv => @fieldParentPtr(x86_64.Tlv, "base", base).resolve(), + // }; + // } pub const Type = enum { branch_aarch64, @@ -79,23 +62,37 @@ pub const Relocation = struct { tlv, }; - pub const Target = union(enum) { - symbol: u32, - section: u16, + pub fn format(base: *const Relocation, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { + try std.fmt.format(writer, "Relocation {{ ", .{}); + try std.fmt.format(writer, ".type = {s}, ", .{base.@"type"}); + try std.fmt.format(writer, ".offset = {}, ", .{base.offset}); + try std.fmt.format(writer, ".block = {}", .{base.block.local_sym_index}); + try std.fmt.format(writer, ".target = {}, ", .{base.target}); - pub fn fromReloc(reloc: macho.relocation_info) Target { - return if (reloc.r_extern == 1) .{ - .symbol = reloc.r_symbolnum, - } else .{ - .section = @intCast(u16, reloc.r_symbolnum - 1), - }; - } - }; + try switch (base.@"type") { + .unsigned => @fieldParentPtr(Unsigned, "base", base).format(fmt, options, writer), + .branch_aarch64 => @fieldParentPtr(aarch64.Branch, "base", base).format(fmt, options, writer), + .page => @fieldParentPtr(aarch64.Page, "base", base).format(fmt, options, writer), + .page_off => @fieldParentPtr(aarch64.PageOff, "base", base).format(fmt, options, writer), + .got_page => @fieldParentPtr(aarch64.GotPage, "base", base).format(fmt, options, writer), + .got_page_off => @fieldParentPtr(aarch64.GotPageOff, "base", base).format(fmt, options, writer), + .pointer_to_got => @fieldParentPtr(aarch64.PointerToGot, "base", base).format(fmt, options, writer), + .tlvp_page => @fieldParentPtr(aarch64.TlvpPage, "base", base).format(fmt, options, writer), + .tlvp_page_off => @fieldParentPtr(aarch64.TlvpPageOff, "base", base).format(fmt, options, writer), + .branch_x86_64 => @fieldParentPtr(x86_64.Branch, "base", base).format(fmt, options, writer), + .signed => @fieldParentPtr(x86_64.Signed, "base", base).format(fmt, options, writer), + .got_load => @fieldParentPtr(x86_64.GotLoad, "base", base).format(fmt, options, writer), + .got => @fieldParentPtr(x86_64.Got, "base", base).format(fmt, options, writer), + .tlv => @fieldParentPtr(x86_64.Tlv, "base", base).format(fmt, options, writer), + }; + + try std.fmt.format(writer, "}}", .{}); + } }; pub const Unsigned = struct { base: Relocation, - subtractor: ?Relocation.Target = null, + subtractor: ?*Symbol = null, /// Addend embedded directly in the relocation slot addend: i64, /// Extracted from r_length: @@ -106,75 +103,47 @@ pub const Unsigned = struct { pub const base_type: Relocation.Type = .unsigned; - pub fn resolve(unsigned: Unsigned, args: Relocation.ResolveArgs) !void { - const addend = if (unsigned.base.target == .section) - unsigned.addend - @intCast(i64, args.source_target_sect_addr.?) - else - unsigned.addend; + // pub fn resolve(unsigned: Unsigned) !void { + // const addend = if (unsigned.base.target == .section) + // unsigned.addend - @intCast(i64, args.source_target_sect_addr.?) + // else + // unsigned.addend; - const result = if (args.subtractor) |subtractor| - @intCast(i64, args.target_addr) - @intCast(i64, subtractor) + addend - else - @intCast(i64, args.target_addr) + addend; + // const result = if (args.subtractor) |subtractor| + // @intCast(i64, args.target_addr) - @intCast(i64, subtractor) + addend + // else + // @intCast(i64, args.target_addr) + addend; - log.debug(" | calculated addend 0x{x}", .{addend}); - log.debug(" | calculated unsigned value 0x{x}", .{result}); + // log.debug(" | calculated addend 0x{x}", .{addend}); + // log.debug(" | calculated unsigned value 0x{x}", .{result}); - if (unsigned.is_64bit) { - mem.writeIntLittle( - u64, - unsigned.base.code[0..8], - @bitCast(u64, result), - ); - } else { - mem.writeIntLittle( - u32, - unsigned.base.code[0..4], - @truncate(u32, @bitCast(u64, result)), - ); + // if (unsigned.is_64bit) { + // mem.writeIntLittle( + // u64, + // unsigned.base.code[0..8], + // @bitCast(u64, result), + // ); + // } else { + // mem.writeIntLittle( + // u32, + // unsigned.base.code[0..4], + // @truncate(u32, @bitCast(u64, result)), + // ); + // } + // } + + pub fn format(self: Unsigned, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { + _ = fmt; + _ = options; + if (self.subtractor) |sub| { + try std.fmt.format(writer, ".subtractor = {}, ", .{sub}); } + try std.fmt.format(writer, ".addend = {}, ", .{self.addend}); + const length: usize = if (self.is_64bit) 8 else 4; + try std.fmt.format(writer, ".length = {}, ", .{length}); } }; -pub fn parse( - allocator: *Allocator, - arch: std.Target.Cpu.Arch, - code: []u8, - relocs: []const macho.relocation_info, -) ![]*Relocation { - var it = RelocIterator{ - .buffer = relocs, - }; - - switch (arch) { - .aarch64 => { - var parser = aarch64.Parser{ - .allocator = allocator, - .it = &it, - .code = code, - .parsed = std.ArrayList(*Relocation).init(allocator), - }; - defer parser.deinit(); - try parser.parse(); - - return parser.parsed.toOwnedSlice(); - }, - .x86_64 => { - var parser = x86_64.Parser{ - .allocator = allocator, - .it = &it, - .code = code, - .parsed = std.ArrayList(*Relocation).init(allocator), - }; - defer parser.deinit(); - try parser.parse(); - - return parser.parsed.toOwnedSlice(); - }, - else => unreachable, - } -} - pub const RelocIterator = struct { buffer: []const macho.relocation_info, index: i32 = -1, @@ -182,15 +151,7 @@ pub const RelocIterator = struct { pub fn next(self: *RelocIterator) ?macho.relocation_info { self.index += 1; if (self.index < self.buffer.len) { - const reloc = self.buffer[@intCast(u32, self.index)]; - log.debug("relocation", .{}); - log.debug(" | type = {}", .{reloc.r_type}); - log.debug(" | offset = {}", .{reloc.r_address}); - log.debug(" | PC = {}", .{reloc.r_pcrel == 1}); - log.debug(" | length = {}", .{reloc.r_length}); - log.debug(" | symbolnum = {}", .{reloc.r_symbolnum}); - log.debug(" | extern = {}", .{reloc.r_extern == 1}); - return reloc; + return self.buffer[@intCast(u32, self.index)]; } return null; } diff --git a/src/link/MachO/reloc/aarch64.zig b/src/link/MachO/reloc/aarch64.zig index 3eaeb65a9d..5105282e43 100644 --- a/src/link/MachO/reloc/aarch64.zig +++ b/src/link/MachO/reloc/aarch64.zig @@ -9,24 +9,34 @@ const meta = std.meta; const reloc = @import("../reloc.zig"); const Allocator = mem.Allocator; +const Object = @import("../Object.zig"); const Relocation = reloc.Relocation; const Symbol = @import("../Symbol.zig"); +const TextBlock = Zld.TextBlock; +const Zld = @import("../Zld.zig"); pub const Branch = struct { base: Relocation, /// Always .UnconditionalBranchImmediate - inst: aarch64.Instruction, + // inst: aarch64.Instruction, pub const base_type: Relocation.Type = .branch_aarch64; - pub fn resolve(branch: Branch, args: Relocation.ResolveArgs) !void { - const displacement = try math.cast(i28, @intCast(i64, args.target_addr) - @intCast(i64, args.source_addr)); + // pub fn resolve(branch: Branch, args: Relocation.ResolveArgs) !void { + // const displacement = try math.cast(i28, @intCast(i64, args.target_addr) - @intCast(i64, args.source_addr)); - log.debug(" | displacement 0x{x}", .{displacement}); + // log.debug(" | displacement 0x{x}", .{displacement}); - var inst = branch.inst; - inst.unconditional_branch_immediate.imm26 = @truncate(u26, @bitCast(u28, displacement >> 2)); - mem.writeIntLittle(u32, branch.base.code[0..4], inst.toU32()); + // var inst = branch.inst; + // inst.unconditional_branch_immediate.imm26 = @truncate(u26, @bitCast(u28, displacement >> 2)); + // mem.writeIntLittle(u32, branch.base.code[0..4], inst.toU32()); + // } + + pub fn format(self: Branch, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { + _ = self; + _ = fmt; + _ = options; + _ = writer; } }; @@ -34,24 +44,32 @@ pub const Page = struct { base: Relocation, addend: ?u32 = null, /// Always .PCRelativeAddress - inst: aarch64.Instruction, + // inst: aarch64.Instruction, pub const base_type: Relocation.Type = .page; - pub fn resolve(page: Page, args: Relocation.ResolveArgs) !void { - const target_addr = if (page.addend) |addend| args.target_addr + addend else args.target_addr; - const source_page = @intCast(i32, args.source_addr >> 12); - const target_page = @intCast(i32, target_addr >> 12); - const pages = @bitCast(u21, @intCast(i21, target_page - source_page)); + // pub fn resolve(page: Page, args: Relocation.ResolveArgs) !void { + // const target_addr = if (page.addend) |addend| args.target_addr + addend else args.target_addr; + // const source_page = @intCast(i32, args.source_addr >> 12); + // const target_page = @intCast(i32, target_addr >> 12); + // const pages = @bitCast(u21, @intCast(i21, target_page - source_page)); - log.debug(" | calculated addend 0x{x}", .{page.addend}); - log.debug(" | moving by {} pages", .{pages}); + // log.debug(" | calculated addend 0x{x}", .{page.addend}); + // log.debug(" | moving by {} pages", .{pages}); - var inst = page.inst; - inst.pc_relative_address.immhi = @truncate(u19, pages >> 2); - inst.pc_relative_address.immlo = @truncate(u2, pages); + // var inst = page.inst; + // inst.pc_relative_address.immhi = @truncate(u19, pages >> 2); + // inst.pc_relative_address.immlo = @truncate(u2, pages); - mem.writeIntLittle(u32, page.base.code[0..4], inst.toU32()); + // mem.writeIntLittle(u32, page.base.code[0..4], inst.toU32()); + // } + + pub fn format(self: Page, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { + _ = fmt; + _ = options; + if (self.addend) |addend| { + try std.fmt.format(writer, ".addend = {}, ", .{addend}); + } } }; @@ -59,7 +77,7 @@ pub const PageOff = struct { base: Relocation, addend: ?u32 = null, op_kind: OpKind, - inst: aarch64.Instruction, + // inst: aarch64.Instruction, pub const base_type: Relocation.Type = .page_off; @@ -68,76 +86,99 @@ pub const PageOff = struct { load_store, }; - pub fn resolve(page_off: PageOff, args: Relocation.ResolveArgs) !void { - const target_addr = if (page_off.addend) |addend| args.target_addr + addend else args.target_addr; - const narrowed = @truncate(u12, target_addr); + // pub fn resolve(page_off: PageOff, args: Relocation.ResolveArgs) !void { + // const target_addr = if (page_off.addend) |addend| args.target_addr + addend else args.target_addr; + // const narrowed = @truncate(u12, target_addr); - log.debug(" | narrowed address within the page 0x{x}", .{narrowed}); - log.debug(" | {s} opcode", .{page_off.op_kind}); + // log.debug(" | narrowed address within the page 0x{x}", .{narrowed}); + // log.debug(" | {s} opcode", .{page_off.op_kind}); - var inst = page_off.inst; - if (page_off.op_kind == .arithmetic) { - inst.add_subtract_immediate.imm12 = narrowed; - } else { - const offset: u12 = blk: { - if (inst.load_store_register.size == 0) { - if (inst.load_store_register.v == 1) { - // 128-bit SIMD is scaled by 16. - break :blk try math.divExact(u12, narrowed, 16); - } - // Otherwise, 8-bit SIMD or ldrb. - break :blk narrowed; - } else { - const denom: u4 = try math.powi(u4, 2, inst.load_store_register.size); - break :blk try math.divExact(u12, narrowed, denom); - } - }; - inst.load_store_register.offset = offset; + // var inst = page_off.inst; + // if (page_off.op_kind == .arithmetic) { + // inst.add_subtract_immediate.imm12 = narrowed; + // } else { + // const offset: u12 = blk: { + // if (inst.load_store_register.size == 0) { + // if (inst.load_store_register.v == 1) { + // // 128-bit SIMD is scaled by 16. + // break :blk try math.divExact(u12, narrowed, 16); + // } + // // Otherwise, 8-bit SIMD or ldrb. + // break :blk narrowed; + // } else { + // const denom: u4 = try math.powi(u4, 2, inst.load_store_register.size); + // break :blk try math.divExact(u12, narrowed, denom); + // } + // }; + // inst.load_store_register.offset = offset; + // } + + // mem.writeIntLittle(u32, page_off.base.code[0..4], inst.toU32()); + // } + + pub fn format(self: PageOff, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { + _ = fmt; + _ = options; + if (self.addend) |addend| { + try std.fmt.format(writer, ".addend = {}, ", .{addend}); } - - mem.writeIntLittle(u32, page_off.base.code[0..4], inst.toU32()); + try std.fmt.format(writer, ".op_kind = {s}, ", .{self.op_kind}); } }; pub const GotPage = struct { base: Relocation, /// Always .PCRelativeAddress - inst: aarch64.Instruction, + // inst: aarch64.Instruction, pub const base_type: Relocation.Type = .got_page; - pub fn resolve(page: GotPage, args: Relocation.ResolveArgs) !void { - const source_page = @intCast(i32, args.source_addr >> 12); - const target_page = @intCast(i32, args.target_addr >> 12); - const pages = @bitCast(u21, @intCast(i21, target_page - source_page)); + // pub fn resolve(page: GotPage, args: Relocation.ResolveArgs) !void { + // const source_page = @intCast(i32, args.source_addr >> 12); + // const target_page = @intCast(i32, args.target_addr >> 12); + // const pages = @bitCast(u21, @intCast(i21, target_page - source_page)); - log.debug(" | moving by {} pages", .{pages}); + // log.debug(" | moving by {} pages", .{pages}); - var inst = page.inst; - inst.pc_relative_address.immhi = @truncate(u19, pages >> 2); - inst.pc_relative_address.immlo = @truncate(u2, pages); + // var inst = page.inst; + // inst.pc_relative_address.immhi = @truncate(u19, pages >> 2); + // inst.pc_relative_address.immlo = @truncate(u2, pages); - mem.writeIntLittle(u32, page.base.code[0..4], inst.toU32()); + // mem.writeIntLittle(u32, page.base.code[0..4], inst.toU32()); + // } + + pub fn format(self: GotPage, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { + _ = self; + _ = fmt; + _ = options; + _ = writer; } }; pub const GotPageOff = struct { base: Relocation, /// Always .LoadStoreRegister with size = 3 for GOT indirection - inst: aarch64.Instruction, + // inst: aarch64.Instruction, pub const base_type: Relocation.Type = .got_page_off; - pub fn resolve(page_off: GotPageOff, args: Relocation.ResolveArgs) !void { - const narrowed = @truncate(u12, args.target_addr); + // pub fn resolve(page_off: GotPageOff, args: Relocation.ResolveArgs) !void { + // const narrowed = @truncate(u12, args.target_addr); - log.debug(" | narrowed address within the page 0x{x}", .{narrowed}); + // log.debug(" | narrowed address within the page 0x{x}", .{narrowed}); - var inst = page_off.inst; - const offset = try math.divExact(u12, narrowed, 8); - inst.load_store_register.offset = offset; + // var inst = page_off.inst; + // const offset = try math.divExact(u12, narrowed, 8); + // inst.load_store_register.offset = offset; - mem.writeIntLittle(u32, page_off.base.code[0..4], inst.toU32()); + // mem.writeIntLittle(u32, page_off.base.code[0..4], inst.toU32()); + // } + + pub fn format(self: GotPageOff, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { + _ = self; + _ = fmt; + _ = options; + _ = writer; } }; @@ -146,34 +187,48 @@ pub const PointerToGot = struct { pub const base_type: Relocation.Type = .pointer_to_got; - pub fn resolve(ptr_to_got: PointerToGot, args: Relocation.ResolveArgs) !void { - const result = try math.cast(i32, @intCast(i64, args.target_addr) - @intCast(i64, args.source_addr)); + // pub fn resolve(ptr_to_got: PointerToGot, args: Relocation.ResolveArgs) !void { + // const result = try math.cast(i32, @intCast(i64, args.target_addr) - @intCast(i64, args.source_addr)); - log.debug(" | calculated value 0x{x}", .{result}); + // log.debug(" | calculated value 0x{x}", .{result}); - mem.writeIntLittle(u32, ptr_to_got.base.code[0..4], @bitCast(u32, result)); + // mem.writeIntLittle(u32, ptr_to_got.base.code[0..4], @bitCast(u32, result)); + // } + + pub fn format(self: PointerToGot, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { + _ = self; + _ = fmt; + _ = options; + _ = writer; } }; pub const TlvpPage = struct { base: Relocation, /// Always .PCRelativeAddress - inst: aarch64.Instruction, + // inst: aarch64.Instruction, pub const base_type: Relocation.Type = .tlvp_page; - pub fn resolve(page: TlvpPage, args: Relocation.ResolveArgs) !void { - const source_page = @intCast(i32, args.source_addr >> 12); - const target_page = @intCast(i32, args.target_addr >> 12); - const pages = @bitCast(u21, @intCast(i21, target_page - source_page)); + // pub fn resolve(page: TlvpPage, args: Relocation.ResolveArgs) !void { + // const source_page = @intCast(i32, args.source_addr >> 12); + // const target_page = @intCast(i32, args.target_addr >> 12); + // const pages = @bitCast(u21, @intCast(i21, target_page - source_page)); - log.debug(" | moving by {} pages", .{pages}); + // log.debug(" | moving by {} pages", .{pages}); - var inst = page.inst; - inst.pc_relative_address.immhi = @truncate(u19, pages >> 2); - inst.pc_relative_address.immlo = @truncate(u2, pages); + // var inst = page.inst; + // inst.pc_relative_address.immhi = @truncate(u19, pages >> 2); + // inst.pc_relative_address.immlo = @truncate(u2, pages); - mem.writeIntLittle(u32, page.base.code[0..4], inst.toU32()); + // mem.writeIntLittle(u32, page.base.code[0..4], inst.toU32()); + // } + + pub fn format(self: TlvpPage, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { + _ = self; + _ = fmt; + _ = options; + _ = writer; } }; @@ -182,82 +237,110 @@ pub const TlvpPageOff = struct { /// Always .AddSubtractImmediate regardless of the source instruction. /// This means, we always rewrite the instruction to add even if the /// source instruction was an ldr. - inst: aarch64.Instruction, + // inst: aarch64.Instruction, pub const base_type: Relocation.Type = .tlvp_page_off; - pub fn resolve(page_off: TlvpPageOff, args: Relocation.ResolveArgs) !void { - const narrowed = @truncate(u12, args.target_addr); + // pub fn resolve(page_off: TlvpPageOff, args: Relocation.ResolveArgs) !void { + // const narrowed = @truncate(u12, args.target_addr); - log.debug(" | narrowed address within the page 0x{x}", .{narrowed}); + // log.debug(" | narrowed address within the page 0x{x}", .{narrowed}); - var inst = page_off.inst; - inst.add_subtract_immediate.imm12 = narrowed; + // var inst = page_off.inst; + // inst.add_subtract_immediate.imm12 = narrowed; - mem.writeIntLittle(u32, page_off.base.code[0..4], inst.toU32()); + // mem.writeIntLittle(u32, page_off.base.code[0..4], inst.toU32()); + // } + + pub fn format(self: TlvpPageOff, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { + _ = self; + _ = fmt; + _ = options; + _ = writer; } }; pub const Parser = struct { - allocator: *Allocator, + object: *Object, + zld: *Zld, it: *reloc.RelocIterator, - code: []u8, - parsed: std.ArrayList(*Relocation), + block: *TextBlock, + base_addr: u64, addend: ?u32 = null, - subtractor: ?Relocation.Target = null, + subtractor: ?*Symbol = null, - pub fn deinit(parser: *Parser) void { - parser.parsed.deinit(); - } - - pub fn parse(parser: *Parser) !void { - while (parser.it.next()) |rel| { - switch (@intToEnum(macho.reloc_type_arm64, rel.r_type)) { - .ARM64_RELOC_BRANCH26 => { - try parser.parseBranch(rel); - }, + pub fn parse(self: *Parser) !void { + while (self.it.next()) |rel| { + const out_rel = switch (@intToEnum(macho.reloc_type_arm64, rel.r_type)) { + .ARM64_RELOC_BRANCH26 => try self.parseBranch(rel), .ARM64_RELOC_SUBTRACTOR => { - try parser.parseSubtractor(rel); - }, - .ARM64_RELOC_UNSIGNED => { - try parser.parseUnsigned(rel); + // Subtractor is not a relocation with effect on the TextBlock, so + // parse it and carry on. + try self.parseSubtractor(rel); + continue; }, + .ARM64_RELOC_UNSIGNED => try self.parseUnsigned(rel), .ARM64_RELOC_ADDEND => { - try parser.parseAddend(rel); + // Addend is not a relocation with effect on the TextBlock, so + // parse it and carry on. + try self.parseAddend(rel); + continue; }, .ARM64_RELOC_PAGE21, .ARM64_RELOC_GOT_LOAD_PAGE21, .ARM64_RELOC_TLVP_LOAD_PAGE21, - => { - try parser.parsePage(rel); + => try self.parsePage(rel), + .ARM64_RELOC_PAGEOFF12 => try self.parsePageOff(rel), + .ARM64_RELOC_GOT_LOAD_PAGEOFF12 => try self.parseGotLoadPageOff(rel), + .ARM64_RELOC_TLVP_LOAD_PAGEOFF12 => try self.parseTlvpLoadPageOff(rel), + .ARM64_RELOC_POINTER_TO_GOT => try self.parsePointerToGot(rel), + }; + try self.block.relocs.append(out_rel); + + if (out_rel.target.payload == .regular) { + try self.block.references.put(out_rel.target.payload.regular.local_sym_index, {}); + } + + switch (out_rel.@"type") { + .got_page, .got_page_off, .pointer_to_got => { + const sym = out_rel.target; + + if (sym.got_index != null) continue; + + const index = @intCast(u32, self.zld.got_entries.items.len); + sym.got_index = index; + try self.zld.got_entries.append(self.zld.allocator, sym); + + log.debug("adding GOT entry for symbol {s} at index {}", .{ sym.name, index }); }, - .ARM64_RELOC_PAGEOFF12 => { - try parser.parsePageOff(rel); - }, - .ARM64_RELOC_GOT_LOAD_PAGEOFF12 => { - try parser.parseGotLoadPageOff(rel); - }, - .ARM64_RELOC_TLVP_LOAD_PAGEOFF12 => { - try parser.parseTlvpLoadPageOff(rel); - }, - .ARM64_RELOC_POINTER_TO_GOT => { - try parser.parsePointerToGot(rel); + .branch_aarch64 => { + const sym = out_rel.target; + + if (sym.stubs_index != null) continue; + if (sym.payload != .proxy) continue; + + const index = @intCast(u32, self.zld.stubs.items.len); + sym.stubs_index = index; + try self.zld.stubs.append(self.zld.allocator, sym); + + log.debug("adding stub entry for symbol {s} at index {}", .{ sym.name, index }); }, + else => {}, } } } - fn parseAddend(parser: *Parser, rel: macho.relocation_info) !void { + fn parseAddend(self: *Parser, rel: macho.relocation_info) !void { const rel_type = @intToEnum(macho.reloc_type_arm64, rel.r_type); assert(rel_type == .ARM64_RELOC_ADDEND); assert(rel.r_pcrel == 0); assert(rel.r_extern == 0); - assert(parser.addend == null); + assert(self.addend == null); - parser.addend = rel.r_symbolnum; + self.addend = rel.r_symbolnum; // Verify ADDEND is followed by a load. - const next = @intToEnum(macho.reloc_type_arm64, parser.it.peek().r_type); + const next = @intToEnum(macho.reloc_type_arm64, self.it.peek().r_type); switch (next) { .ARM64_RELOC_PAGE21, .ARM64_RELOC_PAGEOFF12 => {}, else => { @@ -267,127 +350,101 @@ pub const Parser = struct { } } - fn parseBranch(parser: *Parser, rel: macho.relocation_info) !void { + fn parseBranch(self: *Parser, rel: macho.relocation_info) !*Relocation { const rel_type = @intToEnum(macho.reloc_type_arm64, rel.r_type); assert(rel_type == .ARM64_RELOC_BRANCH26); assert(rel.r_pcrel == 1); assert(rel.r_length == 2); - const offset = @intCast(u32, rel.r_address); - const inst = parser.code[offset..][0..4]; - const parsed_inst = aarch64.Instruction{ .unconditional_branch_immediate = mem.bytesToValue( - meta.TagPayload( - aarch64.Instruction, - aarch64.Instruction.unconditional_branch_immediate, - ), - inst, - ) }; + const offset = @intCast(u32, @intCast(u64, rel.r_address) - self.base_addr); + const target = try self.object.symbolFromReloc(rel); - var branch = try parser.allocator.create(Branch); - errdefer parser.allocator.destroy(branch); - - const target = Relocation.Target.fromReloc(rel); + var branch = try self.object.allocator.create(Branch); + errdefer self.object.allocator.destroy(branch); branch.* = .{ .base = .{ .@"type" = .branch_aarch64, - .code = inst, .offset = offset, .target = target, + .block = self.block, }, - .inst = parsed_inst, }; - log.debug(" | emitting {}", .{branch}); - try parser.parsed.append(&branch.base); + return &branch.base; } - fn parsePage(parser: *Parser, rel: macho.relocation_info) !void { + fn parsePage(self: *Parser, rel: macho.relocation_info) !*Relocation { assert(rel.r_pcrel == 1); assert(rel.r_length == 2); const rel_type = @intToEnum(macho.reloc_type_arm64, rel.r_type); - const target = Relocation.Target.fromReloc(rel); - - const offset = @intCast(u32, rel.r_address); - const inst = parser.code[offset..][0..4]; - const parsed_inst = aarch64.Instruction{ .pc_relative_address = mem.bytesToValue(meta.TagPayload( - aarch64.Instruction, - aarch64.Instruction.pc_relative_address, - ), inst) }; + const target = try self.object.symbolFromReloc(rel); + const offset = @intCast(u32, @intCast(u64, rel.r_address) - self.base_addr); const ptr: *Relocation = ptr: { switch (rel_type) { .ARM64_RELOC_PAGE21 => { defer { // Reset parser's addend state - parser.addend = null; + self.addend = null; } - var page = try parser.allocator.create(Page); - errdefer parser.allocator.destroy(page); + var page = try self.object.allocator.create(Page); + errdefer self.object.allocator.destroy(page); page.* = .{ .base = .{ .@"type" = .page, - .code = inst, .offset = offset, .target = target, + .block = self.block, }, - .addend = parser.addend, - .inst = parsed_inst, + .addend = self.addend, }; - log.debug(" | emitting {}", .{page}); - break :ptr &page.base; }, .ARM64_RELOC_GOT_LOAD_PAGE21 => { - var page = try parser.allocator.create(GotPage); - errdefer parser.allocator.destroy(page); + var page = try self.object.allocator.create(GotPage); + errdefer self.object.allocator.destroy(page); page.* = .{ .base = .{ .@"type" = .got_page, - .code = inst, .offset = offset, .target = target, + .block = self.block, }, - .inst = parsed_inst, }; - log.debug(" | emitting {}", .{page}); - break :ptr &page.base; }, .ARM64_RELOC_TLVP_LOAD_PAGE21 => { - var page = try parser.allocator.create(TlvpPage); - errdefer parser.allocator.destroy(page); + var page = try self.object.allocator.create(TlvpPage); + errdefer self.object.allocator.destroy(page); page.* = .{ .base = .{ .@"type" = .tlvp_page, - .code = inst, .offset = offset, .target = target, + .block = self.block, }, - .inst = parsed_inst, }; - log.debug(" | emitting {}", .{page}); - break :ptr &page.base; }, else => unreachable, } }; - try parser.parsed.append(ptr); + return ptr; } - fn parsePageOff(parser: *Parser, rel: macho.relocation_info) !void { + fn parsePageOff(self: *Parser, rel: macho.relocation_info) !*Relocation { defer { // Reset parser's addend state - parser.addend = null; + self.addend = null; } const rel_type = @intToEnum(macho.reloc_type_arm64, rel.r_type); @@ -395,83 +452,56 @@ pub const Parser = struct { assert(rel.r_pcrel == 0); assert(rel.r_length == 2); - const offset = @intCast(u32, rel.r_address); - const inst = parser.code[offset..][0..4]; + const target = try self.object.symbolFromReloc(rel); + const offset = @intCast(u32, @intCast(u64, rel.r_address) - self.base_addr); + const op_kind: PageOff.OpKind = if (isArithmeticOp(self.block.code[offset..][0..4])) + .arithmetic + else + .load_store; - var op_kind: PageOff.OpKind = undefined; - var parsed_inst: aarch64.Instruction = undefined; - if (isArithmeticOp(inst)) { - op_kind = .arithmetic; - parsed_inst = .{ .add_subtract_immediate = mem.bytesToValue(meta.TagPayload( - aarch64.Instruction, - aarch64.Instruction.add_subtract_immediate, - ), inst) }; - } else { - op_kind = .load_store; - parsed_inst = .{ .load_store_register = mem.bytesToValue(meta.TagPayload( - aarch64.Instruction, - aarch64.Instruction.load_store_register, - ), inst) }; - } - const target = Relocation.Target.fromReloc(rel); - - var page_off = try parser.allocator.create(PageOff); - errdefer parser.allocator.destroy(page_off); + var page_off = try self.object.allocator.create(PageOff); + errdefer self.object.allocator.destroy(page_off); page_off.* = .{ .base = .{ .@"type" = .page_off, - .code = inst, .offset = offset, .target = target, + .block = self.block, }, .op_kind = op_kind, - .inst = parsed_inst, - .addend = parser.addend, + .addend = self.addend, }; - log.debug(" | emitting {}", .{page_off}); - try parser.parsed.append(&page_off.base); + return &page_off.base; } - fn parseGotLoadPageOff(parser: *Parser, rel: macho.relocation_info) !void { + fn parseGotLoadPageOff(self: *Parser, rel: macho.relocation_info) !*Relocation { const rel_type = @intToEnum(macho.reloc_type_arm64, rel.r_type); assert(rel_type == .ARM64_RELOC_GOT_LOAD_PAGEOFF12); assert(rel.r_pcrel == 0); assert(rel.r_length == 2); - const offset = @intCast(u32, rel.r_address); - const inst = parser.code[offset..][0..4]; - assert(!isArithmeticOp(inst)); + const target = try self.object.symbolFromReloc(rel); + const offset = @intCast(u32, @intCast(u64, rel.r_address) - self.base_addr); + assert(!isArithmeticOp(self.block.code[offset..][0..4])); - const parsed_inst = mem.bytesToValue(meta.TagPayload( - aarch64.Instruction, - aarch64.Instruction.load_store_register, - ), inst); - assert(parsed_inst.size == 3); - - const target = Relocation.Target.fromReloc(rel); - - var page_off = try parser.allocator.create(GotPageOff); - errdefer parser.allocator.destroy(page_off); + var page_off = try self.object.allocator.create(GotPageOff); + errdefer self.object.allocator.destroy(page_off); page_off.* = .{ .base = .{ .@"type" = .got_page_off, - .code = inst, .offset = offset, .target = target, - }, - .inst = .{ - .load_store_register = parsed_inst, + .block = self.block, }, }; - log.debug(" | emitting {}", .{page_off}); - try parser.parsed.append(&page_off.base); + return &page_off.base; } - fn parseTlvpLoadPageOff(parser: *Parser, rel: macho.relocation_info) !void { + fn parseTlvpLoadPageOff(self: *Parser, rel: macho.relocation_info) !*Relocation { const rel_type = @intToEnum(macho.reloc_type_arm64, rel.r_type); assert(rel_type == .ARM64_RELOC_TLVP_LOAD_PAGEOFF12); assert(rel.r_pcrel == 0); @@ -483,141 +513,102 @@ pub const Parser = struct { size: u1, }; - const offset = @intCast(u32, rel.r_address); - const inst = parser.code[offset..][0..4]; - const parsed: RegInfo = parsed: { - if (isArithmeticOp(inst)) { - const parsed_inst = mem.bytesAsValue(meta.TagPayload( - aarch64.Instruction, - aarch64.Instruction.add_subtract_immediate, - ), inst); - break :parsed .{ - .rd = parsed_inst.rd, - .rn = parsed_inst.rn, - .size = parsed_inst.sf, - }; - } else { - const parsed_inst = mem.bytesAsValue(meta.TagPayload( - aarch64.Instruction, - aarch64.Instruction.load_store_register, - ), inst); - break :parsed .{ - .rd = parsed_inst.rt, - .rn = parsed_inst.rn, - .size = @truncate(u1, parsed_inst.size), - }; - } - }; + const target = try self.object.symbolFromReloc(rel); + const offset = @intCast(u32, @intCast(u64, rel.r_address) - self.base_addr); - const target = Relocation.Target.fromReloc(rel); - - var page_off = try parser.allocator.create(TlvpPageOff); - errdefer parser.allocator.destroy(page_off); + var page_off = try self.object.allocator.create(TlvpPageOff); + errdefer self.object.allocator.destroy(page_off); page_off.* = .{ .base = .{ .@"type" = .tlvp_page_off, - .code = inst, .offset = offset, .target = target, - }, - .inst = .{ - .add_subtract_immediate = .{ - .rd = parsed.rd, - .rn = parsed.rn, - .imm12 = 0, // This will be filled when target addresses are known. - .sh = 0, - .s = 0, - .op = 0, - .sf = parsed.size, - }, + .block = self.block, }, }; - log.debug(" | emitting {}", .{page_off}); - try parser.parsed.append(&page_off.base); + return &page_off.base; } - fn parseSubtractor(parser: *Parser, rel: macho.relocation_info) !void { + fn parseSubtractor(self: *Parser, rel: macho.relocation_info) !void { const rel_type = @intToEnum(macho.reloc_type_arm64, rel.r_type); assert(rel_type == .ARM64_RELOC_SUBTRACTOR); assert(rel.r_pcrel == 0); - assert(parser.subtractor == null); + assert(self.subtractor == null); - parser.subtractor = Relocation.Target.fromReloc(rel); + self.subtractor = try self.object.symbolFromReloc(rel); // Verify SUBTRACTOR is followed by UNSIGNED. - const next = @intToEnum(macho.reloc_type_arm64, parser.it.peek().r_type); + const next = @intToEnum(macho.reloc_type_arm64, self.it.peek().r_type); if (next != .ARM64_RELOC_UNSIGNED) { log.err("unexpected relocation type: expected UNSIGNED, found {s}", .{next}); return error.UnexpectedRelocationType; } } - fn parseUnsigned(parser: *Parser, rel: macho.relocation_info) !void { + fn parseUnsigned(self: *Parser, rel: macho.relocation_info) !*Relocation { defer { // Reset parser's subtractor state - parser.subtractor = null; + self.subtractor = null; } const rel_type = @intToEnum(macho.reloc_type_arm64, rel.r_type); assert(rel_type == .ARM64_RELOC_UNSIGNED); assert(rel.r_pcrel == 0); - var unsigned = try parser.allocator.create(reloc.Unsigned); - errdefer parser.allocator.destroy(unsigned); - - const target = Relocation.Target.fromReloc(rel); + const target = try self.object.symbolFromReloc(rel); + const offset = @intCast(u32, @intCast(u64, rel.r_address) - self.base_addr); const is_64bit: bool = switch (rel.r_length) { 3 => true, 2 => false, else => unreachable, }; - const offset = @intCast(u32, rel.r_address); const addend: i64 = if (is_64bit) - mem.readIntLittle(i64, parser.code[offset..][0..8]) + mem.readIntLittle(i64, self.block.code[offset..][0..8]) else - mem.readIntLittle(i32, parser.code[offset..][0..4]); + mem.readIntLittle(i32, self.block.code[offset..][0..4]); + + var unsigned = try self.object.allocator.create(reloc.Unsigned); + errdefer self.object.allocator.destroy(unsigned); unsigned.* = .{ .base = .{ .@"type" = .unsigned, - .code = if (is_64bit) parser.code[offset..][0..8] else parser.code[offset..][0..4], .offset = offset, .target = target, + .block = self.block, }, - .subtractor = parser.subtractor, + .subtractor = self.subtractor, .is_64bit = is_64bit, .addend = addend, }; - log.debug(" | emitting {}", .{unsigned}); - try parser.parsed.append(&unsigned.base); + return &unsigned.base; } - fn parsePointerToGot(parser: *Parser, rel: macho.relocation_info) !void { + fn parsePointerToGot(self: *Parser, rel: macho.relocation_info) !*Relocation { const rel_type = @intToEnum(macho.reloc_type_arm64, rel.r_type); assert(rel_type == .ARM64_RELOC_POINTER_TO_GOT); assert(rel.r_pcrel == 1); assert(rel.r_length == 2); - var ptr_to_got = try parser.allocator.create(PointerToGot); - errdefer parser.allocator.destroy(ptr_to_got); + var ptr_to_got = try self.object.allocator.create(PointerToGot); + errdefer self.object.allocator.destroy(ptr_to_got); - const target = Relocation.Target.fromReloc(rel); - const offset = @intCast(u32, rel.r_address); + const target = try self.object.symbolFromReloc(rel); + const offset = @intCast(u32, @intCast(u64, rel.r_address) - self.base_addr); ptr_to_got.* = .{ .base = .{ .@"type" = .pointer_to_got, - .code = parser.code[offset..][0..4], .offset = offset, .target = target, + .block = self.block, }, }; - log.debug(" | emitting {}", .{ptr_to_got}); - try parser.parsed.append(&ptr_to_got.base); + return &ptr_to_got.base; } }; diff --git a/src/link/MachO/reloc/x86_64.zig b/src/link/MachO/reloc/x86_64.zig index 9f3c4702c8..85c797dcd0 100644 --- a/src/link/MachO/reloc/x86_64.zig +++ b/src/link/MachO/reloc/x86_64.zig @@ -8,17 +8,28 @@ const meta = std.meta; const reloc = @import("../reloc.zig"); const Allocator = mem.Allocator; +const Object = @import("../Object.zig"); const Relocation = reloc.Relocation; +const Symbol = @import("../Symbol.zig"); +const TextBlock = Zld.TextBlock; +const Zld = @import("../Zld.zig"); pub const Branch = struct { base: Relocation, pub const base_type: Relocation.Type = .branch_x86_64; - pub fn resolve(branch: Branch, args: Relocation.ResolveArgs) !void { - const displacement = try math.cast(i32, @intCast(i64, args.target_addr) - @intCast(i64, args.source_addr) - 4); - log.debug(" | displacement 0x{x}", .{displacement}); - mem.writeIntLittle(u32, branch.base.code[0..4], @bitCast(u32, displacement)); + // pub fn resolve(branch: Branch, args: Relocation.ResolveArgs) !void { + // const displacement = try math.cast(i32, @intCast(i64, args.target_addr) - @intCast(i64, args.source_addr) - 4); + // log.debug(" | displacement 0x{x}", .{displacement}); + // mem.writeIntLittle(u32, branch.base.code[0..4], @bitCast(u32, displacement)); + // } + + pub fn format(self: Branch, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { + _ = self; + _ = fmt; + _ = options; + _ = writer; } }; @@ -29,25 +40,32 @@ pub const Signed = struct { pub const base_type: Relocation.Type = .signed; - pub fn resolve(signed: Signed, args: Relocation.ResolveArgs) !void { - const target_addr = target_addr: { - if (signed.base.target == .section) { - const source_target = @intCast(i64, args.source_source_sect_addr.?) + @intCast(i64, signed.base.offset) + signed.addend + 4; - const source_disp = source_target - @intCast(i64, args.source_target_sect_addr.?); - break :target_addr @intCast(i64, args.target_addr) + source_disp; - } - break :target_addr @intCast(i64, args.target_addr) + signed.addend; - }; - const displacement = try math.cast( - i32, - target_addr - @intCast(i64, args.source_addr) - signed.correction - 4, - ); + // pub fn resolve(signed: Signed, args: Relocation.ResolveArgs) !void { + // const target_addr = target_addr: { + // if (signed.base.target == .section) { + // const source_target = @intCast(i64, args.source_source_sect_addr.?) + @intCast(i64, signed.base.offset) + signed.addend + 4; + // const source_disp = source_target - @intCast(i64, args.source_target_sect_addr.?); + // break :target_addr @intCast(i64, args.target_addr) + source_disp; + // } + // break :target_addr @intCast(i64, args.target_addr) + signed.addend; + // }; + // const displacement = try math.cast( + // i32, + // target_addr - @intCast(i64, args.source_addr) - signed.correction - 4, + // ); - log.debug(" | addend 0x{x}", .{signed.addend}); - log.debug(" | correction 0x{x}", .{signed.correction}); - log.debug(" | displacement 0x{x}", .{displacement}); + // log.debug(" | addend 0x{x}", .{signed.addend}); + // log.debug(" | correction 0x{x}", .{signed.correction}); + // log.debug(" | displacement 0x{x}", .{displacement}); - mem.writeIntLittle(u32, signed.base.code[0..4], @bitCast(u32, displacement)); + // mem.writeIntLittle(u32, signed.base.code[0..4], @bitCast(u32, displacement)); + // } + + pub fn format(self: Signed, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { + _ = fmt; + _ = options; + try std.fmt.format(writer, ".addend = {}, ", .{self.addend}); + try std.fmt.format(writer, ".correction = {}, ", .{self.correction}); } }; @@ -56,10 +74,17 @@ pub const GotLoad = struct { pub const base_type: Relocation.Type = .got_load; - pub fn resolve(got_load: GotLoad, args: Relocation.ResolveArgs) !void { - const displacement = try math.cast(i32, @intCast(i64, args.target_addr) - @intCast(i64, args.source_addr) - 4); - log.debug(" | displacement 0x{x}", .{displacement}); - mem.writeIntLittle(u32, got_load.base.code[0..4], @bitCast(u32, displacement)); + // pub fn resolve(got_load: GotLoad, args: Relocation.ResolveArgs) !void { + // const displacement = try math.cast(i32, @intCast(i64, args.target_addr) - @intCast(i64, args.source_addr) - 4); + // log.debug(" | displacement 0x{x}", .{displacement}); + // mem.writeIntLittle(u32, got_load.base.code[0..4], @bitCast(u32, displacement)); + // } + + pub fn format(self: GotLoad, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { + _ = self; + _ = fmt; + _ = options; + _ = writer; } }; @@ -69,113 +94,139 @@ pub const Got = struct { pub const base_type: Relocation.Type = .got; - pub fn resolve(got: Got, args: Relocation.ResolveArgs) !void { - const displacement = try math.cast( - i32, - @intCast(i64, args.target_addr) - @intCast(i64, args.source_addr) - 4 + got.addend, - ); - log.debug(" | displacement 0x{x}", .{displacement}); - mem.writeIntLittle(u32, got.base.code[0..4], @bitCast(u32, displacement)); + // pub fn resolve(got: Got, args: Relocation.ResolveArgs) !void { + // const displacement = try math.cast( + // i32, + // @intCast(i64, args.target_addr) - @intCast(i64, args.source_addr) - 4 + got.addend, + // ); + // log.debug(" | displacement 0x{x}", .{displacement}); + // mem.writeIntLittle(u32, got.base.code[0..4], @bitCast(u32, displacement)); + // } + + pub fn format(self: Got, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { + _ = fmt; + _ = options; + try std.fmt.format(writer, ".addend = {}, ", .{self.addend}); } }; pub const Tlv = struct { base: Relocation, - op: *u8, pub const base_type: Relocation.Type = .tlv; - pub fn resolve(tlv: Tlv, args: Relocation.ResolveArgs) !void { - // We need to rewrite the opcode from movq to leaq. - tlv.op.* = 0x8d; - log.debug(" | rewriting op to leaq", .{}); + // pub fn resolve(tlv: Tlv, args: Relocation.ResolveArgs) !void { + // // We need to rewrite the opcode from movq to leaq. + // tlv.op.* = 0x8d; + // log.debug(" | rewriting op to leaq", .{}); - const displacement = try math.cast(i32, @intCast(i64, args.target_addr) - @intCast(i64, args.source_addr) - 4); - log.debug(" | displacement 0x{x}", .{displacement}); + // const displacement = try math.cast(i32, @intCast(i64, args.target_addr) - @intCast(i64, args.source_addr) - 4); + // log.debug(" | displacement 0x{x}", .{displacement}); - mem.writeIntLittle(u32, tlv.base.code[0..4], @bitCast(u32, displacement)); + // mem.writeIntLittle(u32, tlv.base.code[0..4], @bitCast(u32, displacement)); + // } + pub fn format(self: Tlv, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { + _ = self; + _ = fmt; + _ = options; + _ = writer; } }; pub const Parser = struct { - allocator: *Allocator, + object: *Object, + zld: *Zld, it: *reloc.RelocIterator, - code: []u8, - parsed: std.ArrayList(*Relocation), - subtractor: ?Relocation.Target = null, + block: *TextBlock, + base_addr: u64, + subtractor: ?*Symbol = null, - pub fn deinit(parser: *Parser) void { - parser.parsed.deinit(); - } - - pub fn parse(parser: *Parser) !void { - while (parser.it.next()) |rel| { - switch (@intToEnum(macho.reloc_type_x86_64, rel.r_type)) { - .X86_64_RELOC_BRANCH => { - try parser.parseBranch(rel); - }, + pub fn parse(self: *Parser) !void { + while (self.it.next()) |rel| { + const out_rel = switch (@intToEnum(macho.reloc_type_x86_64, rel.r_type)) { + .X86_64_RELOC_BRANCH => try self.parseBranch(rel), .X86_64_RELOC_SUBTRACTOR => { - try parser.parseSubtractor(rel); - }, - .X86_64_RELOC_UNSIGNED => { - try parser.parseUnsigned(rel); + // Subtractor is not a relocation with effect on the TextBlock, so + // parse it and carry on. + try self.parseSubtractor(rel); + continue; }, + .X86_64_RELOC_UNSIGNED => try self.parseUnsigned(rel), .X86_64_RELOC_SIGNED, .X86_64_RELOC_SIGNED_1, .X86_64_RELOC_SIGNED_2, .X86_64_RELOC_SIGNED_4, - => { - try parser.parseSigned(rel); + => try self.parseSigned(rel), + .X86_64_RELOC_GOT_LOAD => try self.parseGotLoad(rel), + .X86_64_RELOC_GOT => try self.parseGot(rel), + .X86_64_RELOC_TLV => try self.parseTlv(rel), + }; + try self.block.relocs.append(out_rel); + + if (out_rel.target.payload == .regular) { + try self.block.references.put(out_rel.target.payload.regular.local_sym_index, {}); + } + + switch (out_rel.@"type") { + .got_load, .got => { + const sym = out_rel.target; + + if (sym.got_index != null) continue; + + const index = @intCast(u32, self.zld.got_entries.items.len); + sym.got_index = index; + try self.zld.got_entries.append(self.zld.allocator, sym); + + log.debug("adding GOT entry for symbol {s} at index {}", .{ sym.name, index }); }, - .X86_64_RELOC_GOT_LOAD => { - try parser.parseGotLoad(rel); - }, - .X86_64_RELOC_GOT => { - try parser.parseGot(rel); - }, - .X86_64_RELOC_TLV => { - try parser.parseTlv(rel); + .branch_x86_64 => { + const sym = out_rel.target; + + if (sym.stubs_index != null) continue; + if (sym.payload != .proxy) continue; + + const index = @intCast(u32, self.zld.stubs.items.len); + sym.stubs_index = index; + try self.zld.stubs.append(self.zld.allocator, sym); + + log.debug("adding stub entry for symbol {s} at index {}", .{ sym.name, index }); }, + else => {}, } } } - fn parseBranch(parser: *Parser, rel: macho.relocation_info) !void { + fn parseBranch(self: *Parser, rel: macho.relocation_info) !*Relocation { const rel_type = @intToEnum(macho.reloc_type_x86_64, rel.r_type); assert(rel_type == .X86_64_RELOC_BRANCH); assert(rel.r_pcrel == 1); assert(rel.r_length == 2); - const offset = @intCast(u32, rel.r_address); - const inst = parser.code[offset..][0..4]; + const offset = @intCast(u32, @intCast(u64, rel.r_address) - self.base_addr); + const target = try self.object.symbolFromReloc(rel); - var branch = try parser.allocator.create(Branch); - errdefer parser.allocator.destroy(branch); - - const target = Relocation.Target.fromReloc(rel); + var branch = try self.object.allocator.create(Branch); + errdefer self.object.allocator.destroy(branch); branch.* = .{ .base = .{ .@"type" = .branch_x86_64, - .code = inst, .offset = offset, .target = target, + .block = self.block, }, }; - log.debug(" | emitting {}", .{branch}); - try parser.parsed.append(&branch.base); + return &branch.base; } - fn parseSigned(parser: *Parser, rel: macho.relocation_info) !void { + fn parseSigned(self: *Parser, rel: macho.relocation_info) !*Relocation { assert(rel.r_pcrel == 1); assert(rel.r_length == 2); const rel_type = @intToEnum(macho.reloc_type_x86_64, rel.r_type); - const target = Relocation.Target.fromReloc(rel); - - const offset = @intCast(u32, rel.r_address); - const inst = parser.code[offset..][0..4]; + const target = try self.object.symbolFromReloc(rel); + const offset = @intCast(u32, @intCast(u64, rel.r_address) - self.base_addr); const correction: i4 = switch (rel_type) { .X86_64_RELOC_SIGNED => 0, .X86_64_RELOC_SIGNED_1 => 1, @@ -183,161 +234,152 @@ pub const Parser = struct { .X86_64_RELOC_SIGNED_4 => 4, else => unreachable, }; - const addend = mem.readIntLittle(i32, inst) + correction; + const addend = mem.readIntLittle(i32, self.block.code[offset..][0..4]) + correction; - var signed = try parser.allocator.create(Signed); - errdefer parser.allocator.destroy(signed); + var signed = try self.object.allocator.create(Signed); + errdefer self.object.allocator.destroy(signed); signed.* = .{ .base = .{ .@"type" = .signed, - .code = inst, .offset = offset, .target = target, + .block = self.block, }, .addend = addend, .correction = correction, }; - log.debug(" | emitting {}", .{signed}); - try parser.parsed.append(&signed.base); + return &signed.base; } - fn parseGotLoad(parser: *Parser, rel: macho.relocation_info) !void { + fn parseGotLoad(self: *Parser, rel: macho.relocation_info) !*Relocation { const rel_type = @intToEnum(macho.reloc_type_x86_64, rel.r_type); assert(rel_type == .X86_64_RELOC_GOT_LOAD); assert(rel.r_pcrel == 1); assert(rel.r_length == 2); - const offset = @intCast(u32, rel.r_address); - const inst = parser.code[offset..][0..4]; - const target = Relocation.Target.fromReloc(rel); + const offset = @intCast(u32, @intCast(u64, rel.r_address) - self.base_addr); + const target = try self.object.symbolFromReloc(rel); - var got_load = try parser.allocator.create(GotLoad); - errdefer parser.allocator.destroy(got_load); + var got_load = try self.object.allocator.create(GotLoad); + errdefer self.object.allocator.destroy(got_load); got_load.* = .{ .base = .{ .@"type" = .got_load, - .code = inst, .offset = offset, .target = target, + .block = self.block, }, }; - log.debug(" | emitting {}", .{got_load}); - try parser.parsed.append(&got_load.base); + return &got_load.base; } - fn parseGot(parser: *Parser, rel: macho.relocation_info) !void { + fn parseGot(self: *Parser, rel: macho.relocation_info) !*Relocation { const rel_type = @intToEnum(macho.reloc_type_x86_64, rel.r_type); assert(rel_type == .X86_64_RELOC_GOT); assert(rel.r_pcrel == 1); assert(rel.r_length == 2); - const offset = @intCast(u32, rel.r_address); - const inst = parser.code[offset..][0..4]; - const target = Relocation.Target.fromReloc(rel); - const addend = mem.readIntLittle(i32, inst); + const offset = @intCast(u32, @intCast(u64, rel.r_address) - self.base_addr); + const target = try self.object.symbolFromReloc(rel); + const addend = mem.readIntLittle(i32, self.block.code[offset..][0..4]); - var got = try parser.allocator.create(Got); - errdefer parser.allocator.destroy(got); + var got = try self.object.allocator.create(Got); + errdefer self.object.allocator.destroy(got); got.* = .{ .base = .{ .@"type" = .got, - .code = inst, .offset = offset, .target = target, + .block = self.block, }, .addend = addend, }; - log.debug(" | emitting {}", .{got}); - try parser.parsed.append(&got.base); + return &got.base; } - fn parseTlv(parser: *Parser, rel: macho.relocation_info) !void { + fn parseTlv(self: *Parser, rel: macho.relocation_info) !*Relocation { const rel_type = @intToEnum(macho.reloc_type_x86_64, rel.r_type); assert(rel_type == .X86_64_RELOC_TLV); assert(rel.r_pcrel == 1); assert(rel.r_length == 2); - const offset = @intCast(u32, rel.r_address); - const inst = parser.code[offset..][0..4]; - const target = Relocation.Target.fromReloc(rel); + const offset = @intCast(u32, @intCast(u64, rel.r_address) - self.base_addr); + const target = try self.object.symbolFromReloc(rel); - var tlv = try parser.allocator.create(Tlv); - errdefer parser.allocator.destroy(tlv); + var tlv = try self.object.allocator.create(Tlv); + errdefer self.object.allocator.destroy(tlv); tlv.* = .{ .base = .{ .@"type" = .tlv, - .code = inst, .offset = offset, .target = target, + .block = self.block, }, - .op = &parser.code[offset - 2], }; - log.debug(" | emitting {}", .{tlv}); - try parser.parsed.append(&tlv.base); + return &tlv.base; } - fn parseSubtractor(parser: *Parser, rel: macho.relocation_info) !void { + fn parseSubtractor(self: *Parser, rel: macho.relocation_info) !void { const rel_type = @intToEnum(macho.reloc_type_x86_64, rel.r_type); assert(rel_type == .X86_64_RELOC_SUBTRACTOR); assert(rel.r_pcrel == 0); - assert(parser.subtractor == null); + assert(self.subtractor == null); - parser.subtractor = Relocation.Target.fromReloc(rel); + self.subtractor = try self.object.symbolFromReloc(rel); // Verify SUBTRACTOR is followed by UNSIGNED. - const next = @intToEnum(macho.reloc_type_x86_64, parser.it.peek().r_type); + const next = @intToEnum(macho.reloc_type_x86_64, self.it.peek().r_type); if (next != .X86_64_RELOC_UNSIGNED) { log.err("unexpected relocation type: expected UNSIGNED, found {s}", .{next}); return error.UnexpectedRelocationType; } } - fn parseUnsigned(parser: *Parser, rel: macho.relocation_info) !void { + fn parseUnsigned(self: *Parser, rel: macho.relocation_info) !*Relocation { defer { // Reset parser's subtractor state - parser.subtractor = null; + self.subtractor = null; } const rel_type = @intToEnum(macho.reloc_type_x86_64, rel.r_type); assert(rel_type == .X86_64_RELOC_UNSIGNED); assert(rel.r_pcrel == 0); - var unsigned = try parser.allocator.create(reloc.Unsigned); - errdefer parser.allocator.destroy(unsigned); - - const target = Relocation.Target.fromReloc(rel); + const target = try self.object.symbolFromReloc(rel); const is_64bit: bool = switch (rel.r_length) { 3 => true, 2 => false, else => unreachable, }; - const offset = @intCast(u32, rel.r_address); + const offset = @intCast(u32, @intCast(u64, rel.r_address) - self.base_addr); const addend: i64 = if (is_64bit) - mem.readIntLittle(i64, parser.code[offset..][0..8]) + mem.readIntLittle(i64, self.block.code[offset..][0..8]) else - mem.readIntLittle(i32, parser.code[offset..][0..4]); + mem.readIntLittle(i32, self.block.code[offset..][0..4]); + + var unsigned = try self.object.allocator.create(reloc.Unsigned); + errdefer self.object.allocator.destroy(unsigned); unsigned.* = .{ .base = .{ .@"type" = .unsigned, - .code = if (is_64bit) parser.code[offset..][0..8] else parser.code[offset..][0..4], .offset = offset, .target = target, + .block = self.block, }, - .subtractor = parser.subtractor, + .subtractor = self.subtractor, .is_64bit = is_64bit, .addend = addend, }; - log.debug(" | emitting {}", .{unsigned}); - try parser.parsed.append(&unsigned.base); + return &unsigned.base; } }; From dbd2eb7c7f9267e8ae508d0995c1d4c5a3b46309 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Tue, 6 Jul 2021 19:09:49 +0200 Subject: [PATCH 19/81] zld: simplify relocation parsing --- CMakeLists.txt | 2 - src/link/MachO/Object.zig | 37 +- src/link/MachO/Zld.zig | 62 +-- src/link/MachO/reloc.zig | 852 +++++++++++++++++++++++++++---- src/link/MachO/reloc/aarch64.zig | 618 ---------------------- src/link/MachO/reloc/x86_64.zig | 385 -------------- 6 files changed, 753 insertions(+), 1203 deletions(-) delete mode 100644 src/link/MachO/reloc/aarch64.zig delete mode 100644 src/link/MachO/reloc/x86_64.zig diff --git a/CMakeLists.txt b/CMakeLists.txt index d47a285e31..828b3ee73e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -588,8 +588,6 @@ set(ZIG_STAGE2_SOURCES "${CMAKE_SOURCE_DIR}/src/link/MachO/bind.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO/commands.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO/reloc.zig" - "${CMAKE_SOURCE_DIR}/src/link/MachO/reloc/aarch64.zig" - "${CMAKE_SOURCE_DIR}/src/link/MachO/reloc/x86_64.zig" "${CMAKE_SOURCE_DIR}/src/link/Wasm.zig" "${CMAKE_SOURCE_DIR}/src/link/tapi.zig" "${CMAKE_SOURCE_DIR}/src/link/tapi/parse.zig" diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index 06d5a260cd..6e8925b648 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -408,7 +408,6 @@ const TextBlockParser = struct { const start_addr = senior_nlist.nlist.n_value - self.section.addr; const end_addr = if (next_nlist) |n| n.nlist.n_value - self.section.addr else self.section.size; - log.warn("{} - {}", .{ start_addr, end_addr }); const code = self.code[start_addr..end_addr]; const size = code.len; @@ -430,7 +429,7 @@ const TextBlockParser = struct { .aliases = alias_only_indices, .references = std.AutoArrayHashMap(u32, void).init(self.allocator), .code = try self.allocator.dupe(u8, code), - .relocs = std.ArrayList(*Relocation).init(self.allocator), + .relocs = std.ArrayList(Relocation).init(self.allocator), .size = size, .alignment = self.section.@"align", }; @@ -579,7 +578,7 @@ pub fn parseTextBlocks(self: *Object, zld: *Zld) !void { .local_sym_index = local_sym_index, .references = std.AutoArrayHashMap(u32, void).init(self.allocator), .code = try self.allocator.dupe(u8, code), - .relocs = std.ArrayList(*Relocation).init(self.allocator), + .relocs = std.ArrayList(Relocation).init(self.allocator), .size = sect.size, .alignment = sect.@"align", }; @@ -607,30 +606,14 @@ fn parseRelocs( var it = reloc.RelocIterator{ .buffer = relocs, }; - - switch (self.arch.?) { - .aarch64 => { - var parser = reloc.aarch64.Parser{ - .object = self, - .zld = zld, - .it = &it, - .block = block, - .base_addr = base_addr, - }; - try parser.parse(); - }, - .x86_64 => { - var parser = reloc.x86_64.Parser{ - .object = self, - .zld = zld, - .it = &it, - .block = block, - .base_addr = base_addr, - }; - try parser.parse(); - }, - else => unreachable, - } + var parser = reloc.Parser{ + .object = self, + .zld = zld, + .it = &it, + .block = block, + .base_addr = base_addr, + }; + try parser.parse(); } pub fn symbolFromReloc(self: *Object, rel: macho.relocation_info) !*Symbol { diff --git a/src/link/MachO/Zld.zig b/src/link/MachO/Zld.zig index 7f7997bc17..69f8821cb7 100644 --- a/src/link/MachO/Zld.zig +++ b/src/link/MachO/Zld.zig @@ -137,7 +137,7 @@ pub const TextBlock = struct { aliases: ?[]u32 = null, references: std.AutoArrayHashMap(u32, void), code: []u8, - relocs: std.ArrayList(*Relocation), + relocs: std.ArrayList(Relocation), size: u64, alignment: u32, next: ?*TextBlock = null, @@ -1604,7 +1604,7 @@ fn resolveSymbols(self: *Zld) !void { .local_sym_index = local_sym_index, .references = std.AutoArrayHashMap(u32, void).init(self.allocator), .code = code, - .relocs = std.ArrayList(*Relocation).init(self.allocator), + .relocs = std.ArrayList(Relocation).init(self.allocator), .size = size, .alignment = alignment, }; @@ -1871,64 +1871,6 @@ fn resolveRelocsAndWriteSections(self: *Zld) !void { } } -fn relocTargetAddr(self: *Zld, object: *const Object, target: reloc.Relocation.Target) !u64 { - const target_addr = blk: { - switch (target) { - .symbol => |sym_id| { - const sym = object.symbols.items[sym_id]; - switch (sym.payload) { - .regular => |reg| { - log.debug(" | regular '{s}'", .{sym.name}); - break :blk reg.address; - }, - .proxy => |proxy| { - if (mem.eql(u8, sym.name, "__tlv_bootstrap")) { - log.debug(" | symbol '__tlv_bootstrap'", .{}); - const segment = self.load_commands.items[self.data_segment_cmd_index.?].Segment; - const tlv = segment.sections.items[self.tlv_section_index.?]; - break :blk tlv.addr; - } - - log.debug(" | symbol stub '{s}'", .{sym.name}); - const segment = self.load_commands.items[self.text_segment_cmd_index.?].Segment; - const stubs = segment.sections.items[self.stubs_section_index.?]; - const stubs_index = sym.stubs_index orelse { - if (proxy.bind_info.items.len > 0) { - break :blk 0; // Dynamically bound by dyld. - } - log.err( - "expected stubs index or dynamic bind address when relocating symbol '{s}'", - .{sym.name}, - ); - log.err("this is an internal linker error", .{}); - return error.FailedToResolveRelocationTarget; - }; - break :blk stubs.addr + stubs_index * stubs.reserved2; - }, - else => { - log.err("failed to resolve symbol '{s}' as a relocation target", .{sym.name}); - log.err("this is an internal linker error", .{}); - return error.FailedToResolveRelocationTarget; - }, - } - }, - .section => |sect_id| { - log.debug(" | section offset", .{}); - const source_sect = object.sections.items[sect_id]; - log.debug(" | section '{s},{s}'", .{ - segmentName(source_sect.inner), - sectionName(source_sect.inner), - }); - const target_map = source_sect.target_map orelse unreachable; - const target_seg = self.load_commands.items[target_map.segment_id].Segment; - const target_sect = target_seg.sections.items[target_map.section_id]; - break :blk target_sect.addr + target_map.offset; - }, - } - }; - return target_addr; -} - fn populateMetadata(self: *Zld) !void { if (self.pagezero_segment_cmd_index == null) { self.pagezero_segment_cmd_index = @intCast(u16, self.load_commands.items.len); diff --git a/src/link/MachO/reloc.zig b/src/link/MachO/reloc.zig index e11e850aa6..4693e89787 100644 --- a/src/link/MachO/reloc.zig +++ b/src/link/MachO/reloc.zig @@ -1,4 +1,5 @@ const std = @import("std"); +const aarch64 = @import("../../codegen/aarch64.zig"); const assert = std.debug.assert; const log = std.log.scoped(.reloc); const macho = std.macho; @@ -6,141 +7,431 @@ const math = std.math; const mem = std.mem; const meta = std.meta; -pub const aarch64 = @import("reloc/aarch64.zig"); -pub const x86_64 = @import("reloc/x86_64.zig"); - const Allocator = mem.Allocator; +const Arch = std.Target.Cpu.Arch; +const Object = @import("Object.zig"); const Symbol = @import("Symbol.zig"); -const TextBlock = @import("Zld.zig").TextBlock; +const TextBlock = Zld.TextBlock; +const Zld = @import("Zld.zig"); pub const Relocation = struct { - @"type": Type, + /// Offset within the `block`s code buffer. + /// Note relocation size can be inferred by relocation's kind. offset: u32, + + /// Parent block containing this relocation. block: *TextBlock, + + /// Target symbol: either a regular or a proxy. target: *Symbol, - pub fn cast(base: *Relocation, comptime T: type) ?*T { - if (base.@"type" != T.base_type) - return null; + payload: union(enum) { + unsigned: Unsigned, + branch: Branch, + page: Page, + page_off: PageOff, + pointer_to_got: PointerToGot, + signed: Signed, + load: Load, + }, - return @fieldParentPtr(T, "base", base); - } + pub const Unsigned = struct { + subtractor: ?*Symbol = null, - // pub fn resolve(base: *Relocation) !void { - // return switch (base.@"type") { - // .unsigned => @fieldParentPtr(Unsigned, "base", base).resolve(), - // .branch_aarch64 => @fieldParentPtr(aarch64.Branch, "base", base).resolve(), - // .page => @fieldParentPtr(aarch64.Page, "base", base).resolve(), - // .page_off => @fieldParentPtr(aarch64.PageOff, "base", base).resolve(), - // .got_page => @fieldParentPtr(aarch64.GotPage, "base", base).resolve(), - // .got_page_off => @fieldParentPtr(aarch64.GotPageOff, "base", base).resolve(), - // .pointer_to_got => @fieldParentPtr(aarch64.PointerToGot, "base", base).resolve(), - // .tlvp_page => @fieldParentPtr(aarch64.TlvpPage, "base", base).resolve(), - // .tlvp_page_off => @fieldParentPtr(aarch64.TlvpPageOff, "base", base).resolve(), - // .branch_x86_64 => @fieldParentPtr(x86_64.Branch, "base", base).resolve(), - // .signed => @fieldParentPtr(x86_64.Signed, "base", base).resolve(), - // .got_load => @fieldParentPtr(x86_64.GotLoad, "base", base).resolve(), - // .got => @fieldParentPtr(x86_64.Got, "base", base).resolve(), - // .tlv => @fieldParentPtr(x86_64.Tlv, "base", base).resolve(), - // }; - // } + /// Addend embedded directly in the relocation slot + addend: i64, - pub const Type = enum { - branch_aarch64, - unsigned, - page, - page_off, - got_page, - got_page_off, - tlvp_page, - pointer_to_got, - tlvp_page_off, - branch_x86_64, - signed, - got_load, - got, - tlv, + /// Extracted from r_length: + /// => 3 implies true + /// => 2 implies false + /// => * is unreachable + is_64bit: bool, + + pub fn resolve(self: Unsigned, base: Relocation, source_addr: u64, target_addr: u64) !void { + // const addend = if (unsigned.base.target == .section) + // unsigned.addend - @intCast(i64, args.source_target_sect_addr.?) + // else + // unsigned.addend; + + // const result = if (args.subtractor) |subtractor| + // @intCast(i64, args.target_addr) - @intCast(i64, subtractor) + addend + // else + // @intCast(i64, args.target_addr) + addend; + + // log.debug(" | calculated addend 0x{x}", .{addend}); + // log.debug(" | calculated unsigned value 0x{x}", .{result}); + + // if (unsigned.is_64bit) { + // mem.writeIntLittle( + // u64, + // unsigned.base.code[0..8], + // @bitCast(u64, result), + // ); + // } else { + // mem.writeIntLittle( + // u32, + // unsigned.base.code[0..4], + // @truncate(u32, @bitCast(u64, result)), + // ); + // } + } + + pub fn format(self: Unsigned, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { + _ = fmt; + _ = options; + try std.fmt.format(writer, "Unsigned {{ ", .{}); + if (self.subtractor) |sub| { + try std.fmt.format(writer, ".subtractor = {}, ", .{sub}); + } + try std.fmt.format(writer, ".addend = {}, ", .{self.addend}); + const length: usize = if (self.is_64bit) 8 else 4; + try std.fmt.format(writer, ".length = {}, ", .{length}); + try std.fmt.format(writer, "}}", .{}); + } }; - pub fn format(base: *const Relocation, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { - try std.fmt.format(writer, "Relocation {{ ", .{}); - try std.fmt.format(writer, ".type = {s}, ", .{base.@"type"}); - try std.fmt.format(writer, ".offset = {}, ", .{base.offset}); - try std.fmt.format(writer, ".block = {}", .{base.block.local_sym_index}); - try std.fmt.format(writer, ".target = {}, ", .{base.target}); + pub const Branch = struct { + arch: Arch, - try switch (base.@"type") { - .unsigned => @fieldParentPtr(Unsigned, "base", base).format(fmt, options, writer), - .branch_aarch64 => @fieldParentPtr(aarch64.Branch, "base", base).format(fmt, options, writer), - .page => @fieldParentPtr(aarch64.Page, "base", base).format(fmt, options, writer), - .page_off => @fieldParentPtr(aarch64.PageOff, "base", base).format(fmt, options, writer), - .got_page => @fieldParentPtr(aarch64.GotPage, "base", base).format(fmt, options, writer), - .got_page_off => @fieldParentPtr(aarch64.GotPageOff, "base", base).format(fmt, options, writer), - .pointer_to_got => @fieldParentPtr(aarch64.PointerToGot, "base", base).format(fmt, options, writer), - .tlvp_page => @fieldParentPtr(aarch64.TlvpPage, "base", base).format(fmt, options, writer), - .tlvp_page_off => @fieldParentPtr(aarch64.TlvpPageOff, "base", base).format(fmt, options, writer), - .branch_x86_64 => @fieldParentPtr(x86_64.Branch, "base", base).format(fmt, options, writer), - .signed => @fieldParentPtr(x86_64.Signed, "base", base).format(fmt, options, writer), - .got_load => @fieldParentPtr(x86_64.GotLoad, "base", base).format(fmt, options, writer), - .got => @fieldParentPtr(x86_64.Got, "base", base).format(fmt, options, writer), - .tlv => @fieldParentPtr(x86_64.Tlv, "base", base).format(fmt, options, writer), + pub fn resolve(self: Branch, base: Relocation, source_addr: u64, target_addr: u64) !void { + switch (arch) { + .aarch64 => { + const displacement = try math.cast(i28, @intCast(i64, target_addr) - @intCast(i64, source_addr)); + var inst = aarch64.Instruction{ + .unconditional_branch_immediate = mem.bytesToValue( + meta.TagPayload( + aarch.Instruction, + aarch64.Instruction.unconditional_branch_immediate, + ), + base.block.code[base.offset..][0..4], + ), + }; + inst.unconditional_branch_immediate.imm26 = @truncate(u26, @bitCast(u28, displacement >> 2)); + mem.writeIntLittle(u32, base.block.code[base.offset..][0..4], inst.toU32()); + }, + .x86_64 => { + const displacement = try math.cast(i32, @intCast(i64, target_addr) - @intCast(i64, source_addr) - 4); + mem.writeIntLittle(u32, base.block.code[base.offset..][0..4], @bitCast(u32, displacement)); + }, + else => return error.UnsupportedCpuArchitecture, + } + } + + pub fn format(self: Branch, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { + _ = fmt; + _ = options; + try std.fmt.format(writer, "Branch {{}}", .{}); + } + }; + + pub const Page = struct { + kind: enum { + page, + got, + tlvp, + }, + addend: ?u32 = null, + + pub fn resolve(self: Page, base: Relocation, source_addr: u64, target_addr: u64) !void { + const actual_target_addr = if (self.addend) |addend| target_addr + addend else target_addr; + const source_page = @intCast(i32, source_addr >> 12); + const target_page = @intCast(i32, actual_target_addr >> 12); + const pages = @bitCast(u21, @intCast(i21, target_page - source_page)); + + var inst = aarch64.Instruction{ + .pc_relative_address = mem.bytesToValue( + meta.TagPayload( + aarch64.Instruction, + aarch64.Instruction.pc_relative_address, + ), + base.block.code[base.offset..][0..4], + ), + }; + inst.pc_relative_address.immhi = @truncate(u19, pages >> 2); + inst.pc_relative_address.immlo = @truncate(u2, pages); + + mem.writeIntLittle(u32, base.block.code[base.offset..][0..4], inst.toU32()); + } + + pub fn format(self: Page, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { + _ = fmt; + _ = options; + try std.fmt.format(writer, "Page {{ ", .{}); + switch (self.kind) { + .page => {}, + .got => { + try std.fmt.format(writer, ".got, ", .{}); + }, + .tlvp => { + try std.fmt.format(writer, ".tlvp", .{}); + }, + } + if (self.addend) |add| { + try std.fmt.format(writer, ".addend = {}, ", .{add}); + } + try std.fmt.format(writer, "}}", .{}); + } + }; + + pub const PageOff = struct { + kind: enum { + page, + got, + tlvp, + }, + addend: ?u32 = null, + op_kind: ?OpKind = null, + + pub const OpKind = enum { + arithmetic, + load, }; - try std.fmt.format(writer, "}}", .{}); - } -}; + pub fn resolve(self: PageOff, base: Relocation, source_addr: u64, target_addr: u64) !void { + switch (self.kind) { + .page => { + // const target_addr = if (page_off.addend) |addend| args.target_addr + addend else args.target_addr; + // const narrowed = @truncate(u12, target_addr); -pub const Unsigned = struct { - base: Relocation, - subtractor: ?*Symbol = null, - /// Addend embedded directly in the relocation slot - addend: i64, - /// Extracted from r_length: - /// => 3 implies true - /// => 2 implies false - /// => * is unreachable - is_64bit: bool, + // log.debug(" | narrowed address within the page 0x{x}", .{narrowed}); + // log.debug(" | {s} opcode", .{page_off.op_kind}); - pub const base_type: Relocation.Type = .unsigned; + // var inst = page_off.inst; + // if (page_off.op_kind == .arithmetic) { + // inst.add_subtract_immediate.imm12 = narrowed; + // } else { + // const offset: u12 = blk: { + // if (inst.load_store_register.size == 0) { + // if (inst.load_store_register.v == 1) { + // // 128-bit SIMD is scaled by 16. + // break :blk try math.divExact(u12, narrowed, 16); + // } + // // Otherwise, 8-bit SIMD or ldrb. + // break :blk narrowed; + // } else { + // const denom: u4 = try math.powi(u4, 2, inst.load_store_register.size); + // break :blk try math.divExact(u12, narrowed, denom); + // } + // }; + // inst.load_store_register.offset = offset; + // } - // pub fn resolve(unsigned: Unsigned) !void { - // const addend = if (unsigned.base.target == .section) - // unsigned.addend - @intCast(i64, args.source_target_sect_addr.?) - // else - // unsigned.addend; + // mem.writeIntLittle(u32, page_off.base.code[0..4], inst.toU32()); - // const result = if (args.subtractor) |subtractor| - // @intCast(i64, args.target_addr) - @intCast(i64, subtractor) + addend - // else - // @intCast(i64, args.target_addr) + addend; + }, + .got => { + // const narrowed = @truncate(u12, args.target_addr); - // log.debug(" | calculated addend 0x{x}", .{addend}); - // log.debug(" | calculated unsigned value 0x{x}", .{result}); + // log.debug(" | narrowed address within the page 0x{x}", .{narrowed}); - // if (unsigned.is_64bit) { - // mem.writeIntLittle( - // u64, - // unsigned.base.code[0..8], - // @bitCast(u64, result), - // ); - // } else { - // mem.writeIntLittle( - // u32, - // unsigned.base.code[0..4], - // @truncate(u32, @bitCast(u64, result)), - // ); - // } - // } + // var inst = page_off.inst; + // const offset = try math.divExact(u12, narrowed, 8); + // inst.load_store_register.offset = offset; - pub fn format(self: Unsigned, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { - _ = fmt; - _ = options; - if (self.subtractor) |sub| { - try std.fmt.format(writer, ".subtractor = {}, ", .{sub}); + // mem.writeIntLittle(u32, page_off.base.code[0..4], inst.toU32()); + }, + .tlvp => { + + // const narrowed = @truncate(u12, args.target_addr); + + // log.debug(" | narrowed address within the page 0x{x}", .{narrowed}); + + // var inst = page_off.inst; + // inst.add_subtract_immediate.imm12 = narrowed; + + // mem.writeIntLittle(u32, page_off.base.code[0..4], inst.toU32()); + }, + } } - try std.fmt.format(writer, ".addend = {}, ", .{self.addend}); - const length: usize = if (self.is_64bit) 8 else 4; - try std.fmt.format(writer, ".length = {}, ", .{length}); + + pub fn format(self: PageOff, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { + _ = fmt; + _ = options; + try std.fmt.format(writer, "PageOff {{ ", .{}); + switch (self.kind) { + .page => {}, + .got => { + try std.fmt.format(writer, ".got, ", .{}); + }, + .tlvp => { + try std.fmt.format(writer, ".tlvp, ", .{}); + }, + } + if (self.addend) |add| { + try std.fmt.format(writer, ".addend = {}, ", .{add}); + } + if (self.op_kind) |op| { + try std.fmt.format(writer, ".op_kind = {s}, ", .{op}); + } + try std.fmt.format(writer, "}}", .{}); + } + }; + + pub const PointerToGot = struct { + pub fn resolve(self: PointerToGot, base: Relocation, source_addr: u64, target_addr: u64) !void { + const result = try math.cast(i32, @intCast(i64, target_addr) - @intCast(i64, source_addr)); + mem.writeIntLittle(u32, base.block.code[base.offset..][0..4], @bitCast(u32, result)); + } + + pub fn format(self: PointerToGot, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { + _ = fmt; + _ = options; + try std.fmt.format(writer, "PointerToGot {{}}", .{}); + } + }; + + pub const Signed = struct { + addend: i32, + correction: i4, + + pub fn resolve(self: Signed, base: Relocation, source_addr: u64, target_addr: u64) !void { + // const target_addr = target_addr: { + // if (signed.base.target == .section) { + // const source_target = @intCast(i64, args.source_source_sect_addr.?) + @intCast(i64, signed.base.offset) + signed.addend + 4; + // const source_disp = source_target - @intCast(i64, args.source_target_sect_addr.?); + // break :target_addr @intCast(i64, args.target_addr) + source_disp; + // } + // break :target_addr @intCast(i64, args.target_addr) + signed.addend; + // }; + // const displacement = try math.cast( + // i32, + // target_addr - @intCast(i64, args.source_addr) - signed.correction - 4, + // ); + + // log.debug(" | addend 0x{x}", .{signed.addend}); + // log.debug(" | correction 0x{x}", .{signed.correction}); + // log.debug(" | displacement 0x{x}", .{displacement}); + + // mem.writeIntLittle(u32, signed.base.code[0..4], @bitCast(u32, displacement)); + } + + pub fn format(self: Signed, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { + _ = fmt; + _ = options; + try std.fmt.format(writer, "Signed {{ ", .{}); + try std.fmt.format(writer, ".addend = {}, ", .{self.addend}); + try std.fmt.format(writer, ".correction = {}, ", .{self.correction}); + try std.fmt.format(writer, "}}", .{}); + } + }; + + pub const Load = struct { + kind: enum { + got, + tlvp, + }, + addend: ?i32 = null, + + pub fn resolve(self: Load, base: Relocation, source_addr: u64, target_addr: u64) !void { + if (self.kind == .tlvp) { + // We need to rewrite the opcode from movq to leaq. + base.block.code[base.offset - 2] = 0x8d; + } + const addend = if (self.addend) |addend| addend else 0; + const displacement = try math.cast( + i32, + @intCast(i64, target_addr) - @intCast(i64, source_addr) - 4 + addend, + ); + mem.writeIntLittle(u32, base.block.code[base.offset..][0..4], @bitCast(u32, displacement)); + } + + pub fn format(self: Load, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { + _ = fmt; + _ = options; + try std.fmt.format(writer, "Load {{ ", .{}); + try std.fmt.format(writer, "{s}, ", .{self.kind}); + if (self.addend) |addend| { + try std.fmt.format(writer, ".addend = {}, ", .{addend}); + } + try std.fmt.format(writer, "}}", .{}); + } + }; + + pub fn resolve(self: Relocation, zld: *Zld) !void { + const source_addr = blk: { + const sym = zld.locals.items[self.block.local_sym_index]; + break :blk sym.payload.regular.address; + }; + const target_addr = blk: { + const is_via_got = inner: { + switch (self.payload) { + .pointer_to_got => break :inner true, + .page => |page| page.kind == .got, + .page_off => |page_off| page_off == .got, + .load => {}, + else => break :inner false, + } + }; + + if (is_via_got) { + const dc_seg = zld.load_commands.items[zld.data_const_segment_cmd_index.?].Segment; + const got = dc_seg.sections.items[zld.got_section_index.?]; + const got_index = self.target.got_index orelse { + log.err("expected GOT entry for symbol '{s}'", .{self.target.name}); + log.err(" this is an internal linker error", .{}); + return error.FailedToResolveRelocationTarget; + }; + break :blk got.addr + got_index * @sizeOf(u64); + } + + switch (self.target.payload) { + .regular => |reg| break :blk reg.address, + .proxy => |proxy| { + if (mem.eql(u8, self.target.name, "__tlv_bootstrap")) { + const segment = zld.load_commands.items[zld.data_segment_cmd_index.?].Segment; + const tlv = segment.sections.items[zld.tlv_section_index.?]; + break :blk tlv.addr; + } + + const segment = zld.load_commands.items[zld.text_segment_cmd_index.?].Segment; + const stubs = segment.sections.items[zld.stubs_section_index.?]; + const stubs_index = self.target.stubs_index orelse { + if (proxy.bind_info.items.len > 0) { + break :blk 0; // Dynamically bound by dyld. + } + log.err("expected stubs index or dynamic bind address for symbol '{s}'", .{ + self.target.name, + }); + log.err(" this is an internal linker error", .{}); + return error.FailedToResolveRelocationTarget; + }; + break :blk stubs.addr + stubs_index * stubs.reserved2; + }, + else => { + log.err("failed to resolve symbol '{s}' as a relocation target", .{self.target.name}); + log.err(" this is an internal linker error", .{}); + return error.FailedToResolveRelocationTarget; + }, + } + }; + switch (self.payload) { + .unsigned => |unsigned| try unsigned.resolve(self, source_addr, target_addr), + .branch => |branch| try branch.resolve(self, source_addr, target_addr), + .page => |page| try page.resolve(self, source_addr, target_addr), + .page_off => |page_off| try page_off.resolve(self, source_addr, target_addr), + .pointer_to_got => |pointer_to_got| try pointer_to_got.resolve(self, source_addr, target_addr), + .signed => |signed| try signed.resolve(self, source_addr, target_addr), + .load => |load| try load.resolve(self, source_addr, target_addr), + } + } + + pub fn format(self: Relocation, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { + try std.fmt.format(writer, "Relocation {{ ", .{}); + try std.fmt.format(writer, ".offset = {}, ", .{self.offset}); + try std.fmt.format(writer, ".block = {}", .{self.block.local_sym_index}); + try std.fmt.format(writer, ".target = {}, ", .{self.target}); + + switch (self.payload) { + .unsigned => |unsigned| try unsigned.format(fmt, options, writer), + .branch => |branch| try branch.format(fmt, options, writer), + .page => |page| try page.format(fmt, options, writer), + .page_off => |page_off| try page_off.format(fmt, options, writer), + .pointer_to_got => |pointer_to_got| try pointer_to_got.format(fmt, options, writer), + .signed => |signed| try signed.format(fmt, options, writer), + .load => |load| try load.format(fmt, options, writer), + } + + try std.fmt.format(writer, "}}", .{}); } }; @@ -161,3 +452,342 @@ pub const RelocIterator = struct { return self.buffer[@intCast(u32, self.index + 1)]; } }; + +pub const Parser = struct { + object: *Object, + zld: *Zld, + it: *RelocIterator, + block: *TextBlock, + + /// Base address of the parsed text block in the source section. + base_addr: u64, + + /// Used only when targeting aarch64 + addend: ?u32 = null, + + /// Parsed subtractor symbol from _RELOC_SUBTRACTOR reloc type. + subtractor: ?*Symbol = null, + + pub fn parse(self: *Parser) !void { + while (self.it.next()) |rel| { + const out_rel = blk: { + switch (self.object.arch.?) { + .aarch64 => { + const out_rel = switch (@intToEnum(macho.reloc_type_arm64, rel.r_type)) { + .ARM64_RELOC_BRANCH26 => try self.parseBranch(rel), + .ARM64_RELOC_SUBTRACTOR => { + // Subtractor is not a relocation with effect on the TextBlock, so + // parse it and carry on. + try self.parseSubtractor(rel); + + // Verify SUBTRACTOR is followed by UNSIGNED. + const next = @intToEnum(macho.reloc_type_arm64, self.it.peek().r_type); + if (next != .ARM64_RELOC_UNSIGNED) { + log.err("unexpected relocation type: expected UNSIGNED, found {s}", .{next}); + return error.UnexpectedRelocationType; + } + continue; + }, + .ARM64_RELOC_UNSIGNED => try self.parseUnsigned(rel), + .ARM64_RELOC_ADDEND => { + // Addend is not a relocation with effect on the TextBlock, so + // parse it and carry on. + try self.parseAddend(rel); + + // Verify ADDEND is followed by a load. + const next = @intToEnum(macho.reloc_type_arm64, self.it.peek().r_type); + switch (next) { + .ARM64_RELOC_PAGE21, .ARM64_RELOC_PAGEOFF12 => {}, + else => { + log.err("unexpected relocation type: expected PAGE21 or PAGEOFF12, found {s}", .{next}); + return error.UnexpectedRelocationType; + }, + } + continue; + }, + .ARM64_RELOC_PAGE21, + .ARM64_RELOC_GOT_LOAD_PAGE21, + .ARM64_RELOC_TLVP_LOAD_PAGE21, + => try self.parsePage(rel), + .ARM64_RELOC_PAGEOFF12, + .ARM64_RELOC_GOT_LOAD_PAGEOFF12, + .ARM64_RELOC_TLVP_LOAD_PAGEOFF12, + => try self.parsePageOff(rel), + .ARM64_RELOC_POINTER_TO_GOT => try self.parsePointerToGot(rel), + }; + break :blk out_rel; + }, + .x86_64 => { + const out_rel = switch (@intToEnum(macho.reloc_type_x86_64, rel.r_type)) { + .X86_64_RELOC_BRANCH => try self.parseBranch(rel), + .X86_64_RELOC_SUBTRACTOR => { + // Subtractor is not a relocation with effect on the TextBlock, so + // parse it and carry on. + try self.parseSubtractor(rel); + + // Verify SUBTRACTOR is followed by UNSIGNED. + const next = @intToEnum(macho.reloc_type_x86_64, self.it.peek().r_type); + if (next != .X86_64_RELOC_UNSIGNED) { + log.err("unexpected relocation type: expected UNSIGNED, found {s}", .{next}); + return error.UnexpectedRelocationType; + } + continue; + }, + .X86_64_RELOC_UNSIGNED => try self.parseUnsigned(rel), + .X86_64_RELOC_SIGNED, + .X86_64_RELOC_SIGNED_1, + .X86_64_RELOC_SIGNED_2, + .X86_64_RELOC_SIGNED_4, + => try self.parseSigned(rel), + .X86_64_RELOC_GOT_LOAD, + .X86_64_RELOC_GOT, + .X86_64_RELOC_TLV, + => try self.parseLoad(rel), + }; + break :blk out_rel; + }, + else => unreachable, + } + }; + try self.block.relocs.append(out_rel); + + if (out_rel.target.payload == .regular) { + try self.block.references.put(out_rel.target.payload.regular.local_sym_index, {}); + } + + const is_via_got = switch (out_rel.payload) { + .pointer_to_got => true, + .load => |load| load.kind == .got, + .page => |page| page.kind == .got, + .page_off => |page_off| page_off.kind == .got, + else => false, + }; + + if (is_via_got and out_rel.target.got_index == null) { + const index = @intCast(u32, self.zld.got_entries.items.len); + out_rel.target.got_index = index; + try self.zld.got_entries.append(self.zld.allocator, out_rel.target); + log.debug("adding GOT entry for symbol {s} at index {}", .{ out_rel.target.name, index }); + } + + if (out_rel.payload == .branch) { + const sym = out_rel.target; + + if (sym.stubs_index != null) continue; + if (sym.payload != .proxy) continue; + + const index = @intCast(u32, self.zld.stubs.items.len); + sym.stubs_index = index; + try self.zld.stubs.append(self.zld.allocator, sym); + + log.debug("adding stub entry for symbol {s} at index {}", .{ sym.name, index }); + } + } + } + + fn parseBaseRelInfo(self: *Parser, rel: macho.relocation_info) !Relocation { + const offset = @intCast(u32, @intCast(u64, rel.r_address) - self.base_addr); + const target = try self.object.symbolFromReloc(rel); + return Relocation{ + .offset = offset, + .target = target, + .block = self.block, + .payload = undefined, + }; + } + + fn parseUnsigned(self: *Parser, rel: macho.relocation_info) !Relocation { + defer { + // Reset parser's subtractor state + self.subtractor = null; + } + + assert(rel.r_pcrel == 0); + + var parsed = try self.parseBaseRelInfo(rel); + const is_64bit: bool = switch (rel.r_length) { + 3 => true, + 2 => false, + else => unreachable, + }; + const addend: i64 = if (is_64bit) + mem.readIntLittle(i64, self.block.code[parsed.offset..][0..8]) + else + mem.readIntLittle(i32, self.block.code[parsed.offset..][0..4]); + + parsed.payload = .{ + .unsigned = .{ + .subtractor = self.subtractor, + .is_64bit = is_64bit, + .addend = addend, + }, + }; + + return parsed; + } + + fn parseBranch(self: *Parser, rel: macho.relocation_info) !Relocation { + assert(rel.r_pcrel == 1); + assert(rel.r_length == 2); + + var parsed = try self.parseBaseRelInfo(rel); + parsed.payload = .{ + .branch = .{ + .arch = self.object.arch.?, + }, + }; + return parsed; + } + + fn parsePage(self: *Parser, rel: macho.relocation_info) !Relocation { + assert(rel.r_pcrel == 1); + assert(rel.r_length == 2); + + const rel_type = @intToEnum(macho.reloc_type_arm64, rel.r_type); + + defer if (rel_type == .ARM64_RELOC_PAGE21) { + // Reset parser's addend state + self.addend = null; + }; + + const addend = if (rel_type == .ARM64_RELOC_PAGE21) + self.addend + else + null; + + var parsed = try self.parseBaseRelInfo(rel); + parsed.payload = .{ + .page = .{ + .kind = switch (rel_type) { + .ARM64_RELOC_PAGE21 => .page, + .ARM64_RELOC_GOT_LOAD_PAGE21 => .got, + .ARM64_RELOC_TLVP_LOAD_PAGE21 => .tlvp, + else => unreachable, + }, + .addend = addend, + }, + }; + return parsed; + } + + fn parsePageOff(self: *Parser, rel: macho.relocation_info) !Relocation { + assert(rel.r_pcrel == 0); + assert(rel.r_length == 2); + + const rel_type = @intToEnum(macho.reloc_type_arm64, rel.r_type); + + defer if (rel_type == .ARM64_RELOC_PAGEOFF12) { + // Reset parser's addend state + self.addend = null; + }; + + const addend = if (rel_type == .ARM64_RELOC_PAGEOFF12) + self.addend + else + null; + + var parsed = try self.parseBaseRelInfo(rel); + const op_kind: ?Relocation.PageOff.OpKind = blk: { + if (rel_type != .ARM64_RELOC_PAGEOFF12) break :blk null; + const op_kind: Relocation.PageOff.OpKind = if (isArithmeticOp(self.block.code[parsed.offset..][0..4])) + .arithmetic + else + .load; + break :blk op_kind; + }; + + parsed.payload = .{ + .page_off = .{ + .kind = switch (rel_type) { + .ARM64_RELOC_PAGEOFF12 => .page, + .ARM64_RELOC_GOT_LOAD_PAGEOFF12 => .got, + .ARM64_RELOC_TLVP_LOAD_PAGEOFF12 => .tlvp, + else => unreachable, + }, + .addend = addend, + .op_kind = op_kind, + }, + }; + return parsed; + } + + fn parsePointerToGot(self: *Parser, rel: macho.relocation_info) !Relocation { + assert(rel.r_pcrel == 1); + assert(rel.r_length == 2); + + var parsed = try self.parseBaseRelInfo(rel); + parsed.payload = .{ + .pointer_to_got = .{}, + }; + return parsed; + } + + fn parseAddend(self: *Parser, rel: macho.relocation_info) !void { + assert(rel.r_pcrel == 0); + assert(rel.r_extern == 0); + assert(self.addend == null); + + self.addend = rel.r_symbolnum; + } + + fn parseSigned(self: *Parser, rel: macho.relocation_info) !Relocation { + assert(rel.r_pcrel == 1); + assert(rel.r_length == 2); + + var parsed = try self.parseBaseRelInfo(rel); + const rel_type = @intToEnum(macho.reloc_type_x86_64, rel.r_type); + const correction: i4 = switch (rel_type) { + .X86_64_RELOC_SIGNED => 0, + .X86_64_RELOC_SIGNED_1 => 1, + .X86_64_RELOC_SIGNED_2 => 2, + .X86_64_RELOC_SIGNED_4 => 4, + else => unreachable, + }; + const addend = mem.readIntLittle(i32, self.block.code[parsed.offset..][0..4]) + correction; + + parsed.payload = .{ + .signed = .{ + .correction = correction, + .addend = addend, + }, + }; + + return parsed; + } + + fn parseSubtractor(self: *Parser, rel: macho.relocation_info) !void { + assert(rel.r_pcrel == 0); + assert(self.subtractor == null); + + self.subtractor = try self.object.symbolFromReloc(rel); + } + + fn parseLoad(self: *Parser, rel: macho.relocation_info) !Relocation { + assert(rel.r_pcrel == 1); + assert(rel.r_length == 2); + + var parsed = try self.parseBaseRelInfo(rel); + const rel_type = @intToEnum(macho.reloc_type_x86_64, rel.r_type); + const addend = if (rel_type == .X86_64_RELOC_GOT) + mem.readIntLittle(i32, self.block.code[parsed.offset..][0..4]) + else + null; + + parsed.payload = .{ + .load = .{ + .kind = switch (rel_type) { + .X86_64_RELOC_GOT_LOAD, .X86_64_RELOC_GOT => .got, + .X86_64_RELOC_TLV => .tlvp, + else => unreachable, + }, + .addend = addend, + }, + }; + return parsed; + } +}; + +inline fn isArithmeticOp(inst: *const [4]u8) bool { + const group_decode = @truncate(u5, inst[3]); + return ((group_decode >> 2) == 4); +} diff --git a/src/link/MachO/reloc/aarch64.zig b/src/link/MachO/reloc/aarch64.zig deleted file mode 100644 index 5105282e43..0000000000 --- a/src/link/MachO/reloc/aarch64.zig +++ /dev/null @@ -1,618 +0,0 @@ -const std = @import("std"); -const aarch64 = @import("../../../codegen/aarch64.zig"); -const assert = std.debug.assert; -const log = std.log.scoped(.reloc); -const macho = std.macho; -const math = std.math; -const mem = std.mem; -const meta = std.meta; -const reloc = @import("../reloc.zig"); - -const Allocator = mem.Allocator; -const Object = @import("../Object.zig"); -const Relocation = reloc.Relocation; -const Symbol = @import("../Symbol.zig"); -const TextBlock = Zld.TextBlock; -const Zld = @import("../Zld.zig"); - -pub const Branch = struct { - base: Relocation, - /// Always .UnconditionalBranchImmediate - // inst: aarch64.Instruction, - - pub const base_type: Relocation.Type = .branch_aarch64; - - // pub fn resolve(branch: Branch, args: Relocation.ResolveArgs) !void { - // const displacement = try math.cast(i28, @intCast(i64, args.target_addr) - @intCast(i64, args.source_addr)); - - // log.debug(" | displacement 0x{x}", .{displacement}); - - // var inst = branch.inst; - // inst.unconditional_branch_immediate.imm26 = @truncate(u26, @bitCast(u28, displacement >> 2)); - // mem.writeIntLittle(u32, branch.base.code[0..4], inst.toU32()); - // } - - pub fn format(self: Branch, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { - _ = self; - _ = fmt; - _ = options; - _ = writer; - } -}; - -pub const Page = struct { - base: Relocation, - addend: ?u32 = null, - /// Always .PCRelativeAddress - // inst: aarch64.Instruction, - - pub const base_type: Relocation.Type = .page; - - // pub fn resolve(page: Page, args: Relocation.ResolveArgs) !void { - // const target_addr = if (page.addend) |addend| args.target_addr + addend else args.target_addr; - // const source_page = @intCast(i32, args.source_addr >> 12); - // const target_page = @intCast(i32, target_addr >> 12); - // const pages = @bitCast(u21, @intCast(i21, target_page - source_page)); - - // log.debug(" | calculated addend 0x{x}", .{page.addend}); - // log.debug(" | moving by {} pages", .{pages}); - - // var inst = page.inst; - // inst.pc_relative_address.immhi = @truncate(u19, pages >> 2); - // inst.pc_relative_address.immlo = @truncate(u2, pages); - - // mem.writeIntLittle(u32, page.base.code[0..4], inst.toU32()); - // } - - pub fn format(self: Page, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { - _ = fmt; - _ = options; - if (self.addend) |addend| { - try std.fmt.format(writer, ".addend = {}, ", .{addend}); - } - } -}; - -pub const PageOff = struct { - base: Relocation, - addend: ?u32 = null, - op_kind: OpKind, - // inst: aarch64.Instruction, - - pub const base_type: Relocation.Type = .page_off; - - pub const OpKind = enum { - arithmetic, - load_store, - }; - - // pub fn resolve(page_off: PageOff, args: Relocation.ResolveArgs) !void { - // const target_addr = if (page_off.addend) |addend| args.target_addr + addend else args.target_addr; - // const narrowed = @truncate(u12, target_addr); - - // log.debug(" | narrowed address within the page 0x{x}", .{narrowed}); - // log.debug(" | {s} opcode", .{page_off.op_kind}); - - // var inst = page_off.inst; - // if (page_off.op_kind == .arithmetic) { - // inst.add_subtract_immediate.imm12 = narrowed; - // } else { - // const offset: u12 = blk: { - // if (inst.load_store_register.size == 0) { - // if (inst.load_store_register.v == 1) { - // // 128-bit SIMD is scaled by 16. - // break :blk try math.divExact(u12, narrowed, 16); - // } - // // Otherwise, 8-bit SIMD or ldrb. - // break :blk narrowed; - // } else { - // const denom: u4 = try math.powi(u4, 2, inst.load_store_register.size); - // break :blk try math.divExact(u12, narrowed, denom); - // } - // }; - // inst.load_store_register.offset = offset; - // } - - // mem.writeIntLittle(u32, page_off.base.code[0..4], inst.toU32()); - // } - - pub fn format(self: PageOff, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { - _ = fmt; - _ = options; - if (self.addend) |addend| { - try std.fmt.format(writer, ".addend = {}, ", .{addend}); - } - try std.fmt.format(writer, ".op_kind = {s}, ", .{self.op_kind}); - } -}; - -pub const GotPage = struct { - base: Relocation, - /// Always .PCRelativeAddress - // inst: aarch64.Instruction, - - pub const base_type: Relocation.Type = .got_page; - - // pub fn resolve(page: GotPage, args: Relocation.ResolveArgs) !void { - // const source_page = @intCast(i32, args.source_addr >> 12); - // const target_page = @intCast(i32, args.target_addr >> 12); - // const pages = @bitCast(u21, @intCast(i21, target_page - source_page)); - - // log.debug(" | moving by {} pages", .{pages}); - - // var inst = page.inst; - // inst.pc_relative_address.immhi = @truncate(u19, pages >> 2); - // inst.pc_relative_address.immlo = @truncate(u2, pages); - - // mem.writeIntLittle(u32, page.base.code[0..4], inst.toU32()); - // } - - pub fn format(self: GotPage, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { - _ = self; - _ = fmt; - _ = options; - _ = writer; - } -}; - -pub const GotPageOff = struct { - base: Relocation, - /// Always .LoadStoreRegister with size = 3 for GOT indirection - // inst: aarch64.Instruction, - - pub const base_type: Relocation.Type = .got_page_off; - - // pub fn resolve(page_off: GotPageOff, args: Relocation.ResolveArgs) !void { - // const narrowed = @truncate(u12, args.target_addr); - - // log.debug(" | narrowed address within the page 0x{x}", .{narrowed}); - - // var inst = page_off.inst; - // const offset = try math.divExact(u12, narrowed, 8); - // inst.load_store_register.offset = offset; - - // mem.writeIntLittle(u32, page_off.base.code[0..4], inst.toU32()); - // } - - pub fn format(self: GotPageOff, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { - _ = self; - _ = fmt; - _ = options; - _ = writer; - } -}; - -pub const PointerToGot = struct { - base: Relocation, - - pub const base_type: Relocation.Type = .pointer_to_got; - - // pub fn resolve(ptr_to_got: PointerToGot, args: Relocation.ResolveArgs) !void { - // const result = try math.cast(i32, @intCast(i64, args.target_addr) - @intCast(i64, args.source_addr)); - - // log.debug(" | calculated value 0x{x}", .{result}); - - // mem.writeIntLittle(u32, ptr_to_got.base.code[0..4], @bitCast(u32, result)); - // } - - pub fn format(self: PointerToGot, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { - _ = self; - _ = fmt; - _ = options; - _ = writer; - } -}; - -pub const TlvpPage = struct { - base: Relocation, - /// Always .PCRelativeAddress - // inst: aarch64.Instruction, - - pub const base_type: Relocation.Type = .tlvp_page; - - // pub fn resolve(page: TlvpPage, args: Relocation.ResolveArgs) !void { - // const source_page = @intCast(i32, args.source_addr >> 12); - // const target_page = @intCast(i32, args.target_addr >> 12); - // const pages = @bitCast(u21, @intCast(i21, target_page - source_page)); - - // log.debug(" | moving by {} pages", .{pages}); - - // var inst = page.inst; - // inst.pc_relative_address.immhi = @truncate(u19, pages >> 2); - // inst.pc_relative_address.immlo = @truncate(u2, pages); - - // mem.writeIntLittle(u32, page.base.code[0..4], inst.toU32()); - // } - - pub fn format(self: TlvpPage, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { - _ = self; - _ = fmt; - _ = options; - _ = writer; - } -}; - -pub const TlvpPageOff = struct { - base: Relocation, - /// Always .AddSubtractImmediate regardless of the source instruction. - /// This means, we always rewrite the instruction to add even if the - /// source instruction was an ldr. - // inst: aarch64.Instruction, - - pub const base_type: Relocation.Type = .tlvp_page_off; - - // pub fn resolve(page_off: TlvpPageOff, args: Relocation.ResolveArgs) !void { - // const narrowed = @truncate(u12, args.target_addr); - - // log.debug(" | narrowed address within the page 0x{x}", .{narrowed}); - - // var inst = page_off.inst; - // inst.add_subtract_immediate.imm12 = narrowed; - - // mem.writeIntLittle(u32, page_off.base.code[0..4], inst.toU32()); - // } - - pub fn format(self: TlvpPageOff, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { - _ = self; - _ = fmt; - _ = options; - _ = writer; - } -}; - -pub const Parser = struct { - object: *Object, - zld: *Zld, - it: *reloc.RelocIterator, - block: *TextBlock, - base_addr: u64, - addend: ?u32 = null, - subtractor: ?*Symbol = null, - - pub fn parse(self: *Parser) !void { - while (self.it.next()) |rel| { - const out_rel = switch (@intToEnum(macho.reloc_type_arm64, rel.r_type)) { - .ARM64_RELOC_BRANCH26 => try self.parseBranch(rel), - .ARM64_RELOC_SUBTRACTOR => { - // Subtractor is not a relocation with effect on the TextBlock, so - // parse it and carry on. - try self.parseSubtractor(rel); - continue; - }, - .ARM64_RELOC_UNSIGNED => try self.parseUnsigned(rel), - .ARM64_RELOC_ADDEND => { - // Addend is not a relocation with effect on the TextBlock, so - // parse it and carry on. - try self.parseAddend(rel); - continue; - }, - .ARM64_RELOC_PAGE21, - .ARM64_RELOC_GOT_LOAD_PAGE21, - .ARM64_RELOC_TLVP_LOAD_PAGE21, - => try self.parsePage(rel), - .ARM64_RELOC_PAGEOFF12 => try self.parsePageOff(rel), - .ARM64_RELOC_GOT_LOAD_PAGEOFF12 => try self.parseGotLoadPageOff(rel), - .ARM64_RELOC_TLVP_LOAD_PAGEOFF12 => try self.parseTlvpLoadPageOff(rel), - .ARM64_RELOC_POINTER_TO_GOT => try self.parsePointerToGot(rel), - }; - try self.block.relocs.append(out_rel); - - if (out_rel.target.payload == .regular) { - try self.block.references.put(out_rel.target.payload.regular.local_sym_index, {}); - } - - switch (out_rel.@"type") { - .got_page, .got_page_off, .pointer_to_got => { - const sym = out_rel.target; - - if (sym.got_index != null) continue; - - const index = @intCast(u32, self.zld.got_entries.items.len); - sym.got_index = index; - try self.zld.got_entries.append(self.zld.allocator, sym); - - log.debug("adding GOT entry for symbol {s} at index {}", .{ sym.name, index }); - }, - .branch_aarch64 => { - const sym = out_rel.target; - - if (sym.stubs_index != null) continue; - if (sym.payload != .proxy) continue; - - const index = @intCast(u32, self.zld.stubs.items.len); - sym.stubs_index = index; - try self.zld.stubs.append(self.zld.allocator, sym); - - log.debug("adding stub entry for symbol {s} at index {}", .{ sym.name, index }); - }, - else => {}, - } - } - } - - fn parseAddend(self: *Parser, rel: macho.relocation_info) !void { - const rel_type = @intToEnum(macho.reloc_type_arm64, rel.r_type); - assert(rel_type == .ARM64_RELOC_ADDEND); - assert(rel.r_pcrel == 0); - assert(rel.r_extern == 0); - assert(self.addend == null); - - self.addend = rel.r_symbolnum; - - // Verify ADDEND is followed by a load. - const next = @intToEnum(macho.reloc_type_arm64, self.it.peek().r_type); - switch (next) { - .ARM64_RELOC_PAGE21, .ARM64_RELOC_PAGEOFF12 => {}, - else => { - log.err("unexpected relocation type: expected PAGE21 or PAGEOFF12, found {s}", .{next}); - return error.UnexpectedRelocationType; - }, - } - } - - fn parseBranch(self: *Parser, rel: macho.relocation_info) !*Relocation { - const rel_type = @intToEnum(macho.reloc_type_arm64, rel.r_type); - assert(rel_type == .ARM64_RELOC_BRANCH26); - assert(rel.r_pcrel == 1); - assert(rel.r_length == 2); - - const offset = @intCast(u32, @intCast(u64, rel.r_address) - self.base_addr); - const target = try self.object.symbolFromReloc(rel); - - var branch = try self.object.allocator.create(Branch); - errdefer self.object.allocator.destroy(branch); - - branch.* = .{ - .base = .{ - .@"type" = .branch_aarch64, - .offset = offset, - .target = target, - .block = self.block, - }, - }; - - return &branch.base; - } - - fn parsePage(self: *Parser, rel: macho.relocation_info) !*Relocation { - assert(rel.r_pcrel == 1); - assert(rel.r_length == 2); - - const rel_type = @intToEnum(macho.reloc_type_arm64, rel.r_type); - const target = try self.object.symbolFromReloc(rel); - const offset = @intCast(u32, @intCast(u64, rel.r_address) - self.base_addr); - - const ptr: *Relocation = ptr: { - switch (rel_type) { - .ARM64_RELOC_PAGE21 => { - defer { - // Reset parser's addend state - self.addend = null; - } - var page = try self.object.allocator.create(Page); - errdefer self.object.allocator.destroy(page); - - page.* = .{ - .base = .{ - .@"type" = .page, - .offset = offset, - .target = target, - .block = self.block, - }, - .addend = self.addend, - }; - - break :ptr &page.base; - }, - .ARM64_RELOC_GOT_LOAD_PAGE21 => { - var page = try self.object.allocator.create(GotPage); - errdefer self.object.allocator.destroy(page); - - page.* = .{ - .base = .{ - .@"type" = .got_page, - .offset = offset, - .target = target, - .block = self.block, - }, - }; - - break :ptr &page.base; - }, - .ARM64_RELOC_TLVP_LOAD_PAGE21 => { - var page = try self.object.allocator.create(TlvpPage); - errdefer self.object.allocator.destroy(page); - - page.* = .{ - .base = .{ - .@"type" = .tlvp_page, - .offset = offset, - .target = target, - .block = self.block, - }, - }; - - break :ptr &page.base; - }, - else => unreachable, - } - }; - - return ptr; - } - - fn parsePageOff(self: *Parser, rel: macho.relocation_info) !*Relocation { - defer { - // Reset parser's addend state - self.addend = null; - } - - const rel_type = @intToEnum(macho.reloc_type_arm64, rel.r_type); - assert(rel_type == .ARM64_RELOC_PAGEOFF12); - assert(rel.r_pcrel == 0); - assert(rel.r_length == 2); - - const target = try self.object.symbolFromReloc(rel); - const offset = @intCast(u32, @intCast(u64, rel.r_address) - self.base_addr); - const op_kind: PageOff.OpKind = if (isArithmeticOp(self.block.code[offset..][0..4])) - .arithmetic - else - .load_store; - - var page_off = try self.object.allocator.create(PageOff); - errdefer self.object.allocator.destroy(page_off); - - page_off.* = .{ - .base = .{ - .@"type" = .page_off, - .offset = offset, - .target = target, - .block = self.block, - }, - .op_kind = op_kind, - .addend = self.addend, - }; - - return &page_off.base; - } - - fn parseGotLoadPageOff(self: *Parser, rel: macho.relocation_info) !*Relocation { - const rel_type = @intToEnum(macho.reloc_type_arm64, rel.r_type); - assert(rel_type == .ARM64_RELOC_GOT_LOAD_PAGEOFF12); - assert(rel.r_pcrel == 0); - assert(rel.r_length == 2); - - const target = try self.object.symbolFromReloc(rel); - const offset = @intCast(u32, @intCast(u64, rel.r_address) - self.base_addr); - assert(!isArithmeticOp(self.block.code[offset..][0..4])); - - var page_off = try self.object.allocator.create(GotPageOff); - errdefer self.object.allocator.destroy(page_off); - - page_off.* = .{ - .base = .{ - .@"type" = .got_page_off, - .offset = offset, - .target = target, - .block = self.block, - }, - }; - - return &page_off.base; - } - - fn parseTlvpLoadPageOff(self: *Parser, rel: macho.relocation_info) !*Relocation { - const rel_type = @intToEnum(macho.reloc_type_arm64, rel.r_type); - assert(rel_type == .ARM64_RELOC_TLVP_LOAD_PAGEOFF12); - assert(rel.r_pcrel == 0); - assert(rel.r_length == 2); - - const RegInfo = struct { - rd: u5, - rn: u5, - size: u1, - }; - - const target = try self.object.symbolFromReloc(rel); - const offset = @intCast(u32, @intCast(u64, rel.r_address) - self.base_addr); - - var page_off = try self.object.allocator.create(TlvpPageOff); - errdefer self.object.allocator.destroy(page_off); - - page_off.* = .{ - .base = .{ - .@"type" = .tlvp_page_off, - .offset = offset, - .target = target, - .block = self.block, - }, - }; - - return &page_off.base; - } - - fn parseSubtractor(self: *Parser, rel: macho.relocation_info) !void { - const rel_type = @intToEnum(macho.reloc_type_arm64, rel.r_type); - assert(rel_type == .ARM64_RELOC_SUBTRACTOR); - assert(rel.r_pcrel == 0); - assert(self.subtractor == null); - - self.subtractor = try self.object.symbolFromReloc(rel); - - // Verify SUBTRACTOR is followed by UNSIGNED. - const next = @intToEnum(macho.reloc_type_arm64, self.it.peek().r_type); - if (next != .ARM64_RELOC_UNSIGNED) { - log.err("unexpected relocation type: expected UNSIGNED, found {s}", .{next}); - return error.UnexpectedRelocationType; - } - } - - fn parseUnsigned(self: *Parser, rel: macho.relocation_info) !*Relocation { - defer { - // Reset parser's subtractor state - self.subtractor = null; - } - - const rel_type = @intToEnum(macho.reloc_type_arm64, rel.r_type); - assert(rel_type == .ARM64_RELOC_UNSIGNED); - assert(rel.r_pcrel == 0); - - const target = try self.object.symbolFromReloc(rel); - const offset = @intCast(u32, @intCast(u64, rel.r_address) - self.base_addr); - const is_64bit: bool = switch (rel.r_length) { - 3 => true, - 2 => false, - else => unreachable, - }; - const addend: i64 = if (is_64bit) - mem.readIntLittle(i64, self.block.code[offset..][0..8]) - else - mem.readIntLittle(i32, self.block.code[offset..][0..4]); - - var unsigned = try self.object.allocator.create(reloc.Unsigned); - errdefer self.object.allocator.destroy(unsigned); - - unsigned.* = .{ - .base = .{ - .@"type" = .unsigned, - .offset = offset, - .target = target, - .block = self.block, - }, - .subtractor = self.subtractor, - .is_64bit = is_64bit, - .addend = addend, - }; - - return &unsigned.base; - } - - fn parsePointerToGot(self: *Parser, rel: macho.relocation_info) !*Relocation { - const rel_type = @intToEnum(macho.reloc_type_arm64, rel.r_type); - assert(rel_type == .ARM64_RELOC_POINTER_TO_GOT); - assert(rel.r_pcrel == 1); - assert(rel.r_length == 2); - - var ptr_to_got = try self.object.allocator.create(PointerToGot); - errdefer self.object.allocator.destroy(ptr_to_got); - - const target = try self.object.symbolFromReloc(rel); - const offset = @intCast(u32, @intCast(u64, rel.r_address) - self.base_addr); - - ptr_to_got.* = .{ - .base = .{ - .@"type" = .pointer_to_got, - .offset = offset, - .target = target, - .block = self.block, - }, - }; - - return &ptr_to_got.base; - } -}; - -inline fn isArithmeticOp(inst: *const [4]u8) bool { - const group_decode = @truncate(u5, inst[3]); - return ((group_decode >> 2) == 4); -} diff --git a/src/link/MachO/reloc/x86_64.zig b/src/link/MachO/reloc/x86_64.zig deleted file mode 100644 index 85c797dcd0..0000000000 --- a/src/link/MachO/reloc/x86_64.zig +++ /dev/null @@ -1,385 +0,0 @@ -const std = @import("std"); -const assert = std.debug.assert; -const log = std.log.scoped(.reloc); -const macho = std.macho; -const math = std.math; -const mem = std.mem; -const meta = std.meta; -const reloc = @import("../reloc.zig"); - -const Allocator = mem.Allocator; -const Object = @import("../Object.zig"); -const Relocation = reloc.Relocation; -const Symbol = @import("../Symbol.zig"); -const TextBlock = Zld.TextBlock; -const Zld = @import("../Zld.zig"); - -pub const Branch = struct { - base: Relocation, - - pub const base_type: Relocation.Type = .branch_x86_64; - - // pub fn resolve(branch: Branch, args: Relocation.ResolveArgs) !void { - // const displacement = try math.cast(i32, @intCast(i64, args.target_addr) - @intCast(i64, args.source_addr) - 4); - // log.debug(" | displacement 0x{x}", .{displacement}); - // mem.writeIntLittle(u32, branch.base.code[0..4], @bitCast(u32, displacement)); - // } - - pub fn format(self: Branch, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { - _ = self; - _ = fmt; - _ = options; - _ = writer; - } -}; - -pub const Signed = struct { - base: Relocation, - addend: i32, - correction: i4, - - pub const base_type: Relocation.Type = .signed; - - // pub fn resolve(signed: Signed, args: Relocation.ResolveArgs) !void { - // const target_addr = target_addr: { - // if (signed.base.target == .section) { - // const source_target = @intCast(i64, args.source_source_sect_addr.?) + @intCast(i64, signed.base.offset) + signed.addend + 4; - // const source_disp = source_target - @intCast(i64, args.source_target_sect_addr.?); - // break :target_addr @intCast(i64, args.target_addr) + source_disp; - // } - // break :target_addr @intCast(i64, args.target_addr) + signed.addend; - // }; - // const displacement = try math.cast( - // i32, - // target_addr - @intCast(i64, args.source_addr) - signed.correction - 4, - // ); - - // log.debug(" | addend 0x{x}", .{signed.addend}); - // log.debug(" | correction 0x{x}", .{signed.correction}); - // log.debug(" | displacement 0x{x}", .{displacement}); - - // mem.writeIntLittle(u32, signed.base.code[0..4], @bitCast(u32, displacement)); - // } - - pub fn format(self: Signed, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { - _ = fmt; - _ = options; - try std.fmt.format(writer, ".addend = {}, ", .{self.addend}); - try std.fmt.format(writer, ".correction = {}, ", .{self.correction}); - } -}; - -pub const GotLoad = struct { - base: Relocation, - - pub const base_type: Relocation.Type = .got_load; - - // pub fn resolve(got_load: GotLoad, args: Relocation.ResolveArgs) !void { - // const displacement = try math.cast(i32, @intCast(i64, args.target_addr) - @intCast(i64, args.source_addr) - 4); - // log.debug(" | displacement 0x{x}", .{displacement}); - // mem.writeIntLittle(u32, got_load.base.code[0..4], @bitCast(u32, displacement)); - // } - - pub fn format(self: GotLoad, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { - _ = self; - _ = fmt; - _ = options; - _ = writer; - } -}; - -pub const Got = struct { - base: Relocation, - addend: i32, - - pub const base_type: Relocation.Type = .got; - - // pub fn resolve(got: Got, args: Relocation.ResolveArgs) !void { - // const displacement = try math.cast( - // i32, - // @intCast(i64, args.target_addr) - @intCast(i64, args.source_addr) - 4 + got.addend, - // ); - // log.debug(" | displacement 0x{x}", .{displacement}); - // mem.writeIntLittle(u32, got.base.code[0..4], @bitCast(u32, displacement)); - // } - - pub fn format(self: Got, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { - _ = fmt; - _ = options; - try std.fmt.format(writer, ".addend = {}, ", .{self.addend}); - } -}; - -pub const Tlv = struct { - base: Relocation, - - pub const base_type: Relocation.Type = .tlv; - - // pub fn resolve(tlv: Tlv, args: Relocation.ResolveArgs) !void { - // // We need to rewrite the opcode from movq to leaq. - // tlv.op.* = 0x8d; - // log.debug(" | rewriting op to leaq", .{}); - - // const displacement = try math.cast(i32, @intCast(i64, args.target_addr) - @intCast(i64, args.source_addr) - 4); - // log.debug(" | displacement 0x{x}", .{displacement}); - - // mem.writeIntLittle(u32, tlv.base.code[0..4], @bitCast(u32, displacement)); - // } - pub fn format(self: Tlv, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { - _ = self; - _ = fmt; - _ = options; - _ = writer; - } -}; - -pub const Parser = struct { - object: *Object, - zld: *Zld, - it: *reloc.RelocIterator, - block: *TextBlock, - base_addr: u64, - subtractor: ?*Symbol = null, - - pub fn parse(self: *Parser) !void { - while (self.it.next()) |rel| { - const out_rel = switch (@intToEnum(macho.reloc_type_x86_64, rel.r_type)) { - .X86_64_RELOC_BRANCH => try self.parseBranch(rel), - .X86_64_RELOC_SUBTRACTOR => { - // Subtractor is not a relocation with effect on the TextBlock, so - // parse it and carry on. - try self.parseSubtractor(rel); - continue; - }, - .X86_64_RELOC_UNSIGNED => try self.parseUnsigned(rel), - .X86_64_RELOC_SIGNED, - .X86_64_RELOC_SIGNED_1, - .X86_64_RELOC_SIGNED_2, - .X86_64_RELOC_SIGNED_4, - => try self.parseSigned(rel), - .X86_64_RELOC_GOT_LOAD => try self.parseGotLoad(rel), - .X86_64_RELOC_GOT => try self.parseGot(rel), - .X86_64_RELOC_TLV => try self.parseTlv(rel), - }; - try self.block.relocs.append(out_rel); - - if (out_rel.target.payload == .regular) { - try self.block.references.put(out_rel.target.payload.regular.local_sym_index, {}); - } - - switch (out_rel.@"type") { - .got_load, .got => { - const sym = out_rel.target; - - if (sym.got_index != null) continue; - - const index = @intCast(u32, self.zld.got_entries.items.len); - sym.got_index = index; - try self.zld.got_entries.append(self.zld.allocator, sym); - - log.debug("adding GOT entry for symbol {s} at index {}", .{ sym.name, index }); - }, - .branch_x86_64 => { - const sym = out_rel.target; - - if (sym.stubs_index != null) continue; - if (sym.payload != .proxy) continue; - - const index = @intCast(u32, self.zld.stubs.items.len); - sym.stubs_index = index; - try self.zld.stubs.append(self.zld.allocator, sym); - - log.debug("adding stub entry for symbol {s} at index {}", .{ sym.name, index }); - }, - else => {}, - } - } - } - - fn parseBranch(self: *Parser, rel: macho.relocation_info) !*Relocation { - const rel_type = @intToEnum(macho.reloc_type_x86_64, rel.r_type); - assert(rel_type == .X86_64_RELOC_BRANCH); - assert(rel.r_pcrel == 1); - assert(rel.r_length == 2); - - const offset = @intCast(u32, @intCast(u64, rel.r_address) - self.base_addr); - const target = try self.object.symbolFromReloc(rel); - - var branch = try self.object.allocator.create(Branch); - errdefer self.object.allocator.destroy(branch); - - branch.* = .{ - .base = .{ - .@"type" = .branch_x86_64, - .offset = offset, - .target = target, - .block = self.block, - }, - }; - - return &branch.base; - } - - fn parseSigned(self: *Parser, rel: macho.relocation_info) !*Relocation { - assert(rel.r_pcrel == 1); - assert(rel.r_length == 2); - - const rel_type = @intToEnum(macho.reloc_type_x86_64, rel.r_type); - const target = try self.object.symbolFromReloc(rel); - const offset = @intCast(u32, @intCast(u64, rel.r_address) - self.base_addr); - const correction: i4 = switch (rel_type) { - .X86_64_RELOC_SIGNED => 0, - .X86_64_RELOC_SIGNED_1 => 1, - .X86_64_RELOC_SIGNED_2 => 2, - .X86_64_RELOC_SIGNED_4 => 4, - else => unreachable, - }; - const addend = mem.readIntLittle(i32, self.block.code[offset..][0..4]) + correction; - - var signed = try self.object.allocator.create(Signed); - errdefer self.object.allocator.destroy(signed); - - signed.* = .{ - .base = .{ - .@"type" = .signed, - .offset = offset, - .target = target, - .block = self.block, - }, - .addend = addend, - .correction = correction, - }; - - return &signed.base; - } - - fn parseGotLoad(self: *Parser, rel: macho.relocation_info) !*Relocation { - const rel_type = @intToEnum(macho.reloc_type_x86_64, rel.r_type); - assert(rel_type == .X86_64_RELOC_GOT_LOAD); - assert(rel.r_pcrel == 1); - assert(rel.r_length == 2); - - const offset = @intCast(u32, @intCast(u64, rel.r_address) - self.base_addr); - const target = try self.object.symbolFromReloc(rel); - - var got_load = try self.object.allocator.create(GotLoad); - errdefer self.object.allocator.destroy(got_load); - - got_load.* = .{ - .base = .{ - .@"type" = .got_load, - .offset = offset, - .target = target, - .block = self.block, - }, - }; - - return &got_load.base; - } - - fn parseGot(self: *Parser, rel: macho.relocation_info) !*Relocation { - const rel_type = @intToEnum(macho.reloc_type_x86_64, rel.r_type); - assert(rel_type == .X86_64_RELOC_GOT); - assert(rel.r_pcrel == 1); - assert(rel.r_length == 2); - - const offset = @intCast(u32, @intCast(u64, rel.r_address) - self.base_addr); - const target = try self.object.symbolFromReloc(rel); - const addend = mem.readIntLittle(i32, self.block.code[offset..][0..4]); - - var got = try self.object.allocator.create(Got); - errdefer self.object.allocator.destroy(got); - - got.* = .{ - .base = .{ - .@"type" = .got, - .offset = offset, - .target = target, - .block = self.block, - }, - .addend = addend, - }; - - return &got.base; - } - - fn parseTlv(self: *Parser, rel: macho.relocation_info) !*Relocation { - const rel_type = @intToEnum(macho.reloc_type_x86_64, rel.r_type); - assert(rel_type == .X86_64_RELOC_TLV); - assert(rel.r_pcrel == 1); - assert(rel.r_length == 2); - - const offset = @intCast(u32, @intCast(u64, rel.r_address) - self.base_addr); - const target = try self.object.symbolFromReloc(rel); - - var tlv = try self.object.allocator.create(Tlv); - errdefer self.object.allocator.destroy(tlv); - - tlv.* = .{ - .base = .{ - .@"type" = .tlv, - .offset = offset, - .target = target, - .block = self.block, - }, - }; - - return &tlv.base; - } - - fn parseSubtractor(self: *Parser, rel: macho.relocation_info) !void { - const rel_type = @intToEnum(macho.reloc_type_x86_64, rel.r_type); - assert(rel_type == .X86_64_RELOC_SUBTRACTOR); - assert(rel.r_pcrel == 0); - assert(self.subtractor == null); - - self.subtractor = try self.object.symbolFromReloc(rel); - - // Verify SUBTRACTOR is followed by UNSIGNED. - const next = @intToEnum(macho.reloc_type_x86_64, self.it.peek().r_type); - if (next != .X86_64_RELOC_UNSIGNED) { - log.err("unexpected relocation type: expected UNSIGNED, found {s}", .{next}); - return error.UnexpectedRelocationType; - } - } - - fn parseUnsigned(self: *Parser, rel: macho.relocation_info) !*Relocation { - defer { - // Reset parser's subtractor state - self.subtractor = null; - } - - const rel_type = @intToEnum(macho.reloc_type_x86_64, rel.r_type); - assert(rel_type == .X86_64_RELOC_UNSIGNED); - assert(rel.r_pcrel == 0); - - const target = try self.object.symbolFromReloc(rel); - const is_64bit: bool = switch (rel.r_length) { - 3 => true, - 2 => false, - else => unreachable, - }; - const offset = @intCast(u32, @intCast(u64, rel.r_address) - self.base_addr); - const addend: i64 = if (is_64bit) - mem.readIntLittle(i64, self.block.code[offset..][0..8]) - else - mem.readIntLittle(i32, self.block.code[offset..][0..4]); - - var unsigned = try self.object.allocator.create(reloc.Unsigned); - errdefer self.object.allocator.destroy(unsigned); - - unsigned.* = .{ - .base = .{ - .@"type" = .unsigned, - .offset = offset, - .target = target, - .block = self.block, - }, - .subtractor = self.subtractor, - .is_64bit = is_64bit, - .addend = addend, - }; - - return &unsigned.base; - } -}; From 555b66c25567ab23402e3792bdbe81b7a4e98803 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Wed, 7 Jul 2021 10:36:41 +0200 Subject: [PATCH 20/81] zld: move should_rebase logic into Symbol --- src/link/MachO/Object.zig | 58 +++++++++++------ src/link/MachO/Symbol.zig | 21 ++++++- src/link/MachO/Zld.zig | 129 +------------------------------------- src/link/MachO/reloc.zig | 57 +++++++++++++++-- 4 files changed, 112 insertions(+), 153 deletions(-) diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index 6e8925b648..9925611243 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -339,6 +339,7 @@ const TextBlockParser = struct { zld: *Zld, nlists: []NlistWithIndex, index: u32 = 0, + match: Zld.MatchingSection, fn peek(self: *TextBlockParser) ?NlistWithIndex { return if (self.index + 1 < self.nlists.len) self.nlists[self.index + 1] else null; @@ -405,6 +406,8 @@ const TextBlockParser = struct { const senior_nlist = aliases.pop(); const senior_sym = self.zld.locals.items[senior_nlist.index]; assert(senior_sym.payload == .regular); + senior_sym.payload.regular.segment_id = self.match.seg; + senior_sym.payload.regular.section_id = self.match.sect; const start_addr = senior_nlist.nlist.n_value - self.section.addr; const end_addr = if (next_nlist) |n| n.nlist.n_value - self.section.addr else self.section.size; @@ -417,6 +420,11 @@ const TextBlockParser = struct { try out.ensureTotalCapacity(aliases.items.len); for (aliases.items) |alias| { out.appendAssumeCapacity(alias.index); + + const sym = self.zld.locals.items[alias.index]; + const reg = &sym.payload.regular; + reg.segment_id = self.match.seg; + reg.section_id = self.match.sect; } break :blk out.toOwnedSlice(); } else null; @@ -439,6 +447,18 @@ const TextBlockParser = struct { try self.object.parseRelocs(self.zld, relocs, block, start_addr); } + const is_zerofill = blk: { + const tseg = self.zld.load_commands.items[self.match.seg].Segment; + const tsect = tseg.sections.items[self.match.sect]; + const tsect_type = sectionType(tsect); + break :blk tsect_type == macho.S_ZEROFILL or + tsect_type == macho.S_THREAD_LOCAL_ZEROFILL or + tsect_type == macho.S_THREAD_LOCAL_VARIABLES; + }; + if (is_zerofill) { + mem.set(u8, block.code, 0); + } + self.index += 1; return block; @@ -511,28 +531,16 @@ pub fn parseTextBlocks(self: *Object, zld: *Zld) !void { .object = self, .zld = zld, .nlists = filtered_nlists, + .match = match, }; while (try parser.next()) |block| { - { - const sym = zld.locals.items[block.local_sym_index]; - const reg = &sym.payload.regular; - if (reg.file) |file| { - if (file != self) { - log.warn("deduping definition of {s} in {s}", .{ sym.name, self.name.? }); - continue; - } - } - reg.segment_id = match.seg; - reg.section_id = match.sect; - } - - if (block.aliases) |aliases| { - for (aliases) |alias| { - const sym = zld.locals.items[alias]; - const reg = &sym.payload.regular; - reg.segment_id = match.seg; - reg.section_id = match.sect; + const sym = zld.locals.items[block.local_sym_index]; + const reg = &sym.payload.regular; + if (reg.file) |file| { + if (file != self) { + log.warn("deduping definition of {s} in {s}", .{ sym.name, self.name.? }); + continue; } } @@ -587,6 +595,18 @@ pub fn parseTextBlocks(self: *Object, zld: *Zld) !void { try self.parseRelocs(zld, relocs, block, 0); } + const is_zerofill = blk: { + const tseg = zld.load_commands.items[match.seg].Segment; + const tsect = tseg.sections.items[match.sect]; + const tsect_type = sectionType(tsect); + break :blk tsect_type == macho.S_ZEROFILL or + tsect_type == macho.S_THREAD_LOCAL_ZEROFILL or + tsect_type == macho.S_THREAD_LOCAL_VARIABLES; + }; + if (is_zerofill) { + mem.set(u8, block.code, 0); + } + if (zld.last_text_block) |last| { last.next = block; block.prev = last; diff --git a/src/link/MachO/Symbol.zig b/src/link/MachO/Symbol.zig index 16cd0c9ecc..5f437dc209 100644 --- a/src/link/MachO/Symbol.zig +++ b/src/link/MachO/Symbol.zig @@ -2,6 +2,7 @@ const Symbol = @This(); const std = @import("std"); const assert = std.debug.assert; +const commands = @import("commands.zig"); const macho = std.macho; const mem = std.mem; @@ -57,6 +58,8 @@ pub const Regular = struct { local_sym_index: u32 = 0, + should_rebase: bool = false, + pub const Linkage = enum { translation_unit, linkage_unit, @@ -74,6 +77,9 @@ pub const Regular = struct { if (self.weak_ref) { try std.fmt.format(writer, ".weak_ref, ", .{}); } + if (self.should_rebase) { + try std.fmt.format(writer, ".should_rebase, ", .{}); + } if (self.file) |file| { try std.fmt.format(writer, ".file = {s}, ", .{file.name.?}); } @@ -108,8 +114,8 @@ pub const Proxy = struct { /// Dynamic binding info - spots within the final /// executable where this proxy is referenced from. bind_info: std.ArrayListUnmanaged(struct { - segment_id: u16, - address: u64, + local_sym_index: u32, + offset: u32, }) = .{}, /// Dylib where to locate this symbol. @@ -198,6 +204,17 @@ pub fn isTemp(symbol: Symbol) bool { return false; } +pub fn needsTlvOffset(self: Symbol, zld: *Zld) bool { + if (self.payload != .regular) return false; + + const reg = self.payload.regular; + const seg = zld.load_command.items[reg.segment_id].Segment; + const sect = seg.sections.items[reg.section_id]; + const sect_type = commands.sectionType(sect); + + return sect_type == macho.S_THREAD_LOCAL_VARIABLES; +} + pub fn asNlist(symbol: *Symbol, strtab: *StringTable) !macho.nlist_64 { const n_strx = try strtab.getOrPut(symbol.name); const nlist = nlist: { diff --git a/src/link/MachO/Zld.zig b/src/link/MachO/Zld.zig index 69f8821cb7..2f28f20253 100644 --- a/src/link/MachO/Zld.zig +++ b/src/link/MachO/Zld.zig @@ -107,8 +107,6 @@ locals: std.ArrayListUnmanaged(*Symbol) = .{}, imports: std.ArrayListUnmanaged(*Symbol) = .{}, globals: std.StringArrayHashMapUnmanaged(*Symbol) = .{}, -threadlocal_offsets: std.ArrayListUnmanaged(TlvOffset) = .{}, // TODO merge with Symbol abstraction -local_rebases: std.ArrayListUnmanaged(Pointer) = .{}, stubs: std.ArrayListUnmanaged(*Symbol) = .{}, got_entries: std.ArrayListUnmanaged(*Symbol) = .{}, @@ -197,8 +195,6 @@ pub fn init(allocator: *Allocator) !Zld { } pub fn deinit(self: *Zld) void { - self.threadlocal_offsets.deinit(self.allocator); - self.local_rebases.deinit(self.allocator); self.stubs.deinit(self.allocator); self.got_entries.deinit(self.allocator); @@ -225,8 +221,6 @@ pub fn deinit(self: *Zld) void { } self.dylibs.deinit(self.allocator); - self.globals.deinit(self.allocator); - for (self.imports.items) |sym| { sym.deinit(self.allocator); self.allocator.destroy(sym); @@ -239,6 +233,7 @@ pub fn deinit(self: *Zld) void { } self.locals.deinit(self.allocator); + self.globals.deinit(self.allocator); self.strtab.deinit(); } @@ -290,7 +285,6 @@ pub fn link(self: *Zld, files: []const []const u8, output: Output, args: LinkArg // try self.allocateDataSegment(); // self.allocateLinkeditSegment(); // try self.allocateSymbols(); - // try self.allocateProxyBindAddresses(); // try self.flush(); } @@ -449,7 +443,7 @@ fn updateMetadata(self: *Zld) !void { } } -const MatchingSection = struct { +pub const MatchingSection = struct { seg: u16, sect: u16, }; @@ -1140,31 +1134,6 @@ fn allocateSymbols(self: *Zld) !void { } } -fn allocateProxyBindAddresses(self: *Zld) !void { - for (self.objects.items) |object| { - for (object.sections.items) |sect| { - const relocs = sect.relocs orelse continue; - - for (relocs) |rel| { - if (rel.@"type" != .unsigned) continue; // GOT is currently special-cased - if (rel.target != .symbol) continue; - - const sym = object.symbols.items[rel.target.symbol]; - if (sym.payload != .proxy) continue; - - const target_map = sect.target_map orelse continue; - const target_seg = self.load_commands.items[target_map.segment_id].Segment; - const target_sect = target_seg.sections.items[target_map.section_id]; - - try sym.payload.proxy.bind_info.append(self.allocator, .{ - .segment_id = target_map.segment_id, - .address = target_sect.addr + target_map.offset + rel.offset, - }); - } - } - } -} - fn writeStubHelperCommon(self: *Zld) !void { const text_segment = &self.load_commands.items[self.text_segment_cmd_index.?].Segment; const stub_helper = &text_segment.sections.items[self.stub_helper_section_index.?]; @@ -1748,72 +1717,6 @@ fn resolveRelocsAndWriteSections(self: *Zld) !void { args.source_source_sect_addr = sect.inner.addr; args.source_target_sect_addr = source_sect.inner.addr; } - - const sect_type = sectionType(target_sect); - const should_rebase = rebase: { - if (!unsigned.is_64bit) break :rebase false; - - // TODO actually, a check similar to what dyld is doing, that is, verifying - // that the segment is writable should be enough here. - const is_right_segment = blk: { - if (self.data_segment_cmd_index) |idx| { - if (target_map.segment_id == idx) { - break :blk true; - } - } - if (self.data_const_segment_cmd_index) |idx| { - if (target_map.segment_id == idx) { - break :blk true; - } - } - break :blk false; - }; - - if (!is_right_segment) break :rebase false; - if (sect_type != macho.S_LITERAL_POINTERS and - sect_type != macho.S_REGULAR) - { - break :rebase false; - } - if (rel.target == .symbol) { - const sym = object.symbols.items[rel.target.symbol]; - if (sym.payload == .proxy) { - break :rebase false; - } - } - - break :rebase true; - }; - - if (should_rebase) { - try self.local_rebases.append(self.allocator, .{ - .offset = source_addr - target_seg.inner.vmaddr, - .segment_id = target_map.segment_id, - }); - } - - // TLV is handled via a separate offset mechanism. - // Calculate the offset to the initializer. - if (sect_type == macho.S_THREAD_LOCAL_VARIABLES) tlv: { - // TODO we don't want to save offset to tlv_bootstrap - if (mem.eql(u8, object.symbols.items[rel.target.symbol].name, "__tlv_bootstrap")) break :tlv; - - const base_addr = blk: { - if (self.tlv_data_section_index) |index| { - const tlv_data = target_seg.sections.items[index]; - break :blk tlv_data.addr; - } else { - const tlv_bss = target_seg.sections.items[self.tlv_bss_section_index.?]; - break :blk tlv_bss.addr; - } - }; - // Since we require TLV data to always preceed TLV bss section, we calculate - // offsets wrt to the former if it is defined; otherwise, wrt to the latter. - try self.threadlocal_offsets.append(self.allocator, .{ - .source_addr = args.source_addr, - .offset = args.target_addr - base_addr, - }); - } }, .got_page, .got_page_off, .got_load, .got, .pointer_to_got => { const dc_seg = self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; @@ -1839,34 +1742,6 @@ fn resolveRelocsAndWriteSections(self: *Zld) !void { try rel.resolve(args); } } - - log.debug("writing contents of '{s},{s}' section from '{s}' from 0x{x} to 0x{x}", .{ - segname, - sectname, - object.name, - target_sect_off, - target_sect_off + sect.code.len, - }); - - if (sectionType(target_sect) == macho.S_ZEROFILL or - sectionType(target_sect) == macho.S_THREAD_LOCAL_ZEROFILL or - sectionType(target_sect) == macho.S_THREAD_LOCAL_VARIABLES) - { - log.debug("zeroing out '{s},{s}' from 0x{x} to 0x{x}", .{ - segmentName(target_sect), - sectionName(target_sect), - target_sect_off, - target_sect_off + sect.code.len, - }); - - // Zero-out the space - var zeroes = try self.allocator.alloc(u8, sect.code.len); - defer self.allocator.free(zeroes); - mem.set(u8, zeroes, 0); - try self.file.?.pwriteAll(zeroes, target_sect_off); - } else { - try self.file.?.pwriteAll(sect.code, target_sect_off); - } } } } diff --git a/src/link/MachO/reloc.zig b/src/link/MachO/reloc.zig index 4693e89787..ce95b26252 100644 --- a/src/link/MachO/reloc.zig +++ b/src/link/MachO/reloc.zig @@ -1,6 +1,7 @@ const std = @import("std"); const aarch64 = @import("../../codegen/aarch64.zig"); const assert = std.debug.assert; +const commands = @import("commands.zig"); const log = std.log.scoped(.reloc); const macho = std.macho; const math = std.math; @@ -567,14 +568,60 @@ pub const Parser = struct { const index = @intCast(u32, self.zld.got_entries.items.len); out_rel.target.got_index = index; try self.zld.got_entries.append(self.zld.allocator, out_rel.target); - log.debug("adding GOT entry for symbol {s} at index {}", .{ out_rel.target.name, index }); - } - if (out_rel.payload == .branch) { + log.debug("adding GOT entry for symbol {s} at index {}", .{ out_rel.target.name, index }); + } else if (out_rel.payload == .unsigned) { + const sym = out_rel.target; + switch (sym.payload) { + .proxy => { + try sym.payload.proxy.bind_info.append(self.zld.allocator, .{ + .local_sym_index = self.block.local_sym_index, + .offset = out_rel.offset, + }); + }, + else => { + const source_sym = self.zld.locals.items[self.block.local_sym_index]; + const source_reg = &source_sym.payload.regular; + const seg = self.zld.load_commands.items[source_reg.segment_id].Segment; + const sect = seg.sections.items[source_reg.section_id]; + const sect_type = commands.sectionType(sect); + + const should_rebase = rebase: { + if (!out_rel.payload.unsigned.is_64bit) break :rebase false; + + // TODO actually, a check similar to what dyld is doing, that is, verifying + // that the segment is writable should be enough here. + const is_right_segment = blk: { + if (self.zld.data_segment_cmd_index) |idx| { + if (source_reg.segment_id == idx) { + break :blk true; + } + } + if (self.zld.data_const_segment_cmd_index) |idx| { + if (source_reg.segment_id == idx) { + break :blk true; + } + } + break :blk false; + }; + + if (!is_right_segment) break :rebase false; + if (sect_type != macho.S_LITERAL_POINTERS and + sect_type != macho.S_REGULAR) + { + break :rebase false; + } + + break :rebase true; + }; + source_reg.should_rebase = should_rebase; + }, + } + } else if (out_rel.payload == .branch) blk: { const sym = out_rel.target; - if (sym.stubs_index != null) continue; - if (sym.payload != .proxy) continue; + if (sym.stubs_index != null) break :blk; + if (sym.payload != .proxy) break :blk; const index = @intCast(u32, self.zld.stubs.items.len); sym.stubs_index = index; From dfa69e3c308fe7a5cb8b78c494312e1d280afa18 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Wed, 7 Jul 2021 12:10:19 +0200 Subject: [PATCH 21/81] zld: dealloc TextBlock if omitted --- src/link/MachO/Object.zig | 6 ++++-- src/link/MachO/Zld.zig | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index 9925611243..5c10c6bd33 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -512,10 +512,10 @@ pub fn parseTextBlocks(self: *Object, zld: *Zld) !void { const relocs = mem.bytesAsSlice(macho.relocation_info, raw_relocs); // Is there any padding between symbols within the section? - const is_padded = self.header.?.flags & macho.MH_SUBSECTIONS_VIA_SYMBOLS != 0; + const is_splittable = self.header.?.flags & macho.MH_SUBSECTIONS_VIA_SYMBOLS != 0; next: { - if (is_padded) blocks: { + if (is_splittable) blocks: { const filtered_nlists = NlistWithIndex.filterInSection( sorted_nlists.items, sect_id + 1, @@ -540,6 +540,8 @@ pub fn parseTextBlocks(self: *Object, zld: *Zld) !void { if (reg.file) |file| { if (file != self) { log.warn("deduping definition of {s} in {s}", .{ sym.name, self.name.? }); + block.deinit(self.allocator); + self.allocator.destroy(block); continue; } } diff --git a/src/link/MachO/Zld.zig b/src/link/MachO/Zld.zig index 2f28f20253..5e6d2c17ce 100644 --- a/src/link/MachO/Zld.zig +++ b/src/link/MachO/Zld.zig @@ -147,7 +147,7 @@ pub const TextBlock = struct { } block.relocs.deinit(); block.references.deinit(); - allocator.free(code); + allocator.free(block.code); } pub fn print_this(self: *const TextBlock, zld: *Zld) void { From a04bc1ed14319bf22769b01709bb7174388734f0 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Wed, 7 Jul 2021 13:47:04 +0200 Subject: [PATCH 22/81] zld: update relocs and start prepping for segment allocs --- src/link/MachO/Object.zig | 27 ++--- src/link/MachO/Zld.zig | 225 +------------------------------------- src/link/MachO/reloc.zig | 193 ++++++++++++++++++++------------ 3 files changed, 140 insertions(+), 305 deletions(-) diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index 5c10c6bd33..65af8166fc 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -48,8 +48,6 @@ dwarf_debug_ranges_index: ?u16 = null, symtab: std.ArrayListUnmanaged(macho.nlist_64) = .{}, strtab: std.ArrayListUnmanaged(u8) = .{}, - -initializers: std.ArrayListUnmanaged(u32) = .{}, data_in_code_entries: std.ArrayListUnmanaged(macho.data_in_code_entry) = .{}, symbols: std.ArrayListUnmanaged(*Symbol) = .{}, @@ -157,7 +155,6 @@ pub fn deinit(self: *Object) void { } self.load_commands.deinit(self.allocator); self.data_in_code_entries.deinit(self.allocator); - self.initializers.deinit(self.allocator); self.symtab.deinit(self.allocator); self.strtab.deinit(self.allocator); self.symbols.deinit(self.allocator); @@ -573,6 +570,7 @@ pub fn parseTextBlocks(self: *Object, zld: *Zld) !void { symbol.payload = .{ .regular = .{ .linkage = .translation_unit, + .address = sect.addr, .segment_id = match.seg, .section_id = match.sect, .file = self, @@ -657,6 +655,13 @@ pub fn symbolFromReloc(self: *Object, rel: macho.relocation_info) !*Symbol { }); defer self.allocator.free(name); const symbol = try Symbol.new(self.allocator, name); + symbol.payload = .{ + .regular = .{ + .linkage = .translation_unit, + .address = sect.addr, + .file = self, + }, + }; try self.sections_as_symbols.putNoClobber(self.allocator, sect_id, symbol); break :symbol symbol; }; @@ -666,22 +671,6 @@ pub fn symbolFromReloc(self: *Object, rel: macho.relocation_info) !*Symbol { return symbol; } -pub fn parseInitializers(self: *Object) !void { - const index = self.mod_init_func_section_index orelse return; - const section = self.sections.items[index]; - - log.debug("parsing initializers in {s}", .{self.name.?}); - - // Parse C++ initializers - const relocs = section.relocs orelse unreachable; - try self.initializers.ensureCapacity(self.allocator, relocs.len); - for (relocs) |rel| { - self.initializers.appendAssumeCapacity(rel.target.symbol); - } - - mem.reverse(u32, self.initializers.items); -} - fn parseSymtab(self: *Object) !void { const index = self.symtab_cmd_index orelse return; const symtab_cmd = self.load_commands.items[index].Symtab; diff --git a/src/link/MachO/Zld.zig b/src/link/MachO/Zld.zig index 5e6d2c17ce..36d0e58dc3 100644 --- a/src/link/MachO/Zld.zig +++ b/src/link/MachO/Zld.zig @@ -120,16 +120,6 @@ pub const Output = struct { install_name: ?[]const u8 = null, }; -const TlvOffset = struct { - source_addr: u64, - offset: u64, - - fn cmp(context: void, a: TlvOffset, b: TlvOffset) bool { - _ = context; - return a.source_addr < b.source_addr; - } -}; - pub const TextBlock = struct { local_sym_index: u32, aliases: ?[]u32 = null, @@ -274,12 +264,11 @@ pub fn link(self: *Zld, files: []const []const u8, output: Output, args: LinkArg try self.parseLibs(args.libs, args.syslibroot); try self.resolveSymbols(); try self.parseTextBlocks(); + try self.sortSections(); + try self.addRpaths(args.rpaths); + try self.addDataInCodeLC(); + try self.addCodeSignatureLC(); return error.TODO; - // try self.updateMetadata(); - // try self.sortSections(); - // try self.addRpaths(args.rpaths); - // try self.addDataInCodeLC(); - // try self.addCodeSignatureLC(); // try self.allocateTextSegment(); // try self.allocateDataConstSegment(); // try self.allocateDataSegment(); @@ -343,106 +332,6 @@ fn parseLibs(self: *Zld, libs: []const []const u8, syslibroot: ?[]const u8) !voi } } -fn mapAndUpdateSections( - self: *Zld, - object: *Object, - source_sect_id: u16, - target_seg_id: u16, - target_sect_id: u16, -) !void { - const source_sect = &object.sections.items[source_sect_id]; - const target_seg = &self.load_commands.items[target_seg_id].Segment; - const target_sect = &target_seg.sections.items[target_sect_id]; - - const alignment = try math.powi(u32, 2, target_sect.@"align"); - const offset = mem.alignForwardGeneric(u64, target_sect.size, alignment); - const size = mem.alignForwardGeneric(u64, source_sect.inner.size, alignment); - - log.debug("{s}: '{s},{s}' mapped to '{s},{s}' from 0x{x} to 0x{x}", .{ - object.name.?, - segmentName(source_sect.inner), - sectionName(source_sect.inner), - segmentName(target_sect.*), - sectionName(target_sect.*), - offset, - offset + size, - }); - log.debug(" | flags 0x{x}", .{source_sect.inner.flags}); - - source_sect.target_map = .{ - .segment_id = target_seg_id, - .section_id = target_sect_id, - .offset = @intCast(u32, offset), - }; - target_sect.size = offset + size; -} - -fn updateMetadata(self: *Zld) !void { - for (self.objects.items) |object| { - // Find ideal section alignment and update section mappings - for (object.sections.items) |sect, sect_id| { - const match = (try self.getMatchingSection(sect.inner)) orelse { - log.debug("{s}: unhandled section type 0x{x} for '{s},{s}'", .{ - object.name.?, - sect.inner.flags, - segmentName(sect.inner), - sectionName(sect.inner), - }); - continue; - }; - const target_seg = &self.load_commands.items[match.seg].Segment; - const target_sect = &target_seg.sections.items[match.sect]; - target_sect.@"align" = math.max(target_sect.@"align", sect.inner.@"align"); - - try self.mapAndUpdateSections(object, @intCast(u16, sect_id), match.seg, match.sect); - } - } - - tlv_align: { - const has_tlv = - self.tlv_section_index != null or - self.tlv_data_section_index != null or - self.tlv_bss_section_index != null; - - if (!has_tlv) break :tlv_align; - - const seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; - - if (self.tlv_section_index) |index| { - const sect = &seg.sections.items[index]; - sect.@"align" = 3; // __thread_vars is always 8byte aligned - } - - // Apparently __tlv_data and __tlv_bss need to have matching alignment, so fix it up. - // All __thread_data and __thread_bss sections must have same alignment - // https://github.com/apple-opensource/ld64/blob/e28c028b20af187a16a7161d89e91868a450cadc/src/ld/ld.cpp#L1172 - const data_align: u32 = data: { - if (self.tlv_data_section_index) |index| { - const sect = &seg.sections.items[index]; - break :data sect.@"align"; - } - break :tlv_align; - }; - const bss_align: u32 = bss: { - if (self.tlv_bss_section_index) |index| { - const sect = &seg.sections.items[index]; - break :bss sect.@"align"; - } - break :tlv_align; - }; - const max_align = math.max(data_align, bss_align); - - if (self.tlv_data_section_index) |index| { - const sect = &seg.sections.items[index]; - sect.@"align" = max_align; - } - if (self.tlv_bss_section_index) |index| { - const sect = &seg.sections.items[index]; - sect.@"align" = max_align; - } - } -} - pub const MatchingSection = struct { seg: u16, sect: u16, @@ -946,36 +835,6 @@ fn sortSections(self: *Zld) !void { maybe_index.* = new_index; } } - - for (self.objects.items) |object| { - for (object.sections.items) |*sect| { - const target_map = sect.target_map orelse continue; - - const new_index = blk: { - if (self.text_segment_cmd_index.? == target_map.segment_id) { - break :blk text_index_mapping.get(target_map.section_id) orelse unreachable; - } else if (self.data_const_segment_cmd_index.? == target_map.segment_id) { - break :blk data_const_index_mapping.get(target_map.section_id) orelse unreachable; - } else if (self.data_segment_cmd_index.? == target_map.segment_id) { - break :blk data_index_mapping.get(target_map.section_id) orelse unreachable; - } else unreachable; - }; - - log.debug("remapping in {s}: '{s},{s}': {} => {}", .{ - object.name.?, - segmentName(sect.inner), - sectionName(sect.inner), - target_map.section_id, - new_index, - }); - - sect.target_map = .{ - .segment_id = target_map.segment_id, - .section_id = new_index, - .offset = target_map.offset, - }; - } - } } fn allocateTextSegment(self: *Zld) !void { @@ -1431,6 +1290,7 @@ fn resolveSymbolsInObject(self: *Zld, object: *Object) !void { symbol.payload = .{ .regular = .{ .linkage = .translation_unit, + .address = sym.n_value, .weak_ref = Symbol.isWeakRef(sym), .file = object, .local_sym_index = @intCast(u32, self.locals.items.len), @@ -1470,6 +1330,7 @@ fn resolveSymbolsInObject(self: *Zld, object: *Object) !void { symbol.payload = .{ .regular = .{ .linkage = linkage, + .address = sym.n_value, .weak_ref = Symbol.isWeakRef(sym), .file = object, }, @@ -1672,80 +1533,6 @@ fn parseTextBlocks(self: *Zld) !void { } } -fn resolveRelocsAndWriteSections(self: *Zld) !void { - for (self.objects.items) |object| { - log.debug("relocating object {s}", .{object.name}); - - for (object.sections.items) |sect| { - if (sectionType(sect.inner) == macho.S_MOD_INIT_FUNC_POINTERS or - sectionType(sect.inner) == macho.S_MOD_TERM_FUNC_POINTERS) continue; - - const segname = segmentName(sect.inner); - const sectname = sectionName(sect.inner); - - log.debug("relocating section '{s},{s}'", .{ segname, sectname }); - - // Get target mapping - const target_map = sect.target_map orelse { - log.debug("no mapping for '{s},{s}'; skipping", .{ segname, sectname }); - continue; - }; - const target_seg = self.load_commands.items[target_map.segment_id].Segment; - const target_sect = target_seg.sections.items[target_map.section_id]; - const target_sect_addr = target_sect.addr + target_map.offset; - const target_sect_off = target_sect.offset + target_map.offset; - - if (sect.relocs) |relocs| { - for (relocs) |rel| { - const source_addr = target_sect_addr + rel.offset; - - var args: reloc.Relocation.ResolveArgs = .{ - .source_addr = source_addr, - .target_addr = undefined, - }; - - switch (rel.@"type") { - .unsigned => { - args.target_addr = try self.relocTargetAddr(object, rel.target); - - const unsigned = rel.cast(reloc.Unsigned) orelse unreachable; - if (unsigned.subtractor) |subtractor| { - args.subtractor = try self.relocTargetAddr(object, subtractor); - } - if (rel.target == .section) { - const source_sect = object.sections.items[rel.target.section]; - args.source_source_sect_addr = sect.inner.addr; - args.source_target_sect_addr = source_sect.inner.addr; - } - }, - .got_page, .got_page_off, .got_load, .got, .pointer_to_got => { - const dc_seg = self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; - const got = dc_seg.sections.items[self.got_section_index.?]; - const sym = object.symbols.items[rel.target.symbol]; - const got_index = sym.got_index orelse { - log.err("expected GOT index relocating symbol '{s}'", .{sym.name}); - log.err("this is an internal linker error", .{}); - return error.FailedToResolveRelocationTarget; - }; - args.target_addr = got.addr + got_index * @sizeOf(u64); - }, - else => |tt| { - if (tt == .signed and rel.target == .section) { - const source_sect = object.sections.items[rel.target.section]; - args.source_source_sect_addr = sect.inner.addr; - args.source_target_sect_addr = source_sect.inner.addr; - } - args.target_addr = try self.relocTargetAddr(object, rel.target); - }, - } - - try rel.resolve(args); - } - } - } - } -} - fn populateMetadata(self: *Zld) !void { if (self.pagezero_segment_cmd_index == null) { self.pagezero_segment_cmd_index = @intCast(u16, self.load_commands.items.len); diff --git a/src/link/MachO/reloc.zig b/src/link/MachO/reloc.zig index ce95b26252..d35344c71b 100644 --- a/src/link/MachO/reloc.zig +++ b/src/link/MachO/reloc.zig @@ -48,33 +48,24 @@ pub const Relocation = struct { /// => * is unreachable is_64bit: bool, + source_sect_addr: ?u64 = null, + pub fn resolve(self: Unsigned, base: Relocation, source_addr: u64, target_addr: u64) !void { - // const addend = if (unsigned.base.target == .section) - // unsigned.addend - @intCast(i64, args.source_target_sect_addr.?) - // else - // unsigned.addend; + const addend = if (self.source_sect_addr) |addr| + self.addend - addr + else + self.addend; - // const result = if (args.subtractor) |subtractor| - // @intCast(i64, args.target_addr) - @intCast(i64, subtractor) + addend - // else - // @intCast(i64, args.target_addr) + addend; + const result = if (self.subtractor) |subtractor| + @intCast(i64, target_addr) - @intCast(i64, subtractor.payload.regular.address) + addend + else + @intCast(i64, target_addr) + addend; - // log.debug(" | calculated addend 0x{x}", .{addend}); - // log.debug(" | calculated unsigned value 0x{x}", .{result}); - - // if (unsigned.is_64bit) { - // mem.writeIntLittle( - // u64, - // unsigned.base.code[0..8], - // @bitCast(u64, result), - // ); - // } else { - // mem.writeIntLittle( - // u32, - // unsigned.base.code[0..4], - // @truncate(u32, @bitCast(u64, result)), - // ); - // } + if (self.is_64bit) { + mem.writeIntLittle(u64, base.block.code[base.offset..][0..8], @bitCast(u64, result)); + } else { + mem.writeIntLittle(u32, base.block.code[base.offset..][0..4], @truncate(u32, @bitCast(u64, result))); + } } pub fn format(self: Unsigned, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { @@ -191,56 +182,119 @@ pub const Relocation = struct { pub fn resolve(self: PageOff, base: Relocation, source_addr: u64, target_addr: u64) !void { switch (self.kind) { .page => { - // const target_addr = if (page_off.addend) |addend| args.target_addr + addend else args.target_addr; - // const narrowed = @truncate(u12, target_addr); + const actual_target_addr = if (self.addend) |addend| target_addr + addend else target_addr; + const narrowed = @truncate(u12, actual_target_addr); - // log.debug(" | narrowed address within the page 0x{x}", .{narrowed}); - // log.debug(" | {s} opcode", .{page_off.op_kind}); + const op_kind = self.op_kind orelse unreachable; + var inst: aarch64.Instruction = blk: { + switch (op_kind) { + .arithmetic => { + break :blk .{ + .add_subtract_immediate = mem.bytesToValue( + meta.TagPayload( + aarch64.Instruction, + aarch64.Instruction.add_subtract_immediate, + ), + base.block.code[base.offset..][0..4], + ), + }; + }, + .load => { + break :blk .{ + .load_store_register = mem.bytesToValue( + meta.TagPayload( + aarch64.Instruction, + aarch64.Instruction.load_store_register, + ), + base.block.code[base.offset..][0..4], + ), + }; + }, + } + }; - // var inst = page_off.inst; - // if (page_off.op_kind == .arithmetic) { - // inst.add_subtract_immediate.imm12 = narrowed; - // } else { - // const offset: u12 = blk: { - // if (inst.load_store_register.size == 0) { - // if (inst.load_store_register.v == 1) { - // // 128-bit SIMD is scaled by 16. - // break :blk try math.divExact(u12, narrowed, 16); - // } - // // Otherwise, 8-bit SIMD or ldrb. - // break :blk narrowed; - // } else { - // const denom: u4 = try math.powi(u4, 2, inst.load_store_register.size); - // break :blk try math.divExact(u12, narrowed, denom); - // } - // }; - // inst.load_store_register.offset = offset; - // } - - // mem.writeIntLittle(u32, page_off.base.code[0..4], inst.toU32()); + if (op_kind == .arithmetic) { + inst.add_subtract_immediate.imm12 = narrowed; + } else { + const offset: u12 = blk: { + if (inst.load_store_register.size == 0) { + if (inst.load_store_register.v == 1) { + // 128-bit SIMD is scaled by 16. + break :blk try math.divExact(u12, narrowed, 16); + } + // Otherwise, 8-bit SIMD or ldrb. + break :blk narrowed; + } else { + const denom: u4 = try math.powi(u4, 2, inst.load_store_register.size); + break :blk try math.divExact(u12, narrowed, denom); + } + }; + inst.load_store_register.offset = offset; + } + mem.writeIntLittle(u32, base.block.code[base.offset..][0..4], inst.toU32()); }, .got => { - // const narrowed = @truncate(u12, args.target_addr); - - // log.debug(" | narrowed address within the page 0x{x}", .{narrowed}); - - // var inst = page_off.inst; - // const offset = try math.divExact(u12, narrowed, 8); - // inst.load_store_register.offset = offset; - - // mem.writeIntLittle(u32, page_off.base.code[0..4], inst.toU32()); + const narrowed = @truncate(u12, target_addr); + var inst = mem.bytesToValue( + meta.TagPayload( + aarch64.Instruction, + aarch64.Instruction.load_store_register, + ), + base.block.code[base.offset..][0..4], + ); + const offset = try math.divExact(u12, narrowed, 8); + inst.load_store_register.offset = offset; + mem.writeIntLittle(u32, base.block.code[base.offset..][0..4], inst.toU32()); }, .tlvp => { - - // const narrowed = @truncate(u12, args.target_addr); - - // log.debug(" | narrowed address within the page 0x{x}", .{narrowed}); - - // var inst = page_off.inst; - // inst.add_subtract_immediate.imm12 = narrowed; - - // mem.writeIntLittle(u32, page_off.base.code[0..4], inst.toU32()); + const RegInfo = struct { + rd: u5, + rn: u5, + size: u1, + }; + const reg_info: RegInfo = blk: { + if (isArithmeticOp(base.block.code[base.offset..][0..4])) { + const inst = mem.bytesToValue( + meta.TagPayload( + aarch64.Instruction, + aarch64.Instruction.add_subtract_immediate, + ), + base.block.code[base.offset..][0..4], + ); + break :blk .{ + .rd = inst.rd, + .rn = inst.rn, + .size = inst.sf, + }; + } else { + const inst = mem.bytesToValue( + meta.TagPayload( + aarch64.Instruction, + aarch64.Instruction.load_store_register, + ), + base.block.code[base.offset..][0..4], + ); + break :blk .{ + .rd = inst.rt, + .rn = inst.rn, + .size = @truncate(u1, inst.size), + }; + } + }; + const narrowed = @truncate(u12, target_addr); + var inst = aarch64.Instruction{ + .add_subtract_immediate = .{ + .rd = reg_info.rd, + .rn = reg_info.rn, + .imm12 = narrowed, + .sh = 0, + .s = 0, + .op = 0, + .sf = reg_info.size, + }, + }; + mem.writeIntLittle(u32, base.block.code[base.offset..][0..4], inst.toU32()); }, } } @@ -661,12 +715,17 @@ pub const Parser = struct { mem.readIntLittle(i64, self.block.code[parsed.offset..][0..8]) else mem.readIntLittle(i32, self.block.code[parsed.offset..][0..4]); + const source_sect_addr = if (rel.r_extern == 0) blk: { + if (parsed.target.payload == .regular) break :blk parsed.target.payload.regular.address; + break :blk null; + } else null; parsed.payload = .{ .unsigned = .{ .subtractor = self.subtractor, .is_64bit = is_64bit, .addend = addend, + .source_sect_addr = source_sect_addr, }, }; From 7c662db8d95670f8ac0c88e9a2d6f49ef6782f13 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Wed, 7 Jul 2021 14:57:05 +0200 Subject: [PATCH 23/81] zld: keep text blocks per segment,section pair --- src/link/MachO/Object.zig | 20 ++++++----- src/link/MachO/Zld.zig | 74 +++++++++++++++++++++++++++++---------- 2 files changed, 68 insertions(+), 26 deletions(-) diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index 65af8166fc..011aca06c3 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -543,11 +543,13 @@ pub fn parseTextBlocks(self: *Object, zld: *Zld) !void { } } - if (zld.last_text_block) |last| { - last.next = block; - block.prev = last; + if (zld.blocks.getPtr(match)) |last| { + last.*.next = block; + block.prev = last.*; + last.* = block; + } else { + try zld.blocks.putNoClobber(zld.allocator, match, block); } - zld.last_text_block = block; } break :next; @@ -607,11 +609,13 @@ pub fn parseTextBlocks(self: *Object, zld: *Zld) !void { mem.set(u8, block.code, 0); } - if (zld.last_text_block) |last| { - last.next = block; - block.prev = last; + if (zld.blocks.getPtr(match)) |last| { + last.*.next = block; + block.prev = last.*; + last.* = block; + } else { + try zld.blocks.putNoClobber(zld.allocator, match, block); } - zld.last_text_block = block; } } } diff --git a/src/link/MachO/Zld.zig b/src/link/MachO/Zld.zig index 36d0e58dc3..485f6eda42 100644 --- a/src/link/MachO/Zld.zig +++ b/src/link/MachO/Zld.zig @@ -112,7 +112,7 @@ got_entries: std.ArrayListUnmanaged(*Symbol) = .{}, stub_helper_stubs_start_off: ?u64 = null, -last_text_block: ?*TextBlock = null, +blocks: std.AutoHashMapUnmanaged(MatchingSection, *TextBlock) = .{}, pub const Output = struct { tag: enum { exe, dylib }, @@ -225,6 +225,9 @@ pub fn deinit(self: *Zld) void { self.globals.deinit(self.allocator); self.strtab.deinit(); + + // TODO dealloc all blocks + self.blocks.deinit(self.allocator); } pub fn closeFiles(self: Zld) void { @@ -268,6 +271,15 @@ pub fn link(self: *Zld, files: []const []const u8, output: Output, args: LinkArg try self.addRpaths(args.rpaths); try self.addDataInCodeLC(); try self.addCodeSignatureLC(); + + var it = self.blocks.iterator(); + while (it.next()) |entry| { + const seg = self.load_commands.items[entry.key_ptr.seg].Segment; + const sect = seg.sections.items[entry.key_ptr.sect]; + + log.warn("\n\n{s},{s} contents:", .{ segmentName(sect), sectionName(sect) }); + entry.value_ptr.*.print(self); + } return error.TODO; // try self.allocateTextSegment(); // try self.allocateDataConstSegment(); @@ -835,6 +847,30 @@ fn sortSections(self: *Zld) !void { maybe_index.* = new_index; } } + + { + var transient: std.AutoHashMapUnmanaged(MatchingSection, *TextBlock) = .{}; + try transient.ensureCapacity(self.allocator, self.blocks.count()); + + var it = self.blocks.iterator(); + while (it.next()) |entry| { + const old = entry.key_ptr.*; + const sect = if (old.seg == self.text_segment_cmd_index.?) + text_index_mapping.get(old.sect) + else if (old.seg == self.data_const_segment_cmd_index.?) + data_const_index_mapping.get(old.sect) + else + data_index_mapping.get(old.sect); + transient.putAssumeCapacityNoClobber(.{ + .seg = old.seg, + .sect = old.sect, + }, entry.value_ptr.*); + } + + self.blocks.clearAndFree(self.allocator); + self.blocks.deinit(self.allocator); + self.blocks = transient; + } } fn allocateTextSegment(self: *Zld) !void { @@ -1403,13 +1439,19 @@ fn resolveSymbols(self: *Zld) !void { try self.locals.append(self.allocator, symbol); }, .tentative => |tent| { - if (self.common_section_index == null) { - const data_seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; - self.common_section_index = @intCast(u16, data_seg.sections.items.len); - try data_seg.addSection(self.allocator, "__common", .{ - .flags = macho.S_ZEROFILL, - }); - } + const match: MatchingSection = blk: { + if (self.common_section_index == null) { + const data_seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; + self.common_section_index = @intCast(u16, data_seg.sections.items.len); + try data_seg.addSection(self.allocator, "__common", .{ + .flags = macho.S_ZEROFILL, + }); + } + break :blk .{ + .seg = self.data_segment_cmd_index.?, + .sect = self.common_section_index.?, + }; + }; const size = tent.size; const code = try self.allocator.alloc(u8, size); @@ -1439,13 +1481,13 @@ fn resolveSymbols(self: *Zld) !void { .alignment = alignment, }; - // TODO I'm not 100% sure about this yet, but I believe we should keep a separate list of - // TextBlocks per segment. - if (self.last_text_block) |last| { - last.next = block; - block.prev = last; + if (self.blocks.getPtr(match)) |last| { + last.*.next = block; + block.prev = last.*; + last.* = block; + } else { + try self.blocks.putNoClobber(self.allocator, match, block); } - self.last_text_block = block; }, else => {}, } @@ -1527,10 +1569,6 @@ fn parseTextBlocks(self: *Zld) !void { for (self.objects.items) |object| { try object.parseTextBlocks(self); } - - if (self.last_text_block) |block| { - block.print(self); - } } fn populateMetadata(self: *Zld) !void { From e524f43a6fbb5189d24aa42667ababdcd92a2ab2 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Wed, 7 Jul 2021 22:15:41 +0200 Subject: [PATCH 24/81] zld: save rebase and TLV offset as part of TextBlock instead of as part of the Symbol. This seems to be more optimal way of handling dyld ops in presence of no splittable input sections in object files. --- src/link/MachO/Object.zig | 28 ++++------------------------ src/link/MachO/Symbol.zig | 5 ----- src/link/MachO/Zld.zig | 29 ++++++++++++++++++++--------- src/link/MachO/reloc.zig | 14 ++++++++++++-- 4 files changed, 36 insertions(+), 40 deletions(-) diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index 011aca06c3..f000119edf 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -435,6 +435,8 @@ const TextBlockParser = struct { .references = std.AutoArrayHashMap(u32, void).init(self.allocator), .code = try self.allocator.dupe(u8, code), .relocs = std.ArrayList(Relocation).init(self.allocator), + .rebases = std.ArrayList(u64).init(self.allocator), + .tlv_offsets = std.ArrayList(u64).init(self.allocator), .size = size, .alignment = self.section.@"align", }; @@ -444,18 +446,6 @@ const TextBlockParser = struct { try self.object.parseRelocs(self.zld, relocs, block, start_addr); } - const is_zerofill = blk: { - const tseg = self.zld.load_commands.items[self.match.seg].Segment; - const tsect = tseg.sections.items[self.match.sect]; - const tsect_type = sectionType(tsect); - break :blk tsect_type == macho.S_ZEROFILL or - tsect_type == macho.S_THREAD_LOCAL_ZEROFILL or - tsect_type == macho.S_THREAD_LOCAL_VARIABLES; - }; - if (is_zerofill) { - mem.set(u8, block.code, 0); - } - self.index += 1; return block; @@ -589,6 +579,8 @@ pub fn parseTextBlocks(self: *Object, zld: *Zld) !void { .references = std.AutoArrayHashMap(u32, void).init(self.allocator), .code = try self.allocator.dupe(u8, code), .relocs = std.ArrayList(Relocation).init(self.allocator), + .rebases = std.ArrayList(u64).init(self.allocator), + .tlv_offsets = std.ArrayList(u64).init(self.allocator), .size = sect.size, .alignment = sect.@"align", }; @@ -597,18 +589,6 @@ pub fn parseTextBlocks(self: *Object, zld: *Zld) !void { try self.parseRelocs(zld, relocs, block, 0); } - const is_zerofill = blk: { - const tseg = zld.load_commands.items[match.seg].Segment; - const tsect = tseg.sections.items[match.sect]; - const tsect_type = sectionType(tsect); - break :blk tsect_type == macho.S_ZEROFILL or - tsect_type == macho.S_THREAD_LOCAL_ZEROFILL or - tsect_type == macho.S_THREAD_LOCAL_VARIABLES; - }; - if (is_zerofill) { - mem.set(u8, block.code, 0); - } - if (zld.blocks.getPtr(match)) |last| { last.*.next = block; block.prev = last.*; diff --git a/src/link/MachO/Symbol.zig b/src/link/MachO/Symbol.zig index 5f437dc209..86624653f5 100644 --- a/src/link/MachO/Symbol.zig +++ b/src/link/MachO/Symbol.zig @@ -58,8 +58,6 @@ pub const Regular = struct { local_sym_index: u32 = 0, - should_rebase: bool = false, - pub const Linkage = enum { translation_unit, linkage_unit, @@ -77,9 +75,6 @@ pub const Regular = struct { if (self.weak_ref) { try std.fmt.format(writer, ".weak_ref, ", .{}); } - if (self.should_rebase) { - try std.fmt.format(writer, ".should_rebase, ", .{}); - } if (self.file) |file| { try std.fmt.format(writer, ".file = {s}, ", .{file.name.?}); } diff --git a/src/link/MachO/Zld.zig b/src/link/MachO/Zld.zig index 485f6eda42..53bb31a718 100644 --- a/src/link/MachO/Zld.zig +++ b/src/link/MachO/Zld.zig @@ -128,6 +128,8 @@ pub const TextBlock = struct { relocs: std.ArrayList(Relocation), size: u64, alignment: u32, + rebases: std.ArrayList(u64), + tlv_offsets: std.ArrayList(u64), next: ?*TextBlock = null, prev: ?*TextBlock = null, @@ -137,6 +139,8 @@ pub const TextBlock = struct { } block.relocs.deinit(); block.references.deinit(); + block.rebases.deinit(); + block.tlv_offsets.deinit(); allocator.free(block.code); } @@ -144,24 +148,30 @@ pub const TextBlock = struct { log.warn("TextBlock", .{}); log.warn(" | {}: {}", .{ self.local_sym_index, zld.locals.items[self.local_sym_index] }); if (self.aliases) |aliases| { - log.warn(" | Aliases:", .{}); + log.warn(" | aliases:", .{}); for (aliases) |index| { log.warn(" | {}: {}", .{ index, zld.locals.items[index] }); } } if (self.references.count() > 0) { - log.warn(" | References:", .{}); + log.warn(" | references:", .{}); for (self.references.keys()) |index| { log.warn(" | {}: {}", .{ index, zld.locals.items[index] }); } } log.warn(" | code.len = {}", .{self.code.len}); if (self.relocs.items.len > 0) { - log.warn("Relocations:", .{}); + log.warn(" | relocations:", .{}); for (self.relocs.items) |rel| { - log.warn(" | {}", .{rel}); + log.warn(" | {}", .{rel}); } } + if (self.rebases.items.len > 0) { + log.warn(" | rebases: {any}", .{self.rebases.items}); + } + if (self.tlv_offsets.items.len > 0) { + log.warn(" | TLV offsets: {any}", .{self.tlv_offsets.items}); + } log.warn(" | size = {}", .{self.size}); log.warn(" | align = {}", .{self.alignment}); } @@ -271,6 +281,10 @@ pub fn link(self: *Zld, files: []const []const u8, output: Output, args: LinkArg try self.addRpaths(args.rpaths); try self.addDataInCodeLC(); try self.addCodeSignatureLC(); + // try self.allocateTextSegment(); + // try self.allocateDataConstSegment(); + // try self.allocateDataSegment(); + // self.allocateLinkeditSegment(); var it = self.blocks.iterator(); while (it.next()) |entry| { @@ -281,11 +295,6 @@ pub fn link(self: *Zld, files: []const []const u8, output: Output, args: LinkArg entry.value_ptr.*.print(self); } return error.TODO; - // try self.allocateTextSegment(); - // try self.allocateDataConstSegment(); - // try self.allocateDataSegment(); - // self.allocateLinkeditSegment(); - // try self.allocateSymbols(); // try self.flush(); } @@ -1477,6 +1486,8 @@ fn resolveSymbols(self: *Zld) !void { .references = std.AutoArrayHashMap(u32, void).init(self.allocator), .code = code, .relocs = std.ArrayList(Relocation).init(self.allocator), + .rebases = std.ArrayList(u64).init(self.allocator), + .tlv_offsets = std.ArrayList(u64).init(self.allocator), .size = size, .alignment = alignment, }; diff --git a/src/link/MachO/reloc.zig b/src/link/MachO/reloc.zig index d35344c71b..b645ec152e 100644 --- a/src/link/MachO/reloc.zig +++ b/src/link/MachO/reloc.zig @@ -405,7 +405,7 @@ pub const Relocation = struct { pub fn resolve(self: Relocation, zld: *Zld) !void { const source_addr = blk: { const sym = zld.locals.items[self.block.local_sym_index]; - break :blk sym.payload.regular.address; + break :blk sym.payload.regular.address + self.offset; }; const target_addr = blk: { const is_via_got = inner: { @@ -668,7 +668,17 @@ pub const Parser = struct { break :rebase true; }; - source_reg.should_rebase = should_rebase; + + if (should_rebase) { + try self.block.rebases.append(out_rel.offset); + } + + // TLV is handled via a separate offset mechanism. + // Save the offset to the initializer. + // TODO I believe this can be simplified a lot! + if (sect_type == macho.S_THREAD_LOCAL_VARIABLES) { + try self.block.tlv_offsets.append(out_rel.offset); + } }, } } else if (out_rel.payload == .branch) blk: { From 7aeedc0912c8218773891ae98a729bb2b1be5231 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Thu, 8 Jul 2021 00:29:10 +0200 Subject: [PATCH 25/81] zld: allocate TextBlocks temporarily by iterating over all defined TextBlocks. However, once we merge this with MachO incremental, updates will be done at the point of creation and/or update. Also, fix mining TLV knowledge for working out TLV pointers. --- src/link/MachO/Object.zig | 21 ++++++++- src/link/MachO/Symbol.zig | 15 +++++++ src/link/MachO/Zld.zig | 95 +++++++++++++++++++-------------------- src/link/MachO/reloc.zig | 7 +-- 4 files changed, 83 insertions(+), 55 deletions(-) diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index f000119edf..867821fa6f 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -7,6 +7,7 @@ const fs = std.fs; const io = std.io; const log = std.log.scoped(.object); const macho = std.macho; +const math = std.math; const mem = std.mem; const reloc = @import("reloc.zig"); const sort = std.sort; @@ -436,7 +437,7 @@ const TextBlockParser = struct { .code = try self.allocator.dupe(u8, code), .relocs = std.ArrayList(Relocation).init(self.allocator), .rebases = std.ArrayList(u64).init(self.allocator), - .tlv_offsets = std.ArrayList(u64).init(self.allocator), + .tlv_offsets = std.ArrayList(TextBlock.TlvOffset).init(self.allocator), .size = size, .alignment = self.section.@"align", }; @@ -533,6 +534,14 @@ pub fn parseTextBlocks(self: *Object, zld: *Zld) !void { } } + // Update target section's metadata + // TODO should we update segment's size here too? + // How does it tie with incremental space allocs? + const tseg = &zld.load_commands.items[match.seg].Segment; + const tsect = &tseg.sections.items[match.sect]; + tsect.size += block.size; + tsect.@"align" = math.max(tsect.@"align", block.alignment); + if (zld.blocks.getPtr(match)) |last| { last.*.next = block; block.prev = last.*; @@ -580,7 +589,7 @@ pub fn parseTextBlocks(self: *Object, zld: *Zld) !void { .code = try self.allocator.dupe(u8, code), .relocs = std.ArrayList(Relocation).init(self.allocator), .rebases = std.ArrayList(u64).init(self.allocator), - .tlv_offsets = std.ArrayList(u64).init(self.allocator), + .tlv_offsets = std.ArrayList(TextBlock.TlvOffset).init(self.allocator), .size = sect.size, .alignment = sect.@"align", }; @@ -589,6 +598,14 @@ pub fn parseTextBlocks(self: *Object, zld: *Zld) !void { try self.parseRelocs(zld, relocs, block, 0); } + // Update target section's metadata + // TODO should we update segment's size here too? + // How does it tie with incremental space allocs? + const tseg = &zld.load_commands.items[match.seg].Segment; + const tsect = &tseg.sections.items[match.sect]; + tsect.size += block.size; + tsect.@"align" = math.max(tsect.@"align", block.alignment); + if (zld.blocks.getPtr(match)) |last| { last.*.next = block; block.prev = last.*; diff --git a/src/link/MachO/Symbol.zig b/src/link/MachO/Symbol.zig index 86624653f5..0ed122cc95 100644 --- a/src/link/MachO/Symbol.zig +++ b/src/link/MachO/Symbol.zig @@ -10,6 +10,7 @@ const Allocator = mem.Allocator; const Dylib = @import("Dylib.zig"); const Object = @import("Object.zig"); const StringTable = @import("StringTable.zig"); +const Zld = @import("Zld.zig"); /// Symbol name. Owned slice. name: []const u8, @@ -80,6 +81,20 @@ pub const Regular = struct { } try std.fmt.format(writer, "}}", .{}); } + + pub fn sectionId(self: Regular, zld: *Zld) u8 { + // TODO there might be a more generic way of doing this. + var section: u8 = 0; + for (zld.load_commands.items) |cmd, cmd_id| { + if (cmd != .Segment) break; + if (cmd_id == self.segment_id) { + section += @intCast(u8, self.section_id) + 1; + break; + } + section += @intCast(u8, cmd.Segment.sections.items.len); + } + return section; + } }; pub const Tentative = struct { diff --git a/src/link/MachO/Zld.zig b/src/link/MachO/Zld.zig index 53bb31a718..e137e16e9b 100644 --- a/src/link/MachO/Zld.zig +++ b/src/link/MachO/Zld.zig @@ -129,10 +129,15 @@ pub const TextBlock = struct { size: u64, alignment: u32, rebases: std.ArrayList(u64), - tlv_offsets: std.ArrayList(u64), + tlv_offsets: std.ArrayList(TlvOffset), next: ?*TextBlock = null, prev: ?*TextBlock = null, + pub const TlvOffset = struct { + local_sym_index: u32, + offset: u64, + }; + pub fn deinit(block: *TextBlock, allocator: *Allocator) void { if (block.aliases) |aliases| { allocator.free(aliases); @@ -281,10 +286,11 @@ pub fn link(self: *Zld, files: []const []const u8, output: Output, args: LinkArg try self.addRpaths(args.rpaths); try self.addDataInCodeLC(); try self.addCodeSignatureLC(); - // try self.allocateTextSegment(); - // try self.allocateDataConstSegment(); - // try self.allocateDataSegment(); - // self.allocateLinkeditSegment(); + try self.allocateTextSegment(); + try self.allocateDataConstSegment(); + try self.allocateDataSegment(); + self.allocateLinkeditSegment(); + try self.allocateTextBlocks(); var it = self.blocks.iterator(); while (it.next()) |entry| { @@ -292,6 +298,7 @@ pub fn link(self: *Zld, files: []const []const u8, output: Output, args: LinkArg const sect = seg.sections.items[entry.key_ptr.sect]; log.warn("\n\n{s},{s} contents:", .{ segmentName(sect), sectionName(sect) }); + log.warn("{}", .{sect}); entry.value_ptr.*.print(self); } return error.TODO; @@ -865,14 +872,14 @@ fn sortSections(self: *Zld) !void { while (it.next()) |entry| { const old = entry.key_ptr.*; const sect = if (old.seg == self.text_segment_cmd_index.?) - text_index_mapping.get(old.sect) + text_index_mapping.get(old.sect).? else if (old.seg == self.data_const_segment_cmd_index.?) - data_const_index_mapping.get(old.sect) + data_const_index_mapping.get(old.sect).? else - data_index_mapping.get(old.sect); + data_index_mapping.get(old.sect).?; transient.putAssumeCapacityNoClobber(.{ .seg = old.seg, - .sect = old.sect, + .sect = sect, }, entry.value_ptr.*); } @@ -880,6 +887,18 @@ fn sortSections(self: *Zld) !void { self.blocks.deinit(self.allocator); self.blocks = transient; } + + for (self.locals.items) |sym, i| { + if (i == 0) continue; // skip the null symbol + assert(sym.payload == .regular); + const reg = &sym.payload.regular; + reg.section_id = if (reg.segment_id == self.text_segment_cmd_index.?) + text_index_mapping.get(reg.section_id).? + else if (reg.segment_id == self.data_const_segment_cmd_index.?) + data_const_index_mapping.get(reg.section_id).? + else + data_index_mapping.get(reg.section_id).?; + } } fn allocateTextSegment(self: *Zld) !void { @@ -991,50 +1010,26 @@ fn allocateSegment(self: *Zld, index: u16, offset: u64) !void { seg.inner.vmsize = seg_size_aligned; } -fn allocateSymbol(self: *Zld, symbol: *Symbol) !void { - const reg = &symbol.payload.regular; - const object = reg.file orelse return; - const source_sect = &object.sections.items[reg.section]; - const target_map = source_sect.target_map orelse { - log.debug("section '{s},{s}' not mapped for symbol '{s}'", .{ - segmentName(source_sect.inner), - sectionName(source_sect.inner), - symbol.name, - }); - return; - }; +fn allocateTextBlocks(self: *Zld) !void { + var it = self.blocks.iterator(); + while (it.next()) |entry| { + const match = entry.key_ptr.*; + var block: *TextBlock = entry.value_ptr.*; - const target_seg = self.load_commands.items[target_map.segment_id].Segment; - const target_sect = target_seg.sections.items[target_map.section_id]; - const target_addr = target_sect.addr + target_map.offset; - const address = reg.address - source_sect.inner.addr + target_addr; + const seg = self.load_commands.items[match.seg].Segment; + const sect = seg.sections.items[match.sect]; + var base_addr: u64 = sect.addr + sect.size; - log.debug("resolving symbol '{s}' at 0x{x}", .{ symbol.name, address }); + while (true) { + const sym = self.locals.items[block.local_sym_index]; + assert(sym.payload == .regular); + sym.payload.regular.address = base_addr - block.size; + base_addr -= block.size; - // TODO there might be a more generic way of doing this. - var section: u8 = 0; - for (self.load_commands.items) |cmd, cmd_id| { - if (cmd != .Segment) break; - if (cmd_id == target_map.segment_id) { - section += @intCast(u8, target_map.section_id) + 1; - break; + if (block.prev) |prev| { + block = prev; + } else break; } - section += @intCast(u8, cmd.Segment.sections.items.len); - } - - reg.address = address; - reg.section = section; -} - -fn allocateSymbols(self: *Zld) !void { - for (self.locals.items) |symbol| { - if (symbol.payload != .regular) continue; - try self.allocateSymbol(symbol); - } - - for (self.globals.values()) |symbol| { - if (symbol.payload != .regular) continue; - try self.allocateSymbol(symbol); } } @@ -1487,7 +1482,7 @@ fn resolveSymbols(self: *Zld) !void { .code = code, .relocs = std.ArrayList(Relocation).init(self.allocator), .rebases = std.ArrayList(u64).init(self.allocator), - .tlv_offsets = std.ArrayList(u64).init(self.allocator), + .tlv_offsets = std.ArrayList(TextBlock.TlvOffset).init(self.allocator), .size = size, .alignment = alignment, }; diff --git a/src/link/MachO/reloc.zig b/src/link/MachO/reloc.zig index b645ec152e..07d5186a63 100644 --- a/src/link/MachO/reloc.zig +++ b/src/link/MachO/reloc.zig @@ -674,10 +674,11 @@ pub const Parser = struct { } // TLV is handled via a separate offset mechanism. - // Save the offset to the initializer. - // TODO I believe this can be simplified a lot! if (sect_type == macho.S_THREAD_LOCAL_VARIABLES) { - try self.block.tlv_offsets.append(out_rel.offset); + try self.block.tlv_offsets.append(.{ + .local_sym_index = out_rel.target.payload.regular.local_sym_index, + .offset = out_rel.offset, + }); } }, } From 961b463fad37e00fa8a2ca2bbfcb58a2b1d2bea9 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Thu, 8 Jul 2021 11:57:14 +0200 Subject: [PATCH 26/81] zld: track symbols defined within TextBlock in case TextBlock represents an entire section with symbols defined within. --- src/link/MachO/Object.zig | 72 +++++++++++++++++++++++-------------- src/link/MachO/Zld.zig | 76 ++++++++++++++++++++++++++++----------- 2 files changed, 100 insertions(+), 48 deletions(-) diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index 867821fa6f..a3b1ff4f79 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -430,17 +430,12 @@ const TextBlockParser = struct { const block = try self.allocator.create(TextBlock); errdefer self.allocator.destroy(block); - block.* = .{ - .local_sym_index = senior_nlist.index, - .aliases = alias_only_indices, - .references = std.AutoArrayHashMap(u32, void).init(self.allocator), - .code = try self.allocator.dupe(u8, code), - .relocs = std.ArrayList(Relocation).init(self.allocator), - .rebases = std.ArrayList(u64).init(self.allocator), - .tlv_offsets = std.ArrayList(TextBlock.TlvOffset).init(self.allocator), - .size = size, - .alignment = self.section.@"align", - }; + block.* = TextBlock.init(self.allocator); + block.local_sym_index = senior_nlist.index; + block.aliases = alias_only_indices; + block.code = try self.allocator.dupe(u8, code); + block.size = size; + block.alignment = self.section.@"align"; const relocs = filterRelocs(self.relocs, start_addr, end_addr); if (relocs.len > 0) { @@ -499,16 +494,17 @@ pub fn parseTextBlocks(self: *Object, zld: *Zld) !void { _ = try self.file.?.preadAll(raw_relocs, sect.reloff); const relocs = mem.bytesAsSlice(macho.relocation_info, raw_relocs); + // Symbols within this section only. + const filtered_nlists = NlistWithIndex.filterInSection( + sorted_nlists.items, + sect_id + 1, + ); + // Is there any padding between symbols within the section? const is_splittable = self.header.?.flags & macho.MH_SUBSECTIONS_VIA_SYMBOLS != 0; next: { if (is_splittable) blocks: { - const filtered_nlists = NlistWithIndex.filterInSection( - sorted_nlists.items, - sect_id + 1, - ); - if (filtered_nlists.len == 0) break :blocks; var parser = TextBlockParser{ @@ -528,7 +524,7 @@ pub fn parseTextBlocks(self: *Object, zld: *Zld) !void { if (reg.file) |file| { if (file != self) { log.warn("deduping definition of {s} in {s}", .{ sym.name, self.name.? }); - block.deinit(self.allocator); + block.deinit(); self.allocator.destroy(block); continue; } @@ -583,21 +579,43 @@ pub fn parseTextBlocks(self: *Object, zld: *Zld) !void { const block = try self.allocator.create(TextBlock); errdefer self.allocator.destroy(block); - block.* = .{ - .local_sym_index = local_sym_index, - .references = std.AutoArrayHashMap(u32, void).init(self.allocator), - .code = try self.allocator.dupe(u8, code), - .relocs = std.ArrayList(Relocation).init(self.allocator), - .rebases = std.ArrayList(u64).init(self.allocator), - .tlv_offsets = std.ArrayList(TextBlock.TlvOffset).init(self.allocator), - .size = sect.size, - .alignment = sect.@"align", - }; + block.* = TextBlock.init(self.allocator); + block.local_sym_index = local_sym_index; + block.code = try self.allocator.dupe(u8, code); + block.size = sect.size; + block.alignment = sect.@"align"; if (relocs.len > 0) { try self.parseRelocs(zld, relocs, block, 0); } + // Since this is block gets a helper local temporary symbol that didn't exist + // in the object file which encompasses the entire section, we need traverse + // the filtered symbols and note which symbol is contained within so that + // we can properly allocate addresses down the line. + // While we're at it, we need to update segment,section mapping of each symbol too. + if (filtered_nlists.len > 0) { + var contained = std.ArrayList(TextBlock.SymbolAtOffset).init(self.allocator); + defer contained.deinit(); + try contained.ensureTotalCapacity(filtered_nlists.len); + + for (filtered_nlists) |nlist_with_index| { + const sym = self.symbols.items[nlist_with_index.index]; + assert(sym.payload == .regular); + const reg = &sym.payload.regular; + + reg.segment_id = match.seg; + reg.section_id = match.sect; + + contained.appendAssumeCapacity(.{ + .local_sym_index = reg.local_sym_index, + .offset = nlist_with_index.nlist.n_value - sect.addr, + }); + } + + block.contained = contained.toOwnedSlice(); + } + // Update target section's metadata // TODO should we update segment's size here too? // How does it tie with incremental space allocs? diff --git a/src/link/MachO/Zld.zig b/src/link/MachO/Zld.zig index e137e16e9b..cdc0732c34 100644 --- a/src/link/MachO/Zld.zig +++ b/src/link/MachO/Zld.zig @@ -121,9 +121,11 @@ pub const Output = struct { }; pub const TextBlock = struct { + allocator: *Allocator, local_sym_index: u32, aliases: ?[]u32 = null, references: std.AutoArrayHashMap(u32, void), + contained: ?[]SymbolAtOffset = null, code: []u8, relocs: std.ArrayList(Relocation), size: u64, @@ -133,20 +135,42 @@ pub const TextBlock = struct { next: ?*TextBlock = null, prev: ?*TextBlock = null, + pub const SymbolAtOffset = struct { + local_sym_index: u32, + offset: u64, + }; + pub const TlvOffset = struct { local_sym_index: u32, offset: u64, }; - pub fn deinit(block: *TextBlock, allocator: *Allocator) void { - if (block.aliases) |aliases| { - allocator.free(aliases); + pub fn init(allocator: *Allocator) TextBlock { + return .{ + .allocator = allocator, + .local_sym_index = undefined, + .references = std.AutoArrayHashMap(u32, void).init(allocator), + .code = undefined, + .relocs = std.ArrayList(Relocation).init(allocator), + .size = undefined, + .alignment = undefined, + .rebases = std.ArrayList(u64).init(allocator), + .tlv_offsets = std.ArrayList(TextBlock.TlvOffset).init(allocator), + }; + } + + pub fn deinit(self: *TextBlock) void { + if (self.aliases) |aliases| { + self.allocator.free(aliases); } - block.relocs.deinit(); - block.references.deinit(); - block.rebases.deinit(); - block.tlv_offsets.deinit(); - allocator.free(block.code); + self.references.deinit(); + if (self.contained) |contained| { + self.allocator.free(contained); + } + self.allocator.free(self.code); + self.relocs.deinit(); + self.rebases.deinit(); + self.tlv_offsets.deinit(); } pub fn print_this(self: *const TextBlock, zld: *Zld) void { @@ -164,6 +188,12 @@ pub const TextBlock = struct { log.warn(" | {}: {}", .{ index, zld.locals.items[index] }); } } + if (self.contained) |contained| { + log.warn(" | contained symbols:", .{}); + for (contained) |sym_at_off| { + log.warn(" | {}: {}", .{ sym_at_off.offset, zld.locals.items[sym_at_off.local_sym_index] }); + } + } log.warn(" | code.len = {}", .{self.code.len}); if (self.relocs.items.len > 0) { log.warn(" | relocations:", .{}); @@ -1021,10 +1051,20 @@ fn allocateTextBlocks(self: *Zld) !void { var base_addr: u64 = sect.addr + sect.size; while (true) { + base_addr -= block.size; + const sym = self.locals.items[block.local_sym_index]; assert(sym.payload == .regular); - sym.payload.regular.address = base_addr - block.size; - base_addr -= block.size; + sym.payload.regular.address = base_addr; + + // Update each symbol contained within the TextBlock + if (block.contained) |contained| { + for (contained) |sym_at_off| { + const contained_sym = self.locals.items[sym_at_off.local_sym_index]; + assert(contained_sym.payload == .regular); + contained_sym.payload.regular.address = base_addr + sym_at_off.offset; + } + } if (block.prev) |prev| { block = prev; @@ -1476,16 +1516,11 @@ fn resolveSymbols(self: *Zld) !void { const block = try self.allocator.create(TextBlock); errdefer self.allocator.destroy(block); - block.* = .{ - .local_sym_index = local_sym_index, - .references = std.AutoArrayHashMap(u32, void).init(self.allocator), - .code = code, - .relocs = std.ArrayList(Relocation).init(self.allocator), - .rebases = std.ArrayList(u64).init(self.allocator), - .tlv_offsets = std.ArrayList(TextBlock.TlvOffset).init(self.allocator), - .size = size, - .alignment = alignment, - }; + block.* = TextBlock.init(self.allocator); + block.local_sym_index = local_sym_index; + block.code = code; + block.size = size; + block.alignment = alignment; if (self.blocks.getPtr(match)) |last| { last.*.next = block; @@ -1907,7 +1942,6 @@ fn addRpaths(self: *Zld, rpaths: []const []const u8) !void { fn flush(self: *Zld) !void { try self.writeStubHelperCommon(); - try self.resolveRelocsAndWriteSections(); if (self.common_section_index) |index| { const seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; From 12187586d15b6eae0330a652a6b1532d2b457991 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Thu, 8 Jul 2021 14:37:33 +0200 Subject: [PATCH 27/81] zld: fix alloc alignment and resolve relocs --- src/link/MachO/Object.zig | 14 ++- src/link/MachO/Zld.zig | 222 ++++++++++++++++++++++++-------------- src/link/MachO/reloc.zig | 39 ++++--- 3 files changed, 173 insertions(+), 102 deletions(-) diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index a3b1ff4f79..0c8b8f8088 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -535,8 +535,11 @@ pub fn parseTextBlocks(self: *Object, zld: *Zld) !void { // How does it tie with incremental space allocs? const tseg = &zld.load_commands.items[match.seg].Segment; const tsect = &tseg.sections.items[match.sect]; - tsect.size += block.size; - tsect.@"align" = math.max(tsect.@"align", block.alignment); + const new_alignment = math.max(tsect.@"align", block.alignment); + const new_alignment_pow_2 = try math.powi(u32, 2, new_alignment); + const new_size = mem.alignForwardGeneric(u64, tsect.size + block.size, new_alignment_pow_2); + tsect.size = new_size; + tsect.@"align" = new_alignment; if (zld.blocks.getPtr(match)) |last| { last.*.next = block; @@ -621,8 +624,11 @@ pub fn parseTextBlocks(self: *Object, zld: *Zld) !void { // How does it tie with incremental space allocs? const tseg = &zld.load_commands.items[match.seg].Segment; const tsect = &tseg.sections.items[match.sect]; - tsect.size += block.size; - tsect.@"align" = math.max(tsect.@"align", block.alignment); + const new_alignment = math.max(tsect.@"align", block.alignment); + const new_alignment_pow_2 = try math.powi(u32, 2, new_alignment); + const new_size = mem.alignForwardGeneric(u64, tsect.size + block.size, new_alignment_pow_2); + tsect.size = new_size; + tsect.@"align" = new_alignment; if (zld.blocks.getPtr(match)) |last| { last.*.next = block; diff --git a/src/link/MachO/Zld.zig b/src/link/MachO/Zld.zig index cdc0732c34..3b5afaebeb 100644 --- a/src/link/MachO/Zld.zig +++ b/src/link/MachO/Zld.zig @@ -173,6 +173,12 @@ pub const TextBlock = struct { self.tlv_offsets.deinit(); } + pub fn resolveRelocs(self: *TextBlock, zld: *Zld) !void { + for (self.relocs.items) |rel| { + try rel.resolve(zld); + } + } + pub fn print_this(self: *const TextBlock, zld: *Zld) void { log.warn("TextBlock", .{}); log.warn(" | {}: {}", .{ self.local_sym_index, zld.locals.items[self.local_sym_index] }); @@ -328,11 +334,10 @@ pub fn link(self: *Zld, files: []const []const u8, output: Output, args: LinkArg const sect = seg.sections.items[entry.key_ptr.sect]; log.warn("\n\n{s},{s} contents:", .{ segmentName(sect), sectionName(sect) }); - log.warn("{}", .{sect}); + log.warn(" {}", .{sect}); entry.value_ptr.*.print(self); } - return error.TODO; - // try self.flush(); + try self.flush(); } fn parseInputFiles(self: *Zld, files: []const []const u8, syslibroot: ?[]const u8) !void { @@ -1041,6 +1046,8 @@ fn allocateSegment(self: *Zld, index: u16, offset: u64) !void { } fn allocateTextBlocks(self: *Zld) !void { + log.warn("allocating text blocks", .{}); + var it = self.blocks.iterator(); while (it.next()) |entry| { const match = entry.key_ptr.*; @@ -1050,13 +1057,34 @@ fn allocateTextBlocks(self: *Zld) !void { const sect = seg.sections.items[match.sect]; var base_addr: u64 = sect.addr + sect.size; + log.warn(" within section {s},{s}", .{ segmentName(sect), sectionName(sect) }); + log.warn(" {}", .{sect}); + while (true) { - base_addr -= block.size; + const block_alignment = try math.powi(u32, 2, block.alignment); + base_addr = mem.alignBackwardGeneric(u64, base_addr - block.size, block_alignment); const sym = self.locals.items[block.local_sym_index]; assert(sym.payload == .regular); sym.payload.regular.address = base_addr; + log.warn(" {s}: start=0x{x}, end=0x{x}, size={}, align={}", .{ + sym.name, + base_addr, + base_addr + block.size, + block.size, + block.alignment, + }); + + // Update each alias (if any) + if (block.aliases) |aliases| { + for (aliases) |index| { + const alias_sym = self.locals.items[index]; + assert(alias_sym.payload == .regular); + alias_sym.payload.regular.address = base_addr; + } + } + // Update each symbol contained within the TextBlock if (block.contained) |contained| { for (contained) |sym_at_off| { @@ -1073,6 +1101,37 @@ fn allocateTextBlocks(self: *Zld) !void { } } +fn writeTextBlocks(self: *Zld) !void { + var it = self.blocks.iterator(); + while (it.next()) |entry| { + const match = entry.key_ptr.*; + var block: *TextBlock = entry.value_ptr.*; + + const seg = self.load_commands.items[match.seg].Segment; + const sect = seg.sections.items[match.sect]; + + log.warn("writing text blocks for section {s},{s}", .{ segmentName(sect), sectionName(sect) }); + + var code = try self.allocator.alloc(u8, sect.size); + defer self.allocator.free(code); + + var base_off: u64 = sect.size; + + while (true) { + base_off -= block.size; + + try block.resolveRelocs(self); + mem.copy(u8, code[base_off..][0..block.size], block.code); + + if (block.prev) |prev| { + block = prev; + } else break; + } + + try self.file.?.pwriteAll(code, sect.offset); + } +} + fn writeStubHelperCommon(self: *Zld) !void { const text_segment = &self.load_commands.items[self.text_segment_cmd_index.?].Segment; const stub_helper = &text_segment.sections.items[self.stub_helper_section_index.?]; @@ -1941,104 +2000,105 @@ fn addRpaths(self: *Zld, rpaths: []const []const u8) !void { } fn flush(self: *Zld) !void { - try self.writeStubHelperCommon(); + try self.writeTextBlocks(); + // try self.writeStubHelperCommon(); - if (self.common_section_index) |index| { - const seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; - const sect = &seg.sections.items[index]; - sect.offset = 0; - } + // if (self.common_section_index) |index| { + // const seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; + // const sect = &seg.sections.items[index]; + // sect.offset = 0; + // } - if (self.bss_section_index) |index| { - const seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; - const sect = &seg.sections.items[index]; - sect.offset = 0; - } + // if (self.bss_section_index) |index| { + // const seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; + // const sect = &seg.sections.items[index]; + // sect.offset = 0; + // } - if (self.tlv_bss_section_index) |index| { - const seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; - const sect = &seg.sections.items[index]; - sect.offset = 0; - } + // if (self.tlv_bss_section_index) |index| { + // const seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; + // const sect = &seg.sections.items[index]; + // sect.offset = 0; + // } - if (self.tlv_section_index) |index| { - const seg = self.load_commands.items[self.data_segment_cmd_index.?].Segment; - const sect = &seg.sections.items[index]; + // if (self.tlv_section_index) |index| { + // const seg = self.load_commands.items[self.data_segment_cmd_index.?].Segment; + // const sect = &seg.sections.items[index]; - var buffer = try self.allocator.alloc(u8, @intCast(usize, sect.size)); - defer self.allocator.free(buffer); - _ = try self.file.?.preadAll(buffer, sect.offset); + // var buffer = try self.allocator.alloc(u8, @intCast(usize, sect.size)); + // defer self.allocator.free(buffer); + // _ = try self.file.?.preadAll(buffer, sect.offset); - var stream = std.io.fixedBufferStream(buffer); - var writer = stream.writer(); + // var stream = std.io.fixedBufferStream(buffer); + // var writer = stream.writer(); - std.sort.sort(TlvOffset, self.threadlocal_offsets.items, {}, TlvOffset.cmp); + // std.sort.sort(TlvOffset, self.threadlocal_offsets.items, {}, TlvOffset.cmp); - const seek_amt = 2 * @sizeOf(u64); - for (self.threadlocal_offsets.items) |tlv| { - try writer.context.seekBy(seek_amt); - try writer.writeIntLittle(u64, tlv.offset); - } + // const seek_amt = 2 * @sizeOf(u64); + // for (self.threadlocal_offsets.items) |tlv| { + // try writer.context.seekBy(seek_amt); + // try writer.writeIntLittle(u64, tlv.offset); + // } - try self.file.?.pwriteAll(buffer, sect.offset); - } + // try self.file.?.pwriteAll(buffer, sect.offset); + // } - if (self.mod_init_func_section_index) |index| { - const seg = self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; - const sect = &seg.sections.items[index]; + // if (self.mod_init_func_section_index) |index| { + // const seg = self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; + // const sect = &seg.sections.items[index]; - var initializers = std.ArrayList(u64).init(self.allocator); - defer initializers.deinit(); + // var initializers = std.ArrayList(u64).init(self.allocator); + // defer initializers.deinit(); - for (self.objects.items) |object| { - for (object.initializers.items) |sym_id| { - const address = object.symbols.items[sym_id].payload.regular.address; - try initializers.append(address); - } - } + // for (self.objects.items) |object| { + // for (object.initializers.items) |sym_id| { + // const address = object.symbols.items[sym_id].payload.regular.address; + // try initializers.append(address); + // } + // } - _ = try self.file.?.pwriteAll(mem.sliceAsBytes(initializers.items), sect.offset); - sect.size = @intCast(u32, initializers.items.len * @sizeOf(u64)); - } + // _ = try self.file.?.pwriteAll(mem.sliceAsBytes(initializers.items), sect.offset); + // sect.size = @intCast(u32, initializers.items.len * @sizeOf(u64)); + // } - try self.writeGotEntries(); - try self.setEntryPoint(); - try self.writeRebaseInfoTable(); - try self.writeBindInfoTable(); - try self.writeLazyBindInfoTable(); - try self.writeExportInfo(); - try self.writeDataInCode(); + // try self.writeGotEntries(); + // try self.setEntryPoint(); + // try self.writeRebaseInfoTable(); + // try self.writeBindInfoTable(); + // try self.writeLazyBindInfoTable(); + // try self.writeExportInfo(); + // try self.writeDataInCode(); - { - const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; - const symtab = &self.load_commands.items[self.symtab_cmd_index.?].Symtab; - symtab.symoff = @intCast(u32, seg.inner.fileoff + seg.inner.filesize); - } + // { + // const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; + // const symtab = &self.load_commands.items[self.symtab_cmd_index.?].Symtab; + // symtab.symoff = @intCast(u32, seg.inner.fileoff + seg.inner.filesize); + // } - try self.writeSymbolTable(); - try self.writeStringTable(); + // try self.writeSymbolTable(); + // try self.writeStringTable(); - { - // Seal __LINKEDIT size - const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; - seg.inner.vmsize = mem.alignForwardGeneric(u64, seg.inner.filesize, self.page_size.?); - } + // { + // // Seal __LINKEDIT size + // const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; + // seg.inner.vmsize = mem.alignForwardGeneric(u64, seg.inner.filesize, self.page_size.?); + // } - if (self.target.?.cpu.arch == .aarch64) { - try self.writeCodeSignaturePadding(); - } + // if (self.target.?.cpu.arch == .aarch64) { + // try self.writeCodeSignaturePadding(); + // } - try self.writeLoadCommands(); - try self.writeHeader(); + // try self.writeLoadCommands(); + // try self.writeHeader(); - if (self.target.?.cpu.arch == .aarch64) { - try self.writeCodeSignature(); - } + // if (self.target.?.cpu.arch == .aarch64) { + // try self.writeCodeSignature(); + // } - if (comptime std.Target.current.isDarwin() and std.Target.current.cpu.arch == .aarch64) { - const out_path = self.output.?.path; - try fs.cwd().copyFile(out_path, fs.cwd(), out_path, .{}); - } + // if (comptime std.Target.current.isDarwin() and std.Target.current.cpu.arch == .aarch64) { + // const out_path = self.output.?.path; + // try fs.cwd().copyFile(out_path, fs.cwd(), out_path, .{}); + // } } fn writeGotEntries(self: *Zld) !void { diff --git a/src/link/MachO/reloc.zig b/src/link/MachO/reloc.zig index 07d5186a63..bfb2ee905e 100644 --- a/src/link/MachO/reloc.zig +++ b/src/link/MachO/reloc.zig @@ -52,7 +52,7 @@ pub const Relocation = struct { pub fn resolve(self: Unsigned, base: Relocation, source_addr: u64, target_addr: u64) !void { const addend = if (self.source_sect_addr) |addr| - self.addend - addr + self.addend - @intCast(i64, addr) else self.addend; @@ -86,13 +86,13 @@ pub const Relocation = struct { arch: Arch, pub fn resolve(self: Branch, base: Relocation, source_addr: u64, target_addr: u64) !void { - switch (arch) { + switch (self.arch) { .aarch64 => { const displacement = try math.cast(i28, @intCast(i64, target_addr) - @intCast(i64, source_addr)); var inst = aarch64.Instruction{ .unconditional_branch_immediate = mem.bytesToValue( meta.TagPayload( - aarch.Instruction, + aarch64.Instruction, aarch64.Instruction.unconditional_branch_immediate, ), base.block.code[base.offset..][0..4], @@ -236,13 +236,15 @@ pub const Relocation = struct { }, .got => { const narrowed = @truncate(u12, target_addr); - var inst = mem.bytesToValue( - meta.TagPayload( - aarch64.Instruction, - aarch64.Instruction.load_store_register, + var inst: aarch64.Instruction = .{ + .load_store_register = mem.bytesToValue( + meta.TagPayload( + aarch64.Instruction, + aarch64.Instruction.load_store_register, + ), + base.block.code[base.offset..][0..4], ), - base.block.code[base.offset..][0..4], - ); + }; const offset = try math.divExact(u12, narrowed, 8); inst.load_store_register.offset = offset; mem.writeIntLittle(u32, base.block.code[base.offset..][0..4], inst.toU32()); @@ -408,14 +410,12 @@ pub const Relocation = struct { break :blk sym.payload.regular.address + self.offset; }; const target_addr = blk: { - const is_via_got = inner: { - switch (self.payload) { - .pointer_to_got => break :inner true, - .page => |page| page.kind == .got, - .page_off => |page_off| page_off == .got, - .load => {}, - else => break :inner false, - } + const is_via_got = switch (self.payload) { + .pointer_to_got => true, + .page => |page| page.kind == .got, + .page_off => |page_off| page_off.kind == .got, + .load => |load| load.kind == .got, + else => false, }; if (is_via_got) { @@ -459,6 +459,11 @@ pub const Relocation = struct { }, } }; + + log.warn("relocating {}", .{self}); + log.warn(" | source_addr = 0x{x}", .{source_addr}); + log.warn(" | target_addr = 0x{x}", .{target_addr}); + switch (self.payload) { .unsigned => |unsigned| try unsigned.resolve(self, source_addr, target_addr), .branch => |branch| try branch.resolve(self, source_addr, target_addr), From 0cc4938419c52681d7f7d5a48054e3f8aa827840 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Thu, 8 Jul 2021 19:22:46 +0200 Subject: [PATCH 28/81] zld: re-enable all of linker after complete rewrite --- src/link/MachO/Symbol.zig | 4 +- src/link/MachO/Zld.zig | 243 ++++++++++++++++++++------------------ 2 files changed, 133 insertions(+), 114 deletions(-) diff --git a/src/link/MachO/Symbol.zig b/src/link/MachO/Symbol.zig index 0ed122cc95..8835bb9a0f 100644 --- a/src/link/MachO/Symbol.zig +++ b/src/link/MachO/Symbol.zig @@ -225,7 +225,7 @@ pub fn needsTlvOffset(self: Symbol, zld: *Zld) bool { return sect_type == macho.S_THREAD_LOCAL_VARIABLES; } -pub fn asNlist(symbol: *Symbol, strtab: *StringTable) !macho.nlist_64 { +pub fn asNlist(symbol: *Symbol, zld: *Zld, strtab: *StringTable) !macho.nlist_64 { const n_strx = try strtab.getOrPut(symbol.name); const nlist = nlist: { switch (symbol.payload) { @@ -233,7 +233,7 @@ pub fn asNlist(symbol: *Symbol, strtab: *StringTable) !macho.nlist_64 { var nlist = macho.nlist_64{ .n_strx = n_strx, .n_type = macho.N_SECT, - .n_sect = regular.section, + .n_sect = regular.sectionId(zld), .n_desc = 0, .n_value = regular.address, }; diff --git a/src/link/MachO/Zld.zig b/src/link/MachO/Zld.zig index 3b5afaebeb..e9a8199d9d 100644 --- a/src/link/MachO/Zld.zig +++ b/src/link/MachO/Zld.zig @@ -337,6 +337,7 @@ pub fn link(self: *Zld, files: []const []const u8, output: Output, args: LinkArg log.warn(" {}", .{sect}); entry.value_ptr.*.print(self); } + try self.flush(); } @@ -1109,23 +1110,31 @@ fn writeTextBlocks(self: *Zld) !void { const seg = self.load_commands.items[match.seg].Segment; const sect = seg.sections.items[match.sect]; + const sect_type = sectionType(sect); log.warn("writing text blocks for section {s},{s}", .{ segmentName(sect), sectionName(sect) }); var code = try self.allocator.alloc(u8, sect.size); defer self.allocator.free(code); - var base_off: u64 = sect.size; + if (sect_type == macho.S_ZEROFILL or + sect_type == macho.S_THREAD_LOCAL_ZEROFILL or + sect_type == macho.S_THREAD_LOCAL_VARIABLES) + { + mem.set(u8, code, 0); + } else { + var base_off: u64 = sect.size; - while (true) { - base_off -= block.size; + while (true) { + base_off -= block.size; - try block.resolveRelocs(self); - mem.copy(u8, code[base_off..][0..block.size], block.code); + try block.resolveRelocs(self); + mem.copy(u8, code[base_off..][0..block.size], block.code); - if (block.prev) |prev| { - block = prev; - } else break; + if (block.prev) |prev| { + block = prev; + } else break; + } } try self.file.?.pwriteAll(code, sect.offset); @@ -2001,104 +2010,100 @@ fn addRpaths(self: *Zld, rpaths: []const []const u8) !void { fn flush(self: *Zld) !void { try self.writeTextBlocks(); - // try self.writeStubHelperCommon(); + try self.writeStubHelperCommon(); - // if (self.common_section_index) |index| { - // const seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; - // const sect = &seg.sections.items[index]; - // sect.offset = 0; - // } + if (self.common_section_index) |index| { + const seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; + const sect = &seg.sections.items[index]; + sect.offset = 0; + } - // if (self.bss_section_index) |index| { - // const seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; - // const sect = &seg.sections.items[index]; - // sect.offset = 0; - // } + if (self.bss_section_index) |index| { + const seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; + const sect = &seg.sections.items[index]; + sect.offset = 0; + } - // if (self.tlv_bss_section_index) |index| { - // const seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; - // const sect = &seg.sections.items[index]; - // sect.offset = 0; - // } + if (self.tlv_bss_section_index) |index| { + const seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; + const sect = &seg.sections.items[index]; + sect.offset = 0; + } - // if (self.tlv_section_index) |index| { - // const seg = self.load_commands.items[self.data_segment_cmd_index.?].Segment; - // const sect = &seg.sections.items[index]; + if (self.tlv_section_index) |index| { + // TODO this should be part of relocation resolution routine. + const seg = self.load_commands.items[self.data_segment_cmd_index.?].Segment; + const sect = &seg.sections.items[index]; - // var buffer = try self.allocator.alloc(u8, @intCast(usize, sect.size)); - // defer self.allocator.free(buffer); - // _ = try self.file.?.preadAll(buffer, sect.offset); + const base_addr = if (self.tlv_data_section_index) |i| + seg.sections.items[i].addr + else + seg.sections.items[self.tlv_bss_section_index.?].addr; - // var stream = std.io.fixedBufferStream(buffer); - // var writer = stream.writer(); + var block: *TextBlock = self.blocks.get(.{ + .seg = self.data_segment_cmd_index.?, + .sect = index, + }) orelse unreachable; - // std.sort.sort(TlvOffset, self.threadlocal_offsets.items, {}, TlvOffset.cmp); + var buffer = try self.allocator.alloc(u8, @intCast(usize, sect.size)); + defer self.allocator.free(buffer); + _ = try self.file.?.preadAll(buffer, sect.offset); - // const seek_amt = 2 * @sizeOf(u64); - // for (self.threadlocal_offsets.items) |tlv| { - // try writer.context.seekBy(seek_amt); - // try writer.writeIntLittle(u64, tlv.offset); - // } + while (true) { + for (block.tlv_offsets.items) |tlv_offset| { + const sym = self.locals.items[tlv_offset.local_sym_index]; + assert(sym.payload == .regular); + const offset = sym.payload.regular.address - base_addr; + mem.writeIntLittle(u64, buffer[tlv_offset.offset..][0..@sizeOf(u64)], offset); + } - // try self.file.?.pwriteAll(buffer, sect.offset); - // } + if (block.prev) |prev| { + block = prev; + } else break; + } - // if (self.mod_init_func_section_index) |index| { - // const seg = self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; - // const sect = &seg.sections.items[index]; + try self.file.?.pwriteAll(buffer, sect.offset); + } - // var initializers = std.ArrayList(u64).init(self.allocator); - // defer initializers.deinit(); - - // for (self.objects.items) |object| { - // for (object.initializers.items) |sym_id| { - // const address = object.symbols.items[sym_id].payload.regular.address; - // try initializers.append(address); - // } - // } - - // _ = try self.file.?.pwriteAll(mem.sliceAsBytes(initializers.items), sect.offset); - // sect.size = @intCast(u32, initializers.items.len * @sizeOf(u64)); - // } - - // try self.writeGotEntries(); - // try self.setEntryPoint(); - // try self.writeRebaseInfoTable(); - // try self.writeBindInfoTable(); - // try self.writeLazyBindInfoTable(); - // try self.writeExportInfo(); + try self.writeGotEntries(); + try self.setEntryPoint(); + try self.writeRebaseInfoTable(); + try self.writeBindInfoTable(); + try self.writeLazyBindInfoTable(); + try self.writeExportInfo(); + // TODO DICE for x86_64 // try self.writeDataInCode(); - // { - // const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; - // const symtab = &self.load_commands.items[self.symtab_cmd_index.?].Symtab; - // symtab.symoff = @intCast(u32, seg.inner.fileoff + seg.inner.filesize); - // } + { + const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; + const symtab = &self.load_commands.items[self.symtab_cmd_index.?].Symtab; + symtab.symoff = @intCast(u32, seg.inner.fileoff + seg.inner.filesize); + } - // try self.writeSymbolTable(); - // try self.writeStringTable(); + try self.writeSymbolTable(); + try self.writeStringTable(); - // { - // // Seal __LINKEDIT size - // const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; - // seg.inner.vmsize = mem.alignForwardGeneric(u64, seg.inner.filesize, self.page_size.?); - // } + { + // Seal __LINKEDIT size + const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; + seg.inner.vmsize = mem.alignForwardGeneric(u64, seg.inner.filesize, self.page_size.?); + } - // if (self.target.?.cpu.arch == .aarch64) { - // try self.writeCodeSignaturePadding(); - // } + if (self.target.?.cpu.arch == .aarch64) { + try self.writeCodeSignaturePadding(); + } - // try self.writeLoadCommands(); - // try self.writeHeader(); + try self.writeLoadCommands(); + try self.writeHeader(); - // if (self.target.?.cpu.arch == .aarch64) { - // try self.writeCodeSignature(); - // } + if (self.target.?.cpu.arch == .aarch64) { + try self.writeCodeSignature(); + } - // if (comptime std.Target.current.isDarwin() and std.Target.current.cpu.arch == .aarch64) { - // const out_path = self.output.?.path; - // try fs.cwd().copyFile(out_path, fs.cwd(), out_path, .{}); - // } + if (comptime std.Target.current.isDarwin() and std.Target.current.cpu.arch == .aarch64) { + const out_path = self.output.?.path; + try fs.cwd().copyFile(out_path, fs.cwd(), out_path, .{}); + } } fn writeGotEntries(self: *Zld) !void { @@ -2140,8 +2145,35 @@ fn writeRebaseInfoTable(self: *Zld) !void { var pointers = std.ArrayList(Pointer).init(self.allocator); defer pointers.deinit(); - try pointers.ensureCapacity(self.local_rebases.items.len); - pointers.appendSliceAssumeCapacity(self.local_rebases.items); + { + var it = self.blocks.iterator(); + while (it.next()) |entry| { + const match = entry.key_ptr.*; + var block: *TextBlock = entry.value_ptr.*; + + if (match.seg == self.text_segment_cmd_index.?) continue; // __TEXT is non-writable + + const seg = self.load_commands.items[match.seg].Segment; + const sect = seg.sections.items[match.sect]; + + while (true) { + const sym = self.locals.items[block.local_sym_index]; + assert(sym.payload == .regular); + const base_offset = sym.payload.regular.address - seg.inner.vmaddr; + + for (block.rebases.items) |offset| { + try pointers.append(.{ + .offset = base_offset + offset, + .segment_id = match.seg, + }); + } + + if (block.prev) |prev| { + block = prev; + } else break; + } + } + } if (self.got_section_index) |idx| { const seg = self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; @@ -2159,24 +2191,6 @@ fn writeRebaseInfoTable(self: *Zld) !void { } } - if (self.mod_init_func_section_index) |idx| { - const seg = self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; - const sect = seg.sections.items[idx]; - const base_offset = sect.addr - seg.inner.vmaddr; - const segment_id = @intCast(u16, self.data_const_segment_cmd_index.?); - - var index: u64 = 0; - for (self.objects.items) |object| { - for (object.initializers.items) |_| { - try pointers.append(.{ - .offset = base_offset + index * @sizeOf(u64), - .segment_id = segment_id, - }); - index += 1; - } - } - } - if (self.la_symbol_ptr_section_index) |idx| { const seg = self.load_commands.items[self.data_segment_cmd_index.?].Segment; const sect = seg.sections.items[idx]; @@ -2240,10 +2254,15 @@ fn writeBindInfoTable(self: *Zld) !void { const proxy = sym.payload.proxy; for (proxy.bind_info.items) |info| { - const seg = self.load_commands.items[info.segment_id].Segment; + const bind_sym = self.locals.items[info.local_sym_index]; + assert(bind_sym.payload == .regular); + const reg = bind_sym.payload.regular; + const base_address = self.load_commands.items[reg.segment_id].Segment.inner.vmaddr; + const offset = reg.address + info.offset - base_address; + try pointers.append(.{ - .offset = info.address - seg.inner.vmaddr, - .segment_id = info.segment_id, + .offset = offset, + .segment_id = reg.segment_id, .dylib_ordinal = proxy.dylibOrdinal(), .name = sym.name, }); @@ -2462,7 +2481,7 @@ fn writeSymbolTable(self: *Zld) !void { for (self.locals.items) |symbol| { if (symbol.isTemp()) continue; // TODO when merging codepaths, this should go into freelist - const nlist = try symbol.asNlist(&self.strtab); + const nlist = try symbol.asNlist(self, &self.strtab); locals.appendAssumeCapacity(nlist); } @@ -2475,7 +2494,7 @@ fn writeSymbolTable(self: *Zld) !void { defer undef_dir.deinit(); for (self.globals.values()) |sym| { - const nlist = try sym.asNlist(&self.strtab); + const nlist = try sym.asNlist(self, &self.strtab); switch (sym.payload) { .regular => try exports.append(nlist), .proxy => { From bb5b82cab9f142bef916e35ec186314a41613356 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Thu, 8 Jul 2021 19:51:03 +0200 Subject: [PATCH 29/81] zld: dedup symbols in the symbol table --- src/link/MachO/Zld.zig | 33 ++++++++++++++++----------------- 1 file changed, 16 insertions(+), 17 deletions(-) diff --git a/src/link/MachO/Zld.zig b/src/link/MachO/Zld.zig index e9a8199d9d..0022f1dbba 100644 --- a/src/link/MachO/Zld.zig +++ b/src/link/MachO/Zld.zig @@ -2477,33 +2477,32 @@ fn writeSymbolTable(self: *Zld) !void { var locals = std.ArrayList(macho.nlist_64).init(self.allocator); defer locals.deinit(); - try locals.ensureTotalCapacity(self.locals.items.len); - - for (self.locals.items) |symbol| { - if (symbol.isTemp()) continue; // TODO when merging codepaths, this should go into freelist - const nlist = try symbol.asNlist(self, &self.strtab); - locals.appendAssumeCapacity(nlist); - } var exports = std.ArrayList(macho.nlist_64).init(self.allocator); defer exports.deinit(); + for (self.locals.items) |symbol, i| { + if (i == 0) continue; // skip null symbol + if (symbol.isTemp()) continue; // TODO when merging codepaths, this should go into freelist + const reg = symbol.payload.regular; + const nlist = try symbol.asNlist(self, &self.strtab); + if (reg.linkage == .translation_unit) { + try locals.append(nlist); + } else { + try exports.append(nlist); + } + } + var undefs = std.ArrayList(macho.nlist_64).init(self.allocator); defer undefs.deinit(); var undef_dir = std.StringHashMap(u32).init(self.allocator); defer undef_dir.deinit(); - for (self.globals.values()) |sym| { + for (self.imports.items) |sym| { const nlist = try sym.asNlist(self, &self.strtab); - switch (sym.payload) { - .regular => try exports.append(nlist), - .proxy => { - const id = @intCast(u32, undefs.items.len); - try undefs.append(nlist); - try undef_dir.putNoClobber(sym.name, id); - }, - else => unreachable, - } + const id = @intCast(u32, undefs.items.len); + try undefs.append(nlist); + try undef_dir.putNoClobber(sym.name, id); } const nlocals = locals.items.len; From 322be2698d78836e94b54da959eb28476530b822 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sat, 10 Jul 2021 09:45:23 +0200 Subject: [PATCH 30/81] zld: TextBlock needs to be written to an aligend offset too --- src/link/MachO/Zld.zig | 29 +++++++++++++++++++++++++---- 1 file changed, 25 insertions(+), 4 deletions(-) diff --git a/src/link/MachO/Zld.zig b/src/link/MachO/Zld.zig index 0022f1dbba..cc94b68a57 100644 --- a/src/link/MachO/Zld.zig +++ b/src/link/MachO/Zld.zig @@ -1069,7 +1069,7 @@ fn allocateTextBlocks(self: *Zld) !void { assert(sym.payload == .regular); sym.payload.regular.address = base_addr; - log.warn(" {s}: start=0x{x}, end=0x{x}, size={}, align={}", .{ + log.warn(" {s}: start=0x{x}, end=0x{x}, size={}, align={}", .{ sym.name, base_addr, base_addr + block.size, @@ -1103,6 +1103,8 @@ fn allocateTextBlocks(self: *Zld) !void { } fn writeTextBlocks(self: *Zld) !void { + log.warn("writing text blocks", .{}); + var it = self.blocks.iterator(); while (it.next()) |entry| { const match = entry.key_ptr.*; @@ -1112,7 +1114,8 @@ fn writeTextBlocks(self: *Zld) !void { const sect = seg.sections.items[match.sect]; const sect_type = sectionType(sect); - log.warn("writing text blocks for section {s},{s}", .{ segmentName(sect), sectionName(sect) }); + log.warn(" for section {s},{s}", .{ segmentName(sect), sectionName(sect) }); + log.warn(" {}", .{sect}); var code = try self.allocator.alloc(u8, sect.size); defer self.allocator.free(code); @@ -1126,10 +1129,28 @@ fn writeTextBlocks(self: *Zld) !void { var base_off: u64 = sect.size; while (true) { - base_off -= block.size; + const block_alignment = try math.powi(u32, 2, block.alignment); + const unaligned_base_off = base_off - block.size; + const aligned_base_off = mem.alignBackwardGeneric(u64, unaligned_base_off, block_alignment); + + const sym = self.locals.items[block.local_sym_index]; + log.warn(" {s}: start=0x{x}, end=0x{x}, size={}, align={}", .{ + sym.name, + aligned_base_off, + aligned_base_off + block.size, + block.size, + block.alignment, + }); try block.resolveRelocs(self); - mem.copy(u8, code[base_off..][0..block.size], block.code); + mem.copy(u8, code[aligned_base_off..][0..block.size], block.code); + + // TODO NOP for machine code instead of just zeroing out + const padding_off = aligned_base_off + block.size; + const padding_len = unaligned_base_off - aligned_base_off; + mem.set(u8, code[padding_off..][0..padding_len], 0); + + base_off = aligned_base_off; if (block.prev) |prev| { block = prev; From dd5c7588d105c544289b6344b652133538e7e898 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sat, 10 Jul 2021 14:07:56 +0200 Subject: [PATCH 31/81] zld: fix resolving TLV offset relocations --- src/link/MachO/Zld.zig | 51 +--------------------------------------- src/link/MachO/reloc.zig | 49 ++++++++++++++++++++++++++++---------- 2 files changed, 37 insertions(+), 63 deletions(-) diff --git a/src/link/MachO/Zld.zig b/src/link/MachO/Zld.zig index cc94b68a57..9afa5007b7 100644 --- a/src/link/MachO/Zld.zig +++ b/src/link/MachO/Zld.zig @@ -131,7 +131,6 @@ pub const TextBlock = struct { size: u64, alignment: u32, rebases: std.ArrayList(u64), - tlv_offsets: std.ArrayList(TlvOffset), next: ?*TextBlock = null, prev: ?*TextBlock = null, @@ -140,11 +139,6 @@ pub const TextBlock = struct { offset: u64, }; - pub const TlvOffset = struct { - local_sym_index: u32, - offset: u64, - }; - pub fn init(allocator: *Allocator) TextBlock { return .{ .allocator = allocator, @@ -155,7 +149,6 @@ pub const TextBlock = struct { .size = undefined, .alignment = undefined, .rebases = std.ArrayList(u64).init(allocator), - .tlv_offsets = std.ArrayList(TextBlock.TlvOffset).init(allocator), }; } @@ -170,7 +163,6 @@ pub const TextBlock = struct { self.allocator.free(self.code); self.relocs.deinit(); self.rebases.deinit(); - self.tlv_offsets.deinit(); } pub fn resolveRelocs(self: *TextBlock, zld: *Zld) !void { @@ -210,9 +202,6 @@ pub const TextBlock = struct { if (self.rebases.items.len > 0) { log.warn(" | rebases: {any}", .{self.rebases.items}); } - if (self.tlv_offsets.items.len > 0) { - log.warn(" | TLV offsets: {any}", .{self.tlv_offsets.items}); - } log.warn(" | size = {}", .{self.size}); log.warn(" | align = {}", .{self.alignment}); } @@ -1120,10 +1109,7 @@ fn writeTextBlocks(self: *Zld) !void { var code = try self.allocator.alloc(u8, sect.size); defer self.allocator.free(code); - if (sect_type == macho.S_ZEROFILL or - sect_type == macho.S_THREAD_LOCAL_ZEROFILL or - sect_type == macho.S_THREAD_LOCAL_VARIABLES) - { + if (sect_type == macho.S_ZEROFILL or sect_type == macho.S_THREAD_LOCAL_ZEROFILL) { mem.set(u8, code, 0); } else { var base_off: u64 = sect.size; @@ -2051,41 +2037,6 @@ fn flush(self: *Zld) !void { sect.offset = 0; } - if (self.tlv_section_index) |index| { - // TODO this should be part of relocation resolution routine. - const seg = self.load_commands.items[self.data_segment_cmd_index.?].Segment; - const sect = &seg.sections.items[index]; - - const base_addr = if (self.tlv_data_section_index) |i| - seg.sections.items[i].addr - else - seg.sections.items[self.tlv_bss_section_index.?].addr; - - var block: *TextBlock = self.blocks.get(.{ - .seg = self.data_segment_cmd_index.?, - .sect = index, - }) orelse unreachable; - - var buffer = try self.allocator.alloc(u8, @intCast(usize, sect.size)); - defer self.allocator.free(buffer); - _ = try self.file.?.preadAll(buffer, sect.offset); - - while (true) { - for (block.tlv_offsets.items) |tlv_offset| { - const sym = self.locals.items[tlv_offset.local_sym_index]; - assert(sym.payload == .regular); - const offset = sym.payload.regular.address - base_addr; - mem.writeIntLittle(u64, buffer[tlv_offset.offset..][0..@sizeOf(u64)], offset); - } - - if (block.prev) |prev| { - block = prev; - } else break; - } - - try self.file.?.pwriteAll(buffer, sect.offset); - } - try self.writeGotEntries(); try self.setEntryPoint(); try self.writeRebaseInfoTable(); diff --git a/src/link/MachO/reloc.zig b/src/link/MachO/reloc.zig index bfb2ee905e..7cf477a703 100644 --- a/src/link/MachO/reloc.zig +++ b/src/link/MachO/reloc.zig @@ -50,7 +50,7 @@ pub const Relocation = struct { source_sect_addr: ?u64 = null, - pub fn resolve(self: Unsigned, base: Relocation, source_addr: u64, target_addr: u64) !void { + pub fn resolve(self: Unsigned, base: Relocation, _: u64, target_addr: u64) !void { const addend = if (self.source_sect_addr) |addr| self.addend - @intCast(i64, addr) else @@ -430,12 +430,43 @@ pub const Relocation = struct { } switch (self.target.payload) { - .regular => |reg| break :blk reg.address, + .regular => |reg| { + const is_tlv = is_tlv: { + const sym = zld.locals.items[self.block.local_sym_index]; + const seg = zld.load_commands.items[sym.payload.regular.segment_id].Segment; + const sect = seg.sections.items[sym.payload.regular.section_id]; + break :is_tlv commands.sectionType(sect) == macho.S_THREAD_LOCAL_VARIABLES; + }; + if (is_tlv) { + // For TLV relocations, the value specified as a relocation is the displacement from the + // TLV initializer (either value in __thread_data or zero-init in __thread_bss) to the first + // defined TLV template init section in the following order: + // * wrt to __thread_data if defined, then + // * wrt to __thread_bss + const seg = zld.load_commands.items[zld.data_segment_cmd_index.?].Segment; + const base_address = inner: { + if (zld.tlv_data_section_index) |i| { + break :inner seg.sections.items[i].addr; + } else if (zld.tlv_bss_section_index) |i| { + break :inner seg.sections.items[i].addr; + } else { + log.err("threadlocal variables present but no initializer sections found", .{}); + log.err(" __thread_data not found", .{}); + log.err(" __thread_bss not found", .{}); + return error.FailedToResolveRelocationTarget; + } + }; + break :blk reg.address - base_address; + } + + break :blk reg.address; + }, .proxy => |proxy| { if (mem.eql(u8, self.target.name, "__tlv_bootstrap")) { - const segment = zld.load_commands.items[zld.data_segment_cmd_index.?].Segment; - const tlv = segment.sections.items[zld.tlv_section_index.?]; - break :blk tlv.addr; + break :blk 0; // Dynamically bound by dyld. + // const segment = zld.load_commands.items[zld.data_segment_cmd_index.?].Segment; + // const tlv = segment.sections.items[zld.tlv_section_index.?]; + // break :blk tlv.addr; } const segment = zld.load_commands.items[zld.text_segment_cmd_index.?].Segment; @@ -677,14 +708,6 @@ pub const Parser = struct { if (should_rebase) { try self.block.rebases.append(out_rel.offset); } - - // TLV is handled via a separate offset mechanism. - if (sect_type == macho.S_THREAD_LOCAL_VARIABLES) { - try self.block.tlv_offsets.append(.{ - .local_sym_index = out_rel.target.payload.regular.local_sym_index, - .offset = out_rel.offset, - }); - } }, } } else if (out_rel.payload == .branch) blk: { From 3bdb3b574e36962528d203a69ee1de4b39e17830 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sat, 10 Jul 2021 14:33:17 +0200 Subject: [PATCH 32/81] zld: turn logging off --- src/link/MachO/Object.zig | 8 ++--- src/link/MachO/Zld.zig | 70 +++++++++++++++++++++------------------ src/link/MachO/reloc.zig | 6 ++-- 3 files changed, 45 insertions(+), 39 deletions(-) diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index 0c8b8f8088..b1febed39d 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -451,7 +451,7 @@ const TextBlockParser = struct { pub fn parseTextBlocks(self: *Object, zld: *Zld) !void { const seg = self.load_commands.items[self.segment_cmd_index.?].Segment; - log.warn("analysing {s}", .{self.name.?}); + log.debug("analysing {s}", .{self.name.?}); const dysymtab = self.load_commands.items[self.dysymtab_cmd_index.?].Dysymtab; // We only care about defined symbols, so filter every other out. @@ -472,14 +472,14 @@ pub fn parseTextBlocks(self: *Object, zld: *Zld) !void { for (seg.sections.items) |sect, id| { const sect_id = @intCast(u8, id); - log.warn("putting section '{s},{s}' as a TextBlock", .{ + log.debug("putting section '{s},{s}' as a TextBlock", .{ segmentName(sect), sectionName(sect), }); // Get matching segment/section in the final artifact. const match = (try zld.getMatchingSection(sect)) orelse { - log.warn("unhandled section", .{}); + log.debug("unhandled section", .{}); continue; }; @@ -523,7 +523,7 @@ pub fn parseTextBlocks(self: *Object, zld: *Zld) !void { const reg = &sym.payload.regular; if (reg.file) |file| { if (file != self) { - log.warn("deduping definition of {s} in {s}", .{ sym.name, self.name.? }); + log.debug("deduping definition of {s} in {s}", .{ sym.name, self.name.? }); block.deinit(); self.allocator.destroy(block); continue; diff --git a/src/link/MachO/Zld.zig b/src/link/MachO/Zld.zig index 9afa5007b7..171a9625f1 100644 --- a/src/link/MachO/Zld.zig +++ b/src/link/MachO/Zld.zig @@ -171,46 +171,52 @@ pub const TextBlock = struct { } } - pub fn print_this(self: *const TextBlock, zld: *Zld) void { - log.warn("TextBlock", .{}); - log.warn(" | {}: {}", .{ self.local_sym_index, zld.locals.items[self.local_sym_index] }); + pub fn format(self: *const TextBlock, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { + _ = fmt; + _ = options; + try std.fmt.format(writer, "TextBlock {{\n", .{}); + try std.fmt.format(writer, " {}: {}\n", .{ self.local_sym_index, zld.locals.items[self.local_sym_index] }); if (self.aliases) |aliases| { - log.warn(" | aliases:", .{}); + try std.fmt.format(writer, " aliases:\n", .{}); for (aliases) |index| { - log.warn(" | {}: {}", .{ index, zld.locals.items[index] }); + try std.fmt.format(writer, " {}: {}\n", .{ index, zld.locals.items[index] }); } } if (self.references.count() > 0) { - log.warn(" | references:", .{}); + try std.fmt.format(writer, " references:\n", .{}); for (self.references.keys()) |index| { - log.warn(" | {}: {}", .{ index, zld.locals.items[index] }); + try std.fmt.format(writer, " {}: {}\n", .{ index, zld.locals.items[index] }); } } if (self.contained) |contained| { - log.warn(" | contained symbols:", .{}); + try std.fmt.format(writer, " contained symbols:\n", .{}); for (contained) |sym_at_off| { - log.warn(" | {}: {}", .{ sym_at_off.offset, zld.locals.items[sym_at_off.local_sym_index] }); + try std.fmt.format(writer, " {}: {}\n", .{ + sym_at_off.offset, + zld.locals.items[sym_at_off.local_sym_index], + }); } } - log.warn(" | code.len = {}", .{self.code.len}); + try std.fmt.format(writer, " code.len = {}\n", .{self.code.len}); if (self.relocs.items.len > 0) { - log.warn(" | relocations:", .{}); + try std.fmt.format(writer, " relocations:\n", .{}); for (self.relocs.items) |rel| { - log.warn(" | {}", .{rel}); + try std.fmt.format(writer, " {}\n", .{rel}); } } if (self.rebases.items.len > 0) { - log.warn(" | rebases: {any}", .{self.rebases.items}); + try std.fmt.format(writer, " rebases: {any}\n", .{self.rebases.items}); } - log.warn(" | size = {}", .{self.size}); - log.warn(" | align = {}", .{self.alignment}); + try std.fmt.format(writer, " size = {}\n", .{self.size}); + try std.fmt.format(writer, " align = {}\n", .{self.alignment}); + try std.fmt.format(writer, "}}", .{}); } pub fn print(self: *const TextBlock, zld: *Zld) void { if (self.prev) |prev| { prev.print(zld); } - self.print_this(zld); + log.warn("{}\n", .{self}); } }; @@ -317,15 +323,15 @@ pub fn link(self: *Zld, files: []const []const u8, output: Output, args: LinkArg self.allocateLinkeditSegment(); try self.allocateTextBlocks(); - var it = self.blocks.iterator(); - while (it.next()) |entry| { - const seg = self.load_commands.items[entry.key_ptr.seg].Segment; - const sect = seg.sections.items[entry.key_ptr.sect]; + // var it = self.blocks.iterator(); + // while (it.next()) |entry| { + // const seg = self.load_commands.items[entry.key_ptr.seg].Segment; + // const sect = seg.sections.items[entry.key_ptr.sect]; - log.warn("\n\n{s},{s} contents:", .{ segmentName(sect), sectionName(sect) }); - log.warn(" {}", .{sect}); - entry.value_ptr.*.print(self); - } + // log.warn("\n\n{s},{s} contents:", .{ segmentName(sect), sectionName(sect) }); + // log.warn(" {}", .{sect}); + // entry.value_ptr.*.print(self); + // } try self.flush(); } @@ -1036,7 +1042,7 @@ fn allocateSegment(self: *Zld, index: u16, offset: u64) !void { } fn allocateTextBlocks(self: *Zld) !void { - log.warn("allocating text blocks", .{}); + log.debug("allocating text blocks", .{}); var it = self.blocks.iterator(); while (it.next()) |entry| { @@ -1047,8 +1053,8 @@ fn allocateTextBlocks(self: *Zld) !void { const sect = seg.sections.items[match.sect]; var base_addr: u64 = sect.addr + sect.size; - log.warn(" within section {s},{s}", .{ segmentName(sect), sectionName(sect) }); - log.warn(" {}", .{sect}); + log.debug(" within section {s},{s}", .{ segmentName(sect), sectionName(sect) }); + log.debug(" {}", .{sect}); while (true) { const block_alignment = try math.powi(u32, 2, block.alignment); @@ -1058,7 +1064,7 @@ fn allocateTextBlocks(self: *Zld) !void { assert(sym.payload == .regular); sym.payload.regular.address = base_addr; - log.warn(" {s}: start=0x{x}, end=0x{x}, size={}, align={}", .{ + log.debug(" {s}: start=0x{x}, end=0x{x}, size={}, align={}", .{ sym.name, base_addr, base_addr + block.size, @@ -1092,7 +1098,7 @@ fn allocateTextBlocks(self: *Zld) !void { } fn writeTextBlocks(self: *Zld) !void { - log.warn("writing text blocks", .{}); + log.debug("writing text blocks", .{}); var it = self.blocks.iterator(); while (it.next()) |entry| { @@ -1103,8 +1109,8 @@ fn writeTextBlocks(self: *Zld) !void { const sect = seg.sections.items[match.sect]; const sect_type = sectionType(sect); - log.warn(" for section {s},{s}", .{ segmentName(sect), sectionName(sect) }); - log.warn(" {}", .{sect}); + log.debug(" for section {s},{s}", .{ segmentName(sect), sectionName(sect) }); + log.debug(" {}", .{sect}); var code = try self.allocator.alloc(u8, sect.size); defer self.allocator.free(code); @@ -1120,7 +1126,7 @@ fn writeTextBlocks(self: *Zld) !void { const aligned_base_off = mem.alignBackwardGeneric(u64, unaligned_base_off, block_alignment); const sym = self.locals.items[block.local_sym_index]; - log.warn(" {s}: start=0x{x}, end=0x{x}, size={}, align={}", .{ + log.debug(" {s}: start=0x{x}, end=0x{x}, size={}, align={}", .{ sym.name, aligned_base_off, aligned_base_off + block.size, diff --git a/src/link/MachO/reloc.zig b/src/link/MachO/reloc.zig index 7cf477a703..87ccd89873 100644 --- a/src/link/MachO/reloc.zig +++ b/src/link/MachO/reloc.zig @@ -491,9 +491,9 @@ pub const Relocation = struct { } }; - log.warn("relocating {}", .{self}); - log.warn(" | source_addr = 0x{x}", .{source_addr}); - log.warn(" | target_addr = 0x{x}", .{target_addr}); + log.debug("relocating {}", .{self}); + log.debug(" | source_addr = 0x{x}", .{source_addr}); + log.debug(" | target_addr = 0x{x}", .{target_addr}); switch (self.payload) { .unsigned => |unsigned| try unsigned.resolve(self, source_addr, target_addr), From 7aefea614f570f489366a7fbde1a98eaf2551fc6 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sat, 10 Jul 2021 14:35:20 +0200 Subject: [PATCH 33/81] zld: fix allocating tentative defs --- src/link/MachO/Zld.zig | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/link/MachO/Zld.zig b/src/link/MachO/Zld.zig index 171a9625f1..4f423e5787 100644 --- a/src/link/MachO/Zld.zig +++ b/src/link/MachO/Zld.zig @@ -1603,6 +1603,17 @@ fn resolveSymbols(self: *Zld) !void { block.size = size; block.alignment = alignment; + // Update target section's metadata + // TODO should we update segment's size here too? + // How does it tie with incremental space allocs? + const tseg = &self.load_commands.items[match.seg].Segment; + const tsect = &tseg.sections.items[match.sect]; + const new_alignment = math.max(tsect.@"align", block.alignment); + const new_alignment_pow_2 = try math.powi(u32, 2, new_alignment); + const new_size = mem.alignForwardGeneric(u64, tsect.size + block.size, new_alignment_pow_2); + tsect.size = new_size; + tsect.@"align" = new_alignment; + if (self.blocks.getPtr(match)) |last| { last.*.next = block; block.prev = last.*; From 714e0c47612e375ac7148bc177ac7bc543c80243 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sat, 10 Jul 2021 21:48:06 +0200 Subject: [PATCH 34/81] zld: re-enable logging of TextBlocks --- src/link/MachO/Zld.zig | 51 ++++++++++++++++++++---------------------- 1 file changed, 24 insertions(+), 27 deletions(-) diff --git a/src/link/MachO/Zld.zig b/src/link/MachO/Zld.zig index 4f423e5787..4ad3ce635c 100644 --- a/src/link/MachO/Zld.zig +++ b/src/link/MachO/Zld.zig @@ -171,52 +171,49 @@ pub const TextBlock = struct { } } - pub fn format(self: *const TextBlock, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { - _ = fmt; - _ = options; - try std.fmt.format(writer, "TextBlock {{\n", .{}); - try std.fmt.format(writer, " {}: {}\n", .{ self.local_sym_index, zld.locals.items[self.local_sym_index] }); + pub fn print_this(self: *const TextBlock, zld: *Zld) void { + log.warn("TextBlock", .{}); + log.warn(" {}: {}", .{ self.local_sym_index, zld.locals.items[self.local_sym_index] }); if (self.aliases) |aliases| { - try std.fmt.format(writer, " aliases:\n", .{}); + log.warn(" aliases:", .{}); for (aliases) |index| { - try std.fmt.format(writer, " {}: {}\n", .{ index, zld.locals.items[index] }); + log.warn(" {}: {}", .{ index, zld.locals.items[index] }); } } if (self.references.count() > 0) { - try std.fmt.format(writer, " references:\n", .{}); + log.warn(" references:", .{}); for (self.references.keys()) |index| { - try std.fmt.format(writer, " {}: {}\n", .{ index, zld.locals.items[index] }); + log.warn(" {}: {}", .{ index, zld.locals.items[index] }); } } if (self.contained) |contained| { - try std.fmt.format(writer, " contained symbols:\n", .{}); + log.warn(" contained symbols:", .{}); for (contained) |sym_at_off| { - try std.fmt.format(writer, " {}: {}\n", .{ + log.warn(" {}: {}\n", .{ sym_at_off.offset, zld.locals.items[sym_at_off.local_sym_index], }); } } - try std.fmt.format(writer, " code.len = {}\n", .{self.code.len}); + log.warn(" code.len = {}", .{self.code.len}); if (self.relocs.items.len > 0) { - try std.fmt.format(writer, " relocations:\n", .{}); + log.warn(" relocations:", .{}); for (self.relocs.items) |rel| { - try std.fmt.format(writer, " {}\n", .{rel}); + log.warn(" {}", .{rel}); } } if (self.rebases.items.len > 0) { - try std.fmt.format(writer, " rebases: {any}\n", .{self.rebases.items}); + log.warn(" rebases: {any}", .{self.rebases.items}); } - try std.fmt.format(writer, " size = {}\n", .{self.size}); - try std.fmt.format(writer, " align = {}\n", .{self.alignment}); - try std.fmt.format(writer, "}}", .{}); + log.warn(" size = {}", .{self.size}); + log.warn(" align = {}", .{self.alignment}); } pub fn print(self: *const TextBlock, zld: *Zld) void { if (self.prev) |prev| { prev.print(zld); } - log.warn("{}\n", .{self}); + self.print_this(zld); } }; @@ -323,15 +320,15 @@ pub fn link(self: *Zld, files: []const []const u8, output: Output, args: LinkArg self.allocateLinkeditSegment(); try self.allocateTextBlocks(); - // var it = self.blocks.iterator(); - // while (it.next()) |entry| { - // const seg = self.load_commands.items[entry.key_ptr.seg].Segment; - // const sect = seg.sections.items[entry.key_ptr.sect]; + var it = self.blocks.iterator(); + while (it.next()) |entry| { + const seg = self.load_commands.items[entry.key_ptr.seg].Segment; + const sect = seg.sections.items[entry.key_ptr.sect]; - // log.warn("\n\n{s},{s} contents:", .{ segmentName(sect), sectionName(sect) }); - // log.warn(" {}", .{sect}); - // entry.value_ptr.*.print(self); - // } + log.warn("\n\n{s},{s} contents:", .{ segmentName(sect), sectionName(sect) }); + log.warn(" {}", .{sect}); + entry.value_ptr.*.print(self); + } try self.flush(); } From 95aeb09b9b36874645eae648324e21cb2f89337a Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sat, 10 Jul 2021 22:39:31 +0200 Subject: [PATCH 35/81] zld: populate sections from the top rather than from bottom --- src/link/MachO/Object.zig | 1 + src/link/MachO/Zld.zig | 50 ++++++++++++++++++++++++--------------- 2 files changed, 32 insertions(+), 19 deletions(-) diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index b1febed39d..6a801845b3 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -502,6 +502,7 @@ pub fn parseTextBlocks(self: *Object, zld: *Zld) !void { // Is there any padding between symbols within the section? const is_splittable = self.header.?.flags & macho.MH_SUBSECTIONS_VIA_SYMBOLS != 0; + // const is_splittable = false; next: { if (is_splittable) blocks: { diff --git a/src/link/MachO/Zld.zig b/src/link/MachO/Zld.zig index 4ad3ce635c..cd8dcd6f8a 100644 --- a/src/link/MachO/Zld.zig +++ b/src/link/MachO/Zld.zig @@ -1046,22 +1046,28 @@ fn allocateTextBlocks(self: *Zld) !void { const match = entry.key_ptr.*; var block: *TextBlock = entry.value_ptr.*; + // Find the first block + while (block.prev) |prev| { + block = prev; + } + const seg = self.load_commands.items[match.seg].Segment; const sect = seg.sections.items[match.sect]; - var base_addr: u64 = sect.addr + sect.size; - log.debug(" within section {s},{s}", .{ segmentName(sect), sectionName(sect) }); - log.debug(" {}", .{sect}); + var base_addr: u64 = sect.addr; + + log.warn(" within section {s},{s}", .{ segmentName(sect), sectionName(sect) }); + log.warn(" {}", .{sect}); while (true) { const block_alignment = try math.powi(u32, 2, block.alignment); - base_addr = mem.alignBackwardGeneric(u64, base_addr - block.size, block_alignment); + base_addr = mem.alignForwardGeneric(u64, base_addr, block_alignment); const sym = self.locals.items[block.local_sym_index]; assert(sym.payload == .regular); sym.payload.regular.address = base_addr; - log.debug(" {s}: start=0x{x}, end=0x{x}, size={}, align={}", .{ + log.warn(" {s}: start=0x{x}, end=0x{x}, size={}, align={}", .{ sym.name, base_addr, base_addr + block.size, @@ -1087,8 +1093,10 @@ fn allocateTextBlocks(self: *Zld) !void { } } - if (block.prev) |prev| { - block = prev; + base_addr += block.size; + + if (block.next) |next| { + block = next; } else break; } } @@ -1102,12 +1110,16 @@ fn writeTextBlocks(self: *Zld) !void { const match = entry.key_ptr.*; var block: *TextBlock = entry.value_ptr.*; + while (block.prev) |prev| { + block = prev; + } + const seg = self.load_commands.items[match.seg].Segment; const sect = seg.sections.items[match.sect]; const sect_type = sectionType(sect); - log.debug(" for section {s},{s}", .{ segmentName(sect), sectionName(sect) }); - log.debug(" {}", .{sect}); + log.warn(" for section {s},{s}", .{ segmentName(sect), sectionName(sect) }); + log.warn(" {}", .{sect}); var code = try self.allocator.alloc(u8, sect.size); defer self.allocator.free(code); @@ -1115,15 +1127,14 @@ fn writeTextBlocks(self: *Zld) !void { if (sect_type == macho.S_ZEROFILL or sect_type == macho.S_THREAD_LOCAL_ZEROFILL) { mem.set(u8, code, 0); } else { - var base_off: u64 = sect.size; + var base_off: u64 = 0; while (true) { const block_alignment = try math.powi(u32, 2, block.alignment); - const unaligned_base_off = base_off - block.size; - const aligned_base_off = mem.alignBackwardGeneric(u64, unaligned_base_off, block_alignment); + const aligned_base_off = mem.alignForwardGeneric(u64, base_off, block_alignment); const sym = self.locals.items[block.local_sym_index]; - log.debug(" {s}: start=0x{x}, end=0x{x}, size={}, align={}", .{ + log.warn(" {s}: start=0x{x}, end=0x{x}, size={}, align={}", .{ sym.name, aligned_base_off, aligned_base_off + block.size, @@ -1135,16 +1146,17 @@ fn writeTextBlocks(self: *Zld) !void { mem.copy(u8, code[aligned_base_off..][0..block.size], block.code); // TODO NOP for machine code instead of just zeroing out - const padding_off = aligned_base_off + block.size; - const padding_len = unaligned_base_off - aligned_base_off; - mem.set(u8, code[padding_off..][0..padding_len], 0); + const padding_len = aligned_base_off - base_off; + mem.set(u8, code[base_off..][0..padding_len], 0); - base_off = aligned_base_off; + base_off = aligned_base_off + block.size; - if (block.prev) |prev| { - block = prev; + if (block.next) |next| { + block = next; } else break; } + + mem.set(u8, code[base_off..], 0); } try self.file.?.pwriteAll(code, sect.offset); From 9e051e365b689cb4e6ceae5910fb70fe6a7eb0f8 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sat, 10 Jul 2021 23:59:36 +0200 Subject: [PATCH 36/81] zld: correctly estimate TextBlock's alignment with section's alignment serving as the maximum alignment that can be seen in this particular section. However, TextBlocks are still allowed to have at most that alignment. --- src/link/MachO/Object.zig | 10 ++++++++-- src/link/MachO/Zld.zig | 31 +++++++++++++++---------------- src/link/MachO/reloc.zig | 10 ++++++++-- 3 files changed, 31 insertions(+), 20 deletions(-) diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index 6a801845b3..c8c72e5ed4 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -413,6 +413,12 @@ const TextBlockParser = struct { const code = self.code[start_addr..end_addr]; const size = code.len; + const max_align = self.section.@"align"; + const actual_align = if (senior_nlist.nlist.n_value > 0) + math.min(@ctz(u64, senior_nlist.nlist.n_value), max_align) + else + max_align; + const alias_only_indices = if (aliases.items.len > 0) blk: { var out = std.ArrayList(u32).init(self.allocator); try out.ensureTotalCapacity(aliases.items.len); @@ -435,7 +441,7 @@ const TextBlockParser = struct { block.aliases = alias_only_indices; block.code = try self.allocator.dupe(u8, code); block.size = size; - block.alignment = self.section.@"align"; + block.alignment = actual_align; const relocs = filterRelocs(self.relocs, start_addr, end_addr); if (relocs.len > 0) { @@ -524,7 +530,7 @@ pub fn parseTextBlocks(self: *Object, zld: *Zld) !void { const reg = &sym.payload.regular; if (reg.file) |file| { if (file != self) { - log.debug("deduping definition of {s} in {s}", .{ sym.name, self.name.? }); + log.warn("deduping definition of {s} in {s}", .{ sym.name, self.name.? }); block.deinit(); self.allocator.destroy(block); continue; diff --git a/src/link/MachO/Zld.zig b/src/link/MachO/Zld.zig index cd8dcd6f8a..017ebf3835 100644 --- a/src/link/MachO/Zld.zig +++ b/src/link/MachO/Zld.zig @@ -320,15 +320,15 @@ pub fn link(self: *Zld, files: []const []const u8, output: Output, args: LinkArg self.allocateLinkeditSegment(); try self.allocateTextBlocks(); - var it = self.blocks.iterator(); - while (it.next()) |entry| { - const seg = self.load_commands.items[entry.key_ptr.seg].Segment; - const sect = seg.sections.items[entry.key_ptr.sect]; + // var it = self.blocks.iterator(); + // while (it.next()) |entry| { + // const seg = self.load_commands.items[entry.key_ptr.seg].Segment; + // const sect = seg.sections.items[entry.key_ptr.sect]; - log.warn("\n\n{s},{s} contents:", .{ segmentName(sect), sectionName(sect) }); - log.warn(" {}", .{sect}); - entry.value_ptr.*.print(self); - } + // log.warn("\n\n{s},{s} contents:", .{ segmentName(sect), sectionName(sect) }); + // log.warn(" {}", .{sect}); + // entry.value_ptr.*.print(self); + // } try self.flush(); } @@ -1056,8 +1056,8 @@ fn allocateTextBlocks(self: *Zld) !void { var base_addr: u64 = sect.addr; - log.warn(" within section {s},{s}", .{ segmentName(sect), sectionName(sect) }); - log.warn(" {}", .{sect}); + log.debug(" within section {s},{s}", .{ segmentName(sect), sectionName(sect) }); + log.debug(" {}", .{sect}); while (true) { const block_alignment = try math.powi(u32, 2, block.alignment); @@ -1067,7 +1067,7 @@ fn allocateTextBlocks(self: *Zld) !void { assert(sym.payload == .regular); sym.payload.regular.address = base_addr; - log.warn(" {s}: start=0x{x}, end=0x{x}, size={}, align={}", .{ + log.debug(" {s}: start=0x{x}, end=0x{x}, size={}, align={}", .{ sym.name, base_addr, base_addr + block.size, @@ -1118,8 +1118,8 @@ fn writeTextBlocks(self: *Zld) !void { const sect = seg.sections.items[match.sect]; const sect_type = sectionType(sect); - log.warn(" for section {s},{s}", .{ segmentName(sect), sectionName(sect) }); - log.warn(" {}", .{sect}); + log.debug(" for section {s},{s}", .{ segmentName(sect), sectionName(sect) }); + log.debug(" {}", .{sect}); var code = try self.allocator.alloc(u8, sect.size); defer self.allocator.free(code); @@ -1134,7 +1134,7 @@ fn writeTextBlocks(self: *Zld) !void { const aligned_base_off = mem.alignForwardGeneric(u64, base_off, block_alignment); const sym = self.locals.items[block.local_sym_index]; - log.warn(" {s}: start=0x{x}, end=0x{x}, size={}, align={}", .{ + log.debug(" {s}: start=0x{x}, end=0x{x}, size={}, align={}", .{ sym.name, aligned_base_off, aligned_base_off + block.size, @@ -1433,7 +1433,7 @@ fn writeStubInStubHelper(self: *Zld, index: u32) !void { fn resolveSymbolsInObject(self: *Zld, object: *Object) !void { log.debug("resolving symbols in '{s}'", .{object.name}); - for (object.symtab.items) |sym, sym_id| { + for (object.symtab.items) |sym| { const sym_name = object.getString(sym.n_strx); if (Symbol.isStab(sym)) { @@ -2152,7 +2152,6 @@ fn writeRebaseInfoTable(self: *Zld) !void { if (match.seg == self.text_segment_cmd_index.?) continue; // __TEXT is non-writable const seg = self.load_commands.items[match.seg].Segment; - const sect = seg.sections.items[match.sect]; while (true) { const sym = self.locals.items[block.local_sym_index]; diff --git a/src/link/MachO/reloc.zig b/src/link/MachO/reloc.zig index 87ccd89873..1799769db2 100644 --- a/src/link/MachO/reloc.zig +++ b/src/link/MachO/reloc.zig @@ -110,6 +110,7 @@ pub const Relocation = struct { } pub fn format(self: Branch, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { + _ = self; _ = fmt; _ = options; try std.fmt.format(writer, "Branch {{}}", .{}); @@ -179,7 +180,7 @@ pub const Relocation = struct { load, }; - pub fn resolve(self: PageOff, base: Relocation, source_addr: u64, target_addr: u64) !void { + pub fn resolve(self: PageOff, base: Relocation, _: u64, target_addr: u64) !void { switch (self.kind) { .page => { const actual_target_addr = if (self.addend) |addend| target_addr + addend else target_addr; @@ -325,12 +326,13 @@ pub const Relocation = struct { }; pub const PointerToGot = struct { - pub fn resolve(self: PointerToGot, base: Relocation, source_addr: u64, target_addr: u64) !void { + pub fn resolve(_: PointerToGot, base: Relocation, source_addr: u64, target_addr: u64) !void { const result = try math.cast(i32, @intCast(i64, target_addr) - @intCast(i64, source_addr)); mem.writeIntLittle(u32, base.block.code[base.offset..][0..4], @bitCast(u32, result)); } pub fn format(self: PointerToGot, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { + _ = self; _ = fmt; _ = options; try std.fmt.format(writer, "PointerToGot {{}}", .{}); @@ -342,6 +344,10 @@ pub const Relocation = struct { correction: i4, pub fn resolve(self: Signed, base: Relocation, source_addr: u64, target_addr: u64) !void { + _ = self; + _ = base; + _ = source_addr; + _ = target_addr; // const target_addr = target_addr: { // if (signed.base.target == .section) { // const source_target = @intCast(i64, args.source_source_sect_addr.?) + @intCast(i64, signed.base.offset) + signed.addend + 4; From 570660bb4658823227d9cb227b1585eccff2af50 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sun, 11 Jul 2021 10:04:13 +0200 Subject: [PATCH 37/81] zld: ___dso_handle is regular at 0x100000000 which points at the start of the __TEXT segment. Also, ensure C++ initializers and terminators are rebased. --- src/link/MachO/Object.zig | 2 +- src/link/MachO/Zld.zig | 10 ++++++++-- src/link/MachO/reloc.zig | 7 +++++-- 3 files changed, 14 insertions(+), 5 deletions(-) diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index c8c72e5ed4..de0e13c707 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -530,7 +530,7 @@ pub fn parseTextBlocks(self: *Object, zld: *Zld) !void { const reg = &sym.payload.regular; if (reg.file) |file| { if (file != self) { - log.warn("deduping definition of {s} in {s}", .{ sym.name, self.name.? }); + log.debug("deduping definition of {s} in {s}", .{ sym.name, self.name.? }); block.deinit(); self.allocator.destroy(block); continue; diff --git a/src/link/MachO/Zld.zig b/src/link/MachO/Zld.zig index 017ebf3835..c87216fe9c 100644 --- a/src/link/MachO/Zld.zig +++ b/src/link/MachO/Zld.zig @@ -1686,10 +1686,16 @@ fn resolveSymbols(self: *Zld) !void { // Fourth pass, handle synthetic symbols and flag any undefined references. if (self.globals.get("___dso_handle")) |symbol| { if (symbol.payload == .undef) { + const seg = self.load_commands.items[self.text_segment_cmd_index.?].Segment; symbol.payload = .{ - .proxy = .{}, + .regular = .{ + .linkage = .translation_unit, + .address = seg.inner.vmaddr, + .weak_ref = true, + .local_sym_index = @intCast(u32, self.locals.items.len), + }, }; - try self.imports.append(self.allocator, symbol); + try self.locals.append(self.allocator, symbol); } } diff --git a/src/link/MachO/reloc.zig b/src/link/MachO/reloc.zig index 1799769db2..253cbdb2da 100644 --- a/src/link/MachO/reloc.zig +++ b/src/link/MachO/reloc.zig @@ -411,6 +411,8 @@ pub const Relocation = struct { }; pub fn resolve(self: Relocation, zld: *Zld) !void { + log.debug("relocating {}", .{self}); + const source_addr = blk: { const sym = zld.locals.items[self.block.local_sym_index]; break :blk sym.payload.regular.address + self.offset; @@ -497,7 +499,6 @@ pub const Relocation = struct { } }; - log.debug("relocating {}", .{self}); log.debug(" | source_addr = 0x{x}", .{source_addr}); log.debug(" | target_addr = 0x{x}", .{target_addr}); @@ -703,7 +704,9 @@ pub const Parser = struct { if (!is_right_segment) break :rebase false; if (sect_type != macho.S_LITERAL_POINTERS and - sect_type != macho.S_REGULAR) + sect_type != macho.S_REGULAR and + sect_type != macho.S_MOD_INIT_FUNC_POINTERS and + sect_type != macho.S_MOD_TERM_FUNC_POINTERS) { break :rebase false; } From 2a880897b0f7466604058422a7e2fc9f401a4284 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sun, 11 Jul 2021 13:58:36 +0200 Subject: [PATCH 38/81] zld: add basic Signed reloc resolution and fix handling Unsigned for x86_64. --- src/link/MachO/Zld.zig | 4 ---- src/link/MachO/reloc.zig | 43 +++++++++++++--------------------------- 2 files changed, 14 insertions(+), 33 deletions(-) diff --git a/src/link/MachO/Zld.zig b/src/link/MachO/Zld.zig index c87216fe9c..83c857ee5e 100644 --- a/src/link/MachO/Zld.zig +++ b/src/link/MachO/Zld.zig @@ -1039,8 +1039,6 @@ fn allocateSegment(self: *Zld, index: u16, offset: u64) !void { } fn allocateTextBlocks(self: *Zld) !void { - log.debug("allocating text blocks", .{}); - var it = self.blocks.iterator(); while (it.next()) |entry| { const match = entry.key_ptr.*; @@ -1103,8 +1101,6 @@ fn allocateTextBlocks(self: *Zld) !void { } fn writeTextBlocks(self: *Zld) !void { - log.debug("writing text blocks", .{}); - var it = self.blocks.iterator(); while (it.next()) |entry| { const match = entry.key_ptr.*; diff --git a/src/link/MachO/reloc.zig b/src/link/MachO/reloc.zig index 253cbdb2da..c96d338c63 100644 --- a/src/link/MachO/reloc.zig +++ b/src/link/MachO/reloc.zig @@ -48,18 +48,11 @@ pub const Relocation = struct { /// => * is unreachable is_64bit: bool, - source_sect_addr: ?u64 = null, - pub fn resolve(self: Unsigned, base: Relocation, _: u64, target_addr: u64) !void { - const addend = if (self.source_sect_addr) |addr| - self.addend - @intCast(i64, addr) - else - self.addend; - const result = if (self.subtractor) |subtractor| - @intCast(i64, target_addr) - @intCast(i64, subtractor.payload.regular.address) + addend + @intCast(i64, target_addr) - @intCast(i64, subtractor.payload.regular.address) + self.addend else - @intCast(i64, target_addr) + addend; + @intCast(i64, target_addr) + self.addend; if (self.is_64bit) { mem.writeIntLittle(u64, base.block.code[base.offset..][0..8], @bitCast(u64, result)); @@ -344,10 +337,6 @@ pub const Relocation = struct { correction: i4, pub fn resolve(self: Signed, base: Relocation, source_addr: u64, target_addr: u64) !void { - _ = self; - _ = base; - _ = source_addr; - _ = target_addr; // const target_addr = target_addr: { // if (signed.base.target == .section) { // const source_target = @intCast(i64, args.source_source_sect_addr.?) + @intCast(i64, signed.base.offset) + signed.addend + 4; @@ -356,16 +345,12 @@ pub const Relocation = struct { // } // break :target_addr @intCast(i64, args.target_addr) + signed.addend; // }; - // const displacement = try math.cast( - // i32, - // target_addr - @intCast(i64, args.source_addr) - signed.correction - 4, - // ); - - // log.debug(" | addend 0x{x}", .{signed.addend}); - // log.debug(" | correction 0x{x}", .{signed.correction}); - // log.debug(" | displacement 0x{x}", .{displacement}); - - // mem.writeIntLittle(u32, signed.base.code[0..4], @bitCast(u32, displacement)); + const actual_target_addr = @intCast(i64, target_addr) + self.addend; + const displacement = try math.cast( + i32, + actual_target_addr - @intCast(i64, source_addr) - self.correction - 4, + ); + mem.writeIntLittle(u32, base.block.code[base.offset..][0..4], @bitCast(u32, displacement)); } pub fn format(self: Signed, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { @@ -759,21 +744,21 @@ pub const Parser = struct { 2 => false, else => unreachable, }; - const addend: i64 = if (is_64bit) + + var addend: i64 = if (is_64bit) mem.readIntLittle(i64, self.block.code[parsed.offset..][0..8]) else mem.readIntLittle(i32, self.block.code[parsed.offset..][0..4]); - const source_sect_addr = if (rel.r_extern == 0) blk: { - if (parsed.target.payload == .regular) break :blk parsed.target.payload.regular.address; - break :blk null; - } else null; + + if (rel.r_extern == 0) { + addend -= @intCast(i64, parsed.target.payload.regular.address); + } parsed.payload = .{ .unsigned = .{ .subtractor = self.subtractor, .is_64bit = is_64bit, .addend = addend, - .source_sect_addr = source_sect_addr, }, }; From b8fce705ad793aab62507d90c44a3fb7a548b90b Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sun, 11 Jul 2021 21:25:04 +0200 Subject: [PATCH 39/81] zld: refactor nlist and reloc filtering logic --- src/link/MachO/Object.zig | 70 ++++++++++++++++++--------------------- 1 file changed, 32 insertions(+), 38 deletions(-) diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index de0e13c707..208ff6d600 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -273,59 +273,56 @@ pub fn readLoadCommands(self: *Object, reader: anytype) !void { } } +fn findFirst(comptime T: type, haystack: []T, start: usize, predicate: anytype) usize { + if (!@hasDecl(@TypeOf(predicate), "predicate")) + @compileError("Predicate is required to define fn predicate(@This(), T) bool"); + + if (start == haystack.len) return start; + + var i = start; + while (i < haystack.len) : (i += 1) { + if (predicate.predicate(haystack[i])) break; + } + return i; +} + const NlistWithIndex = struct { nlist: macho.nlist_64, index: u32, - fn lessThan(_: void, lhs: @This(), rhs: @This()) bool { + fn lessThan(_: void, lhs: NlistWithIndex, rhs: NlistWithIndex) bool { return lhs.nlist.n_value < rhs.nlist.n_value; } - fn filterInSection(symbols: []@This(), sect_id: u8) []@This() { - var start: usize = 0; - var end: usize = symbols.len; + fn filterInSection(symbols: []NlistWithIndex, sect: macho.section_64) []NlistWithIndex { + const Predicate = struct { + addr: u64, - while (true) { - var change = false; - if (symbols[start].nlist.n_sect != sect_id) { - start += 1; - change = true; - } - if (symbols[end - 1].nlist.n_sect != sect_id) { - end -= 1; - change = true; + fn predicate(self: @This(), symbol: NlistWithIndex) bool { + return symbol.nlist.n_value >= self.addr; } + }; - if (start == end) break; - if (!change) break; - } + const start = findFirst(NlistWithIndex, symbols, 0, Predicate{ .addr = sect.addr }); + const end = findFirst(NlistWithIndex, symbols, start, Predicate{ .addr = sect.addr + sect.size }); return symbols[start..end]; } }; -fn filterRelocs(relocs: []macho.relocation_info, start: u64, end: u64) []macho.relocation_info { - if (relocs.len == 0) return relocs; +fn filterRelocs(relocs: []macho.relocation_info, start_addr: u64, end_addr: u64) []macho.relocation_info { + const Predicate = struct { + addr: u64, - var start_id: usize = 0; - var end_id: usize = relocs.len; - - while (true) { - var change = false; - if (relocs[start_id].r_address >= end) { - start_id += 1; - change = true; - } - if (relocs[end_id - 1].r_address < start) { - end_id -= 1; - change = true; + fn predicate(self: @This(), rel: macho.relocation_info) bool { + return rel.r_address < self.addr; } + }; - if (start_id == end_id) break; - if (!change) break; - } + const start = findFirst(macho.relocation_info, relocs, 0, Predicate{ .addr = end_addr }); + const end = findFirst(macho.relocation_info, relocs, start, Predicate{ .addr = start_addr }); - return relocs[start_id..end_id]; + return relocs[start..end]; } const TextBlockParser = struct { @@ -501,10 +498,7 @@ pub fn parseTextBlocks(self: *Object, zld: *Zld) !void { const relocs = mem.bytesAsSlice(macho.relocation_info, raw_relocs); // Symbols within this section only. - const filtered_nlists = NlistWithIndex.filterInSection( - sorted_nlists.items, - sect_id + 1, - ); + const filtered_nlists = NlistWithIndex.filterInSection(sorted_nlists.items, sect); // Is there any padding between symbols within the section? const is_splittable = self.header.?.flags & macho.MH_SUBSECTIONS_VIA_SYMBOLS != 0; From e3fe9a9df55e36f8ee402b8edb26267b5145ce08 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sun, 11 Jul 2021 21:54:19 +0200 Subject: [PATCH 40/81] zld: fix parsing and resolving Signed relocs on x86_64-macos since this an x86_64 only relocation type. --- src/link/MachO/reloc.zig | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/src/link/MachO/reloc.zig b/src/link/MachO/reloc.zig index c96d338c63..7291919765 100644 --- a/src/link/MachO/reloc.zig +++ b/src/link/MachO/reloc.zig @@ -337,14 +337,6 @@ pub const Relocation = struct { correction: i4, pub fn resolve(self: Signed, base: Relocation, source_addr: u64, target_addr: u64) !void { - // const target_addr = target_addr: { - // if (signed.base.target == .section) { - // const source_target = @intCast(i64, args.source_source_sect_addr.?) + @intCast(i64, signed.base.offset) + signed.addend + 4; - // const source_disp = source_target - @intCast(i64, args.source_target_sect_addr.?); - // break :target_addr @intCast(i64, args.target_addr) + source_disp; - // } - // break :target_addr @intCast(i64, args.target_addr) + signed.addend; - // }; const actual_target_addr = @intCast(i64, target_addr) + self.addend; const displacement = try math.cast( i32, @@ -882,7 +874,14 @@ pub const Parser = struct { .X86_64_RELOC_SIGNED_4 => 4, else => unreachable, }; - const addend = mem.readIntLittle(i32, self.block.code[parsed.offset..][0..4]) + correction; + var addend = mem.readIntLittle(i32, self.block.code[parsed.offset..][0..4]) + correction; + + if (rel.r_extern == 0) { + const source_sym = self.zld.locals.items[self.block.local_sym_index].payload.regular; + const source_addr = source_sym.address + parsed.offset + @intCast(u32, addend) + 4; + const target_sym = parsed.target.payload.regular; + addend = try math.cast(i32, @intCast(i64, source_addr) - @intCast(i64, target_sym.address)); + } parsed.payload = .{ .signed = .{ From 496903c6a807389b3c4940370601ea338b9047f7 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sun, 11 Jul 2021 23:34:35 +0200 Subject: [PATCH 41/81] zld: add DICE support mainly for x86_64-macos --- src/link/MachO/Object.zig | 55 +++++++++++++++++++++++++++++++++++++ src/link/MachO/Zld.zig | 57 ++++++++++++++++++++++++++++----------- 2 files changed, 96 insertions(+), 16 deletions(-) diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index 208ff6d600..2a200933b7 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -325,6 +325,21 @@ fn filterRelocs(relocs: []macho.relocation_info, start_addr: u64, end_addr: u64) return relocs[start..end]; } +fn filterDice(dices: []macho.data_in_code_entry, start_addr: u64, end_addr: u64) []macho.data_in_code_entry { + const Predicate = struct { + addr: u64, + + fn predicate(self: @This(), dice: macho.data_in_code_entry) bool { + return dice.offset >= self.addr; + } + }; + + const start = findFirst(macho.data_in_code_entry, dices, 0, Predicate{ .addr = start_addr }); + const end = findFirst(macho.data_in_code_entry, dices, start, Predicate{ .addr = end_addr }); + + return dices[start..end]; +} + const TextBlockParser = struct { allocator: *Allocator, section: macho.section_64, @@ -445,6 +460,23 @@ const TextBlockParser = struct { try self.object.parseRelocs(self.zld, relocs, block, start_addr); } + if (self.zld.has_dices) { + const dices = filterDice( + self.object.data_in_code_entries.items, + senior_nlist.nlist.n_value, + senior_nlist.nlist.n_value + size, + ); + try block.dices.ensureTotalCapacity(dices.len); + + for (dices) |dice| { + block.dices.appendAssumeCapacity(.{ + .offset = dice.offset - try math.cast(u32, senior_nlist.nlist.n_value), + .length = dice.length, + .kind = dice.kind, + }); + } + } + self.index += 1; return block; @@ -504,6 +536,16 @@ pub fn parseTextBlocks(self: *Object, zld: *Zld) !void { const is_splittable = self.header.?.flags & macho.MH_SUBSECTIONS_VIA_SYMBOLS != 0; // const is_splittable = false; + const has_dices: bool = blk: { + if (self.text_section_index) |index| { + if (index != id) break :blk false; + if (self.data_in_code_entries.items.len == 0) break :blk false; + break :blk true; + } + break :blk false; + }; + zld.has_dices = has_dices; + next: { if (is_splittable) blocks: { if (filtered_nlists.len == 0) break :blocks; @@ -593,6 +635,19 @@ pub fn parseTextBlocks(self: *Object, zld: *Zld) !void { try self.parseRelocs(zld, relocs, block, 0); } + if (zld.has_dices) { + const dices = filterDice(self.data_in_code_entries.items, sect.addr, sect.addr + sect.size); + try block.dices.ensureTotalCapacity(dices.len); + + for (dices) |dice| { + block.dices.appendAssumeCapacity(.{ + .offset = dice.offset - try math.cast(u32, sect.addr), + .length = dice.length, + .kind = dice.kind, + }); + } + } + // Since this is block gets a helper local temporary symbol that didn't exist // in the object file which encompasses the entire section, we need traverse // the filtered symbols and note which symbol is contained within so that diff --git a/src/link/MachO/Zld.zig b/src/link/MachO/Zld.zig index 83c857ee5e..995269440f 100644 --- a/src/link/MachO/Zld.zig +++ b/src/link/MachO/Zld.zig @@ -114,6 +114,8 @@ stub_helper_stubs_start_off: ?u64 = null, blocks: std.AutoHashMapUnmanaged(MatchingSection, *TextBlock) = .{}, +has_dices: bool = false, + pub const Output = struct { tag: enum { exe, dylib }, path: []const u8, @@ -131,6 +133,7 @@ pub const TextBlock = struct { size: u64, alignment: u32, rebases: std.ArrayList(u64), + dices: std.ArrayList(macho.data_in_code_entry), next: ?*TextBlock = null, prev: ?*TextBlock = null, @@ -149,6 +152,7 @@ pub const TextBlock = struct { .size = undefined, .alignment = undefined, .rebases = std.ArrayList(u64).init(allocator), + .dices = std.ArrayList(macho.data_in_code_entry).init(allocator), }; } @@ -163,6 +167,7 @@ pub const TextBlock = struct { self.allocator.free(self.code); self.relocs.deinit(); self.rebases.deinit(); + self.dices.deinit(); } pub fn resolveRelocs(self: *TextBlock, zld: *Zld) !void { @@ -205,6 +210,9 @@ pub const TextBlock = struct { if (self.rebases.items.len > 0) { log.warn(" rebases: {any}", .{self.rebases.items}); } + if (self.dices.items.len > 0) { + log.warn(" dices: {any}", .{self.dices.items}); + } log.warn(" size = {}", .{self.size}); log.warn(" align = {}", .{self.alignment}); } @@ -2071,8 +2079,7 @@ fn flush(self: *Zld) !void { try self.writeBindInfoTable(); try self.writeLazyBindInfoTable(); try self.writeExportInfo(); - // TODO DICE for x86_64 - // try self.writeDataInCode(); + try self.writeDices(); { const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; @@ -2606,7 +2613,9 @@ fn writeStringTable(self: *Zld) !void { } } -fn writeDataInCode(self: *Zld) !void { +fn writeDices(self: *Zld) !void { + if (!self.has_dices) return; + const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; const dice_cmd = &self.load_commands.items[self.data_in_code_cmd_index.?].LinkeditData; const fileoff = seg.inner.fileoff + seg.inner.filesize; @@ -2614,24 +2623,40 @@ fn writeDataInCode(self: *Zld) !void { var buf = std.ArrayList(u8).init(self.allocator); defer buf.deinit(); + var block: *TextBlock = self.blocks.get(.{ + .seg = self.text_segment_cmd_index orelse return, + .sect = self.text_section_index orelse return, + }) orelse return; + + while (block.prev) |prev| { + block = prev; + } + const text_seg = self.load_commands.items[self.text_segment_cmd_index.?].Segment; const text_sect = text_seg.sections.items[self.text_section_index.?]; - for (self.objects.items) |object| { - const source_sect = object.sections.items[object.text_section_index.?]; - const target_map = source_sect.target_map orelse continue; - try buf.ensureCapacity( - buf.items.len + object.data_in_code_entries.items.len * @sizeOf(macho.data_in_code_entry), - ); - for (object.data_in_code_entries.items) |dice| { - const new_dice: macho.data_in_code_entry = .{ - .offset = text_sect.offset + target_map.offset + dice.offset, - .length = dice.length, - .kind = dice.kind, - }; - buf.appendSliceAssumeCapacity(mem.asBytes(&new_dice)); + while (true) { + if (block.dices.items.len > 0) { + const sym = self.locals.items[block.local_sym_index]; + const reg = sym.payload.regular; + const base_off = try math.cast(u32, reg.address - text_sect.addr + text_sect.offset); + + try buf.ensureUnusedCapacity(block.dices.items.len * @sizeOf(macho.data_in_code_entry)); + for (block.dices.items) |dice| { + const rebased_dice = macho.data_in_code_entry{ + .offset = base_off + dice.offset, + .length = dice.length, + .kind = dice.kind, + }; + buf.appendSliceAssumeCapacity(mem.asBytes(&rebased_dice)); + } } + + if (block.next) |next| { + block = next; + } else break; } + const datasize = @intCast(u32, buf.items.len); dice_cmd.dataoff = @intCast(u32, fileoff); From da07251000c5247432017535e5cbcae05be6e00a Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sun, 11 Jul 2021 23:42:18 +0200 Subject: [PATCH 42/81] zld: make addend i64 in Signed reloc --- src/link/MachO/reloc.zig | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/link/MachO/reloc.zig b/src/link/MachO/reloc.zig index 7291919765..2008d53e3f 100644 --- a/src/link/MachO/reloc.zig +++ b/src/link/MachO/reloc.zig @@ -333,7 +333,7 @@ pub const Relocation = struct { }; pub const Signed = struct { - addend: i32, + addend: i64, correction: i4, pub fn resolve(self: Signed, base: Relocation, source_addr: u64, target_addr: u64) !void { @@ -874,13 +874,13 @@ pub const Parser = struct { .X86_64_RELOC_SIGNED_4 => 4, else => unreachable, }; - var addend = mem.readIntLittle(i32, self.block.code[parsed.offset..][0..4]) + correction; + var addend: i64 = mem.readIntLittle(i32, self.block.code[parsed.offset..][0..4]) + correction; if (rel.r_extern == 0) { const source_sym = self.zld.locals.items[self.block.local_sym_index].payload.regular; const source_addr = source_sym.address + parsed.offset + @intCast(u32, addend) + 4; const target_sym = parsed.target.payload.regular; - addend = try math.cast(i32, @intCast(i64, source_addr) - @intCast(i64, target_sym.address)); + addend = @intCast(i64, source_addr) - @intCast(i64, target_sym.address); } parsed.payload = .{ From de30a704b134d17d61a36d41c058c7b4994cd7f2 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Mon, 12 Jul 2021 20:36:01 +0200 Subject: [PATCH 43/81] zld: map [section addr, first symbol) to a tracked TextBlock which applies exclusively to x86_64-macos. --- src/link/MachO/Object.zig | 84 ++++++++++++++++++++++++++++++++++++++- src/link/MachO/Symbol.zig | 1 + 2 files changed, 84 insertions(+), 1 deletion(-) diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index 2a200933b7..abd5a39a69 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -550,6 +550,86 @@ pub fn parseTextBlocks(self: *Object, zld: *Zld) !void { if (is_splittable) blocks: { if (filtered_nlists.len == 0) break :blocks; + // If the first nlist does not match the start of the section, + // then we need encapsulate the memory range [section start, first symbol) + // as a temporary symbol and insert the matching TextBlock. + const first_nlist = filtered_nlists[0].nlist; + if (first_nlist.n_value > sect.addr) { + const symbol = self.sections_as_symbols.get(sect_id) orelse symbol: { + const name = try std.fmt.allocPrint(self.allocator, "l_{s}_{s}_{s}", .{ + self.name.?, + segmentName(sect), + sectionName(sect), + }); + defer self.allocator.free(name); + const symbol = try Symbol.new(self.allocator, name); + try self.sections_as_symbols.putNoClobber(self.allocator, sect_id, symbol); + break :symbol symbol; + }; + + const local_sym_index = @intCast(u32, zld.locals.items.len); + symbol.payload = .{ + .regular = .{ + .linkage = .translation_unit, + .address = sect.addr, + .segment_id = match.seg, + .section_id = match.sect, + .file = self, + .local_sym_index = local_sym_index, + }, + }; + try zld.locals.append(zld.allocator, symbol); + + const block_code = code[0 .. first_nlist.n_value - sect.addr]; + const block_size = block_code.len; + + const block = try self.allocator.create(TextBlock); + errdefer self.allocator.destroy(block); + + block.* = TextBlock.init(self.allocator); + block.local_sym_index = local_sym_index; + block.code = try self.allocator.dupe(u8, block_code); + block.size = block_size; + block.alignment = sect.@"align"; + + const block_relocs = filterRelocs(relocs, 0, block_size); + if (block_relocs.len > 0) { + try self.parseRelocs(zld, block_relocs, block, 0); + } + + if (zld.has_dices) { + const dices = filterDice(self.data_in_code_entries.items, sect.addr, sect.addr + block_size); + try block.dices.ensureTotalCapacity(dices.len); + + for (dices) |dice| { + block.dices.appendAssumeCapacity(.{ + .offset = dice.offset - try math.cast(u32, sect.addr), + .length = dice.length, + .kind = dice.kind, + }); + } + } + + // Update target section's metadata + // TODO should we update segment's size here too? + // How does it tie with incremental space allocs? + const tseg = &zld.load_commands.items[match.seg].Segment; + const tsect = &tseg.sections.items[match.sect]; + const new_alignment = math.max(tsect.@"align", block.alignment); + const new_alignment_pow_2 = try math.powi(u32, 2, new_alignment); + const new_size = mem.alignForwardGeneric(u64, tsect.size + block.size, new_alignment_pow_2); + tsect.size = new_size; + tsect.@"align" = new_alignment; + + if (zld.blocks.getPtr(match)) |last| { + last.*.next = block; + block.prev = last.*; + last.* = block; + } else { + try zld.blocks.putNoClobber(zld.allocator, match, block); + } + } + var parser = TextBlockParser{ .allocator = self.allocator, .section = sect, @@ -610,6 +690,8 @@ pub fn parseTextBlocks(self: *Object, zld: *Zld) !void { try self.sections_as_symbols.putNoClobber(self.allocator, sect_id, symbol); break :symbol symbol; }; + + const local_sym_index = @intCast(u32, zld.locals.items.len); symbol.payload = .{ .regular = .{ .linkage = .translation_unit, @@ -617,9 +699,9 @@ pub fn parseTextBlocks(self: *Object, zld: *Zld) !void { .segment_id = match.seg, .section_id = match.sect, .file = self, + .local_sym_index = local_sym_index, }, }; - const local_sym_index = @intCast(u32, zld.locals.items.len); try zld.locals.append(zld.allocator, symbol); const block = try self.allocator.create(TextBlock); diff --git a/src/link/MachO/Symbol.zig b/src/link/MachO/Symbol.zig index 8835bb9a0f..15d4c180dd 100644 --- a/src/link/MachO/Symbol.zig +++ b/src/link/MachO/Symbol.zig @@ -79,6 +79,7 @@ pub const Regular = struct { if (self.file) |file| { try std.fmt.format(writer, ".file = {s}, ", .{file.name.?}); } + try std.fmt.format(writer, ".local_sym_index = {}, ", .{self.local_sym_index}); try std.fmt.format(writer, "}}", .{}); } From e17f12dd643e9edd90abb66b183d2a59eddc248c Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Mon, 12 Jul 2021 23:56:36 +0200 Subject: [PATCH 44/81] zld: fix incorrectly worked out section size Also, add a solution to a degenerate case where on x86_64 a relocation refers to a cell in a section via section start address even though a symbol exists. In such case, make the section spawned symbol an alias of the actual symbol. --- src/link/MachO/Object.zig | 48 +++++++++++++++++++++++---------------- src/link/MachO/Zld.zig | 23 ++++++++----------- src/link/MachO/reloc.zig | 4 ++-- 3 files changed, 41 insertions(+), 34 deletions(-) diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index abd5a39a69..b53a31af56 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -431,29 +431,26 @@ const TextBlockParser = struct { else max_align; - const alias_only_indices = if (aliases.items.len > 0) blk: { - var out = std.ArrayList(u32).init(self.allocator); - try out.ensureTotalCapacity(aliases.items.len); + const block = try self.allocator.create(TextBlock); + errdefer self.allocator.destroy(block); + + block.* = TextBlock.init(self.allocator); + block.local_sym_index = senior_nlist.index; + block.code = try self.allocator.dupe(u8, code); + block.size = size; + block.alignment = actual_align; + + if (aliases.items.len > 0) { + try block.aliases.ensureTotalCapacity(aliases.items.len); for (aliases.items) |alias| { - out.appendAssumeCapacity(alias.index); + block.aliases.appendAssumeCapacity(alias.index); const sym = self.zld.locals.items[alias.index]; const reg = &sym.payload.regular; reg.segment_id = self.match.seg; reg.section_id = self.match.sect; } - break :blk out.toOwnedSlice(); - } else null; - - const block = try self.allocator.create(TextBlock); - errdefer self.allocator.destroy(block); - - block.* = TextBlock.init(self.allocator); - block.local_sym_index = senior_nlist.index; - block.aliases = alias_only_indices; - block.code = try self.allocator.dupe(u8, code); - block.size = size; - block.alignment = actual_align; + } const relocs = filterRelocs(self.relocs, start_addr, end_addr); if (relocs.len > 0) { @@ -617,7 +614,7 @@ pub fn parseTextBlocks(self: *Object, zld: *Zld) !void { const tsect = &tseg.sections.items[match.sect]; const new_alignment = math.max(tsect.@"align", block.alignment); const new_alignment_pow_2 = try math.powi(u32, 2, new_alignment); - const new_size = mem.alignForwardGeneric(u64, tsect.size + block.size, new_alignment_pow_2); + const new_size = mem.alignForwardGeneric(u64, tsect.size, new_alignment_pow_2) + block.size; tsect.size = new_size; tsect.@"align" = new_alignment; @@ -653,6 +650,19 @@ pub fn parseTextBlocks(self: *Object, zld: *Zld) !void { } } + if (reg.address == sect.addr) { + if (self.sections_as_symbols.get(sect_id)) |alias| { + // Add alias. + const local_sym_index = @intCast(u32, zld.locals.items.len); + const reg_alias = &alias.payload.regular; + reg_alias.segment_id = match.seg; + reg_alias.section_id = match.sect; + reg_alias.local_sym_index = local_sym_index; + try block.aliases.append(local_sym_index); + try zld.locals.append(zld.allocator, alias); + } + } + // Update target section's metadata // TODO should we update segment's size here too? // How does it tie with incremental space allocs? @@ -660,7 +670,7 @@ pub fn parseTextBlocks(self: *Object, zld: *Zld) !void { const tsect = &tseg.sections.items[match.sect]; const new_alignment = math.max(tsect.@"align", block.alignment); const new_alignment_pow_2 = try math.powi(u32, 2, new_alignment); - const new_size = mem.alignForwardGeneric(u64, tsect.size + block.size, new_alignment_pow_2); + const new_size = mem.alignForwardGeneric(u64, tsect.size, new_alignment_pow_2) + block.size; tsect.size = new_size; tsect.@"align" = new_alignment; @@ -764,7 +774,7 @@ pub fn parseTextBlocks(self: *Object, zld: *Zld) !void { const tsect = &tseg.sections.items[match.sect]; const new_alignment = math.max(tsect.@"align", block.alignment); const new_alignment_pow_2 = try math.powi(u32, 2, new_alignment); - const new_size = mem.alignForwardGeneric(u64, tsect.size + block.size, new_alignment_pow_2); + const new_size = mem.alignForwardGeneric(u64, tsect.size, new_alignment_pow_2) + block.size; tsect.size = new_size; tsect.@"align" = new_alignment; diff --git a/src/link/MachO/Zld.zig b/src/link/MachO/Zld.zig index 995269440f..40ab52f062 100644 --- a/src/link/MachO/Zld.zig +++ b/src/link/MachO/Zld.zig @@ -125,7 +125,7 @@ pub const Output = struct { pub const TextBlock = struct { allocator: *Allocator, local_sym_index: u32, - aliases: ?[]u32 = null, + aliases: std.ArrayList(u32), references: std.AutoArrayHashMap(u32, void), contained: ?[]SymbolAtOffset = null, code: []u8, @@ -146,6 +146,7 @@ pub const TextBlock = struct { return .{ .allocator = allocator, .local_sym_index = undefined, + .aliases = std.ArrayList(u32).init(allocator), .references = std.AutoArrayHashMap(u32, void).init(allocator), .code = undefined, .relocs = std.ArrayList(Relocation).init(allocator), @@ -157,9 +158,7 @@ pub const TextBlock = struct { } pub fn deinit(self: *TextBlock) void { - if (self.aliases) |aliases| { - self.allocator.free(aliases); - } + self.aliases.deinit(); self.references.deinit(); if (self.contained) |contained| { self.allocator.free(contained); @@ -179,9 +178,9 @@ pub const TextBlock = struct { pub fn print_this(self: *const TextBlock, zld: *Zld) void { log.warn("TextBlock", .{}); log.warn(" {}: {}", .{ self.local_sym_index, zld.locals.items[self.local_sym_index] }); - if (self.aliases) |aliases| { + if (self.aliases.items.len > 0) { log.warn(" aliases:", .{}); - for (aliases) |index| { + for (self.aliases.items) |index| { log.warn(" {}: {}", .{ index, zld.locals.items[index] }); } } @@ -1082,12 +1081,10 @@ fn allocateTextBlocks(self: *Zld) !void { }); // Update each alias (if any) - if (block.aliases) |aliases| { - for (aliases) |index| { - const alias_sym = self.locals.items[index]; - assert(alias_sym.payload == .regular); - alias_sym.payload.regular.address = base_addr; - } + for (block.aliases.items) |index| { + const alias_sym = self.locals.items[index]; + assert(alias_sym.payload == .regular); + alias_sym.payload.regular.address = base_addr; } // Update each symbol contained within the TextBlock @@ -1623,7 +1620,7 @@ fn resolveSymbols(self: *Zld) !void { const tsect = &tseg.sections.items[match.sect]; const new_alignment = math.max(tsect.@"align", block.alignment); const new_alignment_pow_2 = try math.powi(u32, 2, new_alignment); - const new_size = mem.alignForwardGeneric(u64, tsect.size + block.size, new_alignment_pow_2); + const new_size = mem.alignForwardGeneric(u64, tsect.size, new_alignment_pow_2) + block.size; tsect.size = new_size; tsect.@"align" = new_alignment; diff --git a/src/link/MachO/reloc.zig b/src/link/MachO/reloc.zig index 2008d53e3f..d92d047cd9 100644 --- a/src/link/MachO/reloc.zig +++ b/src/link/MachO/reloc.zig @@ -878,9 +878,9 @@ pub const Parser = struct { if (rel.r_extern == 0) { const source_sym = self.zld.locals.items[self.block.local_sym_index].payload.regular; - const source_addr = source_sym.address + parsed.offset + @intCast(u32, addend) + 4; + const source_addr = source_sym.address + parsed.offset + 4; const target_sym = parsed.target.payload.regular; - addend = @intCast(i64, source_addr) - @intCast(i64, target_sym.address); + addend = @intCast(i64, source_addr) + addend - @intCast(i64, target_sym.address); } parsed.payload = .{ From 398672eb30dce08bd3370cde7adeb503c64a4892 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Tue, 13 Jul 2021 18:42:17 +0200 Subject: [PATCH 45/81] zld: add temp basic handling of debugging stabs --- src/link/MachO/Object.zig | 137 ++++++++++++++++++----------------- src/link/MachO/Zld.zig | 148 ++++++++++++++++++++++++++++++++++++-- 2 files changed, 214 insertions(+), 71 deletions(-) diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index b53a31af56..49eb34c522 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -27,7 +27,6 @@ header: ?macho.mach_header_64 = null, file: ?fs.File = null, file_offset: ?u32 = null, name: ?[]const u8 = null, -mtime: ?u64 = null, load_commands: std.ArrayListUnmanaged(LoadCommand) = .{}, @@ -51,9 +50,17 @@ symtab: std.ArrayListUnmanaged(macho.nlist_64) = .{}, strtab: std.ArrayListUnmanaged(u8) = .{}, data_in_code_entries: std.ArrayListUnmanaged(macho.data_in_code_entry) = .{}, +// Debug info +debug_info: ?DebugInfo = null, +tu_name: ?[]const u8 = null, +tu_comp_dir: ?[]const u8 = null, +mtime: ?u64 = null, + symbols: std.ArrayListUnmanaged(*Symbol) = .{}, sections_as_symbols: std.AutoHashMapUnmanaged(u8, *Symbol) = .{}, +text_blocks: std.ArrayListUnmanaged(*TextBlock) = .{}, + const DebugInfo = struct { inner: dwarf.DwarfInfo, debug_info: []u8, @@ -160,6 +167,19 @@ pub fn deinit(self: *Object) void { self.strtab.deinit(self.allocator); self.symbols.deinit(self.allocator); self.sections_as_symbols.deinit(self.allocator); + self.text_blocks.deinit(self.allocator); + + if (self.debug_info) |*db| { + db.deinit(self.allocator); + } + + if (self.tu_name) |n| { + self.allocator.free(n); + } + + if (self.tu_comp_dir) |n| { + self.allocator.free(n); + } if (self.name) |n| { self.allocator.free(n); @@ -203,6 +223,7 @@ pub fn parse(self: *Object) !void { try self.readLoadCommands(reader); try self.parseSymtab(); try self.parseDataInCode(); + try self.parseDebugInfo(); } pub fn readLoadCommands(self: *Object, reader: anytype) !void { @@ -431,11 +452,27 @@ const TextBlockParser = struct { else max_align; + const stab: ?TextBlock.Stab = if (self.object.debug_info) |di| blk: { + // TODO there has to be a better to handle this. + for (di.inner.func_list.items) |func| { + if (func.pc_range) |range| { + if (senior_nlist.nlist.n_value >= range.start and senior_nlist.nlist.n_value < range.end) { + break :blk TextBlock.Stab{ + .function = range.end - range.start, + }; + } + } + } + if (self.zld.globals.contains(senior_sym.name)) break :blk .global; + break :blk .static; + } else null; + const block = try self.allocator.create(TextBlock); errdefer self.allocator.destroy(block); block.* = TextBlock.init(self.allocator); block.local_sym_index = senior_nlist.index; + block.stab = stab; block.code = try self.allocator.dupe(u8, code); block.size = size; block.alignment = actual_align; @@ -531,9 +568,11 @@ pub fn parseTextBlocks(self: *Object, zld: *Zld) !void { // Is there any padding between symbols within the section? const is_splittable = self.header.?.flags & macho.MH_SUBSECTIONS_VIA_SYMBOLS != 0; + // TODO is it perhaps worth skip parsing subsections in Debug mode and not worry about + // duplicates at all? Need some benchmarks! // const is_splittable = false; - const has_dices: bool = blk: { + zld.has_dices = blk: { if (self.text_section_index) |index| { if (index != id) break :blk false; if (self.data_in_code_entries.items.len == 0) break :blk false; @@ -541,7 +580,7 @@ pub fn parseTextBlocks(self: *Object, zld: *Zld) !void { } break :blk false; }; - zld.has_dices = has_dices; + zld.has_stabs = zld.has_stabs or self.debug_info != null; next: { if (is_splittable) blocks: { @@ -625,6 +664,8 @@ pub fn parseTextBlocks(self: *Object, zld: *Zld) !void { } else { try zld.blocks.putNoClobber(zld.allocator, match, block); } + + try self.text_blocks.append(self.allocator, block); } var parser = TextBlockParser{ @@ -681,6 +722,8 @@ pub fn parseTextBlocks(self: *Object, zld: *Zld) !void { } else { try zld.blocks.putNoClobber(zld.allocator, match, block); } + + try self.text_blocks.append(self.allocator, block); } break :next; @@ -758,9 +801,25 @@ pub fn parseTextBlocks(self: *Object, zld: *Zld) !void { reg.segment_id = match.seg; reg.section_id = match.sect; + const stab: ?TextBlock.Stab = if (self.debug_info) |di| blk: { + // TODO there has to be a better to handle this. + for (di.inner.func_list.items) |func| { + if (func.pc_range) |range| { + if (reg.address >= range.start and reg.address < range.end) { + break :blk TextBlock.Stab{ + .function = range.end - range.start, + }; + } + } + } + if (zld.globals.contains(sym.name)) break :blk .global; + break :blk .static; + } else null; + contained.appendAssumeCapacity(.{ .local_sym_index = reg.local_sym_index, .offset = nlist_with_index.nlist.n_value - sect.addr, + .stab = stab, }); } @@ -785,6 +844,8 @@ pub fn parseTextBlocks(self: *Object, zld: *Zld) !void { } else { try zld.blocks.putNoClobber(zld.allocator, match, block); } + + try self.text_blocks.append(self.allocator, block); } } } @@ -861,13 +922,12 @@ fn parseSymtab(self: *Object) !void { } pub fn parseDebugInfo(self: *Object) !void { + log.debug("parsing debug info in '{s}'", .{self.name.?}); + var debug_info = blk: { var di = try DebugInfo.parseFromObject(self.allocator, self); break :blk di orelse return; }; - defer debug_info.deinit(self.allocator); - - log.debug("parsing debug info in '{s}'", .{self.name.?}); // We assume there is only one CU. const compile_unit = debug_info.inner.findCompileUnit(0x0) catch |err| switch (err) { @@ -881,6 +941,10 @@ pub fn parseDebugInfo(self: *Object) !void { const name = try compile_unit.die.getAttrString(&debug_info.inner, dwarf.AT_name); const comp_dir = try compile_unit.die.getAttrString(&debug_info.inner, dwarf.AT_comp_dir); + self.debug_info = debug_info; + self.tu_name = try self.allocator.dupe(u8, name); + self.tu_comp_dir = try self.allocator.dupe(u8, comp_dir); + if (self.mtime == null) { self.mtime = mtime: { const file = self.file orelse break :mtime 0; @@ -888,67 +952,6 @@ pub fn parseDebugInfo(self: *Object) !void { break :mtime @intCast(u64, @divFloor(stat.mtime, 1_000_000_000)); }; } - - try self.stabs.ensureUnusedCapacity(self.allocator, self.symbols.items.len + 4); - - // Current dir - self.stabs.appendAssumeCapacity(try Symbol.Stab.new(self.allocator, comp_dir, .{ - .kind = .so, - .file = self, - })); - - // Artifact name - self.stabs.appendAssumeCapacity(try Symbol.Stab.new(self.allocator, name, .{ - .kind = .so, - .file = self, - })); - - // Path to object file with debug info - self.stabs.appendAssumeCapacity(try Symbol.Stab.new(self.allocator, self.name.?, .{ - .kind = .oso, - .file = self, - })); - - for (self.symbols.items) |sym| { - if (sym.cast(Symbol.Regular)) |reg| { - const size: u64 = blk: for (debug_info.inner.func_list.items) |func| { - if (func.pc_range) |range| { - if (reg.address >= range.start and reg.address < range.end) { - break :blk range.end - range.start; - } - } - } else 0; - - const stab = try Symbol.Stab.new(self.allocator, sym.name, .{ - .kind = kind: { - if (size > 0) break :kind .function; - switch (reg.linkage) { - .translation_unit => break :kind .static, - else => break :kind .global, - } - }, - .size = size, - .symbol = sym, - .file = self, - }); - self.stabs.appendAssumeCapacity(stab); - } else if (sym.cast(Symbol.Tentative)) |_| { - const stab = try Symbol.Stab.new(self.allocator, sym.name, .{ - .kind = .global, - .size = 0, - .symbol = sym, - .file = self, - }); - self.stabs.appendAssumeCapacity(stab); - } - } - - // Closing delimiter. - const delim_stab = try Symbol.Stab.new(self.allocator, "", .{ - .kind = .so, - .file = self, - }); - self.stabs.appendAssumeCapacity(delim_stab); } pub fn parseDataInCode(self: *Object) !void { diff --git a/src/link/MachO/Zld.zig b/src/link/MachO/Zld.zig index 40ab52f062..71b5d5ad58 100644 --- a/src/link/MachO/Zld.zig +++ b/src/link/MachO/Zld.zig @@ -115,6 +115,7 @@ stub_helper_stubs_start_off: ?u64 = null, blocks: std.AutoHashMapUnmanaged(MatchingSection, *TextBlock) = .{}, has_dices: bool = false, +has_stabs: bool = false, pub const Output = struct { tag: enum { exe, dylib }, @@ -125,6 +126,7 @@ pub const Output = struct { pub const TextBlock = struct { allocator: *Allocator, local_sym_index: u32, + stab: ?Stab = null, aliases: std.ArrayList(u32), references: std.AutoArrayHashMap(u32, void), contained: ?[]SymbolAtOffset = null, @@ -140,6 +142,76 @@ pub const TextBlock = struct { pub const SymbolAtOffset = struct { local_sym_index: u32, offset: u64, + stab: ?Stab = null, + }; + + pub const Stab = union(enum) { + function: u64, + static, + global, + + pub fn asNlists(stab: Stab, local_sym_index: u32, zld: *Zld) ![]macho.nlist_64 { + var nlists = std.ArrayList(macho.nlist_64).init(zld.allocator); + defer nlists.deinit(); + + const sym = zld.locals.items[local_sym_index]; + const reg = sym.payload.regular; + + switch (stab) { + .function => |size| { + try nlists.ensureUnusedCapacity(4); + const section_id = reg.sectionId(zld); + nlists.appendAssumeCapacity(.{ + .n_strx = 0, + .n_type = macho.N_BNSYM, + .n_sect = section_id, + .n_desc = 0, + .n_value = reg.address, + }); + nlists.appendAssumeCapacity(.{ + .n_strx = try zld.strtab.getOrPut(sym.name), + .n_type = macho.N_FUN, + .n_sect = section_id, + .n_desc = 0, + .n_value = reg.address, + }); + nlists.appendAssumeCapacity(.{ + .n_strx = 0, + .n_type = macho.N_FUN, + .n_sect = 0, + .n_desc = 0, + .n_value = size, + }); + nlists.appendAssumeCapacity(.{ + .n_strx = 0, + .n_type = macho.N_ENSYM, + .n_sect = section_id, + .n_desc = 0, + .n_value = size, + }); + }, + .global => { + try nlists.append(.{ + .n_strx = try zld.strtab.getOrPut(sym.name), + .n_type = macho.N_GSYM, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }); + }, + .static => { + try nlists.append(.{ + .n_strx = try zld.strtab.getOrPut(sym.name), + .n_type = macho.N_STSYM, + .n_sect = reg.sectionId(zld), + .n_desc = 0, + .n_value = reg.address, + }); + }, + } + + return nlists.toOwnedSlice(); + } }; pub fn init(allocator: *Allocator) TextBlock { @@ -178,6 +250,9 @@ pub const TextBlock = struct { pub fn print_this(self: *const TextBlock, zld: *Zld) void { log.warn("TextBlock", .{}); log.warn(" {}: {}", .{ self.local_sym_index, zld.locals.items[self.local_sym_index] }); + if (self.stab) |stab| { + log.warn(" stab: {}", .{stab}); + } if (self.aliases.items.len > 0) { log.warn(" aliases:", .{}); for (self.aliases.items) |index| { @@ -193,10 +268,18 @@ pub const TextBlock = struct { if (self.contained) |contained| { log.warn(" contained symbols:", .{}); for (contained) |sym_at_off| { - log.warn(" {}: {}\n", .{ - sym_at_off.offset, - zld.locals.items[sym_at_off.local_sym_index], - }); + if (sym_at_off.stab) |stab| { + log.warn(" {}: {}, stab: {}\n", .{ + sym_at_off.offset, + zld.locals.items[sym_at_off.local_sym_index], + stab, + }); + } else { + log.warn(" {}: {}\n", .{ + sym_at_off.offset, + zld.locals.items[sym_at_off.local_sym_index], + }); + } } } log.warn(" code.len = {}", .{self.code.len}); @@ -2487,8 +2570,10 @@ fn writeSymbolTable(self: *Zld) !void { for (self.locals.items) |symbol, i| { if (i == 0) continue; // skip null symbol if (symbol.isTemp()) continue; // TODO when merging codepaths, this should go into freelist + const reg = symbol.payload.regular; const nlist = try symbol.asNlist(self, &self.strtab); + if (reg.linkage == .translation_unit) { try locals.append(nlist); } else { @@ -2496,6 +2581,61 @@ fn writeSymbolTable(self: *Zld) !void { } } + if (self.has_stabs) { + for (self.objects.items) |object| { + if (object.debug_info == null) continue; + + // Open scope + try locals.ensureUnusedCapacity(4); + locals.appendAssumeCapacity(.{ + .n_strx = try self.strtab.getOrPut(object.tu_comp_dir.?), + .n_type = macho.N_SO, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }); + locals.appendAssumeCapacity(.{ + .n_strx = try self.strtab.getOrPut(object.tu_name.?), + .n_type = macho.N_SO, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }); + locals.appendAssumeCapacity(.{ + .n_strx = try self.strtab.getOrPut(object.name.?), + .n_type = macho.N_OSO, + .n_sect = 0, + .n_desc = 1, + .n_value = object.mtime orelse 0, + }); + + for (object.text_blocks.items) |block| { + if (block.stab) |stab| { + const nlists = try stab.asNlists(block.local_sym_index, self); + defer self.allocator.free(nlists); + try locals.appendSlice(nlists); + } else { + const contained = block.contained orelse continue; + for (contained) |sym_at_off| { + const stab = sym_at_off.stab orelse continue; + const nlists = try stab.asNlists(sym_at_off.local_sym_index, self); + defer self.allocator.free(nlists); + try locals.appendSlice(nlists); + } + } + } + + // Close scope + locals.appendAssumeCapacity(.{ + .n_strx = 0, + .n_type = macho.N_SO, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }); + } + } + var undefs = std.ArrayList(macho.nlist_64).init(self.allocator); defer undefs.deinit(); var undef_dir = std.StringHashMap(u32).init(self.allocator); From e3575cdad44e63f598b557ba3142675197875906 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Tue, 13 Jul 2021 22:58:25 +0200 Subject: [PATCH 46/81] zld: decommision use_lld for MachO Invoke `linkAsArchive` directly in MachO backend when LLVM is available and we are asked to create a static lib. --- src/Compilation.zig | 16 +++++++++------- src/link.zig | 2 +- src/link/MachO.zig | 20 +++++++++++++------- src/main.zig | 6 ++++++ 4 files changed, 29 insertions(+), 15 deletions(-) diff --git a/src/Compilation.zig b/src/Compilation.zig index 625de58c63..5146e757fa 100644 --- a/src/Compilation.zig +++ b/src/Compilation.zig @@ -866,6 +866,10 @@ pub fn create(gpa: *Allocator, options: InitOptions) !*Compilation { // Make a decision on whether to use LLD or our own linker. const use_lld = options.use_lld orelse blk: { + if (options.target.isDarwin()) { + break :blk false; + } + if (!build_options.have_llvm) break :blk false; @@ -903,11 +907,9 @@ pub fn create(gpa: *Allocator, options: InitOptions) !*Compilation { break :blk false; }; - const darwin_can_use_system_sdk = - // comptime conditions - ((build_options.have_llvm and comptime std.Target.current.isDarwin()) and - // runtime conditions - (use_lld and std.builtin.os.tag == .macos and options.target.isDarwin())); + const darwin_can_use_system_sdk = comptime std.Target.current.isDarwin() and + std.builtin.os.tag == .macos and + options.target.isDarwin(); const sysroot = blk: { if (options.sysroot) |sysroot| { @@ -924,10 +926,10 @@ pub fn create(gpa: *Allocator, options: InitOptions) !*Compilation { const lto = blk: { if (options.want_lto) |explicit| { - if (!use_lld) + if (!use_lld and !options.target.isDarwin()) return error.LtoUnavailableWithoutLld; break :blk explicit; - } else if (!use_lld) { + } else if (!use_lld and !options.target.isDarwin()) { break :blk false; } else if (options.c_source_files.len == 0) { break :blk false; diff --git a/src/link.zig b/src/link.zig index 02d9afaf07..894ec66fff 100644 --- a/src/link.zig +++ b/src/link.zig @@ -515,7 +515,7 @@ pub const File = struct { } } - fn linkAsArchive(base: *File, comp: *Compilation) !void { + pub fn linkAsArchive(base: *File, comp: *Compilation) !void { const tracy = trace(@src()); defer tracy.end(); diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 8095366c15..91c457d4fe 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -341,7 +341,6 @@ pub fn openPath(allocator: *Allocator, sub_path: []const u8, options: link.Optio assert(options.object_format == .macho); if (options.use_llvm) return error.LLVM_BackendIsTODO_ForMachO; // TODO - if (options.use_lld) return error.LLD_LinkingIsTODO_ForMachO; // TODO const file = try options.emit.?.directory.handle.createFile(sub_path, .{ .truncate = false, @@ -358,6 +357,10 @@ pub fn openPath(allocator: *Allocator, sub_path: []const u8, options: link.Optio self.base.file = file; + if (options.output_mode == .Lib and options.link_mode == .Static) { + return self; + } + if (!options.strip and options.module != null) { // Create dSYM bundle. const dir = options.module.?.zig_cache_artifact_directory; @@ -393,12 +396,6 @@ pub fn openPath(allocator: *Allocator, sub_path: []const u8, options: link.Optio .n_value = 0, }); - switch (options.output_mode) { - .Exe => {}, - .Obj => {}, - .Lib => return error.TODOImplementWritingLibFiles, - } - try self.populateMissingMetadata(); try self.writeLocalSymbol(0); @@ -428,6 +425,15 @@ pub fn createEmpty(gpa: *Allocator, options: link.Options) !*MachO { } pub fn flush(self: *MachO, comp: *Compilation) !void { + if (self.base.options.output_mode == .Lib and self.base.options.link_mode == .Static) { + if (build_options.have_llvm) { + return self.base.linkAsArchive(comp); + } else { + log.err("TODO: non-LLVM archiver for MachO object files", .{}); + return error.TODOImplementWritingStaticLibFiles; + } + } + if (build_options.have_llvm and self.base.options.use_lld) { return self.linkWithZld(comp); } else { diff --git a/src/main.zig b/src/main.zig index 2b961bb64c..9d515cc398 100644 --- a/src/main.zig +++ b/src/main.zig @@ -1646,6 +1646,12 @@ fn buildOutputType( } } + if (use_lld) |opt| { + if (opt and cross_target.isDarwin()) { + fatal("-fLLD requested with Mach-O object format. Only the self-hosted linker is supported for this target.", .{}); + } + } + if (comptime std.Target.current.isDarwin()) { // If we want to link against frameworks, we need system headers. if (framework_dirs.items.len > 0 or frameworks.items.len > 0) From 9ca69c51e76525e1e753588375036ea711f72df0 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Tue, 13 Jul 2021 23:20:51 +0200 Subject: [PATCH 47/81] zld: error out if LTO is requested targeting Darwin --- src/Compilation.zig | 4 +++- src/main.zig | 8 +++++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/src/Compilation.zig b/src/Compilation.zig index 5146e757fa..6256e2551f 100644 --- a/src/Compilation.zig +++ b/src/Compilation.zig @@ -929,7 +929,9 @@ pub fn create(gpa: *Allocator, options: InitOptions) !*Compilation { if (!use_lld and !options.target.isDarwin()) return error.LtoUnavailableWithoutLld; break :blk explicit; - } else if (!use_lld and !options.target.isDarwin()) { + } else if (!use_lld) { + // TODO zig ld LTO support + // See https://github.com/ziglang/zig/issues/8680 break :blk false; } else if (options.c_source_files.len == 0) { break :blk false; diff --git a/src/main.zig b/src/main.zig index 9d515cc398..ae39b9c167 100644 --- a/src/main.zig +++ b/src/main.zig @@ -1648,7 +1648,13 @@ fn buildOutputType( if (use_lld) |opt| { if (opt and cross_target.isDarwin()) { - fatal("-fLLD requested with Mach-O object format. Only the self-hosted linker is supported for this target.", .{}); + fatal("LLD requested with Mach-O object format. Only the self-hosted linker is supported for this target.", .{}); + } + } + + if (want_lto) |opt| { + if (opt and cross_target.isDarwin()) { + fatal("LTO is not yet supported with the Mach-O object format. More details: https://github.com/ziglang/zig/issues/8680", .{}); } } From f87424ab6393c3208d96a4f078c71f745a37c84b Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Wed, 14 Jul 2021 13:14:41 +0200 Subject: [PATCH 48/81] zld: invoke traditional linker if has LLVM as a temp measure --- src/Compilation.zig | 7 ++++--- src/link/MachO.zig | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/Compilation.zig b/src/Compilation.zig index 6256e2551f..ce6ab0fa76 100644 --- a/src/Compilation.zig +++ b/src/Compilation.zig @@ -907,9 +907,10 @@ pub fn create(gpa: *Allocator, options: InitOptions) !*Compilation { break :blk false; }; - const darwin_can_use_system_sdk = comptime std.Target.current.isDarwin() and - std.builtin.os.tag == .macos and - options.target.isDarwin(); + const darwin_can_use_system_sdk = blk: { + if (comptime !std.Target.current.isDarwin()) break :blk false; + break :blk std.builtin.os.tag == .macos and options.target.isDarwin(); + }; const sysroot = blk: { if (options.sysroot) |sysroot| { diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 91c457d4fe..c00aee6997 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -434,7 +434,7 @@ pub fn flush(self: *MachO, comp: *Compilation) !void { } } - if (build_options.have_llvm and self.base.options.use_lld) { + if (build_options.have_llvm) { return self.linkWithZld(comp); } else { switch (self.base.options.effectiveOutputMode()) { From 0135b4665988530c0bd6b36ef7cb93ecaf999776 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Wed, 14 Jul 2021 22:13:21 +0200 Subject: [PATCH 49/81] zld: remove StringTable abstraction --- CMakeLists.txt | 1 - src/link/MachO.zig | 61 ++++++++++++++++++++++-------- src/link/MachO/DebugSymbols.zig | 4 +- src/link/MachO/StringTable.zig | 64 -------------------------------- src/link/MachO/Symbol.zig | 5 +-- src/link/MachO/Zld.zig | 66 ++++++++++++++++++++++++--------- 6 files changed, 98 insertions(+), 103 deletions(-) delete mode 100644 src/link/MachO/StringTable.zig diff --git a/CMakeLists.txt b/CMakeLists.txt index 828b3ee73e..5722f55e48 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -581,7 +581,6 @@ set(ZIG_STAGE2_SOURCES "${CMAKE_SOURCE_DIR}/src/link/MachO/DebugSymbols.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO/Dylib.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO/Object.zig" - "${CMAKE_SOURCE_DIR}/src/link/MachO/StringTable.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO/Symbol.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO/Trie.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO/Zld.zig" diff --git a/src/link/MachO.zig b/src/link/MachO.zig index c00aee6997..847012110a 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -26,7 +26,6 @@ const target_util = @import("../target.zig"); const DebugSymbols = @import("MachO/DebugSymbols.zig"); const Trie = @import("MachO/Trie.zig"); const CodeSignature = @import("MachO/CodeSignature.zig"); -const StringTable = @import("MachO/StringTable.zig"); const Zld = @import("MachO/Zld.zig"); usingnamespace @import("MachO/commands.zig"); @@ -117,7 +116,8 @@ offset_table_free_list: std.ArrayListUnmanaged(u32) = .{}, stub_helper_stubs_start_off: ?u64 = null, -strtab: StringTable = undefined, +strtab: std.ArrayListUnmanaged(u8) = .{}, +strtab_cache: std.StringHashMapUnmanaged(u32) = .{}, /// Table of GOT entries. offset_table: std.ArrayListUnmanaged(GOTEntry) = .{}, @@ -418,7 +418,6 @@ pub fn createEmpty(gpa: *Allocator, options: link.Options) !*MachO { .file = null, }, .page_size = if (options.target.cpu.arch == .aarch64) 0x4000 else 0x1000, - .strtab = try StringTable.init(gpa), }; return self; @@ -985,7 +984,14 @@ pub fn deinit(self: *MachO) void { self.text_block_free_list.deinit(self.base.allocator); self.offset_table.deinit(self.base.allocator); self.offset_table_free_list.deinit(self.base.allocator); - self.strtab.deinit(); + { + var it = self.strtab_cache.keyIterator(); + while (it.next()) |key| { + self.base.allocator.free(key.*); + } + } + self.strtab_cache.deinit(self.base.allocator); + self.strtab.deinit(self.base.allocator); self.globals.deinit(self.base.allocator); self.globals_free_list.deinit(self.base.allocator); self.locals.deinit(self.base.allocator); @@ -1203,7 +1209,7 @@ pub fn updateDecl(self: *MachO, module: *Module, decl: *Module.Decl) !void { const new_name = try std.fmt.allocPrint(self.base.allocator, "_{s}", .{mem.spanZ(decl.name)}); defer self.base.allocator.free(new_name); - symbol.n_strx = try self.strtab.getOrPut(new_name); + symbol.n_strx = try self.makeString(new_name); symbol.n_type = macho.N_SECT; symbol.n_sect = @intCast(u8, self.text_section_index.?) + 1; symbol.n_desc = 0; @@ -1215,7 +1221,7 @@ pub fn updateDecl(self: *MachO, module: *Module, decl: *Module.Decl) !void { const decl_name = try std.fmt.allocPrint(self.base.allocator, "_{s}", .{mem.spanZ(decl.name)}); defer self.base.allocator.free(decl_name); - const name_str_index = try self.strtab.getOrPut(decl_name); + const name_str_index = try self.makeString(decl_name); const addr = try self.allocateTextBlock(&decl.link.macho, code.len, required_alignment); log.debug("allocated text block for {s} at 0x{x}", .{ decl_name, addr }); @@ -1405,14 +1411,14 @@ pub fn updateDeclExports( if (exp.link.macho.sym_index) |i| { const sym = &self.globals.items[i]; sym.* = .{ - .n_strx = try self.strtab.getOrPut(exp_name), + .n_strx = sym.n_strx, .n_type = n_type, .n_sect = @intCast(u8, self.text_section_index.?) + 1, .n_desc = n_desc, .n_value = decl_sym.n_value, }; } else { - const name_str_index = try self.strtab.getOrPut(exp_name); + const name_str_index = try self.makeString(exp_name); const i = if (self.globals_free_list.popOrNull()) |i| i else blk: { _ = self.globals.addOneAssumeCapacity(); self.export_info_dirty = true; @@ -1788,7 +1794,8 @@ pub fn populateMissingMetadata(self: *MachO) !void { symtab.symoff = @intCast(u32, symtab_off); symtab.nsyms = @intCast(u32, self.base.options.symbol_count_hint); - const strtab_size = self.strtab.size(); + try self.strtab.append(self.base.allocator, 0); + const strtab_size = self.strtab.items.len; const strtab_off = self.findFreeSpaceLinkedit(strtab_size, 1, symtab_off); log.debug("found string table free space 0x{x} to 0x{x}", .{ strtab_off, strtab_off + strtab_size }); symtab.stroff = @intCast(u32, strtab_off); @@ -1930,7 +1937,7 @@ pub fn populateMissingMetadata(self: *MachO) !void { if (!self.nonlazy_imports.contains("dyld_stub_binder")) { const index = @intCast(u32, self.nonlazy_imports.count()); const name = try self.base.allocator.dupe(u8, "dyld_stub_binder"); - const offset = try self.strtab.getOrPut("dyld_stub_binder"); + const offset = try self.makeString("dyld_stub_binder"); try self.nonlazy_imports.putNoClobber(self.base.allocator, name, .{ .symbol = .{ .n_strx = offset, @@ -2063,7 +2070,7 @@ fn allocateTextBlock(self: *MachO, text_block: *TextBlock, new_block_size: u64, pub fn addExternSymbol(self: *MachO, name: []const u8) !u32 { const index = @intCast(u32, self.lazy_imports.count()); - const offset = try self.strtab.getOrPut(name); + const offset = try self.makeString(name); const sym_name = try self.base.allocator.dupe(u8, name); const dylib_ordinal = 1; // TODO this is now hardcoded, since we only support libSystem. try self.lazy_imports.putNoClobber(self.base.allocator, sym_name, .{ @@ -2253,7 +2260,7 @@ fn writeOffsetTableEntry(self: *MachO, index: usize) !void { }, } }; - const sym_name = self.strtab.get(sym.n_strx) orelse unreachable; + const sym_name = self.getString(sym.n_strx) orelse unreachable; log.debug("writing offset table entry [ 0x{x} => 0x{x} ({s}) ]", .{ off, sym.n_value, sym_name }); try self.base.file.?.pwriteAll(mem.asBytes(&sym.n_value), off); } @@ -2751,7 +2758,7 @@ fn writeExportTrie(self: *MachO) !void { const text_segment = self.load_commands.items[self.text_segment_cmd_index.?].Segment; for (self.globals.items) |symbol| { // TODO figure out if we should put all global symbols into the export trie - const name = self.strtab.get(symbol.n_strx) orelse unreachable; + const name = self.getString(symbol.n_strx) orelse unreachable; assert(symbol.n_value >= text_segment.inner.vmaddr); try trie.put(.{ .name = name, @@ -3032,7 +3039,7 @@ fn writeStringTable(self: *MachO) !void { const symtab = &self.load_commands.items[self.symtab_cmd_index.?].Symtab; const allocated_size = self.allocatedSizeLinkedit(symtab.stroff); - const needed_size = mem.alignForwardGeneric(u64, self.strtab.size(), @alignOf(u64)); + const needed_size = mem.alignForwardGeneric(u64, self.strtab.items.len, @alignOf(u64)); if (needed_size > allocated_size or self.strtab_needs_relocation) { symtab.strsize = 0; @@ -3042,7 +3049,7 @@ fn writeStringTable(self: *MachO) !void { symtab.strsize = @intCast(u32, needed_size); log.debug("writing string table from 0x{x} to 0x{x}", .{ symtab.stroff, symtab.stroff + symtab.strsize }); - try self.base.file.?.pwriteAll(self.strtab.asSlice(), symtab.stroff); + try self.base.file.?.pwriteAll(self.strtab.items, symtab.stroff); self.load_commands_dirty = true; self.strtab_dirty = false; } @@ -3173,3 +3180,27 @@ pub fn padToIdeal(actual_size: anytype) @TypeOf(actual_size) { fn hasTlvDescriptors(_: *MachO) bool { return false; } + +pub fn makeString(self: *MachO, string: []const u8) !u32 { + if (self.strtab_cache.get(string)) |off| { + log.debug("reusing string '{s}' at offset 0x{x}", .{ string, off }); + return off; + } + + try self.strtab.ensureUnusedCapacity(self.base.allocator, string.len + 1); + const new_off = @intCast(u32, self.strtab.items.len); + + log.debug("writing new string '{s}' at offset 0x{x}", .{ string, new_off }); + + self.strtab.appendSliceAssumeCapacity(string); + self.strtab.appendAssumeCapacity(0); + + try self.strtab_cache.putNoClobber(self.base.allocator, try self.base.allocator.dupe(u8, string), new_off); + + return new_off; +} + +pub fn getString(self: *MachO, off: u32) ?[]const u8 { + assert(off < self.strtab.items.len); + return mem.spanZ(@ptrCast([*:0]const u8, self.strtab.items.ptr + off)); +} diff --git a/src/link/MachO/DebugSymbols.zig b/src/link/MachO/DebugSymbols.zig index 38e13800a6..025959793e 100644 --- a/src/link/MachO/DebugSymbols.zig +++ b/src/link/MachO/DebugSymbols.zig @@ -814,7 +814,7 @@ fn writeStringTable(self: *DebugSymbols) !void { const symtab = &self.load_commands.items[self.symtab_cmd_index.?].Symtab; const allocated_size = self.allocatedSizeLinkedit(symtab.stroff); - const needed_size = mem.alignForwardGeneric(u64, self.base.strtab.size(), @alignOf(u64)); + const needed_size = mem.alignForwardGeneric(u64, self.base.strtab.items.len, @alignOf(u64)); if (needed_size > allocated_size) { symtab.strsize = 0; @@ -823,7 +823,7 @@ fn writeStringTable(self: *DebugSymbols) !void { symtab.strsize = @intCast(u32, needed_size); log.debug("writing string table from 0x{x} to 0x{x}", .{ symtab.stroff, symtab.stroff + symtab.strsize }); - try self.file.pwriteAll(self.base.strtab.asSlice(), symtab.stroff); + try self.file.pwriteAll(self.base.strtab.items, symtab.stroff); self.load_commands_dirty = true; self.strtab_dirty = false; } diff --git a/src/link/MachO/StringTable.zig b/src/link/MachO/StringTable.zig deleted file mode 100644 index 43770afdc1..0000000000 --- a/src/link/MachO/StringTable.zig +++ /dev/null @@ -1,64 +0,0 @@ -const StringTable = @This(); - -const std = @import("std"); -const log = std.log.scoped(.strtab); -const mem = std.mem; - -const Allocator = mem.Allocator; - -allocator: *Allocator, -buffer: std.ArrayListUnmanaged(u8) = .{}, -cache: std.StringHashMapUnmanaged(u32) = .{}, - -pub const Error = error{OutOfMemory}; - -pub fn init(allocator: *Allocator) Error!StringTable { - var strtab = StringTable{ - .allocator = allocator, - }; - try strtab.buffer.append(allocator, 0); - return strtab; -} - -pub fn deinit(self: *StringTable) void { - { - var it = self.cache.keyIterator(); - while (it.next()) |key| { - self.allocator.free(key.*); - } - } - self.cache.deinit(self.allocator); - self.buffer.deinit(self.allocator); -} - -pub fn getOrPut(self: *StringTable, string: []const u8) Error!u32 { - if (self.cache.get(string)) |off| { - log.debug("reusing string '{s}' at offset 0x{x}", .{ string, off }); - return off; - } - - try self.buffer.ensureUnusedCapacity(self.allocator, string.len + 1); - const new_off = @intCast(u32, self.buffer.items.len); - - log.debug("writing new string '{s}' at offset 0x{x}", .{ string, new_off }); - - self.buffer.appendSliceAssumeCapacity(string); - self.buffer.appendAssumeCapacity(0); - - try self.cache.putNoClobber(self.allocator, try self.allocator.dupe(u8, string), new_off); - - return new_off; -} - -pub fn get(self: StringTable, off: u32) ?[]const u8 { - if (off >= self.buffer.items.len) return null; - return mem.spanZ(@ptrCast([*:0]const u8, self.buffer.items.ptr + off)); -} - -pub fn asSlice(self: StringTable) []const u8 { - return self.buffer.items; -} - -pub fn size(self: StringTable) u64 { - return self.buffer.items.len; -} diff --git a/src/link/MachO/Symbol.zig b/src/link/MachO/Symbol.zig index 15d4c180dd..1c270e8510 100644 --- a/src/link/MachO/Symbol.zig +++ b/src/link/MachO/Symbol.zig @@ -9,7 +9,6 @@ const mem = std.mem; const Allocator = mem.Allocator; const Dylib = @import("Dylib.zig"); const Object = @import("Object.zig"); -const StringTable = @import("StringTable.zig"); const Zld = @import("Zld.zig"); /// Symbol name. Owned slice. @@ -226,8 +225,8 @@ pub fn needsTlvOffset(self: Symbol, zld: *Zld) bool { return sect_type == macho.S_THREAD_LOCAL_VARIABLES; } -pub fn asNlist(symbol: *Symbol, zld: *Zld, strtab: *StringTable) !macho.nlist_64 { - const n_strx = try strtab.getOrPut(symbol.name); +pub fn asNlist(symbol: *Symbol, zld: *Zld) !macho.nlist_64 { + const n_strx = try zld.makeString(symbol.name); const nlist = nlist: { switch (symbol.payload) { .regular => |regular| { diff --git a/src/link/MachO/Zld.zig b/src/link/MachO/Zld.zig index 71b5d5ad58..ce08f6d82d 100644 --- a/src/link/MachO/Zld.zig +++ b/src/link/MachO/Zld.zig @@ -18,7 +18,6 @@ const CodeSignature = @import("CodeSignature.zig"); const Dylib = @import("Dylib.zig"); const Object = @import("Object.zig"); const Relocation = reloc.Relocation; -const StringTable = @import("StringTable.zig"); const Symbol = @import("Symbol.zig"); const Trie = @import("Trie.zig"); @@ -26,7 +25,6 @@ usingnamespace @import("commands.zig"); usingnamespace @import("bind.zig"); allocator: *Allocator, -strtab: StringTable, target: ?std.Target = null, page_size: ?u16 = null, @@ -114,6 +112,9 @@ stub_helper_stubs_start_off: ?u64 = null, blocks: std.AutoHashMapUnmanaged(MatchingSection, *TextBlock) = .{}, +strtab: std.ArrayListUnmanaged(u8) = .{}, +strtab_cache: std.StringHashMapUnmanaged(u32) = .{}, + has_dices: bool = false, has_stabs: bool = false, @@ -169,7 +170,7 @@ pub const TextBlock = struct { .n_value = reg.address, }); nlists.appendAssumeCapacity(.{ - .n_strx = try zld.strtab.getOrPut(sym.name), + .n_strx = try zld.makeString(sym.name), .n_type = macho.N_FUN, .n_sect = section_id, .n_desc = 0, @@ -192,7 +193,7 @@ pub const TextBlock = struct { }, .global => { try nlists.append(.{ - .n_strx = try zld.strtab.getOrPut(sym.name), + .n_strx = try zld.makeString(sym.name), .n_type = macho.N_GSYM, .n_sect = 0, .n_desc = 0, @@ -201,7 +202,7 @@ pub const TextBlock = struct { }, .static => { try nlists.append(.{ - .n_strx = try zld.strtab.getOrPut(sym.name), + .n_strx = try zld.makeString(sym.name), .n_type = macho.N_STSYM, .n_sect = reg.sectionId(zld), .n_desc = 0, @@ -311,10 +312,7 @@ pub const TextBlock = struct { const DEFAULT_DYLD_PATH: [*:0]const u8 = "/usr/lib/dyld"; pub fn init(allocator: *Allocator) !Zld { - return Zld{ - .allocator = allocator, - .strtab = try StringTable.init(allocator), - }; + return Zld{ .allocator = allocator }; } pub fn deinit(self: *Zld) void { @@ -357,7 +355,15 @@ pub fn deinit(self: *Zld) void { self.locals.deinit(self.allocator); self.globals.deinit(self.allocator); - self.strtab.deinit(); + + { + var it = self.strtab_cache.keyIterator(); + while (it.next()) |key| { + self.allocator.free(key.*); + } + } + self.strtab_cache.deinit(self.allocator); + self.strtab.deinit(self.allocator); // TODO dealloc all blocks self.blocks.deinit(self.allocator); @@ -2572,7 +2578,7 @@ fn writeSymbolTable(self: *Zld) !void { if (symbol.isTemp()) continue; // TODO when merging codepaths, this should go into freelist const reg = symbol.payload.regular; - const nlist = try symbol.asNlist(self, &self.strtab); + const nlist = try symbol.asNlist(self); if (reg.linkage == .translation_unit) { try locals.append(nlist); @@ -2588,21 +2594,21 @@ fn writeSymbolTable(self: *Zld) !void { // Open scope try locals.ensureUnusedCapacity(4); locals.appendAssumeCapacity(.{ - .n_strx = try self.strtab.getOrPut(object.tu_comp_dir.?), + .n_strx = try self.makeString(object.tu_comp_dir.?), .n_type = macho.N_SO, .n_sect = 0, .n_desc = 0, .n_value = 0, }); locals.appendAssumeCapacity(.{ - .n_strx = try self.strtab.getOrPut(object.tu_name.?), + .n_strx = try self.makeString(object.tu_name.?), .n_type = macho.N_SO, .n_sect = 0, .n_desc = 0, .n_value = 0, }); locals.appendAssumeCapacity(.{ - .n_strx = try self.strtab.getOrPut(object.name.?), + .n_strx = try self.makeString(object.name.?), .n_type = macho.N_OSO, .n_sect = 0, .n_desc = 1, @@ -2642,7 +2648,7 @@ fn writeSymbolTable(self: *Zld) !void { defer undef_dir.deinit(); for (self.imports.items) |sym| { - const nlist = try sym.asNlist(self, &self.strtab); + const nlist = try sym.asNlist(self); const id = @intCast(u32, undefs.items.len); try undefs.append(nlist); try undef_dir.putNoClobber(sym.name, id); @@ -2737,14 +2743,14 @@ fn writeStringTable(self: *Zld) !void { const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; const symtab = &self.load_commands.items[self.symtab_cmd_index.?].Symtab; symtab.stroff = @intCast(u32, seg.inner.fileoff + seg.inner.filesize); - symtab.strsize = @intCast(u32, mem.alignForwardGeneric(u64, self.strtab.size(), @alignOf(u64))); + symtab.strsize = @intCast(u32, mem.alignForwardGeneric(u64, self.strtab.items.len, @alignOf(u64))); seg.inner.filesize += symtab.strsize; log.debug("writing string table from 0x{x} to 0x{x}", .{ symtab.stroff, symtab.stroff + symtab.strsize }); - try self.file.?.pwriteAll(self.strtab.asSlice(), symtab.stroff); + try self.file.?.pwriteAll(self.strtab.items, symtab.stroff); - if (symtab.strsize > self.strtab.size() and self.target.?.cpu.arch == .x86_64) { + if (symtab.strsize > self.strtab.items.len and self.target.?.cpu.arch == .x86_64) { // This is the last section, so we need to pad it out. try self.file.?.pwriteAll(&[_]u8{0}, seg.inner.fileoff + seg.inner.filesize - 1); } @@ -2910,3 +2916,27 @@ fn writeHeader(self: *Zld) !void { try self.file.?.pwriteAll(mem.asBytes(&header), 0); } + +pub fn makeString(self: *Zld, string: []const u8) !u32 { + if (self.strtab_cache.get(string)) |off| { + log.debug("reusing string '{s}' at offset 0x{x}", .{ string, off }); + return off; + } + + try self.strtab.ensureUnusedCapacity(self.allocator, string.len + 1); + const new_off = @intCast(u32, self.strtab.items.len); + + log.debug("writing new string '{s}' at offset 0x{x}", .{ string, new_off }); + + self.strtab.appendSliceAssumeCapacity(string); + self.strtab.appendAssumeCapacity(0); + + try self.strtab_cache.putNoClobber(self.allocator, try self.allocator.dupe(u8, string), new_off); + + return new_off; +} + +pub fn getString(self: *Zld, off: u32) ?[]const u8 { + assert(off < self.strtab.items.len); + return mem.spanZ(@ptrCast([*:0]const u8, self.strtab.items.ptr + off)); +} From ec874a9b2bf24bb37f1e90558153bbf04ac5f22a Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Thu, 15 Jul 2021 06:47:13 +0200 Subject: [PATCH 50/81] zld: move tracking binding for proxies into TextBlock which is the source of binding rather than its target. That is, we now track by source. --- src/link/MachO/Symbol.zig | 21 ++-------------- src/link/MachO/Zld.zig | 52 ++++++++++++++++++++++++++++----------- src/link/MachO/reloc.zig | 19 ++++---------- 3 files changed, 44 insertions(+), 48 deletions(-) diff --git a/src/link/MachO/Symbol.zig b/src/link/MachO/Symbol.zig index 1c270e8510..28aee6eeb0 100644 --- a/src/link/MachO/Symbol.zig +++ b/src/link/MachO/Symbol.zig @@ -121,20 +121,11 @@ pub const Tentative = struct { }; pub const Proxy = struct { - /// Dynamic binding info - spots within the final - /// executable where this proxy is referenced from. - bind_info: std.ArrayListUnmanaged(struct { - local_sym_index: u32, - offset: u32, - }) = .{}, - /// Dylib where to locate this symbol. /// null means self-reference. file: ?*Dylib = null, - pub fn deinit(proxy: *Proxy, allocator: *Allocator) void { - proxy.bind_info.deinit(allocator); - } + local_sym_index: u32 = 0, pub fn dylibOrdinal(proxy: Proxy) u16 { const dylib = proxy.file orelse return 0; @@ -145,13 +136,10 @@ pub const Proxy = struct { _ = fmt; _ = options; try std.fmt.format(writer, "Proxy {{ ", .{}); - if (self.bind_info.items.len > 0) { - // TODO - try std.fmt.format(writer, ".bind_info = {}, ", .{self.bind_info.items.len}); - } if (self.file) |file| { try std.fmt.format(writer, ".file = {s}, ", .{file.name.?}); } + try std.fmt.format(writer, ".local_sym_index = {d}, ", .{self.local_sym_index}); try std.fmt.format(writer, "}}", .{}); } }; @@ -284,11 +272,6 @@ pub fn asNlist(symbol: *Symbol, zld: *Zld) !macho.nlist_64 { pub fn deinit(symbol: *Symbol, allocator: *Allocator) void { allocator.free(symbol.name); - - switch (symbol.payload) { - .proxy => |*proxy| proxy.deinit(allocator), - else => {}, - } } pub fn isStab(sym: macho.nlist_64) bool { diff --git a/src/link/MachO/Zld.zig b/src/link/MachO/Zld.zig index ce08f6d82d..62e34cb6b0 100644 --- a/src/link/MachO/Zld.zig +++ b/src/link/MachO/Zld.zig @@ -136,6 +136,7 @@ pub const TextBlock = struct { size: u64, alignment: u32, rebases: std.ArrayList(u64), + bindings: std.ArrayList(SymbolAtOffset), dices: std.ArrayList(macho.data_in_code_entry), next: ?*TextBlock = null, prev: ?*TextBlock = null, @@ -226,6 +227,7 @@ pub const TextBlock = struct { .size = undefined, .alignment = undefined, .rebases = std.ArrayList(u64).init(allocator), + .bindings = std.ArrayList(SymbolAtOffset).init(allocator), .dices = std.ArrayList(macho.data_in_code_entry).init(allocator), }; } @@ -239,6 +241,7 @@ pub const TextBlock = struct { self.allocator.free(self.code); self.relocs.deinit(); self.rebases.deinit(); + self.bindings.deinit(); self.dices.deinit(); } @@ -293,6 +296,9 @@ pub const TextBlock = struct { if (self.rebases.items.len > 0) { log.warn(" rebases: {any}", .{self.rebases.items}); } + if (self.bindings.items.len > 0) { + log.warn(" bindings: {any}", .{self.bindings.items}); + } if (self.dices.items.len > 0) { log.warn(" dices: {any}", .{self.dices.items}); } @@ -1745,9 +1751,11 @@ fn resolveSymbols(self: *Zld) !void { if (!dylib.symbols.contains(symbol.name)) continue; try referenced.put(dylib, {}); + const index = @intCast(u32, self.imports.items.len); symbol.payload = .{ .proxy = .{ .file = dylib, + .local_sym_index = index, }, }; try self.imports.append(self.allocator, symbol); @@ -2341,23 +2349,37 @@ fn writeBindInfoTable(self: *Zld) !void { } } - for (self.globals.values()) |sym| { - if (sym.payload != .proxy) continue; + { + var it = self.blocks.iterator(); + while (it.next()) |entry| { + const match = entry.key_ptr.*; + var block: *TextBlock = entry.value_ptr.*; - const proxy = sym.payload.proxy; - for (proxy.bind_info.items) |info| { - const bind_sym = self.locals.items[info.local_sym_index]; - assert(bind_sym.payload == .regular); - const reg = bind_sym.payload.regular; - const base_address = self.load_commands.items[reg.segment_id].Segment.inner.vmaddr; - const offset = reg.address + info.offset - base_address; + if (match.seg == self.text_segment_cmd_index.?) continue; // __TEXT is non-writable - try pointers.append(.{ - .offset = offset, - .segment_id = reg.segment_id, - .dylib_ordinal = proxy.dylibOrdinal(), - .name = sym.name, - }); + const seg = self.load_commands.items[match.seg].Segment; + + while (true) { + const sym = self.locals.items[block.local_sym_index]; + assert(sym.payload == .regular); + const base_offset = sym.payload.regular.address - seg.inner.vmaddr; + + for (block.bindings.items) |binding| { + const bind_sym = self.imports.items[binding.local_sym_index]; + const proxy = bind_sym.payload.proxy; + + try pointers.append(.{ + .offset = binding.offset + base_offset, + .segment_id = match.seg, + .dylib_ordinal = proxy.dylibOrdinal(), + .name = bind_sym.name, + }); + } + + if (block.prev) |prev| { + block = prev; + } else break; + } } } diff --git a/src/link/MachO/reloc.zig b/src/link/MachO/reloc.zig index d92d047cd9..c8b176c9c2 100644 --- a/src/link/MachO/reloc.zig +++ b/src/link/MachO/reloc.zig @@ -449,22 +449,13 @@ pub const Relocation = struct { .proxy => |proxy| { if (mem.eql(u8, self.target.name, "__tlv_bootstrap")) { break :blk 0; // Dynamically bound by dyld. - // const segment = zld.load_commands.items[zld.data_segment_cmd_index.?].Segment; - // const tlv = segment.sections.items[zld.tlv_section_index.?]; - // break :blk tlv.addr; } const segment = zld.load_commands.items[zld.text_segment_cmd_index.?].Segment; const stubs = segment.sections.items[zld.stubs_section_index.?]; const stubs_index = self.target.stubs_index orelse { - if (proxy.bind_info.items.len > 0) { - break :blk 0; // Dynamically bound by dyld. - } - log.err("expected stubs index or dynamic bind address for symbol '{s}'", .{ - self.target.name, - }); - log.err(" this is an internal linker error", .{}); - return error.FailedToResolveRelocationTarget; + // TODO verify in TextBlock that the symbol is indeed dynamically bound. + break :blk 0; // Dynamically bound by dyld. }; break :blk stubs.addr + stubs_index * stubs.reserved2; }, @@ -647,9 +638,9 @@ pub const Parser = struct { } else if (out_rel.payload == .unsigned) { const sym = out_rel.target; switch (sym.payload) { - .proxy => { - try sym.payload.proxy.bind_info.append(self.zld.allocator, .{ - .local_sym_index = self.block.local_sym_index, + .proxy => |proxy| { + try self.block.bindings.append(.{ + .local_sym_index = proxy.local_sym_index, .offset = out_rel.offset, }); }, From f8678c48ff43879d043b626031f7a5c92303fdea Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Thu, 15 Jul 2021 07:41:59 +0200 Subject: [PATCH 51/81] zld: reuse string table for symbol names rather than manage allocs separately per symbol. --- src/link/MachO/Object.zig | 39 ++++++++----- src/link/MachO/Symbol.zig | 50 ++++------------- src/link/MachO/Zld.zig | 113 ++++++++++++++++++++------------------ src/link/MachO/reloc.zig | 21 ++++--- 4 files changed, 107 insertions(+), 116 deletions(-) diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index 49eb34c522..91be941256 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -389,7 +389,7 @@ const TextBlockParser = struct { return switch (rreg.linkage) { .global => true, .linkage_unit => lreg.linkage == .translation_unit, - else => lsym.isTemp(), + else => lsym.isTemp(context.zld), }; } @@ -417,7 +417,7 @@ const TextBlockParser = struct { const sym = self.object.symbols.items[nlist_with_index.index]; if (sym.payload != .regular) { log.err("expected a regular symbol, found {s}", .{sym.payload}); - log.err(" when remapping {s}", .{sym.name}); + log.err(" when remapping {s}", .{self.zld.getString(sym.strx)}); return error.SymbolIsNotRegular; } assert(sym.payload.regular.local_sym_index != 0); // This means the symbol has not been properly resolved. @@ -463,7 +463,7 @@ const TextBlockParser = struct { } } } - if (self.zld.globals.contains(senior_sym.name)) break :blk .global; + if (self.zld.globals.contains(self.zld.getString(senior_sym.strx))) break :blk .global; break :blk .static; } else null; @@ -598,7 +598,11 @@ pub fn parseTextBlocks(self: *Object, zld: *Zld) !void { sectionName(sect), }); defer self.allocator.free(name); - const symbol = try Symbol.new(self.allocator, name); + const symbol = try zld.allocator.create(Symbol); + symbol.* = .{ + .strx = try zld.makeString(name), + .payload = .{ .undef = .{} }, + }; try self.sections_as_symbols.putNoClobber(self.allocator, sect_id, symbol); break :symbol symbol; }; @@ -684,7 +688,7 @@ pub fn parseTextBlocks(self: *Object, zld: *Zld) !void { const reg = &sym.payload.regular; if (reg.file) |file| { if (file != self) { - log.debug("deduping definition of {s} in {s}", .{ sym.name, self.name.? }); + log.debug("deduping definition of {s} in {s}", .{ zld.getString(sym.strx), self.name.? }); block.deinit(); self.allocator.destroy(block); continue; @@ -739,7 +743,11 @@ pub fn parseTextBlocks(self: *Object, zld: *Zld) !void { sectionName(sect), }); defer self.allocator.free(name); - const symbol = try Symbol.new(self.allocator, name); + const symbol = try zld.allocator.create(Symbol); + symbol.* = .{ + .strx = try zld.makeString(name), + .payload = .{ .undef = .{} }, + }; try self.sections_as_symbols.putNoClobber(self.allocator, sect_id, symbol); break :symbol symbol; }; @@ -812,7 +820,7 @@ pub fn parseTextBlocks(self: *Object, zld: *Zld) !void { } } } - if (zld.globals.contains(sym.name)) break :blk .global; + if (zld.globals.contains(zld.getString(sym.strx))) break :blk .global; break :blk .static; } else null; @@ -870,7 +878,7 @@ fn parseRelocs( try parser.parse(); } -pub fn symbolFromReloc(self: *Object, rel: macho.relocation_info) !*Symbol { +pub fn symbolFromReloc(self: *Object, zld: *Zld, rel: macho.relocation_info) !*Symbol { const symbol = blk: { if (rel.r_extern == 1) { break :blk self.symbols.items[rel.r_symbolnum]; @@ -888,12 +896,15 @@ pub fn symbolFromReloc(self: *Object, rel: macho.relocation_info) !*Symbol { sectionName(sect), }); defer self.allocator.free(name); - const symbol = try Symbol.new(self.allocator, name); - symbol.payload = .{ - .regular = .{ - .linkage = .translation_unit, - .address = sect.addr, - .file = self, + const symbol = try zld.allocator.create(Symbol); + symbol.* = .{ + .strx = try zld.makeString(name), + .payload = .{ + .regular = .{ + .linkage = .translation_unit, + .address = sect.addr, + .file = self, + }, }, }; try self.sections_as_symbols.putNoClobber(self.allocator, sect_id, symbol); diff --git a/src/link/MachO/Symbol.zig b/src/link/MachO/Symbol.zig index 28aee6eeb0..37072b5618 100644 --- a/src/link/MachO/Symbol.zig +++ b/src/link/MachO/Symbol.zig @@ -11,8 +11,8 @@ const Dylib = @import("Dylib.zig"); const Object = @import("Object.zig"); const Zld = @import("Zld.zig"); -/// Symbol name. Owned slice. -name: []const u8, +/// Offset into the string table. +strx: u32, /// Index in GOT table for indirection. got_index: ?u32 = null, @@ -160,26 +160,11 @@ pub const Undefined = struct { } }; -/// Create new undefined symbol. -pub fn new(allocator: *Allocator, name: []const u8) !*Symbol { - const new_sym = try allocator.create(Symbol); - errdefer allocator.destroy(new_sym); - - new_sym.* = .{ - .name = try allocator.dupe(u8, name), - .payload = .{ - .undef = .{}, - }, - }; - - return new_sym; -} - pub fn format(self: Symbol, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { _ = fmt; _ = options; try std.fmt.format(writer, "Symbol {{", .{}); - try std.fmt.format(writer, ".name = {s}, ", .{self.name}); + try std.fmt.format(writer, ".strx = {d}, ", .{self.strx}); if (self.got_index) |got_index| { try std.fmt.format(writer, ".got_index = {}, ", .{got_index}); } @@ -190,11 +175,12 @@ pub fn format(self: Symbol, comptime fmt: []const u8, options: std.fmt.FormatOpt try std.fmt.format(writer, "}}", .{}); } -pub fn isTemp(symbol: Symbol) bool { +pub fn isTemp(symbol: Symbol, zld: *Zld) bool { + const sym_name = zld.getString(symbol.strx); switch (symbol.payload) { .regular => |regular| { if (regular.linkage == .translation_unit) { - return mem.startsWith(u8, symbol.name, "l") or mem.startsWith(u8, symbol.name, "L"); + return mem.startsWith(u8, sym_name, "l") or mem.startsWith(u8, sym_name, "L"); } }, else => {}, @@ -202,24 +188,12 @@ pub fn isTemp(symbol: Symbol) bool { return false; } -pub fn needsTlvOffset(self: Symbol, zld: *Zld) bool { - if (self.payload != .regular) return false; - - const reg = self.payload.regular; - const seg = zld.load_command.items[reg.segment_id].Segment; - const sect = seg.sections.items[reg.section_id]; - const sect_type = commands.sectionType(sect); - - return sect_type == macho.S_THREAD_LOCAL_VARIABLES; -} - pub fn asNlist(symbol: *Symbol, zld: *Zld) !macho.nlist_64 { - const n_strx = try zld.makeString(symbol.name); const nlist = nlist: { switch (symbol.payload) { .regular => |regular| { var nlist = macho.nlist_64{ - .n_strx = n_strx, + .n_strx = symbol.strx, .n_type = macho.N_SECT, .n_sect = regular.sectionId(zld), .n_desc = 0, @@ -239,7 +213,7 @@ pub fn asNlist(symbol: *Symbol, zld: *Zld) !macho.nlist_64 { .tentative => { // TODO break :nlist macho.nlist_64{ - .n_strx = n_strx, + .n_strx = symbol.strx, .n_type = macho.N_UNDF, .n_sect = 0, .n_desc = 0, @@ -248,7 +222,7 @@ pub fn asNlist(symbol: *Symbol, zld: *Zld) !macho.nlist_64 { }, .proxy => |proxy| { break :nlist macho.nlist_64{ - .n_strx = n_strx, + .n_strx = symbol.strx, .n_type = macho.N_UNDF | macho.N_EXT, .n_sect = 0, .n_desc = (proxy.dylibOrdinal() * macho.N_SYMBOL_RESOLVER) | macho.REFERENCE_FLAG_UNDEFINED_NON_LAZY, @@ -258,7 +232,7 @@ pub fn asNlist(symbol: *Symbol, zld: *Zld) !macho.nlist_64 { .undef => { // TODO break :nlist macho.nlist_64{ - .n_strx = n_strx, + .n_strx = symbol.strx, .n_type = macho.N_UNDF, .n_sect = 0, .n_desc = 0, @@ -270,10 +244,6 @@ pub fn asNlist(symbol: *Symbol, zld: *Zld) !macho.nlist_64 { return nlist; } -pub fn deinit(symbol: *Symbol, allocator: *Allocator) void { - allocator.free(symbol.name); -} - pub fn isStab(sym: macho.nlist_64) bool { return (macho.N_STAB & sym.n_type) != 0; } diff --git a/src/link/MachO/Zld.zig b/src/link/MachO/Zld.zig index 62e34cb6b0..3e7d825d3c 100644 --- a/src/link/MachO/Zld.zig +++ b/src/link/MachO/Zld.zig @@ -113,7 +113,6 @@ stub_helper_stubs_start_off: ?u64 = null, blocks: std.AutoHashMapUnmanaged(MatchingSection, *TextBlock) = .{}, strtab: std.ArrayListUnmanaged(u8) = .{}, -strtab_cache: std.StringHashMapUnmanaged(u32) = .{}, has_dices: bool = false, has_stabs: bool = false, @@ -171,7 +170,7 @@ pub const TextBlock = struct { .n_value = reg.address, }); nlists.appendAssumeCapacity(.{ - .n_strx = try zld.makeString(sym.name), + .n_strx = sym.strx, .n_type = macho.N_FUN, .n_sect = section_id, .n_desc = 0, @@ -194,7 +193,7 @@ pub const TextBlock = struct { }, .global => { try nlists.append(.{ - .n_strx = try zld.makeString(sym.name), + .n_strx = sym.strx, .n_type = macho.N_GSYM, .n_sect = 0, .n_desc = 0, @@ -203,7 +202,7 @@ pub const TextBlock = struct { }, .static => { try nlists.append(.{ - .n_strx = try zld.makeString(sym.name), + .n_strx = sym.strx, .n_type = macho.N_STSYM, .n_sect = reg.sectionId(zld), .n_desc = 0, @@ -349,26 +348,20 @@ pub fn deinit(self: *Zld) void { self.dylibs.deinit(self.allocator); for (self.imports.items) |sym| { - sym.deinit(self.allocator); self.allocator.destroy(sym); } self.imports.deinit(self.allocator); for (self.locals.items) |sym| { - sym.deinit(self.allocator); self.allocator.destroy(sym); } self.locals.deinit(self.allocator); + for (self.globals.keys()) |key| { + self.allocator.free(key); + } self.globals.deinit(self.allocator); - { - var it = self.strtab_cache.keyIterator(); - while (it.next()) |key| { - self.allocator.free(key.*); - } - } - self.strtab_cache.deinit(self.allocator); self.strtab.deinit(self.allocator); // TODO dealloc all blocks @@ -1168,7 +1161,7 @@ fn allocateTextBlocks(self: *Zld) !void { sym.payload.regular.address = base_addr; log.debug(" {s}: start=0x{x}, end=0x{x}, size={}, align={}", .{ - sym.name, + self.getString(sym.strx), base_addr, base_addr + block.size, block.size, @@ -1231,7 +1224,7 @@ fn writeTextBlocks(self: *Zld) !void { const sym = self.locals.items[block.local_sym_index]; log.debug(" {s}: start=0x{x}, end=0x{x}, size={}, align={}", .{ - sym.name, + self.getString(sym.strx), aligned_base_off, aligned_base_off + block.size, block.size, @@ -1552,14 +1545,17 @@ fn resolveSymbolsInObject(self: *Zld, object: *Object) !void { if (Symbol.isSect(sym) and !Symbol.isExt(sym)) { // Regular symbol local to translation unit - const symbol = try Symbol.new(self.allocator, sym_name); - symbol.payload = .{ - .regular = .{ - .linkage = .translation_unit, - .address = sym.n_value, - .weak_ref = Symbol.isWeakRef(sym), - .file = object, - .local_sym_index = @intCast(u32, self.locals.items.len), + const symbol = try self.allocator.create(Symbol); + symbol.* = .{ + .strx = try self.makeString(sym_name), + .payload = .{ + .regular = .{ + .linkage = .translation_unit, + .address = sym.n_value, + .weak_ref = Symbol.isWeakRef(sym), + .file = object, + .local_sym_index = @intCast(u32, self.locals.items.len), + }, }, }; try self.locals.append(self.allocator, symbol); @@ -1569,9 +1565,13 @@ fn resolveSymbolsInObject(self: *Zld, object: *Object) !void { const symbol = self.globals.get(sym_name) orelse symbol: { // Insert new global symbol. - const symbol = try Symbol.new(self.allocator, sym_name); - symbol.payload.undef.file = object; - try self.globals.putNoClobber(self.allocator, symbol.name, symbol); + const symbol = try self.allocator.create(Symbol); + symbol.* = .{ + .strx = try self.makeString(sym_name), + .payload = .{ .undef = .{ .file = object } }, + }; + const alloc_name = try self.allocator.dupe(u8, sym_name); + try self.globals.putNoClobber(self.allocator, alloc_name, symbol); break :symbol symbol; }; @@ -1628,7 +1628,8 @@ fn resolveSymbolsInObject(self: *Zld, object: *Object) !void { fn resolveSymbols(self: *Zld) !void { // TODO mimicking insertion of null symbol from incremental linker. // This will need to moved. - const null_sym = try Symbol.new(self.allocator, ""); + const null_sym = try self.allocator.create(Symbol); + null_sym.* = .{ .strx = 0, .payload = .{ .undef = .{} } }; try self.locals.append(self.allocator, null_sym); // First pass, resolve symbols in provided objects. @@ -1639,12 +1640,13 @@ fn resolveSymbols(self: *Zld) !void { // Second pass, resolve symbols in static libraries. var sym_it = self.globals.iterator(); while (sym_it.next()) |entry| { + const sym_name = entry.key_ptr.*; const symbol = entry.value_ptr.*; if (symbol.payload != .undef) continue; for (self.archives.items) |archive| { // Check if the entry exists in a static archive. - const offsets = archive.toc.get(symbol.name) orelse { + const offsets = archive.toc.get(sym_name) orelse { // No hit. continue; }; @@ -1734,21 +1736,27 @@ fn resolveSymbols(self: *Zld) !void { // Third pass, resolve symbols in dynamic libraries. { // Put dyld_stub_binder as an undefined special symbol. - const symbol = try Symbol.new(self.allocator, "dyld_stub_binder"); + const symbol = try self.allocator.create(Symbol); + symbol.* = .{ + .strx = try self.makeString("dyld_stub_binder"), + .payload = .{ .undef = .{} }, + }; const index = @intCast(u32, self.got_entries.items.len); symbol.got_index = index; try self.got_entries.append(self.allocator, symbol); - try self.globals.putNoClobber(self.allocator, symbol.name, symbol); + const alloc_name = try self.allocator.dupe(u8, "dyld_stub_binder"); + try self.globals.putNoClobber(self.allocator, alloc_name, symbol); } var referenced = std.AutoHashMap(*Dylib, void).init(self.allocator); defer referenced.deinit(); - loop: for (self.globals.values()) |symbol| { + loop: for (self.globals.keys()) |sym_name| { + const symbol = self.globals.get(sym_name).?; if (symbol.payload != .undef) continue; for (self.dylibs.items) |dylib| { - if (!dylib.symbols.contains(symbol.name)) continue; + if (!dylib.symbols.contains(sym_name)) continue; try referenced.put(dylib, {}); const index = @intCast(u32, self.imports.items.len); @@ -1798,10 +1806,11 @@ fn resolveSymbols(self: *Zld) !void { } var has_undefined = false; - for (self.globals.values()) |symbol| { + for (self.globals.keys()) |sym_name| { + const symbol = self.globals.get(sym_name).?; if (symbol.payload != .undef) continue; - log.err("undefined reference to symbol '{s}'", .{symbol.name}); + log.err("undefined reference to symbol '{s}'", .{sym_name}); if (symbol.payload.undef.file) |file| { log.err(" | referenced in {s}", .{file.name.?}); } @@ -2344,7 +2353,7 @@ fn writeBindInfoTable(self: *Zld) !void { .offset = base_offset + sym.got_index.? * @sizeOf(u64), .segment_id = segment_id, .dylib_ordinal = proxy.dylibOrdinal(), - .name = sym.name, + .name = self.getString(sym.strx), }); } } @@ -2372,7 +2381,7 @@ fn writeBindInfoTable(self: *Zld) !void { .offset = binding.offset + base_offset, .segment_id = match.seg, .dylib_ordinal = proxy.dylibOrdinal(), - .name = bind_sym.name, + .name = self.getString(bind_sym.strx), }); } @@ -2395,7 +2404,7 @@ fn writeBindInfoTable(self: *Zld) !void { .offset = base_offset, .segment_id = segment_id, .dylib_ordinal = proxy.dylibOrdinal(), - .name = sym.name, + .name = self.getString(sym.strx), }); } @@ -2435,7 +2444,7 @@ fn writeLazyBindInfoTable(self: *Zld) !void { .offset = base_offset + sym.stubs_index.? * @sizeOf(u64), .segment_id = segment_id, .dylib_ordinal = proxy.dylibOrdinal(), - .name = sym.name, + .name = self.getString(sym.strx), }); } } @@ -2547,7 +2556,7 @@ fn writeExportInfo(self: *Zld) !void { if (sym.payload != .regular) continue; const reg = sym.payload.regular; if (reg.linkage != .global) continue; - try sorted_globals.append(sym.name); + try sorted_globals.append(self.getString(sym.strx)); } std.sort.sort([]const u8, sorted_globals.items, {}, Sorter.lessThan); @@ -2556,10 +2565,10 @@ fn writeExportInfo(self: *Zld) !void { const sym = self.globals.get(sym_name) orelse unreachable; const reg = sym.payload.regular; - log.debug(" | putting '{s}' defined at 0x{x}", .{ sym.name, reg.address }); + log.debug(" | putting '{s}' defined at 0x{x}", .{ sym_name, reg.address }); try trie.put(.{ - .name = sym.name, + .name = sym_name, .vmaddr_offset = reg.address - base_address, .export_flags = macho.EXPORT_SYMBOL_FLAGS_KIND_REGULAR, }); @@ -2597,7 +2606,7 @@ fn writeSymbolTable(self: *Zld) !void { for (self.locals.items) |symbol, i| { if (i == 0) continue; // skip null symbol - if (symbol.isTemp()) continue; // TODO when merging codepaths, this should go into freelist + if (symbol.isTemp(self)) continue; // TODO when merging codepaths, this should go into freelist const reg = symbol.payload.regular; const nlist = try symbol.asNlist(self); @@ -2673,7 +2682,7 @@ fn writeSymbolTable(self: *Zld) !void { const nlist = try sym.asNlist(self); const id = @intCast(u32, undefs.items.len); try undefs.append(nlist); - try undef_dir.putNoClobber(sym.name, id); + try undef_dir.putNoClobber(self.getString(sym.strx), id); } const nlocals = locals.items.len; @@ -2735,7 +2744,8 @@ fn writeSymbolTable(self: *Zld) !void { stubs.reserved1 = 0; for (self.stubs.items) |sym| { - const id = undef_dir.get(sym.name) orelse unreachable; + const sym_name = self.getString(sym.strx); + const id = undef_dir.get(sym_name) orelse unreachable; try writer.writeIntLittle(u32, dysymtab.iundefsym + id); } @@ -2743,7 +2753,8 @@ fn writeSymbolTable(self: *Zld) !void { for (self.got_entries.items) |sym| { switch (sym.payload) { .proxy => { - const id = undef_dir.get(sym.name) orelse unreachable; + const sym_name = self.getString(sym.strx); + const id = undef_dir.get(sym_name) orelse unreachable; try writer.writeIntLittle(u32, dysymtab.iundefsym + id); }, else => { @@ -2754,7 +2765,8 @@ fn writeSymbolTable(self: *Zld) !void { la_symbol_ptr.reserved1 = got.reserved1 + ngot_entries; for (self.stubs.items) |sym| { - const id = undef_dir.get(sym.name) orelse unreachable; + const sym_name = self.getString(sym.strx); + const id = undef_dir.get(sym_name) orelse unreachable; try writer.writeIntLittle(u32, dysymtab.iundefsym + id); } @@ -2940,11 +2952,6 @@ fn writeHeader(self: *Zld) !void { } pub fn makeString(self: *Zld, string: []const u8) !u32 { - if (self.strtab_cache.get(string)) |off| { - log.debug("reusing string '{s}' at offset 0x{x}", .{ string, off }); - return off; - } - try self.strtab.ensureUnusedCapacity(self.allocator, string.len + 1); const new_off = @intCast(u32, self.strtab.items.len); @@ -2953,12 +2960,10 @@ pub fn makeString(self: *Zld, string: []const u8) !u32 { self.strtab.appendSliceAssumeCapacity(string); self.strtab.appendAssumeCapacity(0); - try self.strtab_cache.putNoClobber(self.allocator, try self.allocator.dupe(u8, string), new_off); - return new_off; } -pub fn getString(self: *Zld, off: u32) ?[]const u8 { +pub fn getString(self: *Zld, off: u32) []const u8 { assert(off < self.strtab.items.len); return mem.spanZ(@ptrCast([*:0]const u8, self.strtab.items.ptr + off)); } diff --git a/src/link/MachO/reloc.zig b/src/link/MachO/reloc.zig index c8b176c9c2..e1631081d1 100644 --- a/src/link/MachO/reloc.zig +++ b/src/link/MachO/reloc.zig @@ -407,7 +407,7 @@ pub const Relocation = struct { const dc_seg = zld.load_commands.items[zld.data_const_segment_cmd_index.?].Segment; const got = dc_seg.sections.items[zld.got_section_index.?]; const got_index = self.target.got_index orelse { - log.err("expected GOT entry for symbol '{s}'", .{self.target.name}); + log.err("expected GOT entry for symbol '{s}'", .{zld.getString(self.target.strx)}); log.err(" this is an internal linker error", .{}); return error.FailedToResolveRelocationTarget; }; @@ -446,8 +446,8 @@ pub const Relocation = struct { break :blk reg.address; }, - .proxy => |proxy| { - if (mem.eql(u8, self.target.name, "__tlv_bootstrap")) { + .proxy => { + if (mem.eql(u8, zld.getString(self.target.strx), "__tlv_bootstrap")) { break :blk 0; // Dynamically bound by dyld. } @@ -460,7 +460,9 @@ pub const Relocation = struct { break :blk stubs.addr + stubs_index * stubs.reserved2; }, else => { - log.err("failed to resolve symbol '{s}' as a relocation target", .{self.target.name}); + log.err("failed to resolve symbol '{s}' as a relocation target", .{ + zld.getString(self.target.strx), + }); log.err(" this is an internal linker error", .{}); return error.FailedToResolveRelocationTarget; }, @@ -634,7 +636,10 @@ pub const Parser = struct { out_rel.target.got_index = index; try self.zld.got_entries.append(self.zld.allocator, out_rel.target); - log.debug("adding GOT entry for symbol {s} at index {}", .{ out_rel.target.name, index }); + log.debug("adding GOT entry for symbol {s} at index {}", .{ + self.zld.getString(out_rel.target.strx), + index, + }); } else if (out_rel.payload == .unsigned) { const sym = out_rel.target; switch (sym.payload) { @@ -697,14 +702,14 @@ pub const Parser = struct { sym.stubs_index = index; try self.zld.stubs.append(self.zld.allocator, sym); - log.debug("adding stub entry for symbol {s} at index {}", .{ sym.name, index }); + log.debug("adding stub entry for symbol {s} at index {}", .{ self.zld.getString(sym.strx), index }); } } } fn parseBaseRelInfo(self: *Parser, rel: macho.relocation_info) !Relocation { const offset = @intCast(u32, @intCast(u64, rel.r_address) - self.base_addr); - const target = try self.object.symbolFromReloc(rel); + const target = try self.object.symbolFromReloc(self.zld, rel); return Relocation{ .offset = offset, .target = target, @@ -888,7 +893,7 @@ pub const Parser = struct { assert(rel.r_pcrel == 0); assert(self.subtractor == null); - self.subtractor = try self.object.symbolFromReloc(rel); + self.subtractor = try self.object.symbolFromReloc(self.zld, rel); } fn parseLoad(self: *Parser, rel: macho.relocation_info) !Relocation { From c47ce310716de806b3d95c328a9c2f9735221806 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Thu, 15 Jul 2021 18:25:10 +0200 Subject: [PATCH 52/81] zld: thin out Relocation by not storing *TextBlock this way we shave off 8 bytes per Relocation structure, and instead we can pass the *TextBlock as args to resolve function. --- src/link/MachO/Zld.zig | 86 +++++++++++- src/link/MachO/reloc.zig | 273 +++++++++++++-------------------------- 2 files changed, 178 insertions(+), 181 deletions(-) diff --git a/src/link/MachO/Zld.zig b/src/link/MachO/Zld.zig index 3e7d825d3c..b68b851bb5 100644 --- a/src/link/MachO/Zld.zig +++ b/src/link/MachO/Zld.zig @@ -246,7 +246,91 @@ pub const TextBlock = struct { pub fn resolveRelocs(self: *TextBlock, zld: *Zld) !void { for (self.relocs.items) |rel| { - try rel.resolve(zld); + log.debug("relocating {}", .{rel}); + + const source_addr = blk: { + const sym = zld.locals.items[self.local_sym_index]; + break :blk sym.payload.regular.address + rel.offset; + }; + const target_addr = blk: { + const is_via_got = switch (rel.payload) { + .pointer_to_got => true, + .page => |page| page.kind == .got, + .page_off => |page_off| page_off.kind == .got, + .load => |load| load.kind == .got, + else => false, + }; + + if (is_via_got) { + const dc_seg = zld.load_commands.items[zld.data_const_segment_cmd_index.?].Segment; + const got = dc_seg.sections.items[zld.got_section_index.?]; + const got_index = rel.target.got_index orelse { + log.err("expected GOT entry for symbol '{s}'", .{zld.getString(rel.target.strx)}); + log.err(" this is an internal linker error", .{}); + return error.FailedToResolveRelocationTarget; + }; + break :blk got.addr + got_index * @sizeOf(u64); + } + + switch (rel.target.payload) { + .regular => |reg| { + const is_tlv = is_tlv: { + const sym = zld.locals.items[self.local_sym_index]; + const seg = zld.load_commands.items[sym.payload.regular.segment_id].Segment; + const sect = seg.sections.items[sym.payload.regular.section_id]; + break :is_tlv sectionType(sect) == macho.S_THREAD_LOCAL_VARIABLES; + }; + if (is_tlv) { + // For TLV relocations, the value specified as a relocation is the displacement from the + // TLV initializer (either value in __thread_data or zero-init in __thread_bss) to the first + // defined TLV template init section in the following order: + // * wrt to __thread_data if defined, then + // * wrt to __thread_bss + const seg = zld.load_commands.items[zld.data_segment_cmd_index.?].Segment; + const base_address = inner: { + if (zld.tlv_data_section_index) |i| { + break :inner seg.sections.items[i].addr; + } else if (zld.tlv_bss_section_index) |i| { + break :inner seg.sections.items[i].addr; + } else { + log.err("threadlocal variables present but no initializer sections found", .{}); + log.err(" __thread_data not found", .{}); + log.err(" __thread_bss not found", .{}); + return error.FailedToResolveRelocationTarget; + } + }; + break :blk reg.address - base_address; + } + + break :blk reg.address; + }, + .proxy => { + if (mem.eql(u8, zld.getString(rel.target.strx), "__tlv_bootstrap")) { + break :blk 0; // Dynamically bound by dyld. + } + + const segment = zld.load_commands.items[zld.text_segment_cmd_index.?].Segment; + const stubs = segment.sections.items[zld.stubs_section_index.?]; + const stubs_index = rel.target.stubs_index orelse { + // TODO verify in TextBlock that the symbol is indeed dynamically bound. + break :blk 0; // Dynamically bound by dyld. + }; + break :blk stubs.addr + stubs_index * stubs.reserved2; + }, + else => { + log.err("failed to resolve symbol '{s}' as a relocation target", .{ + zld.getString(rel.target.strx), + }); + log.err(" this is an internal linker error", .{}); + return error.FailedToResolveRelocationTarget; + }, + } + }; + + log.debug(" | source_addr = 0x{x}", .{source_addr}); + log.debug(" | target_addr = 0x{x}", .{target_addr}); + + try rel.resolve(self, source_addr, target_addr); } } diff --git a/src/link/MachO/reloc.zig b/src/link/MachO/reloc.zig index e1631081d1..1d7e6ac51a 100644 --- a/src/link/MachO/reloc.zig +++ b/src/link/MachO/reloc.zig @@ -20,9 +20,6 @@ pub const Relocation = struct { /// Note relocation size can be inferred by relocation's kind. offset: u32, - /// Parent block containing this relocation. - block: *TextBlock, - /// Target symbol: either a regular or a proxy. target: *Symbol, @@ -36,6 +33,13 @@ pub const Relocation = struct { load: Load, }, + const ResolveArgs = struct { + block: *TextBlock, + offset: u32, + source_addr: u64, + target_addr: u64, + }; + pub const Unsigned = struct { subtractor: ?*Symbol = null, @@ -48,16 +52,16 @@ pub const Relocation = struct { /// => * is unreachable is_64bit: bool, - pub fn resolve(self: Unsigned, base: Relocation, _: u64, target_addr: u64) !void { + pub fn resolve(self: Unsigned, args: ResolveArgs) !void { const result = if (self.subtractor) |subtractor| - @intCast(i64, target_addr) - @intCast(i64, subtractor.payload.regular.address) + self.addend + @intCast(i64, args.target_addr) - @intCast(i64, subtractor.payload.regular.address) + self.addend else - @intCast(i64, target_addr) + self.addend; + @intCast(i64, args.target_addr) + self.addend; if (self.is_64bit) { - mem.writeIntLittle(u64, base.block.code[base.offset..][0..8], @bitCast(u64, result)); + mem.writeIntLittle(u64, args.block.code[args.offset..][0..8], @bitCast(u64, result)); } else { - mem.writeIntLittle(u32, base.block.code[base.offset..][0..4], @truncate(u32, @bitCast(u64, result))); + mem.writeIntLittle(u32, args.block.code[args.offset..][0..4], @truncate(u32, @bitCast(u64, result))); } } @@ -78,25 +82,29 @@ pub const Relocation = struct { pub const Branch = struct { arch: Arch, - pub fn resolve(self: Branch, base: Relocation, source_addr: u64, target_addr: u64) !void { + pub fn resolve(self: Branch, args: ResolveArgs) !void { switch (self.arch) { .aarch64 => { - const displacement = try math.cast(i28, @intCast(i64, target_addr) - @intCast(i64, source_addr)); + const displacement = try math.cast( + i28, + @intCast(i64, args.target_addr) - @intCast(i64, args.source_addr), + ); + const code = args.block.code[args.offset..][0..4]; var inst = aarch64.Instruction{ - .unconditional_branch_immediate = mem.bytesToValue( - meta.TagPayload( - aarch64.Instruction, - aarch64.Instruction.unconditional_branch_immediate, - ), - base.block.code[base.offset..][0..4], - ), + .unconditional_branch_immediate = mem.bytesToValue(meta.TagPayload( + aarch64.Instruction, + aarch64.Instruction.unconditional_branch_immediate, + ), code), }; inst.unconditional_branch_immediate.imm26 = @truncate(u26, @bitCast(u28, displacement >> 2)); - mem.writeIntLittle(u32, base.block.code[base.offset..][0..4], inst.toU32()); + mem.writeIntLittle(u32, code, inst.toU32()); }, .x86_64 => { - const displacement = try math.cast(i32, @intCast(i64, target_addr) - @intCast(i64, source_addr) - 4); - mem.writeIntLittle(u32, base.block.code[base.offset..][0..4], @bitCast(u32, displacement)); + const displacement = try math.cast( + i32, + @intCast(i64, args.target_addr) - @intCast(i64, args.source_addr) - 4, + ); + mem.writeIntLittle(u32, args.block.code[args.offset..][0..4], @bitCast(u32, displacement)); }, else => return error.UnsupportedCpuArchitecture, } @@ -118,25 +126,23 @@ pub const Relocation = struct { }, addend: ?u32 = null, - pub fn resolve(self: Page, base: Relocation, source_addr: u64, target_addr: u64) !void { - const actual_target_addr = if (self.addend) |addend| target_addr + addend else target_addr; - const source_page = @intCast(i32, source_addr >> 12); - const target_page = @intCast(i32, actual_target_addr >> 12); + pub fn resolve(self: Page, args: ResolveArgs) !void { + const target_addr = if (self.addend) |addend| args.target_addr + addend else args.target_addr; + const source_page = @intCast(i32, args.source_addr >> 12); + const target_page = @intCast(i32, target_addr >> 12); const pages = @bitCast(u21, @intCast(i21, target_page - source_page)); + const code = args.block.code[args.offset..][0..4]; var inst = aarch64.Instruction{ - .pc_relative_address = mem.bytesToValue( - meta.TagPayload( - aarch64.Instruction, - aarch64.Instruction.pc_relative_address, - ), - base.block.code[base.offset..][0..4], - ), + .pc_relative_address = mem.bytesToValue(meta.TagPayload( + aarch64.Instruction, + aarch64.Instruction.pc_relative_address, + ), code), }; inst.pc_relative_address.immhi = @truncate(u19, pages >> 2); inst.pc_relative_address.immlo = @truncate(u2, pages); - mem.writeIntLittle(u32, base.block.code[base.offset..][0..4], inst.toU32()); + mem.writeIntLittle(u32, code, inst.toU32()); } pub fn format(self: Page, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { @@ -173,35 +179,31 @@ pub const Relocation = struct { load, }; - pub fn resolve(self: PageOff, base: Relocation, _: u64, target_addr: u64) !void { + pub fn resolve(self: PageOff, args: ResolveArgs) !void { + const code = args.block.code[args.offset..][0..4]; + switch (self.kind) { .page => { - const actual_target_addr = if (self.addend) |addend| target_addr + addend else target_addr; - const narrowed = @truncate(u12, actual_target_addr); + const target_addr = if (self.addend) |addend| args.target_addr + addend else args.target_addr; + const narrowed = @truncate(u12, target_addr); const op_kind = self.op_kind orelse unreachable; var inst: aarch64.Instruction = blk: { switch (op_kind) { .arithmetic => { break :blk .{ - .add_subtract_immediate = mem.bytesToValue( - meta.TagPayload( - aarch64.Instruction, - aarch64.Instruction.add_subtract_immediate, - ), - base.block.code[base.offset..][0..4], - ), + .add_subtract_immediate = mem.bytesToValue(meta.TagPayload( + aarch64.Instruction, + aarch64.Instruction.add_subtract_immediate, + ), code), }; }, .load => { break :blk .{ - .load_store_register = mem.bytesToValue( - meta.TagPayload( - aarch64.Instruction, - aarch64.Instruction.load_store_register, - ), - base.block.code[base.offset..][0..4], - ), + .load_store_register = mem.bytesToValue(meta.TagPayload( + aarch64.Instruction, + aarch64.Instruction.load_store_register, + ), code), }; }, } @@ -226,22 +228,19 @@ pub const Relocation = struct { inst.load_store_register.offset = offset; } - mem.writeIntLittle(u32, base.block.code[base.offset..][0..4], inst.toU32()); + mem.writeIntLittle(u32, code, inst.toU32()); }, .got => { - const narrowed = @truncate(u12, target_addr); + const narrowed = @truncate(u12, args.target_addr); var inst: aarch64.Instruction = .{ - .load_store_register = mem.bytesToValue( - meta.TagPayload( - aarch64.Instruction, - aarch64.Instruction.load_store_register, - ), - base.block.code[base.offset..][0..4], - ), + .load_store_register = mem.bytesToValue(meta.TagPayload( + aarch64.Instruction, + aarch64.Instruction.load_store_register, + ), code), }; const offset = try math.divExact(u12, narrowed, 8); inst.load_store_register.offset = offset; - mem.writeIntLittle(u32, base.block.code[base.offset..][0..4], inst.toU32()); + mem.writeIntLittle(u32, code, inst.toU32()); }, .tlvp => { const RegInfo = struct { @@ -250,27 +249,21 @@ pub const Relocation = struct { size: u1, }; const reg_info: RegInfo = blk: { - if (isArithmeticOp(base.block.code[base.offset..][0..4])) { - const inst = mem.bytesToValue( - meta.TagPayload( - aarch64.Instruction, - aarch64.Instruction.add_subtract_immediate, - ), - base.block.code[base.offset..][0..4], - ); + if (isArithmeticOp(code)) { + const inst = mem.bytesToValue(meta.TagPayload( + aarch64.Instruction, + aarch64.Instruction.add_subtract_immediate, + ), code); break :blk .{ .rd = inst.rd, .rn = inst.rn, .size = inst.sf, }; } else { - const inst = mem.bytesToValue( - meta.TagPayload( - aarch64.Instruction, - aarch64.Instruction.load_store_register, - ), - base.block.code[base.offset..][0..4], - ); + const inst = mem.bytesToValue(meta.TagPayload( + aarch64.Instruction, + aarch64.Instruction.load_store_register, + ), code); break :blk .{ .rd = inst.rt, .rn = inst.rn, @@ -278,7 +271,7 @@ pub const Relocation = struct { }; } }; - const narrowed = @truncate(u12, target_addr); + const narrowed = @truncate(u12, args.target_addr); var inst = aarch64.Instruction{ .add_subtract_immediate = .{ .rd = reg_info.rd, @@ -290,7 +283,7 @@ pub const Relocation = struct { .sf = reg_info.size, }, }; - mem.writeIntLittle(u32, base.block.code[base.offset..][0..4], inst.toU32()); + mem.writeIntLittle(u32, code, inst.toU32()); }, } } @@ -319,9 +312,9 @@ pub const Relocation = struct { }; pub const PointerToGot = struct { - pub fn resolve(_: PointerToGot, base: Relocation, source_addr: u64, target_addr: u64) !void { - const result = try math.cast(i32, @intCast(i64, target_addr) - @intCast(i64, source_addr)); - mem.writeIntLittle(u32, base.block.code[base.offset..][0..4], @bitCast(u32, result)); + pub fn resolve(_: PointerToGot, args: ResolveArgs) !void { + const result = try math.cast(i32, @intCast(i64, args.target_addr) - @intCast(i64, args.source_addr)); + mem.writeIntLittle(u32, args.block.code[args.offset..][0..4], @bitCast(u32, result)); } pub fn format(self: PointerToGot, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { @@ -336,13 +329,13 @@ pub const Relocation = struct { addend: i64, correction: i4, - pub fn resolve(self: Signed, base: Relocation, source_addr: u64, target_addr: u64) !void { - const actual_target_addr = @intCast(i64, target_addr) + self.addend; + pub fn resolve(self: Signed, args: ResolveArgs) !void { + const target_addr = @intCast(i64, args.target_addr) + self.addend; const displacement = try math.cast( i32, - actual_target_addr - @intCast(i64, source_addr) - self.correction - 4, + target_addr - @intCast(i64, args.source_addr) - self.correction - 4, ); - mem.writeIntLittle(u32, base.block.code[base.offset..][0..4], @bitCast(u32, displacement)); + mem.writeIntLittle(u32, args.block.code[args.offset..][0..4], @bitCast(u32, displacement)); } pub fn format(self: Signed, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { @@ -362,17 +355,17 @@ pub const Relocation = struct { }, addend: ?i32 = null, - pub fn resolve(self: Load, base: Relocation, source_addr: u64, target_addr: u64) !void { + pub fn resolve(self: Load, args: ResolveArgs) !void { if (self.kind == .tlvp) { // We need to rewrite the opcode from movq to leaq. - base.block.code[base.offset - 2] = 0x8d; + args.block.code[args.offset - 2] = 0x8d; } const addend = if (self.addend) |addend| addend else 0; const displacement = try math.cast( i32, - @intCast(i64, target_addr) - @intCast(i64, source_addr) - 4 + addend, + @intCast(i64, args.target_addr) - @intCast(i64, args.source_addr) - 4 + addend, ); - mem.writeIntLittle(u32, base.block.code[base.offset..][0..4], @bitCast(u32, displacement)); + mem.writeIntLittle(u32, args.block.code[args.offset..][0..4], @bitCast(u32, displacement)); } pub fn format(self: Load, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { @@ -387,106 +380,27 @@ pub const Relocation = struct { } }; - pub fn resolve(self: Relocation, zld: *Zld) !void { - log.debug("relocating {}", .{self}); - - const source_addr = blk: { - const sym = zld.locals.items[self.block.local_sym_index]; - break :blk sym.payload.regular.address + self.offset; + pub fn resolve(self: Relocation, block: *TextBlock, source_addr: u64, target_addr: u64) !void { + const args = ResolveArgs{ + .block = block, + .offset = self.offset, + .source_addr = source_addr, + .target_addr = target_addr, }; - const target_addr = blk: { - const is_via_got = switch (self.payload) { - .pointer_to_got => true, - .page => |page| page.kind == .got, - .page_off => |page_off| page_off.kind == .got, - .load => |load| load.kind == .got, - else => false, - }; - - if (is_via_got) { - const dc_seg = zld.load_commands.items[zld.data_const_segment_cmd_index.?].Segment; - const got = dc_seg.sections.items[zld.got_section_index.?]; - const got_index = self.target.got_index orelse { - log.err("expected GOT entry for symbol '{s}'", .{zld.getString(self.target.strx)}); - log.err(" this is an internal linker error", .{}); - return error.FailedToResolveRelocationTarget; - }; - break :blk got.addr + got_index * @sizeOf(u64); - } - - switch (self.target.payload) { - .regular => |reg| { - const is_tlv = is_tlv: { - const sym = zld.locals.items[self.block.local_sym_index]; - const seg = zld.load_commands.items[sym.payload.regular.segment_id].Segment; - const sect = seg.sections.items[sym.payload.regular.section_id]; - break :is_tlv commands.sectionType(sect) == macho.S_THREAD_LOCAL_VARIABLES; - }; - if (is_tlv) { - // For TLV relocations, the value specified as a relocation is the displacement from the - // TLV initializer (either value in __thread_data or zero-init in __thread_bss) to the first - // defined TLV template init section in the following order: - // * wrt to __thread_data if defined, then - // * wrt to __thread_bss - const seg = zld.load_commands.items[zld.data_segment_cmd_index.?].Segment; - const base_address = inner: { - if (zld.tlv_data_section_index) |i| { - break :inner seg.sections.items[i].addr; - } else if (zld.tlv_bss_section_index) |i| { - break :inner seg.sections.items[i].addr; - } else { - log.err("threadlocal variables present but no initializer sections found", .{}); - log.err(" __thread_data not found", .{}); - log.err(" __thread_bss not found", .{}); - return error.FailedToResolveRelocationTarget; - } - }; - break :blk reg.address - base_address; - } - - break :blk reg.address; - }, - .proxy => { - if (mem.eql(u8, zld.getString(self.target.strx), "__tlv_bootstrap")) { - break :blk 0; // Dynamically bound by dyld. - } - - const segment = zld.load_commands.items[zld.text_segment_cmd_index.?].Segment; - const stubs = segment.sections.items[zld.stubs_section_index.?]; - const stubs_index = self.target.stubs_index orelse { - // TODO verify in TextBlock that the symbol is indeed dynamically bound. - break :blk 0; // Dynamically bound by dyld. - }; - break :blk stubs.addr + stubs_index * stubs.reserved2; - }, - else => { - log.err("failed to resolve symbol '{s}' as a relocation target", .{ - zld.getString(self.target.strx), - }); - log.err(" this is an internal linker error", .{}); - return error.FailedToResolveRelocationTarget; - }, - } - }; - - log.debug(" | source_addr = 0x{x}", .{source_addr}); - log.debug(" | target_addr = 0x{x}", .{target_addr}); - switch (self.payload) { - .unsigned => |unsigned| try unsigned.resolve(self, source_addr, target_addr), - .branch => |branch| try branch.resolve(self, source_addr, target_addr), - .page => |page| try page.resolve(self, source_addr, target_addr), - .page_off => |page_off| try page_off.resolve(self, source_addr, target_addr), - .pointer_to_got => |pointer_to_got| try pointer_to_got.resolve(self, source_addr, target_addr), - .signed => |signed| try signed.resolve(self, source_addr, target_addr), - .load => |load| try load.resolve(self, source_addr, target_addr), + .unsigned => |unsigned| try unsigned.resolve(args), + .branch => |branch| try branch.resolve(args), + .page => |page| try page.resolve(args), + .page_off => |page_off| try page_off.resolve(args), + .pointer_to_got => |pointer_to_got| try pointer_to_got.resolve(args), + .signed => |signed| try signed.resolve(args), + .load => |load| try load.resolve(args), } } pub fn format(self: Relocation, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { try std.fmt.format(writer, "Relocation {{ ", .{}); try std.fmt.format(writer, ".offset = {}, ", .{self.offset}); - try std.fmt.format(writer, ".block = {}", .{self.block.local_sym_index}); try std.fmt.format(writer, ".target = {}, ", .{self.target}); switch (self.payload) { @@ -713,7 +627,6 @@ pub const Parser = struct { return Relocation{ .offset = offset, .target = target, - .block = self.block, .payload = undefined, }; } From f519e781c6f952b3646cb646880ab460ad7a6cce Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Thu, 15 Jul 2021 18:48:41 +0200 Subject: [PATCH 53/81] zld: move TextBlock into standalone file which should make managing the logic of parsing and resolving relocs that much simpler to parse. --- CMakeLists.txt | 1 + src/link/MachO/Object.zig | 2 +- src/link/MachO/TextBlock.zig | 284 +++++++++++++++++++++++++++++++++++ src/link/MachO/Zld.zig | 277 +--------------------------------- src/link/MachO/reloc.zig | 2 +- 5 files changed, 288 insertions(+), 278 deletions(-) create mode 100644 src/link/MachO/TextBlock.zig diff --git a/CMakeLists.txt b/CMakeLists.txt index 5722f55e48..c807c2ddb6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -582,6 +582,7 @@ set(ZIG_STAGE2_SOURCES "${CMAKE_SOURCE_DIR}/src/link/MachO/Dylib.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO/Object.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO/Symbol.zig" + "${CMAKE_SOURCE_DIR}/src/link/MachO/TextBlock.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO/Trie.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO/Zld.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO/bind.zig" diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index 91be941256..b52d9f6885 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -16,7 +16,7 @@ const Allocator = mem.Allocator; const Arch = std.Target.Cpu.Arch; const Relocation = reloc.Relocation; const Symbol = @import("Symbol.zig"); -const TextBlock = Zld.TextBlock; +const TextBlock = @import("TextBlock.zig"); const Zld = @import("Zld.zig"); usingnamespace @import("commands.zig"); diff --git a/src/link/MachO/TextBlock.zig b/src/link/MachO/TextBlock.zig new file mode 100644 index 0000000000..93763ded18 --- /dev/null +++ b/src/link/MachO/TextBlock.zig @@ -0,0 +1,284 @@ +const TextBlock = @This(); + +const std = @import("std"); +const commands = @import("commands.zig"); +const log = std.log.scoped(.text_block); +const macho = std.macho; +const mem = std.mem; +const reloc = @import("reloc.zig"); + +const Allocator = mem.Allocator; +const Relocation = reloc.Relocation; +const Zld = @import("Zld.zig"); + +allocator: *Allocator, +local_sym_index: u32, +stab: ?Stab = null, +aliases: std.ArrayList(u32), +references: std.AutoArrayHashMap(u32, void), +contained: ?[]SymbolAtOffset = null, +code: []u8, +relocs: std.ArrayList(Relocation), +size: u64, +alignment: u32, +rebases: std.ArrayList(u64), +bindings: std.ArrayList(SymbolAtOffset), +dices: std.ArrayList(macho.data_in_code_entry), +next: ?*TextBlock = null, +prev: ?*TextBlock = null, + +pub const SymbolAtOffset = struct { + local_sym_index: u32, + offset: u64, + stab: ?Stab = null, +}; + +pub const Stab = union(enum) { + function: u64, + static, + global, + + pub fn asNlists(stab: Stab, local_sym_index: u32, zld: *Zld) ![]macho.nlist_64 { + var nlists = std.ArrayList(macho.nlist_64).init(zld.allocator); + defer nlists.deinit(); + + const sym = zld.locals.items[local_sym_index]; + const reg = sym.payload.regular; + + switch (stab) { + .function => |size| { + try nlists.ensureUnusedCapacity(4); + const section_id = reg.sectionId(zld); + nlists.appendAssumeCapacity(.{ + .n_strx = 0, + .n_type = macho.N_BNSYM, + .n_sect = section_id, + .n_desc = 0, + .n_value = reg.address, + }); + nlists.appendAssumeCapacity(.{ + .n_strx = sym.strx, + .n_type = macho.N_FUN, + .n_sect = section_id, + .n_desc = 0, + .n_value = reg.address, + }); + nlists.appendAssumeCapacity(.{ + .n_strx = 0, + .n_type = macho.N_FUN, + .n_sect = 0, + .n_desc = 0, + .n_value = size, + }); + nlists.appendAssumeCapacity(.{ + .n_strx = 0, + .n_type = macho.N_ENSYM, + .n_sect = section_id, + .n_desc = 0, + .n_value = size, + }); + }, + .global => { + try nlists.append(.{ + .n_strx = sym.strx, + .n_type = macho.N_GSYM, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }); + }, + .static => { + try nlists.append(.{ + .n_strx = sym.strx, + .n_type = macho.N_STSYM, + .n_sect = reg.sectionId(zld), + .n_desc = 0, + .n_value = reg.address, + }); + }, + } + + return nlists.toOwnedSlice(); + } +}; + +pub fn init(allocator: *Allocator) TextBlock { + return .{ + .allocator = allocator, + .local_sym_index = undefined, + .aliases = std.ArrayList(u32).init(allocator), + .references = std.AutoArrayHashMap(u32, void).init(allocator), + .code = undefined, + .relocs = std.ArrayList(Relocation).init(allocator), + .size = undefined, + .alignment = undefined, + .rebases = std.ArrayList(u64).init(allocator), + .bindings = std.ArrayList(SymbolAtOffset).init(allocator), + .dices = std.ArrayList(macho.data_in_code_entry).init(allocator), + }; +} + +pub fn deinit(self: *TextBlock) void { + self.aliases.deinit(); + self.references.deinit(); + if (self.contained) |contained| { + self.allocator.free(contained); + } + self.allocator.free(self.code); + self.relocs.deinit(); + self.rebases.deinit(); + self.bindings.deinit(); + self.dices.deinit(); +} + +pub fn resolveRelocs(self: *TextBlock, zld: *Zld) !void { + for (self.relocs.items) |rel| { + log.debug("relocating {}", .{rel}); + + const source_addr = blk: { + const sym = zld.locals.items[self.local_sym_index]; + break :blk sym.payload.regular.address + rel.offset; + }; + const target_addr = blk: { + const is_via_got = switch (rel.payload) { + .pointer_to_got => true, + .page => |page| page.kind == .got, + .page_off => |page_off| page_off.kind == .got, + .load => |load| load.kind == .got, + else => false, + }; + + if (is_via_got) { + const dc_seg = zld.load_commands.items[zld.data_const_segment_cmd_index.?].Segment; + const got = dc_seg.sections.items[zld.got_section_index.?]; + const got_index = rel.target.got_index orelse { + log.err("expected GOT entry for symbol '{s}'", .{zld.getString(rel.target.strx)}); + log.err(" this is an internal linker error", .{}); + return error.FailedToResolveRelocationTarget; + }; + break :blk got.addr + got_index * @sizeOf(u64); + } + + switch (rel.target.payload) { + .regular => |reg| { + const is_tlv = is_tlv: { + const sym = zld.locals.items[self.local_sym_index]; + const seg = zld.load_commands.items[sym.payload.regular.segment_id].Segment; + const sect = seg.sections.items[sym.payload.regular.section_id]; + break :is_tlv commands.sectionType(sect) == macho.S_THREAD_LOCAL_VARIABLES; + }; + if (is_tlv) { + // For TLV relocations, the value specified as a relocation is the displacement from the + // TLV initializer (either value in __thread_data or zero-init in __thread_bss) to the first + // defined TLV template init section in the following order: + // * wrt to __thread_data if defined, then + // * wrt to __thread_bss + const seg = zld.load_commands.items[zld.data_segment_cmd_index.?].Segment; + const base_address = inner: { + if (zld.tlv_data_section_index) |i| { + break :inner seg.sections.items[i].addr; + } else if (zld.tlv_bss_section_index) |i| { + break :inner seg.sections.items[i].addr; + } else { + log.err("threadlocal variables present but no initializer sections found", .{}); + log.err(" __thread_data not found", .{}); + log.err(" __thread_bss not found", .{}); + return error.FailedToResolveRelocationTarget; + } + }; + break :blk reg.address - base_address; + } + + break :blk reg.address; + }, + .proxy => { + if (mem.eql(u8, zld.getString(rel.target.strx), "__tlv_bootstrap")) { + break :blk 0; // Dynamically bound by dyld. + } + + const segment = zld.load_commands.items[zld.text_segment_cmd_index.?].Segment; + const stubs = segment.sections.items[zld.stubs_section_index.?]; + const stubs_index = rel.target.stubs_index orelse { + // TODO verify in TextBlock that the symbol is indeed dynamically bound. + break :blk 0; // Dynamically bound by dyld. + }; + break :blk stubs.addr + stubs_index * stubs.reserved2; + }, + else => { + log.err("failed to resolve symbol '{s}' as a relocation target", .{ + zld.getString(rel.target.strx), + }); + log.err(" this is an internal linker error", .{}); + return error.FailedToResolveRelocationTarget; + }, + } + }; + + log.debug(" | source_addr = 0x{x}", .{source_addr}); + log.debug(" | target_addr = 0x{x}", .{target_addr}); + + try rel.resolve(self, source_addr, target_addr); + } +} + +pub fn print_this(self: *const TextBlock, zld: *Zld) void { + log.warn("TextBlock", .{}); + log.warn(" {}: {}", .{ self.local_sym_index, zld.locals.items[self.local_sym_index] }); + if (self.stab) |stab| { + log.warn(" stab: {}", .{stab}); + } + if (self.aliases.items.len > 0) { + log.warn(" aliases:", .{}); + for (self.aliases.items) |index| { + log.warn(" {}: {}", .{ index, zld.locals.items[index] }); + } + } + if (self.references.count() > 0) { + log.warn(" references:", .{}); + for (self.references.keys()) |index| { + log.warn(" {}: {}", .{ index, zld.locals.items[index] }); + } + } + if (self.contained) |contained| { + log.warn(" contained symbols:", .{}); + for (contained) |sym_at_off| { + if (sym_at_off.stab) |stab| { + log.warn(" {}: {}, stab: {}\n", .{ + sym_at_off.offset, + zld.locals.items[sym_at_off.local_sym_index], + stab, + }); + } else { + log.warn(" {}: {}\n", .{ + sym_at_off.offset, + zld.locals.items[sym_at_off.local_sym_index], + }); + } + } + } + log.warn(" code.len = {}", .{self.code.len}); + if (self.relocs.items.len > 0) { + log.warn(" relocations:", .{}); + for (self.relocs.items) |rel| { + log.warn(" {}", .{rel}); + } + } + if (self.rebases.items.len > 0) { + log.warn(" rebases: {any}", .{self.rebases.items}); + } + if (self.bindings.items.len > 0) { + log.warn(" bindings: {any}", .{self.bindings.items}); + } + if (self.dices.items.len > 0) { + log.warn(" dices: {any}", .{self.dices.items}); + } + log.warn(" size = {}", .{self.size}); + log.warn(" align = {}", .{self.alignment}); +} + +pub fn print(self: *const TextBlock, zld: *Zld) void { + if (self.prev) |prev| { + prev.print(zld); + } + self.print_this(zld); +} diff --git a/src/link/MachO/Zld.zig b/src/link/MachO/Zld.zig index b68b851bb5..ed66652506 100644 --- a/src/link/MachO/Zld.zig +++ b/src/link/MachO/Zld.zig @@ -10,15 +10,14 @@ const macho = std.macho; const math = std.math; const log = std.log.scoped(.zld); const aarch64 = @import("../../codegen/aarch64.zig"); -const reloc = @import("reloc.zig"); const Allocator = mem.Allocator; const Archive = @import("Archive.zig"); const CodeSignature = @import("CodeSignature.zig"); const Dylib = @import("Dylib.zig"); const Object = @import("Object.zig"); -const Relocation = reloc.Relocation; const Symbol = @import("Symbol.zig"); +const TextBlock = @import("TextBlock.zig"); const Trie = @import("Trie.zig"); usingnamespace @import("commands.zig"); @@ -123,280 +122,6 @@ pub const Output = struct { install_name: ?[]const u8 = null, }; -pub const TextBlock = struct { - allocator: *Allocator, - local_sym_index: u32, - stab: ?Stab = null, - aliases: std.ArrayList(u32), - references: std.AutoArrayHashMap(u32, void), - contained: ?[]SymbolAtOffset = null, - code: []u8, - relocs: std.ArrayList(Relocation), - size: u64, - alignment: u32, - rebases: std.ArrayList(u64), - bindings: std.ArrayList(SymbolAtOffset), - dices: std.ArrayList(macho.data_in_code_entry), - next: ?*TextBlock = null, - prev: ?*TextBlock = null, - - pub const SymbolAtOffset = struct { - local_sym_index: u32, - offset: u64, - stab: ?Stab = null, - }; - - pub const Stab = union(enum) { - function: u64, - static, - global, - - pub fn asNlists(stab: Stab, local_sym_index: u32, zld: *Zld) ![]macho.nlist_64 { - var nlists = std.ArrayList(macho.nlist_64).init(zld.allocator); - defer nlists.deinit(); - - const sym = zld.locals.items[local_sym_index]; - const reg = sym.payload.regular; - - switch (stab) { - .function => |size| { - try nlists.ensureUnusedCapacity(4); - const section_id = reg.sectionId(zld); - nlists.appendAssumeCapacity(.{ - .n_strx = 0, - .n_type = macho.N_BNSYM, - .n_sect = section_id, - .n_desc = 0, - .n_value = reg.address, - }); - nlists.appendAssumeCapacity(.{ - .n_strx = sym.strx, - .n_type = macho.N_FUN, - .n_sect = section_id, - .n_desc = 0, - .n_value = reg.address, - }); - nlists.appendAssumeCapacity(.{ - .n_strx = 0, - .n_type = macho.N_FUN, - .n_sect = 0, - .n_desc = 0, - .n_value = size, - }); - nlists.appendAssumeCapacity(.{ - .n_strx = 0, - .n_type = macho.N_ENSYM, - .n_sect = section_id, - .n_desc = 0, - .n_value = size, - }); - }, - .global => { - try nlists.append(.{ - .n_strx = sym.strx, - .n_type = macho.N_GSYM, - .n_sect = 0, - .n_desc = 0, - .n_value = 0, - }); - }, - .static => { - try nlists.append(.{ - .n_strx = sym.strx, - .n_type = macho.N_STSYM, - .n_sect = reg.sectionId(zld), - .n_desc = 0, - .n_value = reg.address, - }); - }, - } - - return nlists.toOwnedSlice(); - } - }; - - pub fn init(allocator: *Allocator) TextBlock { - return .{ - .allocator = allocator, - .local_sym_index = undefined, - .aliases = std.ArrayList(u32).init(allocator), - .references = std.AutoArrayHashMap(u32, void).init(allocator), - .code = undefined, - .relocs = std.ArrayList(Relocation).init(allocator), - .size = undefined, - .alignment = undefined, - .rebases = std.ArrayList(u64).init(allocator), - .bindings = std.ArrayList(SymbolAtOffset).init(allocator), - .dices = std.ArrayList(macho.data_in_code_entry).init(allocator), - }; - } - - pub fn deinit(self: *TextBlock) void { - self.aliases.deinit(); - self.references.deinit(); - if (self.contained) |contained| { - self.allocator.free(contained); - } - self.allocator.free(self.code); - self.relocs.deinit(); - self.rebases.deinit(); - self.bindings.deinit(); - self.dices.deinit(); - } - - pub fn resolveRelocs(self: *TextBlock, zld: *Zld) !void { - for (self.relocs.items) |rel| { - log.debug("relocating {}", .{rel}); - - const source_addr = blk: { - const sym = zld.locals.items[self.local_sym_index]; - break :blk sym.payload.regular.address + rel.offset; - }; - const target_addr = blk: { - const is_via_got = switch (rel.payload) { - .pointer_to_got => true, - .page => |page| page.kind == .got, - .page_off => |page_off| page_off.kind == .got, - .load => |load| load.kind == .got, - else => false, - }; - - if (is_via_got) { - const dc_seg = zld.load_commands.items[zld.data_const_segment_cmd_index.?].Segment; - const got = dc_seg.sections.items[zld.got_section_index.?]; - const got_index = rel.target.got_index orelse { - log.err("expected GOT entry for symbol '{s}'", .{zld.getString(rel.target.strx)}); - log.err(" this is an internal linker error", .{}); - return error.FailedToResolveRelocationTarget; - }; - break :blk got.addr + got_index * @sizeOf(u64); - } - - switch (rel.target.payload) { - .regular => |reg| { - const is_tlv = is_tlv: { - const sym = zld.locals.items[self.local_sym_index]; - const seg = zld.load_commands.items[sym.payload.regular.segment_id].Segment; - const sect = seg.sections.items[sym.payload.regular.section_id]; - break :is_tlv sectionType(sect) == macho.S_THREAD_LOCAL_VARIABLES; - }; - if (is_tlv) { - // For TLV relocations, the value specified as a relocation is the displacement from the - // TLV initializer (either value in __thread_data or zero-init in __thread_bss) to the first - // defined TLV template init section in the following order: - // * wrt to __thread_data if defined, then - // * wrt to __thread_bss - const seg = zld.load_commands.items[zld.data_segment_cmd_index.?].Segment; - const base_address = inner: { - if (zld.tlv_data_section_index) |i| { - break :inner seg.sections.items[i].addr; - } else if (zld.tlv_bss_section_index) |i| { - break :inner seg.sections.items[i].addr; - } else { - log.err("threadlocal variables present but no initializer sections found", .{}); - log.err(" __thread_data not found", .{}); - log.err(" __thread_bss not found", .{}); - return error.FailedToResolveRelocationTarget; - } - }; - break :blk reg.address - base_address; - } - - break :blk reg.address; - }, - .proxy => { - if (mem.eql(u8, zld.getString(rel.target.strx), "__tlv_bootstrap")) { - break :blk 0; // Dynamically bound by dyld. - } - - const segment = zld.load_commands.items[zld.text_segment_cmd_index.?].Segment; - const stubs = segment.sections.items[zld.stubs_section_index.?]; - const stubs_index = rel.target.stubs_index orelse { - // TODO verify in TextBlock that the symbol is indeed dynamically bound. - break :blk 0; // Dynamically bound by dyld. - }; - break :blk stubs.addr + stubs_index * stubs.reserved2; - }, - else => { - log.err("failed to resolve symbol '{s}' as a relocation target", .{ - zld.getString(rel.target.strx), - }); - log.err(" this is an internal linker error", .{}); - return error.FailedToResolveRelocationTarget; - }, - } - }; - - log.debug(" | source_addr = 0x{x}", .{source_addr}); - log.debug(" | target_addr = 0x{x}", .{target_addr}); - - try rel.resolve(self, source_addr, target_addr); - } - } - - pub fn print_this(self: *const TextBlock, zld: *Zld) void { - log.warn("TextBlock", .{}); - log.warn(" {}: {}", .{ self.local_sym_index, zld.locals.items[self.local_sym_index] }); - if (self.stab) |stab| { - log.warn(" stab: {}", .{stab}); - } - if (self.aliases.items.len > 0) { - log.warn(" aliases:", .{}); - for (self.aliases.items) |index| { - log.warn(" {}: {}", .{ index, zld.locals.items[index] }); - } - } - if (self.references.count() > 0) { - log.warn(" references:", .{}); - for (self.references.keys()) |index| { - log.warn(" {}: {}", .{ index, zld.locals.items[index] }); - } - } - if (self.contained) |contained| { - log.warn(" contained symbols:", .{}); - for (contained) |sym_at_off| { - if (sym_at_off.stab) |stab| { - log.warn(" {}: {}, stab: {}\n", .{ - sym_at_off.offset, - zld.locals.items[sym_at_off.local_sym_index], - stab, - }); - } else { - log.warn(" {}: {}\n", .{ - sym_at_off.offset, - zld.locals.items[sym_at_off.local_sym_index], - }); - } - } - } - log.warn(" code.len = {}", .{self.code.len}); - if (self.relocs.items.len > 0) { - log.warn(" relocations:", .{}); - for (self.relocs.items) |rel| { - log.warn(" {}", .{rel}); - } - } - if (self.rebases.items.len > 0) { - log.warn(" rebases: {any}", .{self.rebases.items}); - } - if (self.bindings.items.len > 0) { - log.warn(" bindings: {any}", .{self.bindings.items}); - } - if (self.dices.items.len > 0) { - log.warn(" dices: {any}", .{self.dices.items}); - } - log.warn(" size = {}", .{self.size}); - log.warn(" align = {}", .{self.alignment}); - } - - pub fn print(self: *const TextBlock, zld: *Zld) void { - if (self.prev) |prev| { - prev.print(zld); - } - self.print_this(zld); - } -}; - /// Default path to dyld const DEFAULT_DYLD_PATH: [*:0]const u8 = "/usr/lib/dyld"; diff --git a/src/link/MachO/reloc.zig b/src/link/MachO/reloc.zig index 1d7e6ac51a..1d0c0466d6 100644 --- a/src/link/MachO/reloc.zig +++ b/src/link/MachO/reloc.zig @@ -12,7 +12,7 @@ const Allocator = mem.Allocator; const Arch = std.Target.Cpu.Arch; const Object = @import("Object.zig"); const Symbol = @import("Symbol.zig"); -const TextBlock = Zld.TextBlock; +const TextBlock = @import("TextBlock.zig"); const Zld = @import("Zld.zig"); pub const Relocation = struct { From 5a2bea29315158bc05fb4b09842bbb9ae0ddfada Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Fri, 16 Jul 2021 00:51:21 +0200 Subject: [PATCH 54/81] zld: draft symbol resolver on macho.nlist_64 only --- src/link/MachO/Object.zig | 5 - src/link/MachO/Zld.zig | 801 ++++++++++++++++++++++++++------------ 2 files changed, 545 insertions(+), 261 deletions(-) diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index b52d9f6885..e87a74e80c 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -56,9 +56,6 @@ tu_name: ?[]const u8 = null, tu_comp_dir: ?[]const u8 = null, mtime: ?u64 = null, -symbols: std.ArrayListUnmanaged(*Symbol) = .{}, -sections_as_symbols: std.AutoHashMapUnmanaged(u8, *Symbol) = .{}, - text_blocks: std.ArrayListUnmanaged(*TextBlock) = .{}, const DebugInfo = struct { @@ -165,8 +162,6 @@ pub fn deinit(self: *Object) void { self.data_in_code_entries.deinit(self.allocator); self.symtab.deinit(self.allocator); self.strtab.deinit(self.allocator); - self.symbols.deinit(self.allocator); - self.sections_as_symbols.deinit(self.allocator); self.text_blocks.deinit(self.allocator); if (self.debug_info) |*db| { diff --git a/src/link/MachO/Zld.zig b/src/link/MachO/Zld.zig index ed66652506..d6dd9f597c 100644 --- a/src/link/MachO/Zld.zig +++ b/src/link/MachO/Zld.zig @@ -16,7 +16,6 @@ const Archive = @import("Archive.zig"); const CodeSignature = @import("CodeSignature.zig"); const Dylib = @import("Dylib.zig"); const Object = @import("Object.zig"); -const Symbol = @import("Symbol.zig"); const TextBlock = @import("TextBlock.zig"); const Trie = @import("Trie.zig"); @@ -100,22 +99,55 @@ objc_selrefs_section_index: ?u16 = null, objc_classrefs_section_index: ?u16 = null, objc_data_section_index: ?u16 = null, -locals: std.ArrayListUnmanaged(*Symbol) = .{}, -imports: std.ArrayListUnmanaged(*Symbol) = .{}, -globals: std.StringArrayHashMapUnmanaged(*Symbol) = .{}, +locals: std.ArrayListUnmanaged(macho.nlist_64) = .{}, +globals: std.ArrayListUnmanaged(macho.nlist_64) = .{}, +imports: std.ArrayListUnmanaged(macho.nlist_64) = .{}, +undefs: std.ArrayListUnmanaged(macho.nlist_64) = .{}, +tentatives: std.ArrayListUnmanaged(macho.nlist_64) = .{}, +symbol_resolver: std.StringArrayHashMapUnmanaged(SymbolWithLoc) = .{}, +object_mapping: std.AutoHashMapUnmanaged(u16, []u32) = .{}, -stubs: std.ArrayListUnmanaged(*Symbol) = .{}, -got_entries: std.ArrayListUnmanaged(*Symbol) = .{}, +strtab: std.ArrayListUnmanaged(u8) = .{}, + +// stubs: std.ArrayListUnmanaged(*Symbol) = .{}, +got_entries: std.ArrayListUnmanaged(GotEntry) = .{}, stub_helper_stubs_start_off: ?u64 = null, blocks: std.AutoHashMapUnmanaged(MatchingSection, *TextBlock) = .{}, -strtab: std.ArrayListUnmanaged(u8) = .{}, - has_dices: bool = false, has_stabs: bool = false, +const SymbolWithLoc = struct { + // Table where the symbol can be found. + where: enum { + global, + import, + undef, + tentative, + }, + where_index: u32, + local_sym_index: u32 = 0, + file: u16 = 0, +}; + +pub const GotEntry = struct { + /// GOT entry can either be a local pointer or an extern (nonlazy) import. + kind: enum { + local, + import, + }, + + /// Id to the macho.nlist_64 from the respective table: either locals or nonlazy imports. + /// TODO I'm more and more inclined to just manage a single, max two symbol tables + /// rather than 4 as we currently do, but I'll follow up in the future PR. + local_sym_index: u32, + + /// Index of this entry in the GOT. + got_index: u32, +}; + pub const Output = struct { tag: enum { exe, dylib }, path: []const u8, @@ -130,7 +162,7 @@ pub fn init(allocator: *Allocator) !Zld { } pub fn deinit(self: *Zld) void { - self.stubs.deinit(self.allocator); + // self.stubs.deinit(self.allocator); self.got_entries.deinit(self.allocator); for (self.load_commands.items) |*lc| { @@ -156,20 +188,24 @@ pub fn deinit(self: *Zld) void { } self.dylibs.deinit(self.allocator); - for (self.imports.items) |sym| { - self.allocator.destroy(sym); - } - self.imports.deinit(self.allocator); - - for (self.locals.items) |sym| { - self.allocator.destroy(sym); - } self.locals.deinit(self.allocator); + self.globals.deinit(self.allocator); + self.imports.deinit(self.allocator); + self.undefs.deinit(self.allocator); + self.tentatives.deinit(self.allocator); - for (self.globals.keys()) |key| { + for (self.symbol_resolver.keys()) |key| { self.allocator.free(key); } - self.globals.deinit(self.allocator); + self.symbol_resolver.deinit(self.allocator); + + { + var it = self.object_mapping.valueIterator(); + while (it.next()) |value_ptr| { + self.allocator.free(value_ptr.*); + } + } + self.object_mapping.deinit(self.allocator); self.strtab.deinit(self.allocator); @@ -213,28 +249,73 @@ pub fn link(self: *Zld, files: []const []const u8, output: Output, args: LinkArg try self.parseInputFiles(files, args.syslibroot); try self.parseLibs(args.libs, args.syslibroot); try self.resolveSymbols(); - try self.parseTextBlocks(); - try self.sortSections(); - try self.addRpaths(args.rpaths); - try self.addDataInCodeLC(); - try self.addCodeSignatureLC(); - try self.allocateTextSegment(); - try self.allocateDataConstSegment(); - try self.allocateDataSegment(); - self.allocateLinkeditSegment(); - try self.allocateTextBlocks(); - // var it = self.blocks.iterator(); - // while (it.next()) |entry| { - // const seg = self.load_commands.items[entry.key_ptr.seg].Segment; - // const sect = seg.sections.items[entry.key_ptr.sect]; + log.warn("locals", .{}); + for (self.locals.items) |sym| { + log.warn(" | {s}: {}", .{ self.getString(sym.n_strx), sym }); + } - // log.warn("\n\n{s},{s} contents:", .{ segmentName(sect), sectionName(sect) }); - // log.warn(" {}", .{sect}); - // entry.value_ptr.*.print(self); - // } + log.warn("globals", .{}); + for (self.globals.items) |sym| { + log.warn(" | {s}: {}", .{ self.getString(sym.n_strx), sym }); + } - try self.flush(); + log.warn("tentatives", .{}); + for (self.tentatives.items) |sym| { + log.warn(" | {s}: {}", .{ self.getString(sym.n_strx), sym }); + } + + log.warn("undefines", .{}); + for (self.undefs.items) |sym| { + log.warn(" | {s}: {}", .{ self.getString(sym.n_strx), sym }); + } + + log.warn("imports", .{}); + for (self.imports.items) |sym| { + log.warn(" | {s}: {}", .{ self.getString(sym.n_strx), sym }); + } + + log.warn("symbol resolver", .{}); + for (self.symbol_resolver.keys()) |key| { + log.warn(" | {s} => {}", .{ key, self.symbol_resolver.get(key).? }); + } + + log.warn("mappings", .{}); + for (self.objects.items) |object, id| { + const object_id = @intCast(u16, id); + log.warn(" in object {s}", .{object.name.?}); + for (object.symtab.items) |sym, sym_id| { + if (self.localSymIndex(object_id, @intCast(u32, sym_id))) |local_id| { + log.warn(" | {d} => {d}", .{ sym_id, local_id }); + } else { + log.warn(" | {d} no local mapping for {s}", .{ sym_id, object.getString(sym.n_strx) }); + } + } + } + + return error.TODO; + // try self.parseTextBlocks(); + // try self.sortSections(); + // try self.addRpaths(args.rpaths); + // try self.addDataInCodeLC(); + // try self.addCodeSignatureLC(); + // try self.allocateTextSegment(); + // try self.allocateDataConstSegment(); + // try self.allocateDataSegment(); + // self.allocateLinkeditSegment(); + // try self.allocateTextBlocks(); + + // // var it = self.blocks.iterator(); + // // while (it.next()) |entry| { + // // const seg = self.load_commands.items[entry.key_ptr.seg].Segment; + // // const sect = seg.sections.items[entry.key_ptr.sect]; + + // // log.warn("\n\n{s},{s} contents:", .{ segmentName(sect), sectionName(sect) }); + // // log.warn(" {}", .{sect}); + // // entry.value_ptr.*.print(self); + // // } + + // try self.flush(); } fn parseInputFiles(self: *Zld, files: []const []const u8, syslibroot: ?[]const u8) !void { @@ -1328,130 +1409,242 @@ fn writeStubInStubHelper(self: *Zld, index: u32) !void { try self.file.?.pwriteAll(code, stub_off); } -fn resolveSymbolsInObject(self: *Zld, object: *Object) !void { - log.debug("resolving symbols in '{s}'", .{object.name}); +fn resolveSymbolsInObject(self: *Zld, object_id: u16) !void { + const object = self.objects.items[object_id]; - for (object.symtab.items) |sym| { + log.warn("resolving symbols in '{s}'", .{object.name}); + + const mapping = try self.allocator.alloc(u32, object.symtab.items.len); + mem.set(u32, mapping, 0); + try self.object_mapping.putNoClobber(self.allocator, object_id, mapping); + + for (object.symtab.items) |sym, id| { + const sym_id = @intCast(u32, id); const sym_name = object.getString(sym.n_strx); - if (Symbol.isStab(sym)) { - log.err("unhandled symbol type: stab {s}", .{sym_name}); - log.err(" | first definition in {s}", .{object.name.?}); + if (symbolIsStab(sym)) { + log.err("unhandled symbol type: stab", .{}); + log.err(" symbol '{s}'", .{sym_name}); + log.err(" first definition in '{s}'", .{object.name.?}); return error.UnhandledSymbolType; } - if (Symbol.isIndr(sym)) { - log.err("unhandled symbol type: indirect {s}", .{sym_name}); - log.err(" | first definition in {s}", .{object.name.?}); + if (symbolIsIndr(sym)) { + log.err("unhandled symbol type: indirect", .{}); + log.err(" symbol '{s}'", .{sym_name}); + log.err(" first definition in '{s}'", .{object.name.?}); return error.UnhandledSymbolType; } - if (Symbol.isAbs(sym)) { - log.err("unhandled symbol type: absolute {s}", .{sym_name}); - log.err(" | first definition in {s}", .{object.name.?}); + if (symbolIsAbs(sym)) { + log.err("unhandled symbol type: absolute", .{}); + log.err(" symbol '{s}'", .{sym_name}); + log.err(" first definition in '{s}'", .{object.name.?}); return error.UnhandledSymbolType; } - if (Symbol.isSect(sym) and !Symbol.isExt(sym)) { - // Regular symbol local to translation unit - const symbol = try self.allocator.create(Symbol); - symbol.* = .{ - .strx = try self.makeString(sym_name), - .payload = .{ - .regular = .{ - .linkage = .translation_unit, - .address = sym.n_value, - .weak_ref = Symbol.isWeakRef(sym), - .file = object, - .local_sym_index = @intCast(u32, self.locals.items.len), - }, - }, - }; - try self.locals.append(self.allocator, symbol); - try object.symbols.append(self.allocator, symbol); - continue; - } - - const symbol = self.globals.get(sym_name) orelse symbol: { - // Insert new global symbol. - const symbol = try self.allocator.create(Symbol); - symbol.* = .{ - .strx = try self.makeString(sym_name), - .payload = .{ .undef = .{ .file = object } }, - }; - const alloc_name = try self.allocator.dupe(u8, sym_name); - try self.globals.putNoClobber(self.allocator, alloc_name, symbol); - break :symbol symbol; - }; - - if (Symbol.isSect(sym)) { - // Global symbol - const linkage: Symbol.Regular.Linkage = if (Symbol.isWeakDef(sym) or Symbol.isPext(sym)) - .linkage_unit - else - .global; - - const should_update = if (symbol.payload == .regular) blk: { - if (symbol.payload.regular.linkage == .global and linkage == .global) { - log.err("symbol '{s}' defined multiple times", .{sym_name}); - log.err(" | first definition in {s}", .{symbol.payload.regular.file.?.name.?}); - log.err(" | next definition in {s}", .{object.name.?}); - return error.MultipleSymbolDefinitions; + if (symbolIsSect(sym)) { + // Defined symbol regardless of scope lands in the locals symbol table. + const n_strx = blk: { + if (self.symbol_resolver.get(sym_name)) |resolv| { + switch (resolv.where) { + .global => break :blk self.globals.items[resolv.where_index].n_strx, + .tentative => break :blk self.tentatives.items[resolv.where_index].n_strx, + .undef => break :blk self.undefs.items[resolv.where_index].n_strx, + .import => unreachable, + } } - break :blk symbol.payload.regular.linkage != .global; - } else true; + break :blk try self.makeString(sym_name); + }; + const local_sym_index = @intCast(u32, self.locals.items.len); + try self.locals.append(self.allocator, .{ + .n_strx = n_strx, + .n_type = macho.N_SECT, + .n_sect = 0, + .n_desc = 0, + .n_value = sym.n_value, + }); + mapping[sym_id] = local_sym_index; - if (should_update) { - symbol.payload = .{ - .regular = .{ - .linkage = linkage, - .address = sym.n_value, - .weak_ref = Symbol.isWeakRef(sym), - .file = object, - }, - }; - } - } else if (sym.n_value != 0) { - // Tentative definition - const should_update = switch (symbol.payload) { - .tentative => |tent| tent.size < sym.n_value, - .undef => true, - else => false, + // If the symbol's scope is not local aka translation unit, then we need work out + // if we should save the symbol as a global, or potentially flag the error. + if (!symbolIsExt(sym)) continue; + + const local = self.locals.items[local_sym_index]; + const resolv = self.symbol_resolver.getPtr(sym_name) orelse { + const global_sym_index = @intCast(u32, self.globals.items.len); + try self.globals.append(self.allocator, .{ + .n_strx = n_strx, + .n_type = sym.n_type, + .n_sect = 0, + .n_desc = sym.n_desc, + .n_value = sym.n_value, + }); + try self.symbol_resolver.putNoClobber(self.allocator, try self.allocator.dupe(u8, sym_name), .{ + .where = .global, + .where_index = global_sym_index, + .local_sym_index = local_sym_index, + .file = object_id, + }); + continue; }; - if (should_update) { - symbol.payload = .{ - .tentative = .{ - .size = sym.n_value, - .alignment = (sym.n_desc >> 8) & 0x0f, - .file = object, - }, - }; - } - } + switch (resolv.where) { + .import => unreachable, + .global => { + const global = &self.globals.items[resolv.where_index]; - try object.symbols.append(self.allocator, symbol); + if (!(symbolIsWeakDef(sym) and symbolIsPext(sym)) and + !(symbolIsWeakDef(global.*) and symbolIsPext(global.*))) + { + log.err("symbol '{s}' defined multiple times", .{sym_name}); + log.err(" first definition in '{s}'", .{self.objects.items[resolv.file].name.?}); + log.err(" next definition in '{s}'", .{object.name.?}); + return error.MultipleSymbolDefinitions; + } + + if (symbolIsWeakDef(sym) or symbolIsPext(sym)) continue; // Current symbol is weak, so skip it. + + // Otherwise, update the resolver and the global symbol. + global.n_type = sym.n_type; + resolv.local_sym_index = local_sym_index; + resolv.file = object_id; + + continue; + }, + .undef => { + const undef = &self.undefs.items[resolv.where_index]; + undef.* = .{ + .n_strx = 0, + .n_type = macho.N_UNDF, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }; + }, + .tentative => { + const tentative = &self.tentatives.items[resolv.where_index]; + tentative.* = .{ + .n_strx = 0, + .n_type = macho.N_UNDF, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }; + }, + } + + const global_sym_index = @intCast(u32, self.globals.items.len); + try self.globals.append(self.allocator, .{ + .n_strx = local.n_strx, + .n_type = sym.n_type, + .n_sect = 0, + .n_desc = sym.n_desc, + .n_value = sym.n_value, + }); + resolv.* = .{ + .where = .global, + .where_index = global_sym_index, + .local_sym_index = local_sym_index, + .file = object_id, + }; + } else if (symbolIsTentative(sym)) { + // Symbol is a tentative definition. + const resolv = self.symbol_resolver.getPtr(sym_name) orelse { + const tent_sym_index = @intCast(u32, self.tentatives.items.len); + try self.tentatives.append(self.allocator, .{ + .n_strx = try self.makeString(sym_name), + .n_type = sym.n_type, + .n_sect = 0, + .n_desc = sym.n_desc, + .n_value = sym.n_value, + }); + try self.symbol_resolver.putNoClobber(self.allocator, try self.allocator.dupe(u8, sym_name), .{ + .where = .tentative, + .where_index = tent_sym_index, + .file = object_id, + }); + continue; + }; + + switch (resolv.where) { + .import => unreachable, + .global => {}, + .undef => { + const undef = &self.undefs.items[resolv.where_index]; + const tent_sym_index = @intCast(u32, self.tentatives.items.len); + try self.tentatives.append(self.allocator, .{ + .n_strx = undef.n_strx, + .n_type = sym.n_type, + .n_sect = 0, + .n_desc = sym.n_desc, + .n_value = sym.n_value, + }); + resolv.* = .{ + .where = .tentative, + .where_index = tent_sym_index, + .file = object_id, + }; + undef.* = .{ + .n_strx = 0, + .n_type = macho.N_UNDF, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }; + }, + .tentative => { + const tentative = &self.tentatives.items[resolv.where_index]; + if (tentative.n_value >= sym.n_value) continue; + + tentative.n_desc = sym.n_desc; + tentative.n_value = sym.n_value; + resolv.file = object_id; + }, + } + } else { + // Symbol is undefined. + if (self.symbol_resolver.contains(sym_name)) continue; + + const undef_sym_index = @intCast(u32, self.undefs.items.len); + try self.undefs.append(self.allocator, .{ + .n_strx = try self.makeString(sym_name), + .n_type = macho.N_UNDF, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }); + try self.symbol_resolver.putNoClobber(self.allocator, try self.allocator.dupe(u8, sym_name), .{ + .where = .undef, + .where_index = undef_sym_index, + .file = object_id, + }); + } } } fn resolveSymbols(self: *Zld) !void { // TODO mimicking insertion of null symbol from incremental linker. // This will need to moved. - const null_sym = try self.allocator.create(Symbol); - null_sym.* = .{ .strx = 0, .payload = .{ .undef = .{} } }; - try self.locals.append(self.allocator, null_sym); + try self.locals.append(self.allocator, .{ + .n_strx = 0, + .n_type = macho.N_UNDF, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }); + try self.strtab.append(self.allocator, 0); // First pass, resolve symbols in provided objects. - for (self.objects.items) |object| { - try self.resolveSymbolsInObject(object); + for (self.objects.items) |_, object_id| { + try self.resolveSymbolsInObject(@intCast(u16, object_id)); } // Second pass, resolve symbols in static libraries. - var sym_it = self.globals.iterator(); - while (sym_it.next()) |entry| { - const sym_name = entry.key_ptr.*; - const symbol = entry.value_ptr.*; - if (symbol.payload != .undef) continue; + loop: for (self.undefs.items) |sym| { + if (symbolIsNull(sym)) continue; + + const sym_name = self.getString(sym.n_strx); for (self.archives.items) |archive| { // Check if the entry exists in a static archive. @@ -1462,167 +1655,184 @@ fn resolveSymbols(self: *Zld) !void { assert(offsets.items.len > 0); const object = try archive.parseObject(offsets.items[0]); + const object_id = @intCast(u16, self.objects.items.len); try self.objects.append(self.allocator, object); - try self.resolveSymbolsInObject(object); + try self.resolveSymbolsInObject(object_id); - sym_it = self.globals.iterator(); - break; + continue :loop; } } - // Put any globally defined regular symbol as local. // Convert any tentative definition into a regular symbol and allocate // text blocks for each tentative defintion. - for (self.globals.values()) |symbol| { - switch (symbol.payload) { - .regular => |*reg| { - reg.local_sym_index = @intCast(u32, self.locals.items.len); - try self.locals.append(self.allocator, symbol); - }, - .tentative => |tent| { - const match: MatchingSection = blk: { - if (self.common_section_index == null) { - const data_seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; - self.common_section_index = @intCast(u16, data_seg.sections.items.len); - try data_seg.addSection(self.allocator, "__common", .{ - .flags = macho.S_ZEROFILL, - }); - } - break :blk .{ - .seg = self.data_segment_cmd_index.?, - .sect = self.common_section_index.?, - }; - }; + for (self.tentatives.items) |sym| { + const sym_name = self.getString(sym.n_strx); + const match: MatchingSection = blk: { + if (self.common_section_index == null) { + const data_seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; + self.common_section_index = @intCast(u16, data_seg.sections.items.len); + try data_seg.addSection(self.allocator, "__common", .{ + .flags = macho.S_ZEROFILL, + }); + } + break :blk .{ + .seg = self.data_segment_cmd_index.?, + .sect = self.common_section_index.?, + }; + }; - const size = tent.size; - const code = try self.allocator.alloc(u8, size); - mem.set(u8, code, 0); - const alignment = tent.alignment; - const local_sym_index = @intCast(u32, self.locals.items.len); + const size = sym.n_value; + const code = try self.allocator.alloc(u8, size); + mem.set(u8, code, 0); + const alignment = (sym.n_desc >> 8) & 0x0f; - symbol.payload = .{ - .regular = .{ - .linkage = .global, - .segment_id = self.data_segment_cmd_index.?, - .section_id = self.common_section_index.?, - .local_sym_index = local_sym_index, - }, - }; - try self.locals.append(self.allocator, symbol); + const resolv = self.symbol_resolver.getPtr(sym_name) orelse unreachable; + const local_sym_index = @intCast(u32, self.locals.items.len); + var nlist = macho.nlist_64{ + .n_strx = sym.n_strx, + .n_type = macho.N_SECT, + .n_sect = self.sectionId(match), + .n_desc = 0, + .n_value = 0, + }; + try self.locals.append(self.allocator, nlist); + const global_sym_index = @intCast(u32, self.globals.items.len); + nlist.n_type |= macho.N_EXT; + try self.globals.append(self.allocator, nlist); + resolv.* = .{ + .where = .global, + .where_index = global_sym_index, + .local_sym_index = local_sym_index, + }; - const block = try self.allocator.create(TextBlock); - errdefer self.allocator.destroy(block); + const block = try self.allocator.create(TextBlock); + errdefer self.allocator.destroy(block); - block.* = TextBlock.init(self.allocator); - block.local_sym_index = local_sym_index; - block.code = code; - block.size = size; - block.alignment = alignment; + block.* = TextBlock.init(self.allocator); + block.local_sym_index = local_sym_index; + block.code = code; + block.size = size; + block.alignment = alignment; - // Update target section's metadata - // TODO should we update segment's size here too? - // How does it tie with incremental space allocs? - const tseg = &self.load_commands.items[match.seg].Segment; - const tsect = &tseg.sections.items[match.sect]; - const new_alignment = math.max(tsect.@"align", block.alignment); - const new_alignment_pow_2 = try math.powi(u32, 2, new_alignment); - const new_size = mem.alignForwardGeneric(u64, tsect.size, new_alignment_pow_2) + block.size; - tsect.size = new_size; - tsect.@"align" = new_alignment; + // Update target section's metadata + // TODO should we update segment's size here too? + // How does it tie with incremental space allocs? + const tseg = &self.load_commands.items[match.seg].Segment; + const tsect = &tseg.sections.items[match.sect]; + const new_alignment = math.max(tsect.@"align", block.alignment); + const new_alignment_pow_2 = try math.powi(u32, 2, new_alignment); + const new_size = mem.alignForwardGeneric(u64, tsect.size, new_alignment_pow_2) + block.size; + tsect.size = new_size; + tsect.@"align" = new_alignment; - if (self.blocks.getPtr(match)) |last| { - last.*.next = block; - block.prev = last.*; - last.* = block; - } else { - try self.blocks.putNoClobber(self.allocator, match, block); - } - }, - else => {}, + if (self.blocks.getPtr(match)) |last| { + last.*.next = block; + block.prev = last.*; + last.* = block; + } else { + try self.blocks.putNoClobber(self.allocator, match, block); } } // Third pass, resolve symbols in dynamic libraries. { // Put dyld_stub_binder as an undefined special symbol. - const symbol = try self.allocator.create(Symbol); - symbol.* = .{ - .strx = try self.makeString("dyld_stub_binder"), - .payload = .{ .undef = .{} }, - }; - const index = @intCast(u32, self.got_entries.items.len); - symbol.got_index = index; - try self.got_entries.append(self.allocator, symbol); - const alloc_name = try self.allocator.dupe(u8, "dyld_stub_binder"); - try self.globals.putNoClobber(self.allocator, alloc_name, symbol); + const undef_sym_index = @intCast(u32, self.undefs.items.len); + try self.undefs.append(self.allocator, .{ + .n_strx = try self.makeString("dyld_stub_binder"), + .n_type = macho.N_UNDF, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }); + try self.symbol_resolver.putNoClobber(self.allocator, try self.allocator.dupe(u8, "dyld_stub_binder"), .{ + .where = .undef, + .where_index = undef_sym_index, + }); } var referenced = std.AutoHashMap(*Dylib, void).init(self.allocator); defer referenced.deinit(); - loop: for (self.globals.keys()) |sym_name| { - const symbol = self.globals.get(sym_name).?; - if (symbol.payload != .undef) continue; + loop: for (self.undefs.items) |sym| { + if (symbolIsNull(sym)) continue; + const sym_name = self.getString(sym.n_strx); for (self.dylibs.items) |dylib| { if (!dylib.symbols.contains(sym_name)) continue; - try referenced.put(dylib, {}); - const index = @intCast(u32, self.imports.items.len); - symbol.payload = .{ - .proxy = .{ - .file = dylib, - .local_sym_index = index, - }, + if (!referenced.contains(dylib)) { + // Add LC_LOAD_DYLIB load command for each referenced dylib/stub. + dylib.ordinal = self.next_dylib_ordinal; + const dylib_id = dylib.id orelse unreachable; + var dylib_cmd = try createLoadDylibCommand( + self.allocator, + dylib_id.name, + dylib_id.timestamp, + dylib_id.current_version, + dylib_id.compatibility_version, + ); + errdefer dylib_cmd.deinit(self.allocator); + try self.load_commands.append(self.allocator, .{ .Dylib = dylib_cmd }); + self.next_dylib_ordinal += 1; + try referenced.putNoClobber(dylib, {}); + } + + const resolv = self.symbol_resolver.getPtr(sym_name) orelse unreachable; + const undef = &self.undefs.items[resolv.where_index]; + const import_sym_index = @intCast(u32, self.imports.items.len); + try self.imports.append(self.allocator, .{ + .n_strx = undef.n_strx, + .n_type = macho.N_UNDF | macho.N_EXT, + .n_sect = 0, + .n_desc = (dylib.ordinal.? * macho.N_SYMBOL_RESOLVER) | macho.REFERENCE_FLAG_UNDEFINED_NON_LAZY, + .n_value = 0, + }); + resolv.* = .{ + .where = .import, + .where_index = import_sym_index, }; - try self.imports.append(self.allocator, symbol); + undef.* = .{ + .n_strx = 0, + .n_type = macho.N_UNDF, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }; + continue :loop; } } - // Add LC_LOAD_DYLIB load command for each referenced dylib/stub. - var it = referenced.iterator(); - while (it.next()) |entry| { - const dylib = entry.key_ptr.*; - dylib.ordinal = self.next_dylib_ordinal; - const dylib_id = dylib.id orelse unreachable; - var dylib_cmd = try createLoadDylibCommand( - self.allocator, - dylib_id.name, - dylib_id.timestamp, - dylib_id.current_version, - dylib_id.compatibility_version, - ); - errdefer dylib_cmd.deinit(self.allocator); - try self.load_commands.append(self.allocator, .{ .Dylib = dylib_cmd }); - self.next_dylib_ordinal += 1; - } - // Fourth pass, handle synthetic symbols and flag any undefined references. - if (self.globals.get("___dso_handle")) |symbol| { - if (symbol.payload == .undef) { - const seg = self.load_commands.items[self.text_segment_cmd_index.?].Segment; - symbol.payload = .{ - .regular = .{ - .linkage = .translation_unit, - .address = seg.inner.vmaddr, - .weak_ref = true, - .local_sym_index = @intCast(u32, self.locals.items.len), - }, - }; - try self.locals.append(self.allocator, symbol); - } + if (self.symbol_resolver.getPtr("___dso_handle")) |resolv| blk: { + if (resolv.where != .undef) break :blk; + + const seg = self.load_commands.items[self.text_segment_cmd_index.?].Segment; + const undef = &self.undefs.items[resolv.where_index]; + const global_sym_index = @intCast(u32, self.globals.items.len); + try self.globals.append(self.allocator, .{ + .n_strx = undef.n_strx, + .n_type = macho.N_PEXT | macho.N_EXT | macho.N_SECT, + .n_sect = 0, + .n_desc = macho.N_WEAK_DEF, + .n_value = seg.inner.vmaddr, + }); + resolv.* = .{ + .where = .global, + .where_index = global_sym_index, + }; } var has_undefined = false; - for (self.globals.keys()) |sym_name| { - const symbol = self.globals.get(sym_name).?; - if (symbol.payload != .undef) continue; + for (self.undefs.items) |sym| { + if (symbolIsNull(sym)) continue; + + const sym_name = self.getString(sym.n_strx); + const resolv = self.symbol_resolver.get(sym_name) orelse unreachable; log.err("undefined reference to symbol '{s}'", .{sym_name}); - if (symbol.payload.undef.file) |file| { - log.err(" | referenced in {s}", .{file.name.?}); - } + log.err(" first referenced in '{s}'", .{self.objects.items[resolv.file].name.?}); has_undefined = true; } @@ -2776,3 +2986,82 @@ pub fn getString(self: *Zld, off: u32) []const u8 { assert(off < self.strtab.items.len); return mem.spanZ(@ptrCast([*:0]const u8, self.strtab.items.ptr + off)); } + +fn localSymIndex(self: Zld, object_id: u16, orig_id: u32) ?u32 { + const mapping = self.object_mapping.get(object_id) orelse return null; + const local_sym_index = mapping[orig_id]; + if (local_sym_index == 0) { + return null; + } + return local_sym_index; +} + +pub fn symbolIsStab(sym: macho.nlist_64) bool { + return (macho.N_STAB & sym.n_type) != 0; +} + +pub fn symbolIsPext(sym: macho.nlist_64) bool { + return (macho.N_PEXT & sym.n_type) != 0; +} + +pub fn symbolIsExt(sym: macho.nlist_64) bool { + return (macho.N_EXT & sym.n_type) != 0; +} + +pub fn symbolIsSect(sym: macho.nlist_64) bool { + const type_ = macho.N_TYPE & sym.n_type; + return type_ == macho.N_SECT; +} + +pub fn symbolIsUndf(sym: macho.nlist_64) bool { + const type_ = macho.N_TYPE & sym.n_type; + return type_ == macho.N_UNDF; +} + +pub fn symbolIsIndr(sym: macho.nlist_64) bool { + const type_ = macho.N_TYPE & sym.n_type; + return type_ == macho.N_INDR; +} + +pub fn symbolIsAbs(sym: macho.nlist_64) bool { + const type_ = macho.N_TYPE & sym.n_type; + return type_ == macho.N_ABS; +} + +pub fn symbolIsWeakDef(sym: macho.nlist_64) bool { + return (sym.n_desc & macho.N_WEAK_DEF) != 0; +} + +pub fn symbolIsWeakRef(sym: macho.nlist_64) bool { + return (sym.n_desc & macho.N_WEAK_REF) != 0; +} + +pub fn symbolIsTentative(sym: macho.nlist_64) bool { + if (!symbolIsUndf(sym)) return false; + return sym.n_value != 0; +} + +pub fn symbolIsNull(sym: macho.nlist_64) bool { + return sym.n_value == 0 and sym.n_desc == 0 and sym.n_type == 0 and sym.n_strx == 0 and sym.n_sect == 0; +} + +pub fn symbolIsTemp(self: Zld, sym: macho.nlist_64) bool { + if (!symbolIsSect(sym)) return false; + if (symbolIsExt(sym)) return false; + const sym_name = self.getString(sym.n_strx); + return mem.startsWith(u8, sym_name, "l") or mem.startsWith(u8, sym_name, "L"); +} + +pub fn sectionId(self: Zld, match: MatchingSection) u8 { + // TODO there might be a more generic way of doing this. + var section: u8 = 0; + for (self.load_commands.items) |cmd, cmd_id| { + if (cmd != .Segment) break; + if (cmd_id == match.seg) { + section += @intCast(u8, match.sect) + 1; + break; + } + section += @intCast(u8, cmd.Segment.sections.items.len); + } + return section; +} From 54a403d4ff9e20daf1843725012ae44ec828a833 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Fri, 16 Jul 2021 17:18:53 +0200 Subject: [PATCH 55/81] zld: replace parsed reloc with a simple wrapper around macho.relocation_info --- CMakeLists.txt | 1 - src/link/MachO/Dylib.zig | 4 +- src/link/MachO/Object.zig | 489 ++++++++++++++++++----------------- src/link/MachO/Symbol.zig | 285 -------------------- src/link/MachO/TextBlock.zig | 64 ++--- src/link/MachO/Zld.zig | 74 ++---- 6 files changed, 320 insertions(+), 597 deletions(-) delete mode 100644 src/link/MachO/Symbol.zig diff --git a/CMakeLists.txt b/CMakeLists.txt index c807c2ddb6..ea3cd5107e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -581,7 +581,6 @@ set(ZIG_STAGE2_SOURCES "${CMAKE_SOURCE_DIR}/src/link/MachO/DebugSymbols.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO/Dylib.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO/Object.zig" - "${CMAKE_SOURCE_DIR}/src/link/MachO/Symbol.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO/TextBlock.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO/Trie.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO/Zld.zig" diff --git a/src/link/MachO/Dylib.zig b/src/link/MachO/Dylib.zig index b751249ce4..ca71b7613c 100644 --- a/src/link/MachO/Dylib.zig +++ b/src/link/MachO/Dylib.zig @@ -12,8 +12,8 @@ const fat = @import("fat.zig"); const Allocator = mem.Allocator; const Arch = std.Target.Cpu.Arch; -const Symbol = @import("Symbol.zig"); const LibStub = @import("../tapi.zig").LibStub; +const Zld = @import("Zld.zig"); usingnamespace @import("commands.zig"); @@ -324,7 +324,7 @@ fn parseSymbols(self: *Dylib) !void { _ = try self.file.?.preadAll(strtab, symtab_cmd.stroff + self.library_offset); for (slice) |sym| { - const add_to_symtab = Symbol.isExt(sym) and (Symbol.isSect(sym) or Symbol.isIndr(sym)); + const add_to_symtab = Zld.symbolIsExt(sym) and (Zld.symbolIsSect(sym) or Zld.symbolIsIndr(sym)); if (!add_to_symtab) continue; diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index e87a74e80c..1c074a97c7 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -9,13 +9,10 @@ const log = std.log.scoped(.object); const macho = std.macho; const math = std.math; const mem = std.mem; -const reloc = @import("reloc.zig"); const sort = std.sort; const Allocator = mem.Allocator; const Arch = std.Target.Cpu.Arch; -const Relocation = reloc.Relocation; -const Symbol = @import("Symbol.zig"); const TextBlock = @import("TextBlock.zig"); const Zld = @import("Zld.zig"); @@ -57,6 +54,8 @@ tu_comp_dir: ?[]const u8 = null, mtime: ?u64 = null, text_blocks: std.ArrayListUnmanaged(*TextBlock) = .{}, +sections_as_symbols: std.AutoHashMapUnmanaged(u16, u32) = .{}, +symbol_mapping: std.AutoHashMapUnmanaged(u32, u32) = .{}, const DebugInfo = struct { inner: dwarf.DwarfInfo, @@ -163,6 +162,8 @@ pub fn deinit(self: *Object) void { self.symtab.deinit(self.allocator); self.strtab.deinit(self.allocator); self.text_blocks.deinit(self.allocator); + self.sections_as_symbols.deinit(self.allocator); + self.symbol_mapping.deinit(self.allocator); if (self.debug_info) |*db| { db.deinit(self.allocator); @@ -372,20 +373,17 @@ const TextBlockParser = struct { } const SeniorityContext = struct { - zld: *Zld, + object: *Object, }; fn lessThanBySeniority(context: SeniorityContext, lhs: NlistWithIndex, rhs: NlistWithIndex) bool { - const lsym = context.zld.locals.items[lhs.index]; - const rsym = context.zld.locals.items[rhs.index]; - const lreg = lsym.payload.regular; - const rreg = rsym.payload.regular; - - return switch (rreg.linkage) { - .global => true, - .linkage_unit => lreg.linkage == .translation_unit, - else => lsym.isTemp(context.zld), - }; + if (!Zld.symbolIsExt(rhs.nlist)) { + return Zld.symbolIsTemp(lhs.nlist, context.object.getString(lhs.nlist.n_strx)); + } else if (Zld.symbolIsPext(rhs.nlist) or Zld.symbolIsWeakDef(rhs.nlist)) { + return !Zld.symbolIsExt(lhs.nlist); + } else { + return true; + } } pub fn next(self: *TextBlockParser) !?*TextBlock { @@ -409,6 +407,7 @@ const TextBlockParser = struct { } else null; for (aliases.items) |*nlist_with_index| { + nlist_with_index.index = self.symbol_mapping.get(nlist_with_index.index); const sym = self.object.symbols.items[nlist_with_index.index]; if (sym.payload != .regular) { log.err("expected a regular symbol, found {s}", .{sym.payload}); @@ -424,7 +423,7 @@ const TextBlockParser = struct { sort.sort( NlistWithIndex, aliases.items, - SeniorityContext{ .zld = self.zld }, + SeniorityContext{ .object = self.object }, @This().lessThanBySeniority, ); } @@ -515,7 +514,7 @@ const TextBlockParser = struct { pub fn parseTextBlocks(self: *Object, zld: *Zld) !void { const seg = self.load_commands.items[self.segment_cmd_index.?].Segment; - log.debug("analysing {s}", .{self.name.?}); + log.warn("analysing {s}", .{self.name.?}); const dysymtab = self.load_commands.items[self.dysymtab_cmd_index.?].Dysymtab; // We only care about defined symbols, so filter every other out. @@ -536,14 +535,14 @@ pub fn parseTextBlocks(self: *Object, zld: *Zld) !void { for (seg.sections.items) |sect, id| { const sect_id = @intCast(u8, id); - log.debug("putting section '{s},{s}' as a TextBlock", .{ + log.warn("putting section '{s},{s}' as a TextBlock", .{ segmentName(sect), sectionName(sect), }); // Get matching segment/section in the final artifact. const match = (try zld.getMatchingSection(sect)) orelse { - log.debug("unhandled section", .{}); + log.warn("unhandled section", .{}); continue; }; @@ -577,200 +576,249 @@ pub fn parseTextBlocks(self: *Object, zld: *Zld) !void { }; zld.has_stabs = zld.has_stabs or self.debug_info != null; - next: { - if (is_splittable) blocks: { - if (filtered_nlists.len == 0) break :blocks; + { + // next: { + // if (is_splittable) blocks: { + // if (filtered_nlists.len == 0) break :blocks; - // If the first nlist does not match the start of the section, - // then we need encapsulate the memory range [section start, first symbol) - // as a temporary symbol and insert the matching TextBlock. - const first_nlist = filtered_nlists[0].nlist; - if (first_nlist.n_value > sect.addr) { - const symbol = self.sections_as_symbols.get(sect_id) orelse symbol: { - const name = try std.fmt.allocPrint(self.allocator, "l_{s}_{s}_{s}", .{ - self.name.?, - segmentName(sect), - sectionName(sect), - }); - defer self.allocator.free(name); - const symbol = try zld.allocator.create(Symbol); - symbol.* = .{ - .strx = try zld.makeString(name), - .payload = .{ .undef = .{} }, - }; - try self.sections_as_symbols.putNoClobber(self.allocator, sect_id, symbol); - break :symbol symbol; - }; + // // If the first nlist does not match the start of the section, + // // then we need encapsulate the memory range [section start, first symbol) + // // as a temporary symbol and insert the matching TextBlock. + // const first_nlist = filtered_nlists[0].nlist; + // if (first_nlist.n_value > sect.addr) { + // const symbol = self.sections_as_symbols.get(sect_id) orelse symbol: { + // const name = try std.fmt.allocPrint(self.allocator, "l_{s}_{s}_{s}", .{ + // self.name.?, + // segmentName(sect), + // sectionName(sect), + // }); + // defer self.allocator.free(name); + // const symbol = try zld.allocator.create(Symbol); + // symbol.* = .{ + // .strx = try zld.makeString(name), + // .payload = .{ .undef = .{} }, + // }; + // try self.sections_as_symbols.putNoClobber(self.allocator, sect_id, symbol); + // break :symbol symbol; + // }; - const local_sym_index = @intCast(u32, zld.locals.items.len); - symbol.payload = .{ - .regular = .{ - .linkage = .translation_unit, - .address = sect.addr, - .segment_id = match.seg, - .section_id = match.sect, - .file = self, - .local_sym_index = local_sym_index, - }, - }; - try zld.locals.append(zld.allocator, symbol); + // const local_sym_index = @intCast(u32, zld.locals.items.len); + // symbol.payload = .{ + // .regular = .{ + // .linkage = .translation_unit, + // .address = sect.addr, + // .segment_id = match.seg, + // .section_id = match.sect, + // .file = self, + // .local_sym_index = local_sym_index, + // }, + // }; + // try zld.locals.append(zld.allocator, symbol); - const block_code = code[0 .. first_nlist.n_value - sect.addr]; - const block_size = block_code.len; + // const block_code = code[0 .. first_nlist.n_value - sect.addr]; + // const block_size = block_code.len; - const block = try self.allocator.create(TextBlock); - errdefer self.allocator.destroy(block); + // const block = try self.allocator.create(TextBlock); + // errdefer self.allocator.destroy(block); - block.* = TextBlock.init(self.allocator); - block.local_sym_index = local_sym_index; - block.code = try self.allocator.dupe(u8, block_code); - block.size = block_size; - block.alignment = sect.@"align"; + // block.* = TextBlock.init(self.allocator); + // block.local_sym_index = local_sym_index; + // block.code = try self.allocator.dupe(u8, block_code); + // block.size = block_size; + // block.alignment = sect.@"align"; - const block_relocs = filterRelocs(relocs, 0, block_size); - if (block_relocs.len > 0) { - try self.parseRelocs(zld, block_relocs, block, 0); - } + // const block_relocs = filterRelocs(relocs, 0, block_size); + // if (block_relocs.len > 0) { + // try self.parseRelocs(zld, block_relocs, block, 0); + // } - if (zld.has_dices) { - const dices = filterDice(self.data_in_code_entries.items, sect.addr, sect.addr + block_size); - try block.dices.ensureTotalCapacity(dices.len); + // if (zld.has_dices) { + // const dices = filterDice(self.data_in_code_entries.items, sect.addr, sect.addr + block_size); + // try block.dices.ensureTotalCapacity(dices.len); - for (dices) |dice| { - block.dices.appendAssumeCapacity(.{ - .offset = dice.offset - try math.cast(u32, sect.addr), - .length = dice.length, - .kind = dice.kind, - }); - } - } + // for (dices) |dice| { + // block.dices.appendAssumeCapacity(.{ + // .offset = dice.offset - try math.cast(u32, sect.addr), + // .length = dice.length, + // .kind = dice.kind, + // }); + // } + // } - // Update target section's metadata - // TODO should we update segment's size here too? - // How does it tie with incremental space allocs? - const tseg = &zld.load_commands.items[match.seg].Segment; - const tsect = &tseg.sections.items[match.sect]; - const new_alignment = math.max(tsect.@"align", block.alignment); - const new_alignment_pow_2 = try math.powi(u32, 2, new_alignment); - const new_size = mem.alignForwardGeneric(u64, tsect.size, new_alignment_pow_2) + block.size; - tsect.size = new_size; - tsect.@"align" = new_alignment; + // // Update target section's metadata + // // TODO should we update segment's size here too? + // // How does it tie with incremental space allocs? + // const tseg = &zld.load_commands.items[match.seg].Segment; + // const tsect = &tseg.sections.items[match.sect]; + // const new_alignment = math.max(tsect.@"align", block.alignment); + // const new_alignment_pow_2 = try math.powi(u32, 2, new_alignment); + // const new_size = mem.alignForwardGeneric(u64, tsect.size, new_alignment_pow_2) + block.size; + // tsect.size = new_size; + // tsect.@"align" = new_alignment; - if (zld.blocks.getPtr(match)) |last| { - last.*.next = block; - block.prev = last.*; - last.* = block; - } else { - try zld.blocks.putNoClobber(zld.allocator, match, block); - } + // if (zld.blocks.getPtr(match)) |last| { + // last.*.next = block; + // block.prev = last.*; + // last.* = block; + // } else { + // try zld.blocks.putNoClobber(zld.allocator, match, block); + // } - try self.text_blocks.append(self.allocator, block); - } + // try self.text_blocks.append(self.allocator, block); + // } - var parser = TextBlockParser{ - .allocator = self.allocator, - .section = sect, - .code = code, - .relocs = relocs, - .object = self, - .zld = zld, - .nlists = filtered_nlists, - .match = match, - }; + // var parser = TextBlockParser{ + // .allocator = self.allocator, + // .section = sect, + // .code = code, + // .relocs = relocs, + // .object = self, + // .zld = zld, + // .nlists = filtered_nlists, + // .match = match, + // }; - while (try parser.next()) |block| { - const sym = zld.locals.items[block.local_sym_index]; - const reg = &sym.payload.regular; - if (reg.file) |file| { - if (file != self) { - log.debug("deduping definition of {s} in {s}", .{ zld.getString(sym.strx), self.name.? }); - block.deinit(); - self.allocator.destroy(block); - continue; - } - } + // while (try parser.next()) |block| { + // const sym = zld.locals.items[block.local_sym_index]; + // const reg = &sym.payload.regular; + // if (reg.file) |file| { + // if (file != self) { + // log.debug("deduping definition of {s} in {s}", .{ zld.getString(sym.strx), self.name.? }); + // block.deinit(); + // self.allocator.destroy(block); + // continue; + // } + // } - if (reg.address == sect.addr) { - if (self.sections_as_symbols.get(sect_id)) |alias| { - // Add alias. - const local_sym_index = @intCast(u32, zld.locals.items.len); - const reg_alias = &alias.payload.regular; - reg_alias.segment_id = match.seg; - reg_alias.section_id = match.sect; - reg_alias.local_sym_index = local_sym_index; - try block.aliases.append(local_sym_index); - try zld.locals.append(zld.allocator, alias); - } - } + // if (reg.address == sect.addr) { + // if (self.sections_as_symbols.get(sect_id)) |alias| { + // // Add alias. + // const local_sym_index = @intCast(u32, zld.locals.items.len); + // const reg_alias = &alias.payload.regular; + // reg_alias.segment_id = match.seg; + // reg_alias.section_id = match.sect; + // reg_alias.local_sym_index = local_sym_index; + // try block.aliases.append(local_sym_index); + // try zld.locals.append(zld.allocator, alias); + // } + // } - // Update target section's metadata - // TODO should we update segment's size here too? - // How does it tie with incremental space allocs? - const tseg = &zld.load_commands.items[match.seg].Segment; - const tsect = &tseg.sections.items[match.sect]; - const new_alignment = math.max(tsect.@"align", block.alignment); - const new_alignment_pow_2 = try math.powi(u32, 2, new_alignment); - const new_size = mem.alignForwardGeneric(u64, tsect.size, new_alignment_pow_2) + block.size; - tsect.size = new_size; - tsect.@"align" = new_alignment; + // // Update target section's metadata + // // TODO should we update segment's size here too? + // // How does it tie with incremental space allocs? + // const tseg = &zld.load_commands.items[match.seg].Segment; + // const tsect = &tseg.sections.items[match.sect]; + // const new_alignment = math.max(tsect.@"align", block.alignment); + // const new_alignment_pow_2 = try math.powi(u32, 2, new_alignment); + // const new_size = mem.alignForwardGeneric(u64, tsect.size, new_alignment_pow_2) + block.size; + // tsect.size = new_size; + // tsect.@"align" = new_alignment; - if (zld.blocks.getPtr(match)) |last| { - last.*.next = block; - block.prev = last.*; - last.* = block; - } else { - try zld.blocks.putNoClobber(zld.allocator, match, block); - } + // if (zld.blocks.getPtr(match)) |last| { + // last.*.next = block; + // block.prev = last.*; + // last.* = block; + // } else { + // try zld.blocks.putNoClobber(zld.allocator, match, block); + // } - try self.text_blocks.append(self.allocator, block); - } + // try self.text_blocks.append(self.allocator, block); + // } - break :next; - } + // break :next; + // } // Since there is no symbol to refer to this block, we create // a temp one, unless we already did that when working out the relocations // of other text blocks. - const symbol = self.sections_as_symbols.get(sect_id) orelse symbol: { - const name = try std.fmt.allocPrint(self.allocator, "l_{s}_{s}_{s}", .{ - self.name.?, - segmentName(sect), - sectionName(sect), - }); - defer self.allocator.free(name); - const symbol = try zld.allocator.create(Symbol); - symbol.* = .{ - .strx = try zld.makeString(name), - .payload = .{ .undef = .{} }, - }; - try self.sections_as_symbols.putNoClobber(self.allocator, sect_id, symbol); - break :symbol symbol; - }; - - const local_sym_index = @intCast(u32, zld.locals.items.len); - symbol.payload = .{ - .regular = .{ - .linkage = .translation_unit, - .address = sect.addr, - .segment_id = match.seg, - .section_id = match.sect, - .file = self, - .local_sym_index = local_sym_index, - }, - }; - try zld.locals.append(zld.allocator, symbol); + const block_local_sym_index = @intCast(u32, zld.locals.items.len); + const sym_name = try std.fmt.allocPrint(self.allocator, "l_{s}_{s}_{s}", .{ + self.name.?, + segmentName(sect), + sectionName(sect), + }); + defer self.allocator.free(sym_name); + try zld.locals.append(zld.allocator, .{ + .n_strx = try zld.makeString(sym_name), + .n_type = macho.N_SECT, + .n_sect = zld.sectionId(match), + .n_desc = 0, + .n_value = sect.addr, + }); + const block_local = &zld.locals.items[block_local_sym_index]; + block_local.n_sect = zld.sectionId(match); const block = try self.allocator.create(TextBlock); errdefer self.allocator.destroy(block); block.* = TextBlock.init(self.allocator); - block.local_sym_index = local_sym_index; + block.local_sym_index = block_local_sym_index; block.code = try self.allocator.dupe(u8, code); block.size = sect.size; block.alignment = sect.@"align"; - if (relocs.len > 0) { - try self.parseRelocs(zld, relocs, block, 0); + try block.relocs.ensureTotalCapacity(relocs.len); + for (relocs) |rel| { + const out_rel: TextBlock.Relocation = outer: { + if (rel.r_extern == 0) { + const rel_sect_id = @intCast(u16, rel.r_symbolnum - 1); + const sect_sym_index = self.sections_as_symbols.get(rel_sect_id) orelse blk: { + const sect_sym_index = @intCast(u32, zld.locals.items.len); + const sect_sym_name = try std.fmt.allocPrint(self.allocator, "l_{s}_{s}_{s}", .{ + self.name.?, + segmentName(sect), + sectionName(sect), + }); + defer self.allocator.free(sect_sym_name); + try zld.locals.append(zld.allocator, .{ + .n_strx = try zld.makeString(sect_sym_name), + .n_type = macho.N_SECT, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }); + try self.sections_as_symbols.putNoClobber(self.allocator, rel_sect_id, sect_sym_index); + break :blk sect_sym_index; + }; + break :outer .{ + .inner = rel, + .where = .local, + .where_index = sect_sym_index, + }; + } + + const rel_sym = self.symtab.items[rel.r_symbolnum]; + const rel_sym_name = self.getString(rel_sym.n_strx); + + if (Zld.symbolIsSect(rel_sym) and !Zld.symbolIsExt(rel_sym)) { + const where_index = self.symbol_mapping.get(rel.r_symbolnum) orelse unreachable; + break :outer .{ + .inner = rel, + .where = .local, + .where_index = where_index, + }; + } + + const resolv = zld.symbol_resolver.get(rel_sym_name) orelse unreachable; + switch (resolv.where) { + .global => { + break :outer .{ + .inner = rel, + .where = .local, + .where_index = resolv.local_sym_index, + }; + }, + .import => { + break :outer .{ + .inner = rel, + .where = .import, + .where_index = resolv.where_index, + }; + }, + else => unreachable, + } + }; + block.relocs.appendAssumeCapacity(out_rel); } if (zld.has_dices) { @@ -791,44 +839,41 @@ pub fn parseTextBlocks(self: *Object, zld: *Zld) !void { // the filtered symbols and note which symbol is contained within so that // we can properly allocate addresses down the line. // While we're at it, we need to update segment,section mapping of each symbol too. - if (filtered_nlists.len > 0) { - var contained = std.ArrayList(TextBlock.SymbolAtOffset).init(self.allocator); - defer contained.deinit(); - try contained.ensureTotalCapacity(filtered_nlists.len); + var contained = std.ArrayList(TextBlock.SymbolAtOffset).init(self.allocator); + defer contained.deinit(); + try contained.ensureTotalCapacity(filtered_nlists.len); - for (filtered_nlists) |nlist_with_index| { - const sym = self.symbols.items[nlist_with_index.index]; - assert(sym.payload == .regular); - const reg = &sym.payload.regular; + for (filtered_nlists) |nlist_with_index| { + const nlist = nlist_with_index.nlist; + const local_sym_index = self.symbol_mapping.get(nlist_with_index.index) orelse unreachable; + const local = &zld.locals.items[local_sym_index]; + local.n_sect = zld.sectionId(match); - reg.segment_id = match.seg; - reg.section_id = match.sect; - - const stab: ?TextBlock.Stab = if (self.debug_info) |di| blk: { - // TODO there has to be a better to handle this. - for (di.inner.func_list.items) |func| { - if (func.pc_range) |range| { - if (reg.address >= range.start and reg.address < range.end) { - break :blk TextBlock.Stab{ - .function = range.end - range.start, - }; - } + const stab: ?TextBlock.Stab = if (self.debug_info) |di| blk: { + // TODO there has to be a better to handle this. + for (di.inner.func_list.items) |func| { + if (func.pc_range) |range| { + if (nlist.n_value >= range.start and nlist.n_value < range.end) { + break :blk TextBlock.Stab{ + .function = range.end - range.start, + }; } } - if (zld.globals.contains(zld.getString(sym.strx))) break :blk .global; - break :blk .static; - } else null; + } + // TODO + // if (zld.globals.contains(zld.getString(sym.strx))) break :blk .global; + break :blk .static; + } else null; - contained.appendAssumeCapacity(.{ - .local_sym_index = reg.local_sym_index, - .offset = nlist_with_index.nlist.n_value - sect.addr, - .stab = stab, - }); - } - - block.contained = contained.toOwnedSlice(); + contained.appendAssumeCapacity(.{ + .local_sym_index = local_sym_index, + .offset = nlist.n_value - sect.addr, + .stab = stab, + }); } + block.contained = contained.toOwnedSlice(); + // Update target section's metadata // TODO should we update segment's size here too? // How does it tie with incremental space allocs? @@ -853,26 +898,6 @@ pub fn parseTextBlocks(self: *Object, zld: *Zld) !void { } } -fn parseRelocs( - self: *Object, - zld: *Zld, - relocs: []const macho.relocation_info, - block: *TextBlock, - base_addr: u64, -) !void { - var it = reloc.RelocIterator{ - .buffer = relocs, - }; - var parser = reloc.Parser{ - .object = self, - .zld = zld, - .it = &it, - .block = block, - .base_addr = base_addr, - }; - try parser.parse(); -} - pub fn symbolFromReloc(self: *Object, zld: *Zld, rel: macho.relocation_info) !*Symbol { const symbol = blk: { if (rel.r_extern == 1) { diff --git a/src/link/MachO/Symbol.zig b/src/link/MachO/Symbol.zig deleted file mode 100644 index 37072b5618..0000000000 --- a/src/link/MachO/Symbol.zig +++ /dev/null @@ -1,285 +0,0 @@ -const Symbol = @This(); - -const std = @import("std"); -const assert = std.debug.assert; -const commands = @import("commands.zig"); -const macho = std.macho; -const mem = std.mem; - -const Allocator = mem.Allocator; -const Dylib = @import("Dylib.zig"); -const Object = @import("Object.zig"); -const Zld = @import("Zld.zig"); - -/// Offset into the string table. -strx: u32, - -/// Index in GOT table for indirection. -got_index: ?u32 = null, - -/// Index in stubs table for late binding. -stubs_index: ?u32 = null, - -payload: union(enum) { - regular: Regular, - tentative: Tentative, - proxy: Proxy, - undef: Undefined, - - pub fn format(self: @This(), comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { - return switch (self) { - .regular => |p| p.format(fmt, options, writer), - .tentative => |p| p.format(fmt, options, writer), - .proxy => |p| p.format(fmt, options, writer), - .undef => |p| p.format(fmt, options, writer), - }; - } -}, - -pub const Regular = struct { - /// Linkage type. - linkage: Linkage, - - /// Symbol address. - address: u64 = 0, - - /// Segment ID - segment_id: u16 = 0, - - /// Section ID - section_id: u16 = 0, - - /// Whether the symbol is a weak ref. - weak_ref: bool = false, - - /// Object file where to locate this symbol. - /// null means self-reference. - file: ?*Object = null, - - local_sym_index: u32 = 0, - - pub const Linkage = enum { - translation_unit, - linkage_unit, - global, - }; - - pub fn format(self: Regular, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { - _ = fmt; - _ = options; - try std.fmt.format(writer, "Regular {{ ", .{}); - try std.fmt.format(writer, ".linkage = {s}, ", .{self.linkage}); - try std.fmt.format(writer, ".address = 0x{x}, ", .{self.address}); - try std.fmt.format(writer, ".segment_id = {}, ", .{self.segment_id}); - try std.fmt.format(writer, ".section_id = {}, ", .{self.section_id}); - if (self.weak_ref) { - try std.fmt.format(writer, ".weak_ref, ", .{}); - } - if (self.file) |file| { - try std.fmt.format(writer, ".file = {s}, ", .{file.name.?}); - } - try std.fmt.format(writer, ".local_sym_index = {}, ", .{self.local_sym_index}); - try std.fmt.format(writer, "}}", .{}); - } - - pub fn sectionId(self: Regular, zld: *Zld) u8 { - // TODO there might be a more generic way of doing this. - var section: u8 = 0; - for (zld.load_commands.items) |cmd, cmd_id| { - if (cmd != .Segment) break; - if (cmd_id == self.segment_id) { - section += @intCast(u8, self.section_id) + 1; - break; - } - section += @intCast(u8, cmd.Segment.sections.items.len); - } - return section; - } -}; - -pub const Tentative = struct { - /// Symbol size. - size: u64, - - /// Symbol alignment as power of two. - alignment: u16, - - /// File where this symbol was referenced. - file: ?*Object = null, - - pub fn format(self: Tentative, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { - _ = fmt; - _ = options; - try std.fmt.format(writer, "Tentative {{ ", .{}); - try std.fmt.format(writer, ".size = 0x{x}, ", .{self.size}); - try std.fmt.format(writer, ".alignment = 0x{x}, ", .{self.alignment}); - if (self.file) |file| { - try std.fmt.format(writer, ".file = {s}, ", .{file.name.?}); - } - try std.fmt.format(writer, "}}", .{}); - } -}; - -pub const Proxy = struct { - /// Dylib where to locate this symbol. - /// null means self-reference. - file: ?*Dylib = null, - - local_sym_index: u32 = 0, - - pub fn dylibOrdinal(proxy: Proxy) u16 { - const dylib = proxy.file orelse return 0; - return dylib.ordinal.?; - } - - pub fn format(self: Proxy, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { - _ = fmt; - _ = options; - try std.fmt.format(writer, "Proxy {{ ", .{}); - if (self.file) |file| { - try std.fmt.format(writer, ".file = {s}, ", .{file.name.?}); - } - try std.fmt.format(writer, ".local_sym_index = {d}, ", .{self.local_sym_index}); - try std.fmt.format(writer, "}}", .{}); - } -}; - -pub const Undefined = struct { - /// File where this symbol was referenced. - /// null means synthetic, e.g., dyld_stub_binder. - file: ?*Object = null, - - pub fn format(self: Undefined, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { - _ = fmt; - _ = options; - try std.fmt.format(writer, "Undefined {{ ", .{}); - if (self.file) |file| { - try std.fmt.format(writer, ".file = {s}, ", .{file.name.?}); - } - try std.fmt.format(writer, "}}", .{}); - } -}; - -pub fn format(self: Symbol, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { - _ = fmt; - _ = options; - try std.fmt.format(writer, "Symbol {{", .{}); - try std.fmt.format(writer, ".strx = {d}, ", .{self.strx}); - if (self.got_index) |got_index| { - try std.fmt.format(writer, ".got_index = {}, ", .{got_index}); - } - if (self.stubs_index) |stubs_index| { - try std.fmt.format(writer, ".stubs_index = {}, ", .{stubs_index}); - } - try std.fmt.format(writer, "{}, ", .{self.payload}); - try std.fmt.format(writer, "}}", .{}); -} - -pub fn isTemp(symbol: Symbol, zld: *Zld) bool { - const sym_name = zld.getString(symbol.strx); - switch (symbol.payload) { - .regular => |regular| { - if (regular.linkage == .translation_unit) { - return mem.startsWith(u8, sym_name, "l") or mem.startsWith(u8, sym_name, "L"); - } - }, - else => {}, - } - return false; -} - -pub fn asNlist(symbol: *Symbol, zld: *Zld) !macho.nlist_64 { - const nlist = nlist: { - switch (symbol.payload) { - .regular => |regular| { - var nlist = macho.nlist_64{ - .n_strx = symbol.strx, - .n_type = macho.N_SECT, - .n_sect = regular.sectionId(zld), - .n_desc = 0, - .n_value = regular.address, - }; - - if (regular.linkage != .translation_unit) { - nlist.n_type |= macho.N_EXT; - } - if (regular.linkage == .linkage_unit) { - nlist.n_type |= macho.N_PEXT; - nlist.n_desc |= macho.N_WEAK_DEF; - } - - break :nlist nlist; - }, - .tentative => { - // TODO - break :nlist macho.nlist_64{ - .n_strx = symbol.strx, - .n_type = macho.N_UNDF, - .n_sect = 0, - .n_desc = 0, - .n_value = 0, - }; - }, - .proxy => |proxy| { - break :nlist macho.nlist_64{ - .n_strx = symbol.strx, - .n_type = macho.N_UNDF | macho.N_EXT, - .n_sect = 0, - .n_desc = (proxy.dylibOrdinal() * macho.N_SYMBOL_RESOLVER) | macho.REFERENCE_FLAG_UNDEFINED_NON_LAZY, - .n_value = 0, - }; - }, - .undef => { - // TODO - break :nlist macho.nlist_64{ - .n_strx = symbol.strx, - .n_type = macho.N_UNDF, - .n_sect = 0, - .n_desc = 0, - .n_value = 0, - }; - }, - } - }; - return nlist; -} - -pub fn isStab(sym: macho.nlist_64) bool { - return (macho.N_STAB & sym.n_type) != 0; -} - -pub fn isPext(sym: macho.nlist_64) bool { - return (macho.N_PEXT & sym.n_type) != 0; -} - -pub fn isExt(sym: macho.nlist_64) bool { - return (macho.N_EXT & sym.n_type) != 0; -} - -pub fn isSect(sym: macho.nlist_64) bool { - const type_ = macho.N_TYPE & sym.n_type; - return type_ == macho.N_SECT; -} - -pub fn isUndf(sym: macho.nlist_64) bool { - const type_ = macho.N_TYPE & sym.n_type; - return type_ == macho.N_UNDF; -} - -pub fn isIndr(sym: macho.nlist_64) bool { - const type_ = macho.N_TYPE & sym.n_type; - return type_ == macho.N_INDR; -} - -pub fn isAbs(sym: macho.nlist_64) bool { - const type_ = macho.N_TYPE & sym.n_type; - return type_ == macho.N_ABS; -} - -pub fn isWeakDef(sym: macho.nlist_64) bool { - return (sym.n_desc & macho.N_WEAK_DEF) != 0; -} - -pub fn isWeakRef(sym: macho.nlist_64) bool { - return (sym.n_desc & macho.N_WEAK_REF) != 0; -} diff --git a/src/link/MachO/TextBlock.zig b/src/link/MachO/TextBlock.zig index 93763ded18..04cb33855c 100644 --- a/src/link/MachO/TextBlock.zig +++ b/src/link/MachO/TextBlock.zig @@ -5,10 +5,9 @@ const commands = @import("commands.zig"); const log = std.log.scoped(.text_block); const macho = std.macho; const mem = std.mem; -const reloc = @import("reloc.zig"); const Allocator = mem.Allocator; -const Relocation = reloc.Relocation; +const Arch = std.Target.Cpu.Arch; const Zld = @import("Zld.zig"); allocator: *Allocator, @@ -102,6 +101,15 @@ pub const Stab = union(enum) { } }; +pub const Relocation = struct { + inner: macho.relocation_info, + where: enum { + local, + import, + }, + where_index: u32, +}; + pub fn init(allocator: *Allocator) TextBlock { return .{ .allocator = allocator, @@ -137,18 +145,10 @@ pub fn resolveRelocs(self: *TextBlock, zld: *Zld) !void { const source_addr = blk: { const sym = zld.locals.items[self.local_sym_index]; - break :blk sym.payload.regular.address + rel.offset; + break :blk sym.n_value + rel.offset; }; const target_addr = blk: { - const is_via_got = switch (rel.payload) { - .pointer_to_got => true, - .page => |page| page.kind == .got, - .page_off => |page_off| page_off.kind == .got, - .load => |load| load.kind == .got, - else => false, - }; - - if (is_via_got) { + if (isGotIndirection(rel, zld.target.?.cpu.arch)) { const dc_seg = zld.load_commands.items[zld.data_const_segment_cmd_index.?].Segment; const got = dc_seg.sections.items[zld.got_section_index.?]; const got_index = rel.target.got_index orelse { @@ -228,31 +228,18 @@ pub fn print_this(self: *const TextBlock, zld: *Zld) void { log.warn(" stab: {}", .{stab}); } if (self.aliases.items.len > 0) { - log.warn(" aliases:", .{}); - for (self.aliases.items) |index| { - log.warn(" {}: {}", .{ index, zld.locals.items[index] }); - } + log.warn(" aliases: {any}", .{self.aliases.items}); } if (self.references.count() > 0) { - log.warn(" references:", .{}); - for (self.references.keys()) |index| { - log.warn(" {}: {}", .{ index, zld.locals.items[index] }); - } + log.warn(" references: {any}", .{self.references.keys()}); } if (self.contained) |contained| { log.warn(" contained symbols:", .{}); for (contained) |sym_at_off| { if (sym_at_off.stab) |stab| { - log.warn(" {}: {}, stab: {}\n", .{ - sym_at_off.offset, - zld.locals.items[sym_at_off.local_sym_index], - stab, - }); + log.warn(" {}: {}, stab: {}", .{ sym_at_off.offset, sym_at_off.local_sym_index, stab }); } else { - log.warn(" {}: {}\n", .{ - sym_at_off.offset, - zld.locals.items[sym_at_off.local_sym_index], - }); + log.warn(" {}: {}", .{ sym_at_off.offset, sym_at_off.local_sym_index }); } } } @@ -282,3 +269,22 @@ pub fn print(self: *const TextBlock, zld: *Zld) void { } self.print_this(zld); } + +fn isGotIndirection(rel: macho.relocation_info, arch: Arch) bool { + return switch (arch) { + .aarch64 => switch (@intToEnum(macho.reloc_type_arm64, rel.r_type)) { + .ARM64_RELOC_POINTER_TO_GOT, + .ARM64_RELOC_GOT_LOAD_PAGE21, + .ARM64_RELOC_GOT_LOAD_PAGEOFF12, + => true, + else => false, + }, + .x86_64 => switch (@intToEnum(macho.reloc_type_x86_64, rel.r_type)) { + .X86_64_RELOC_GOT, + .X86_64_RELOC_GOT_LOAD, + => true, + else => false, + }, + else => unreachable, + }; +} diff --git a/src/link/MachO/Zld.zig b/src/link/MachO/Zld.zig index d6dd9f597c..862e6b5b0c 100644 --- a/src/link/MachO/Zld.zig +++ b/src/link/MachO/Zld.zig @@ -105,7 +105,6 @@ imports: std.ArrayListUnmanaged(macho.nlist_64) = .{}, undefs: std.ArrayListUnmanaged(macho.nlist_64) = .{}, tentatives: std.ArrayListUnmanaged(macho.nlist_64) = .{}, symbol_resolver: std.StringArrayHashMapUnmanaged(SymbolWithLoc) = .{}, -object_mapping: std.AutoHashMapUnmanaged(u16, []u32) = .{}, strtab: std.ArrayListUnmanaged(u8) = .{}, @@ -199,14 +198,6 @@ pub fn deinit(self: *Zld) void { } self.symbol_resolver.deinit(self.allocator); - { - var it = self.object_mapping.valueIterator(); - while (it.next()) |value_ptr| { - self.allocator.free(value_ptr.*); - } - } - self.object_mapping.deinit(self.allocator); - self.strtab.deinit(self.allocator); // TODO dealloc all blocks @@ -251,33 +242,33 @@ pub fn link(self: *Zld, files: []const []const u8, output: Output, args: LinkArg try self.resolveSymbols(); log.warn("locals", .{}); - for (self.locals.items) |sym| { - log.warn(" | {s}: {}", .{ self.getString(sym.n_strx), sym }); + for (self.locals.items) |sym, id| { + log.warn(" {d}: {s}, {}", .{ id, self.getString(sym.n_strx), sym }); } log.warn("globals", .{}); - for (self.globals.items) |sym| { - log.warn(" | {s}: {}", .{ self.getString(sym.n_strx), sym }); + for (self.globals.items) |sym, id| { + log.warn(" {d}: {s}, {}", .{ id, self.getString(sym.n_strx), sym }); } log.warn("tentatives", .{}); - for (self.tentatives.items) |sym| { - log.warn(" | {s}: {}", .{ self.getString(sym.n_strx), sym }); + for (self.tentatives.items) |sym, id| { + log.warn(" {d}: {s}, {}", .{ id, self.getString(sym.n_strx), sym }); } log.warn("undefines", .{}); - for (self.undefs.items) |sym| { - log.warn(" | {s}: {}", .{ self.getString(sym.n_strx), sym }); + for (self.undefs.items) |sym, id| { + log.warn(" {d}: {s}, {}", .{ id, self.getString(sym.n_strx), sym }); } log.warn("imports", .{}); - for (self.imports.items) |sym| { - log.warn(" | {s}: {}", .{ self.getString(sym.n_strx), sym }); + for (self.imports.items) |sym, id| { + log.warn(" {d}: {s}, {}", .{ id, self.getString(sym.n_strx), sym }); } log.warn("symbol resolver", .{}); for (self.symbol_resolver.keys()) |key| { - log.warn(" | {s} => {}", .{ key, self.symbol_resolver.get(key).? }); + log.warn(" {s} => {}", .{ key, self.symbol_resolver.get(key).? }); } log.warn("mappings", .{}); @@ -285,7 +276,7 @@ pub fn link(self: *Zld, files: []const []const u8, output: Output, args: LinkArg const object_id = @intCast(u16, id); log.warn(" in object {s}", .{object.name.?}); for (object.symtab.items) |sym, sym_id| { - if (self.localSymIndex(object_id, @intCast(u32, sym_id))) |local_id| { + if (object.symbol_mapping.get(@intCast(u32, sym_id))) |local_id| { log.warn(" | {d} => {d}", .{ sym_id, local_id }); } else { log.warn(" | {d} no local mapping for {s}", .{ sym_id, object.getString(sym.n_strx) }); @@ -293,8 +284,19 @@ pub fn link(self: *Zld, files: []const []const u8, output: Output, args: LinkArg } } + try self.parseTextBlocks(); + + var it = self.blocks.iterator(); + while (it.next()) |entry| { + const seg = self.load_commands.items[entry.key_ptr.seg].Segment; + const sect = seg.sections.items[entry.key_ptr.sect]; + + log.warn("\n\n{s},{s} contents:", .{ segmentName(sect), sectionName(sect) }); + log.warn(" {}", .{sect}); + entry.value_ptr.*.print(self); + } + return error.TODO; - // try self.parseTextBlocks(); // try self.sortSections(); // try self.addRpaths(args.rpaths); // try self.addDataInCodeLC(); @@ -305,16 +307,6 @@ pub fn link(self: *Zld, files: []const []const u8, output: Output, args: LinkArg // self.allocateLinkeditSegment(); // try self.allocateTextBlocks(); - // // var it = self.blocks.iterator(); - // // while (it.next()) |entry| { - // // const seg = self.load_commands.items[entry.key_ptr.seg].Segment; - // // const sect = seg.sections.items[entry.key_ptr.sect]; - - // // log.warn("\n\n{s},{s} contents:", .{ segmentName(sect), sectionName(sect) }); - // // log.warn(" {}", .{sect}); - // // entry.value_ptr.*.print(self); - // // } - // try self.flush(); } @@ -1414,10 +1406,6 @@ fn resolveSymbolsInObject(self: *Zld, object_id: u16) !void { log.warn("resolving symbols in '{s}'", .{object.name}); - const mapping = try self.allocator.alloc(u32, object.symtab.items.len); - mem.set(u32, mapping, 0); - try self.object_mapping.putNoClobber(self.allocator, object_id, mapping); - for (object.symtab.items) |sym, id| { const sym_id = @intCast(u32, id); const sym_name = object.getString(sym.n_strx); @@ -1464,7 +1452,7 @@ fn resolveSymbolsInObject(self: *Zld, object_id: u16) !void { .n_desc = 0, .n_value = sym.n_value, }); - mapping[sym_id] = local_sym_index; + try object.symbol_mapping.putNoClobber(self.allocator, sym_id, local_sym_index); // If the symbol's scope is not local aka translation unit, then we need work out // if we should save the symbol as a global, or potentially flag the error. @@ -2987,15 +2975,6 @@ pub fn getString(self: *Zld, off: u32) []const u8 { return mem.spanZ(@ptrCast([*:0]const u8, self.strtab.items.ptr + off)); } -fn localSymIndex(self: Zld, object_id: u16, orig_id: u32) ?u32 { - const mapping = self.object_mapping.get(object_id) orelse return null; - const local_sym_index = mapping[orig_id]; - if (local_sym_index == 0) { - return null; - } - return local_sym_index; -} - pub fn symbolIsStab(sym: macho.nlist_64) bool { return (macho.N_STAB & sym.n_type) != 0; } @@ -3045,10 +3024,9 @@ pub fn symbolIsNull(sym: macho.nlist_64) bool { return sym.n_value == 0 and sym.n_desc == 0 and sym.n_type == 0 and sym.n_strx == 0 and sym.n_sect == 0; } -pub fn symbolIsTemp(self: Zld, sym: macho.nlist_64) bool { +pub fn symbolIsTemp(sym: macho.nlist_64, sym_name: []const u8) bool { if (!symbolIsSect(sym)) return false; if (symbolIsExt(sym)) return false; - const sym_name = self.getString(sym.n_strx); return mem.startsWith(u8, sym_name, "l") or mem.startsWith(u8, sym_name, "L"); } From 407745a5e91685d52189548620d112a4b34c8127 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sat, 17 Jul 2021 00:00:18 +0200 Subject: [PATCH 56/81] zld: simplify and move Relocations into TextBlock It makes sense to have them as a dependent type since they only ever deal with TextBlocks. Simplify Relocations to rely on symbol indices and symbol resolver rather than pointers. --- CMakeLists.txt | 1 - src/link/MachO/Object.zig | 117 +---- src/link/MachO/TextBlock.zig | 905 ++++++++++++++++++++++++++++++++++- src/link/MachO/Zld.zig | 50 +- src/link/MachO/reloc.zig | 840 -------------------------------- 5 files changed, 937 insertions(+), 976 deletions(-) delete mode 100644 src/link/MachO/reloc.zig diff --git a/CMakeLists.txt b/CMakeLists.txt index ea3cd5107e..83352beea8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -586,7 +586,6 @@ set(ZIG_STAGE2_SOURCES "${CMAKE_SOURCE_DIR}/src/link/MachO/Zld.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO/bind.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO/commands.zig" - "${CMAKE_SOURCE_DIR}/src/link/MachO/reloc.zig" "${CMAKE_SOURCE_DIR}/src/link/Wasm.zig" "${CMAKE_SOURCE_DIR}/src/link/tapi.zig" "${CMAKE_SOURCE_DIR}/src/link/tapi/parse.zig" diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index 1c074a97c7..031d71bd9d 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -290,19 +290,6 @@ pub fn readLoadCommands(self: *Object, reader: anytype) !void { } } -fn findFirst(comptime T: type, haystack: []T, start: usize, predicate: anytype) usize { - if (!@hasDecl(@TypeOf(predicate), "predicate")) - @compileError("Predicate is required to define fn predicate(@This(), T) bool"); - - if (start == haystack.len) return start; - - var i = start; - while (i < haystack.len) : (i += 1) { - if (predicate.predicate(haystack[i])) break; - } - return i; -} - const NlistWithIndex = struct { nlist: macho.nlist_64, index: u32, @@ -315,44 +302,29 @@ const NlistWithIndex = struct { const Predicate = struct { addr: u64, - fn predicate(self: @This(), symbol: NlistWithIndex) bool { + pub fn predicate(self: @This(), symbol: NlistWithIndex) bool { return symbol.nlist.n_value >= self.addr; } }; - const start = findFirst(NlistWithIndex, symbols, 0, Predicate{ .addr = sect.addr }); - const end = findFirst(NlistWithIndex, symbols, start, Predicate{ .addr = sect.addr + sect.size }); + const start = Zld.findFirst(NlistWithIndex, symbols, 0, Predicate{ .addr = sect.addr }); + const end = Zld.findFirst(NlistWithIndex, symbols, start, Predicate{ .addr = sect.addr + sect.size }); return symbols[start..end]; } }; -fn filterRelocs(relocs: []macho.relocation_info, start_addr: u64, end_addr: u64) []macho.relocation_info { - const Predicate = struct { - addr: u64, - - fn predicate(self: @This(), rel: macho.relocation_info) bool { - return rel.r_address < self.addr; - } - }; - - const start = findFirst(macho.relocation_info, relocs, 0, Predicate{ .addr = end_addr }); - const end = findFirst(macho.relocation_info, relocs, start, Predicate{ .addr = start_addr }); - - return relocs[start..end]; -} - fn filterDice(dices: []macho.data_in_code_entry, start_addr: u64, end_addr: u64) []macho.data_in_code_entry { const Predicate = struct { addr: u64, - fn predicate(self: @This(), dice: macho.data_in_code_entry) bool { + pub fn predicate(self: @This(), dice: macho.data_in_code_entry) bool { return dice.offset >= self.addr; } }; - const start = findFirst(macho.data_in_code_entry, dices, 0, Predicate{ .addr = start_addr }); - const end = findFirst(macho.data_in_code_entry, dices, start, Predicate{ .addr = end_addr }); + const start = Zld.findFirst(macho.data_in_code_entry, dices, 0, Predicate{ .addr = start_addr }); + const end = Zld.findFirst(macho.data_in_code_entry, dices, start, Predicate{ .addr = end_addr }); return dices[start..end]; } @@ -483,10 +455,10 @@ const TextBlockParser = struct { } } - const relocs = filterRelocs(self.relocs, start_addr, end_addr); - if (relocs.len > 0) { - try self.object.parseRelocs(self.zld, relocs, block, start_addr); - } + try block.parseRelocsFromObject(relocs, object, .{ + .base_addr = start_addr, + .zld = self.zld, + }); if (self.zld.has_dices) { const dices = filterDice( @@ -745,8 +717,6 @@ pub fn parseTextBlocks(self: *Object, zld: *Zld) !void { .n_desc = 0, .n_value = sect.addr, }); - const block_local = &zld.locals.items[block_local_sym_index]; - block_local.n_sect = zld.sectionId(match); const block = try self.allocator.create(TextBlock); errdefer self.allocator.destroy(block); @@ -757,69 +727,10 @@ pub fn parseTextBlocks(self: *Object, zld: *Zld) !void { block.size = sect.size; block.alignment = sect.@"align"; - try block.relocs.ensureTotalCapacity(relocs.len); - for (relocs) |rel| { - const out_rel: TextBlock.Relocation = outer: { - if (rel.r_extern == 0) { - const rel_sect_id = @intCast(u16, rel.r_symbolnum - 1); - const sect_sym_index = self.sections_as_symbols.get(rel_sect_id) orelse blk: { - const sect_sym_index = @intCast(u32, zld.locals.items.len); - const sect_sym_name = try std.fmt.allocPrint(self.allocator, "l_{s}_{s}_{s}", .{ - self.name.?, - segmentName(sect), - sectionName(sect), - }); - defer self.allocator.free(sect_sym_name); - try zld.locals.append(zld.allocator, .{ - .n_strx = try zld.makeString(sect_sym_name), - .n_type = macho.N_SECT, - .n_sect = 0, - .n_desc = 0, - .n_value = 0, - }); - try self.sections_as_symbols.putNoClobber(self.allocator, rel_sect_id, sect_sym_index); - break :blk sect_sym_index; - }; - break :outer .{ - .inner = rel, - .where = .local, - .where_index = sect_sym_index, - }; - } - - const rel_sym = self.symtab.items[rel.r_symbolnum]; - const rel_sym_name = self.getString(rel_sym.n_strx); - - if (Zld.symbolIsSect(rel_sym) and !Zld.symbolIsExt(rel_sym)) { - const where_index = self.symbol_mapping.get(rel.r_symbolnum) orelse unreachable; - break :outer .{ - .inner = rel, - .where = .local, - .where_index = where_index, - }; - } - - const resolv = zld.symbol_resolver.get(rel_sym_name) orelse unreachable; - switch (resolv.where) { - .global => { - break :outer .{ - .inner = rel, - .where = .local, - .where_index = resolv.local_sym_index, - }; - }, - .import => { - break :outer .{ - .inner = rel, - .where = .import, - .where_index = resolv.where_index, - }; - }, - else => unreachable, - } - }; - block.relocs.appendAssumeCapacity(out_rel); - } + try block.parseRelocsFromObject(relocs, self, .{ + .base_addr = 0, + .zld = zld, + }); if (zld.has_dices) { const dices = filterDice(self.data_in_code_entries.items, sect.addr, sect.addr + sect.size); diff --git a/src/link/MachO/TextBlock.zig b/src/link/MachO/TextBlock.zig index 04cb33855c..20283dfc9d 100644 --- a/src/link/MachO/TextBlock.zig +++ b/src/link/MachO/TextBlock.zig @@ -1,6 +1,7 @@ const TextBlock = @This(); const std = @import("std"); +const assert = std.debug.assert; const commands = @import("commands.zig"); const log = std.log.scoped(.text_block); const macho = std.macho; @@ -8,6 +9,7 @@ const mem = std.mem; const Allocator = mem.Allocator; const Arch = std.Target.Cpu.Arch; +const Object = @import("Object.zig"); const Zld = @import("Zld.zig"); allocator: *Allocator, @@ -102,12 +104,396 @@ pub const Stab = union(enum) { }; pub const Relocation = struct { - inner: macho.relocation_info, + /// Offset within the `block`s code buffer. + /// Note relocation size can be inferred by relocation's kind. + offset: u32, + where: enum { local, import, }, + where_index: u32, + + payload: union(enum) { + unsigned: Unsigned, + branch: Branch, + page: Page, + page_off: PageOff, + pointer_to_got: PointerToGot, + signed: Signed, + load: Load, + }, + + const ResolveArgs = struct { + block: *TextBlock, + offset: u32, + source_addr: u64, + target_addr: u64, + zld: *Zld, + }; + + pub const Unsigned = struct { + subtractor: ?u32, + + /// Addend embedded directly in the relocation slot + addend: i64, + + /// Extracted from r_length: + /// => 3 implies true + /// => 2 implies false + /// => * is unreachable + is_64bit: bool, + + pub fn resolve(self: Unsigned, args: ResolveArgs) !void { + const result = if (self.subtractor) |subtractor| + @intCast(i64, args.target_addr) - @intCast(i64, subtractor.payload.regular.address) + self.addend + else + @intCast(i64, args.target_addr) + self.addend; + + if (self.is_64bit) { + mem.writeIntLittle(u64, args.block.code[args.offset..][0..8], @bitCast(u64, result)); + } else { + mem.writeIntLittle(u32, args.block.code[args.offset..][0..4], @truncate(u32, @bitCast(u64, result))); + } + } + + pub fn format(self: Unsigned, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { + _ = fmt; + _ = options; + try std.fmt.format(writer, "Unsigned {{ ", .{}); + if (self.subtractor) |sub| { + try std.fmt.format(writer, ".subtractor = {}, ", .{sub}); + } + try std.fmt.format(writer, ".addend = {}, ", .{self.addend}); + const length: usize = if (self.is_64bit) 8 else 4; + try std.fmt.format(writer, ".length = {}, ", .{length}); + try std.fmt.format(writer, "}}", .{}); + } + }; + + pub const Branch = struct { + arch: Arch, + + pub fn resolve(self: Branch, args: ResolveArgs) !void { + switch (self.arch) { + .aarch64 => { + const displacement = try math.cast( + i28, + @intCast(i64, args.target_addr) - @intCast(i64, args.source_addr), + ); + const code = args.block.code[args.offset..][0..4]; + var inst = aarch64.Instruction{ + .unconditional_branch_immediate = mem.bytesToValue(meta.TagPayload( + aarch64.Instruction, + aarch64.Instruction.unconditional_branch_immediate, + ), code), + }; + inst.unconditional_branch_immediate.imm26 = @truncate(u26, @bitCast(u28, displacement >> 2)); + mem.writeIntLittle(u32, code, inst.toU32()); + }, + .x86_64 => { + const displacement = try math.cast( + i32, + @intCast(i64, args.target_addr) - @intCast(i64, args.source_addr) - 4, + ); + mem.writeIntLittle(u32, args.block.code[args.offset..][0..4], @bitCast(u32, displacement)); + }, + else => return error.UnsupportedCpuArchitecture, + } + } + + pub fn format(self: Branch, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { + _ = self; + _ = fmt; + _ = options; + try std.fmt.format(writer, "Branch {{}}", .{}); + } + }; + + pub const Page = struct { + kind: enum { + page, + got, + tlvp, + }, + addend: u32 = 0, + + pub fn resolve(self: Page, args: ResolveArgs) !void { + const target_addr = args.target_addr + self.addend; + const source_page = @intCast(i32, args.source_addr >> 12); + const target_page = @intCast(i32, target_addr >> 12); + const pages = @bitCast(u21, @intCast(i21, target_page - source_page)); + + const code = args.block.code[args.offset..][0..4]; + var inst = aarch64.Instruction{ + .pc_relative_address = mem.bytesToValue(meta.TagPayload( + aarch64.Instruction, + aarch64.Instruction.pc_relative_address, + ), code), + }; + inst.pc_relative_address.immhi = @truncate(u19, pages >> 2); + inst.pc_relative_address.immlo = @truncate(u2, pages); + + mem.writeIntLittle(u32, code, inst.toU32()); + } + + pub fn format(self: Page, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { + _ = fmt; + _ = options; + try std.fmt.format(writer, "Page {{ ", .{}); + switch (self.kind) { + .page => {}, + .got => { + try std.fmt.format(writer, ".got, ", .{}); + }, + .tlvp => { + try std.fmt.format(writer, ".tlvp", .{}); + }, + } + try std.fmt.format(writer, ".addend = {}, ", .{self.addend}); + try std.fmt.format(writer, "}}", .{}); + } + }; + + pub const PageOff = struct { + kind: enum { + page, + got, + tlvp, + }, + addend: u32 = 0, + op_kind: ?OpKind = null, + + pub const OpKind = enum { + arithmetic, + load, + }; + + pub fn resolve(self: PageOff, args: ResolveArgs) !void { + const code = args.block.code[args.offset..][0..4]; + + switch (self.kind) { + .page => { + const target_addr = args.target_addr + self.addend; + const narrowed = @truncate(u12, target_addr); + + const op_kind = self.op_kind orelse unreachable; + var inst: aarch64.Instruction = blk: { + switch (op_kind) { + .arithmetic => { + break :blk .{ + .add_subtract_immediate = mem.bytesToValue(meta.TagPayload( + aarch64.Instruction, + aarch64.Instruction.add_subtract_immediate, + ), code), + }; + }, + .load => { + break :blk .{ + .load_store_register = mem.bytesToValue(meta.TagPayload( + aarch64.Instruction, + aarch64.Instruction.load_store_register, + ), code), + }; + }, + } + }; + + if (op_kind == .arithmetic) { + inst.add_subtract_immediate.imm12 = narrowed; + } else { + const offset: u12 = blk: { + if (inst.load_store_register.size == 0) { + if (inst.load_store_register.v == 1) { + // 128-bit SIMD is scaled by 16. + break :blk try math.divExact(u12, narrowed, 16); + } + // Otherwise, 8-bit SIMD or ldrb. + break :blk narrowed; + } else { + const denom: u4 = try math.powi(u4, 2, inst.load_store_register.size); + break :blk try math.divExact(u12, narrowed, denom); + } + }; + inst.load_store_register.offset = offset; + } + + mem.writeIntLittle(u32, code, inst.toU32()); + }, + .got => { + const narrowed = @truncate(u12, args.target_addr); + var inst: aarch64.Instruction = .{ + .load_store_register = mem.bytesToValue(meta.TagPayload( + aarch64.Instruction, + aarch64.Instruction.load_store_register, + ), code), + }; + const offset = try math.divExact(u12, narrowed, 8); + inst.load_store_register.offset = offset; + mem.writeIntLittle(u32, code, inst.toU32()); + }, + .tlvp => { + const RegInfo = struct { + rd: u5, + rn: u5, + size: u1, + }; + const reg_info: RegInfo = blk: { + if (isArithmeticOp(code)) { + const inst = mem.bytesToValue(meta.TagPayload( + aarch64.Instruction, + aarch64.Instruction.add_subtract_immediate, + ), code); + break :blk .{ + .rd = inst.rd, + .rn = inst.rn, + .size = inst.sf, + }; + } else { + const inst = mem.bytesToValue(meta.TagPayload( + aarch64.Instruction, + aarch64.Instruction.load_store_register, + ), code); + break :blk .{ + .rd = inst.rt, + .rn = inst.rn, + .size = @truncate(u1, inst.size), + }; + } + }; + const narrowed = @truncate(u12, args.target_addr); + var inst = aarch64.Instruction{ + .add_subtract_immediate = .{ + .rd = reg_info.rd, + .rn = reg_info.rn, + .imm12 = narrowed, + .sh = 0, + .s = 0, + .op = 0, + .sf = reg_info.size, + }, + }; + mem.writeIntLittle(u32, code, inst.toU32()); + }, + } + } + + pub fn format(self: PageOff, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { + _ = fmt; + _ = options; + try std.fmt.format(writer, "PageOff {{ ", .{}); + switch (self.kind) { + .page => {}, + .got => { + try std.fmt.format(writer, ".got, ", .{}); + }, + .tlvp => { + try std.fmt.format(writer, ".tlvp, ", .{}); + }, + } + try std.fmt.format(writer, ".addend = {}, ", .{self.addend}); + try std.fmt.format(writer, ".op_kind = {s}, ", .{self.op_kind}); + try std.fmt.format(writer, "}}", .{}); + } + }; + + pub const PointerToGot = struct { + pub fn resolve(_: PointerToGot, args: ResolveArgs) !void { + const result = try math.cast(i32, @intCast(i64, args.target_addr) - @intCast(i64, args.source_addr)); + mem.writeIntLittle(u32, args.block.code[args.offset..][0..4], @bitCast(u32, result)); + } + + pub fn format(self: PointerToGot, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { + _ = self; + _ = fmt; + _ = options; + try std.fmt.format(writer, "PointerToGot {{}}", .{}); + } + }; + + pub const Signed = struct { + addend: i64, + correction: i4, + + pub fn resolve(self: Signed, args: ResolveArgs) !void { + const target_addr = @intCast(i64, args.target_addr) + self.addend; + const displacement = try math.cast( + i32, + target_addr - @intCast(i64, args.source_addr) - self.correction - 4, + ); + mem.writeIntLittle(u32, block.code[offset..][0..4], @bitCast(u32, displacement)); + } + + pub fn format(self: Signed, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { + _ = fmt; + _ = options; + try std.fmt.format(writer, "Signed {{ ", .{}); + try std.fmt.format(writer, ".addend = {}, ", .{self.addend}); + try std.fmt.format(writer, ".correction = {}, ", .{self.correction}); + try std.fmt.format(writer, "}}", .{}); + } + }; + + pub const Load = struct { + kind: enum { + got, + tlvp, + }, + addend: i32 = 0, + + pub fn resolve(self: Load, block: *TextBlock, offset: u32, args: ResolveArgs) !void { + if (self.kind == .tlvp) { + // We need to rewrite the opcode from movq to leaq. + block.code[offset - 2] = 0x8d; + } + const displacement = try math.cast( + i32, + @intCast(i64, args.target_addr) - @intCast(i64, args.source_addr) - 4 + self.addend, + ); + mem.writeIntLittle(u32, block.code[offset..][0..4], @bitCast(u32, displacement)); + } + + pub fn format(self: Load, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { + _ = fmt; + _ = options; + try std.fmt.format(writer, "Load {{ ", .{}); + try std.fmt.format(writer, "{s}, ", .{self.kind}); + try std.fmt.format(writer, ".addend = {}, ", .{self.addend}); + try std.fmt.format(writer, "}}", .{}); + } + }; + + pub fn resolve(self: Relocation, block: *TextBlock, args: ResolveArgs) !void { + switch (self.payload) { + .unsigned => |unsigned| try unsigned.resolve(block, self.offset, args), + .branch => |branch| try branch.resolve(block, self.offset, args), + .page => |page| try page.resolve(block, self.offset, args), + .page_off => |page_off| try page_off.resolve(block, self.offset, args), + .pointer_to_got => |pointer_to_got| try pointer_to_got.resolve(block, self.offset, args), + .signed => |signed| try signed.resolve(block, self.offset, args), + .load => |load| try load.resolve(block, self.offset, args), + } + } + + pub fn format(self: Relocation, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { + try std.fmt.format(writer, "Relocation {{ ", .{}); + try std.fmt.format(writer, ".offset = {}, ", .{self.offset}); + try std.fmt.format(writer, ".where = {}, ", .{self.where}); + try std.fmt.format(writer, ".where_index = {d}, ", .{self.where_index}); + + switch (self.payload) { + .unsigned => |unsigned| try unsigned.format(fmt, options, writer), + .branch => |branch| try branch.format(fmt, options, writer), + .page => |page| try page.format(fmt, options, writer), + .page_off => |page_off| try page_off.format(fmt, options, writer), + .pointer_to_got => |pointer_to_got| try pointer_to_got.format(fmt, options, writer), + .signed => |signed| try signed.format(fmt, options, writer), + .load => |load| try load.format(fmt, options, writer), + } + + try std.fmt.format(writer, "}}", .{}); + } }; pub fn init(allocator: *Allocator) TextBlock { @@ -139,6 +525,462 @@ pub fn deinit(self: *TextBlock) void { self.dices.deinit(); } +const RelocContext = struct { + base_addr: u64 = 0, + zld: *Zld, +}; + +fn initRelocFromObject(rel: macho.relocation_info, object: *Object, ctx: RelocContext) !Relocation { + var parsed_rel = Relocation{ + .offset = @intCast(u32, @intCast(u64, rel.r_address) - ctx.base_addr), + .where = undefined, + .where_index = undefined, + .payload = undefined, + }; + + if (rel.r_extern == 0) { + const sect_id = @intCast(u16, rel.r_symbolnum - 1); + + const local_sym_index = object.sections_as_symbols.get(sect_id) orelse blk: { + const seg = object.load_commands.items[object.segment_cmd_index.?].Segment; + const sect = seg.sections.items[sect_id]; + const local_sym_index = @intCast(u32, ctx.zld.locals.items.len); + const sym_name = try std.fmt.allocPrint(ctx.zld.allocator, "l_{s}_{s}_{s}", .{ + object.name.?, + commands.segmentName(sect), + commands.sectionName(sect), + }); + defer ctx.zld.allocator.free(sym_name); + + try ctx.zld.locals.append(ctx.zld.allocator, .{ + .n_strx = try ctx.zld.makeString(sym_name), + .n_type = macho.N_SECT, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }); + try object.sections_as_symbols.putNoClobber(object.allocator, sect_id, local_sym_index); + break :blk local_sym_index; + }; + + parsed_rel.where = .local; + parsed_rel.where_index = local_sym_index; + } else { + const sym = object.symtab.items[rel.r_symbolnum]; + const sym_name = object.getString(sym.n_strx); + + if (Zld.symbolIsSect(sym) and !Zld.symbolIsExt(sym)) { + const where_index = object.symbol_mapping.get(rel.r_symbolnum) orelse unreachable; + parsed_rel.where = .local; + parsed_rel.where_index = where_index; + } else { + const resolv = ctx.zld.symbol_resolver.get(sym_name) orelse unreachable; + switch (resolv.where) { + .global => { + parsed_rel.where = .local; + parsed_rel.where_index = resolv.local_sym_index; + }, + .import => { + parsed_rel.where = .import; + parsed_rel.where_index = resolv.where_index; + }, + else => unreachable, + } + } + } + + return parsed_rel; +} + +pub fn parseRelocsFromObject( + self: *TextBlock, + relocs: []macho.relocation_info, + object: *Object, + ctx: RelocContext, +) !void { + const filtered_relocs = filterRelocs(relocs, ctx.base_addr, ctx.base_addr + self.size); + var it = RelocIterator{ + .buffer = filtered_relocs, + }; + + var addend: u32 = 0; + var subtractor: ?u32 = null; + + while (it.next()) |rel| { + if (isAddend(rel, object.arch.?)) { + // Addend is not a relocation with effect on the TextBlock, so + // parse it and carry on. + assert(addend == 0); // Oh no, addend was not reset! + addend = rel.r_symbolnum; + + // Verify ADDEND is followed by a load. + const next = @intToEnum(macho.reloc_type_arm64, it.peek().r_type); + switch (next) { + .ARM64_RELOC_PAGE21, .ARM64_RELOC_PAGEOFF12 => {}, + else => { + log.err("unexpected relocation type: expected PAGE21 or PAGEOFF12, found {s}", .{next}); + return error.UnexpectedRelocationType; + }, + } + continue; + } + + if (isSubtractor(rel, object.arch.?)) { + // Subtractor is not a relocation with effect on the TextBlock, so + // parse it and carry on. + assert(subtractor == null); // Oh no, subtractor was not reset! + assert(rel.r_extern == 1); + const sym = object.symtab.items[rel.r_symbolnum]; + const sym_name = object.getString(sym.n_strx); + + if (Zld.symbolIsSect(sym) and !Zld.symbolIsExt(sym)) { + const where_index = object.symbol_mapping.get(rel.r_symbolnum) orelse unreachable; + subtractor = where_index; + } else { + const resolv = ctx.zld.symbol_resolver.get(sym_name) orelse unreachable; + assert(resolv.where == .global); + subtractor = resolv.local_sym_index; + } + + // Verify SUBTRACTOR is followed by UNSIGNED. + switch (object.arch.?) { + .aarch64 => { + const next = @intToEnum(macho.reloc_type_arm64, it.peek().r_type); + if (next != .ARM64_RELOC_UNSIGNED) { + log.err("unexpected relocation type: expected UNSIGNED, found {s}", .{next}); + return error.UnexpectedRelocationType; + } + }, + .x86_64 => { + const next = @intToEnum(macho.reloc_type_x86_64, it.peek().r_type); + if (next != .X86_64_RELOC_UNSIGNED) { + log.err("unexpected relocation type: expected UNSIGNED, found {s}", .{next}); + return error.UnexpectedRelocationType; + } + }, + else => unreachable, + } + continue; + } + + var parsed_rel = try initRelocFromObject(rel, object, ctx); + + switch (object.arch.?) { + .aarch64 => { + const rel_type = @intToEnum(macho.reloc_type_arm64, rel.r_type); + switch (rel_type) { + .ARM64_RELOC_ADDEND => unreachable, + .ARM64_RELOC_SUBTRACTOR => unreachable, + .ARM64_RELOC_BRANCH26 => { + self.parseBranch(rel, &parsed_rel, ctx); + }, + .ARM64_RELOC_UNSIGNED => { + self.parseUnsigned(rel, &parsed_rel, subtractor, ctx); + subtractor = null; + }, + .ARM64_RELOC_PAGE21, + .ARM64_RELOC_GOT_LOAD_PAGE21, + .ARM64_RELOC_TLVP_LOAD_PAGE21, + => { + self.parsePage(rel, &parsed_rel, addend); + if (rel_type == .ARM64_RELOC_PAGE21) + addend = 0; + }, + .ARM64_RELOC_PAGEOFF12, + .ARM64_RELOC_GOT_LOAD_PAGEOFF12, + .ARM64_RELOC_TLVP_LOAD_PAGEOFF12, + => { + self.parsePageOff(rel, &parsed_rel, addend, ctx); + if (rel_type == .ARM64_RELOC_PAGEOFF12) + addend = 0; + }, + .ARM64_RELOC_POINTER_TO_GOT => { + self.parsePointerToGot(rel, &parsed_rel); + }, + } + }, + .x86_64 => { + switch (@intToEnum(macho.reloc_type_x86_64, rel.r_type)) { + .X86_64_RELOC_SUBTRACTOR => unreachable, + .X86_64_RELOC_BRANCH => { + self.parseBranch(rel, &parsed_rel, ctx); + }, + .X86_64_RELOC_UNSIGNED => { + self.parseUnsigned(rel, &parsed_rel, subtractor, ctx); + subtractor = null; + }, + .X86_64_RELOC_SIGNED, + .X86_64_RELOC_SIGNED_1, + .X86_64_RELOC_SIGNED_2, + .X86_64_RELOC_SIGNED_4, + => { + self.parseSigned(rel, &parsed_rel, ctx); + }, + .X86_64_RELOC_GOT_LOAD, + .X86_64_RELOC_GOT, + .X86_64_RELOC_TLV, + => { + self.parseLoad(rel, &parsed_rel); + }, + } + }, + else => unreachable, + } + + try self.relocs.append(parsed_rel); + + if (parsed_rel.where == .local) { + try self.references.put(parsed_rel.where_index, {}); + } + + const is_via_got = switch (parsed_rel.payload) { + .pointer_to_got => true, + .load => |load| load.kind == .got, + .page => |page| page.kind == .got, + .page_off => |page_off| page_off.kind == .got, + else => false, + }; + + if (is_via_got) blk: { + const key = Zld.GotIndirectionKey{ + .where = switch (parsed_rel.where) { + .local => .local, + .import => .import, + }, + .where_index = parsed_rel.where_index, + }; + if (ctx.zld.got_entries.contains(key)) break :blk; + + try ctx.zld.got_entries.putNoClobber(ctx.zld.allocator, key, {}); + } else if (parsed_rel.payload == .unsigned) { + switch (parsed_rel.where) { + .import => { + log.warn("WAT {s}", .{ctx.zld.getString(ctx.zld.imports.items[parsed_rel.where_index].n_strx)}); + try self.bindings.append(.{ + .local_sym_index = parsed_rel.where_index, + .offset = parsed_rel.offset, + }); + }, + .local => { + const source_sym = ctx.zld.locals.items[self.local_sym_index]; + const match = ctx.zld.unpackSectionId(source_sym.n_sect); + const seg = ctx.zld.load_commands.items[match.seg].Segment; + const sect = seg.sections.items[match.sect]; + const sect_type = commands.sectionType(sect); + + const should_rebase = rebase: { + if (!parsed_rel.payload.unsigned.is_64bit) break :rebase false; + + // TODO actually, a check similar to what dyld is doing, that is, verifying + // that the segment is writable should be enough here. + const is_right_segment = blk: { + if (ctx.zld.data_segment_cmd_index) |idx| { + if (match.seg == idx) { + break :blk true; + } + } + if (ctx.zld.data_const_segment_cmd_index) |idx| { + if (match.seg == idx) { + break :blk true; + } + } + break :blk false; + }; + + if (!is_right_segment) break :rebase false; + if (sect_type != macho.S_LITERAL_POINTERS and + sect_type != macho.S_REGULAR and + sect_type != macho.S_MOD_INIT_FUNC_POINTERS and + sect_type != macho.S_MOD_TERM_FUNC_POINTERS) + { + break :rebase false; + } + + break :rebase true; + }; + + if (should_rebase) { + try self.rebases.append(parsed_rel.offset); + } + }, + } + } else if (parsed_rel.payload == .branch) blk: { + if (parsed_rel.where != .import) break :blk; + if (ctx.zld.stubs.contains(parsed_rel.where_index)) break :blk; + + try ctx.zld.stubs.putNoClobber(ctx.zld.allocator, parsed_rel.where_index, {}); + } + } +} + +fn isAddend(rel: macho.relocation_info, arch: Arch) bool { + if (arch != .aarch64) return false; + return @intToEnum(macho.reloc_type_arm64, rel.r_type) == .ARM64_RELOC_ADDEND; +} + +fn isSubtractor(rel: macho.relocation_info, arch: Arch) bool { + return switch (arch) { + .aarch64 => @intToEnum(macho.reloc_type_arm64, rel.r_type) == .ARM64_RELOC_SUBTRACTOR, + .x86_64 => @intToEnum(macho.reloc_type_x86_64, rel.r_type) == .X86_64_RELOC_SUBTRACTOR, + else => unreachable, + }; +} + +fn parseUnsigned( + self: TextBlock, + rel: macho.relocation_info, + out: *Relocation, + subtractor: ?u32, + ctx: RelocContext, +) void { + assert(rel.r_pcrel == 0); + + const is_64bit: bool = switch (rel.r_length) { + 3 => true, + 2 => false, + else => unreachable, + }; + + var addend: i64 = if (is_64bit) + mem.readIntLittle(i64, self.code[out.offset..][0..8]) + else + mem.readIntLittle(i32, self.code[out.offset..][0..4]); + + if (rel.r_extern == 0) { + assert(out.where == .local); + const target_sym = ctx.zld.locals.items[out.where_index]; + addend -= @intCast(i64, target_sym.n_value); + } + + out.payload = .{ + .unsigned = .{ + .subtractor = subtractor, + .is_64bit = is_64bit, + .addend = addend, + }, + }; +} + +fn parseBranch(self: TextBlock, rel: macho.relocation_info, out: *Relocation, ctx: RelocContext) void { + assert(rel.r_pcrel == 1); + assert(rel.r_length == 2); + + out.payload = .{ + .branch = .{ + .arch = ctx.zld.target.?.cpu.arch, + }, + }; +} + +fn parsePage(self: TextBlock, rel: macho.relocation_info, out: *Relocation, addend: u32) void { + _ = self; + assert(rel.r_pcrel == 1); + assert(rel.r_length == 2); + + out.payload = .{ + .page = .{ + .kind = switch (@intToEnum(macho.reloc_type_arm64, rel.r_type)) { + .ARM64_RELOC_PAGE21 => .page, + .ARM64_RELOC_GOT_LOAD_PAGE21 => .got, + .ARM64_RELOC_TLVP_LOAD_PAGE21 => .tlvp, + else => unreachable, + }, + .addend = addend, + }, + }; +} + +fn parsePageOff(self: TextBlock, rel: macho.relocation_info, out: *Relocation, addend: u32, ctx: RelocContext) void { + assert(rel.r_pcrel == 0); + assert(rel.r_length == 2); + + const rel_type = @intToEnum(macho.reloc_type_arm64, rel.r_type); + const op_kind: ?Relocation.PageOff.OpKind = blk: { + if (rel_type != .ARM64_RELOC_PAGEOFF12) break :blk null; + const op_kind: Relocation.PageOff.OpKind = if (isArithmeticOp(self.code[out.offset..][0..4])) + .arithmetic + else + .load; + break :blk op_kind; + }; + + out.payload = .{ + .page_off = .{ + .kind = switch (rel_type) { + .ARM64_RELOC_PAGEOFF12 => .page, + .ARM64_RELOC_GOT_LOAD_PAGEOFF12 => .got, + .ARM64_RELOC_TLVP_LOAD_PAGEOFF12 => .tlvp, + else => unreachable, + }, + .addend = addend, + .op_kind = op_kind, + }, + }; +} + +fn parsePointerToGot(self: TextBlock, rel: macho.relocation_info, out: *Relocation) void { + _ = self; + assert(rel.r_pcrel == 1); + assert(rel.r_length == 2); + + out.payload = .{ + .pointer_to_got = .{}, + }; +} + +fn parseSigned(self: TextBlock, rel: macho.relocation_info, out: *Relocation, ctx: RelocContext) void { + assert(rel.r_pcrel == 1); + assert(rel.r_length == 2); + + const rel_type = @intToEnum(macho.reloc_type_x86_64, rel.r_type); + const correction: i4 = switch (rel_type) { + .X86_64_RELOC_SIGNED => 0, + .X86_64_RELOC_SIGNED_1 => 1, + .X86_64_RELOC_SIGNED_2 => 2, + .X86_64_RELOC_SIGNED_4 => 4, + else => unreachable, + }; + var addend: i64 = mem.readIntLittle(i32, self.code[out.offset..][0..4]) + correction; + + if (rel.r_extern == 0) { + const source_sym = ctx.zld.locals.items[self.local_sym_index]; + const target_sym = switch (out.where) { + .local => ctx.zld.locals.items[out.where_index], + .import => ctx.zld.imports.items[out.where_index], + }; + addend = @intCast(i64, source_sym.n_value + out.offset + 4) + addend - @intCast(i64, target_sym.n_value); + } + + out.payload = .{ + .signed = .{ + .correction = correction, + .addend = addend, + }, + }; +} + +fn parseLoad(self: TextBlock, rel: macho.relocation_info, out: *Relocation) void { + assert(rel.r_pcrel == 1); + assert(rel.r_length == 2); + + const rel_type = @intToEnum(macho.reloc_type_x86_64, rel.r_type); + const addend: i32 = if (rel_type == .X86_64_RELOC_GOT) + mem.readIntLittle(i32, self.code[out.offset..][0..4]) + else + 0; + + out.payload = .{ + .load = .{ + .kind = switch (rel_type) { + .X86_64_RELOC_GOT_LOAD, .X86_64_RELOC_GOT => .got, + .X86_64_RELOC_TLV => .tlvp, + else => unreachable, + }, + .addend = addend, + }, + }; +} + pub fn resolveRelocs(self: *TextBlock, zld: *Zld) !void { for (self.relocs.items) |rel| { log.debug("relocating {}", .{rel}); @@ -148,7 +990,15 @@ pub fn resolveRelocs(self: *TextBlock, zld: *Zld) !void { break :blk sym.n_value + rel.offset; }; const target_addr = blk: { - if (isGotIndirection(rel, zld.target.?.cpu.arch)) { + const is_via_got = switch (rel.payload) { + .pointer_to_got => true, + .page => |page| page.kind == .got, + .page_off => |page_off| page_off.kind == .got, + .load => |load| load.kind == .got, + else => false, + }; + + if (is_via_got) { const dc_seg = zld.load_commands.items[zld.data_const_segment_cmd_index.?].Segment; const got = dc_seg.sections.items[zld.got_section_index.?]; const got_index = rel.target.got_index orelse { @@ -270,21 +1120,40 @@ pub fn print(self: *const TextBlock, zld: *Zld) void { self.print_this(zld); } -fn isGotIndirection(rel: macho.relocation_info, arch: Arch) bool { - return switch (arch) { - .aarch64 => switch (@intToEnum(macho.reloc_type_arm64, rel.r_type)) { - .ARM64_RELOC_POINTER_TO_GOT, - .ARM64_RELOC_GOT_LOAD_PAGE21, - .ARM64_RELOC_GOT_LOAD_PAGEOFF12, - => true, - else => false, - }, - .x86_64 => switch (@intToEnum(macho.reloc_type_x86_64, rel.r_type)) { - .X86_64_RELOC_GOT, - .X86_64_RELOC_GOT_LOAD, - => true, - else => false, - }, - else => unreachable, +const RelocIterator = struct { + buffer: []const macho.relocation_info, + index: i32 = -1, + + pub fn next(self: *RelocIterator) ?macho.relocation_info { + self.index += 1; + if (self.index < self.buffer.len) { + return self.buffer[@intCast(u32, self.index)]; + } + return null; + } + + pub fn peek(self: RelocIterator) macho.relocation_info { + assert(self.index + 1 < self.buffer.len); + return self.buffer[@intCast(u32, self.index + 1)]; + } +}; + +fn filterRelocs(relocs: []macho.relocation_info, start_addr: u64, end_addr: u64) []macho.relocation_info { + const Predicate = struct { + addr: u64, + + pub fn predicate(self: @This(), rel: macho.relocation_info) bool { + return rel.r_address < self.addr; + } }; + + const start = Zld.findFirst(macho.relocation_info, relocs, 0, Predicate{ .addr = end_addr }); + const end = Zld.findFirst(macho.relocation_info, relocs, start, Predicate{ .addr = start_addr }); + + return relocs[start..end]; +} + +inline fn isArithmeticOp(inst: *const [4]u8) bool { + const group_decode = @truncate(u5, inst[3]); + return ((group_decode >> 2) == 4); } diff --git a/src/link/MachO/Zld.zig b/src/link/MachO/Zld.zig index 862e6b5b0c..67cd007ebf 100644 --- a/src/link/MachO/Zld.zig +++ b/src/link/MachO/Zld.zig @@ -108,8 +108,8 @@ symbol_resolver: std.StringArrayHashMapUnmanaged(SymbolWithLoc) = .{}, strtab: std.ArrayListUnmanaged(u8) = .{}, -// stubs: std.ArrayListUnmanaged(*Symbol) = .{}, -got_entries: std.ArrayListUnmanaged(GotEntry) = .{}, +stubs: std.AutoArrayHashMapUnmanaged(u32, void) = .{}, +got_entries: std.AutoArrayHashMapUnmanaged(GotIndirectionKey, void) = .{}, stub_helper_stubs_start_off: ?u64 = null, @@ -131,20 +131,12 @@ const SymbolWithLoc = struct { file: u16 = 0, }; -pub const GotEntry = struct { - /// GOT entry can either be a local pointer or an extern (nonlazy) import. - kind: enum { +pub const GotIndirectionKey = struct { + where: enum { local, import, }, - - /// Id to the macho.nlist_64 from the respective table: either locals or nonlazy imports. - /// TODO I'm more and more inclined to just manage a single, max two symbol tables - /// rather than 4 as we currently do, but I'll follow up in the future PR. - local_sym_index: u32, - - /// Index of this entry in the GOT. - got_index: u32, + where_index: u32, }; pub const Output = struct { @@ -161,7 +153,7 @@ pub fn init(allocator: *Allocator) !Zld { } pub fn deinit(self: *Zld) void { - // self.stubs.deinit(self.allocator); + self.stubs.deinit(self.allocator); self.got_entries.deinit(self.allocator); for (self.load_commands.items) |*lc| { @@ -3043,3 +3035,33 @@ pub fn sectionId(self: Zld, match: MatchingSection) u8 { } return section; } + +pub fn unpackSectionId(self: Zld, section_id: u8) MatchingSection { + var match: MatchingSection = undefined; + var section: u8 = 0; + outer: for (self.load_commands.items) |cmd, cmd_id| { + assert(cmd == .Segment); + for (cmd.Segment.sections.items) |_, sect_id| { + section += 1; + if (section_id == section) { + match.seg = @intCast(u16, cmd_id); + match.sect = @intCast(u16, sect_id); + break :outer; + } + } + } + return match; +} + +pub fn findFirst(comptime T: type, haystack: []T, start: usize, predicate: anytype) usize { + if (!@hasDecl(@TypeOf(predicate), "predicate")) + @compileError("Predicate is required to define fn predicate(@This(), T) bool"); + + if (start == haystack.len) return start; + + var i = start; + while (i < haystack.len) : (i += 1) { + if (predicate.predicate(haystack[i])) break; + } + return i; +} diff --git a/src/link/MachO/reloc.zig b/src/link/MachO/reloc.zig deleted file mode 100644 index 1d0c0466d6..0000000000 --- a/src/link/MachO/reloc.zig +++ /dev/null @@ -1,840 +0,0 @@ -const std = @import("std"); -const aarch64 = @import("../../codegen/aarch64.zig"); -const assert = std.debug.assert; -const commands = @import("commands.zig"); -const log = std.log.scoped(.reloc); -const macho = std.macho; -const math = std.math; -const mem = std.mem; -const meta = std.meta; - -const Allocator = mem.Allocator; -const Arch = std.Target.Cpu.Arch; -const Object = @import("Object.zig"); -const Symbol = @import("Symbol.zig"); -const TextBlock = @import("TextBlock.zig"); -const Zld = @import("Zld.zig"); - -pub const Relocation = struct { - /// Offset within the `block`s code buffer. - /// Note relocation size can be inferred by relocation's kind. - offset: u32, - - /// Target symbol: either a regular or a proxy. - target: *Symbol, - - payload: union(enum) { - unsigned: Unsigned, - branch: Branch, - page: Page, - page_off: PageOff, - pointer_to_got: PointerToGot, - signed: Signed, - load: Load, - }, - - const ResolveArgs = struct { - block: *TextBlock, - offset: u32, - source_addr: u64, - target_addr: u64, - }; - - pub const Unsigned = struct { - subtractor: ?*Symbol = null, - - /// Addend embedded directly in the relocation slot - addend: i64, - - /// Extracted from r_length: - /// => 3 implies true - /// => 2 implies false - /// => * is unreachable - is_64bit: bool, - - pub fn resolve(self: Unsigned, args: ResolveArgs) !void { - const result = if (self.subtractor) |subtractor| - @intCast(i64, args.target_addr) - @intCast(i64, subtractor.payload.regular.address) + self.addend - else - @intCast(i64, args.target_addr) + self.addend; - - if (self.is_64bit) { - mem.writeIntLittle(u64, args.block.code[args.offset..][0..8], @bitCast(u64, result)); - } else { - mem.writeIntLittle(u32, args.block.code[args.offset..][0..4], @truncate(u32, @bitCast(u64, result))); - } - } - - pub fn format(self: Unsigned, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { - _ = fmt; - _ = options; - try std.fmt.format(writer, "Unsigned {{ ", .{}); - if (self.subtractor) |sub| { - try std.fmt.format(writer, ".subtractor = {}, ", .{sub}); - } - try std.fmt.format(writer, ".addend = {}, ", .{self.addend}); - const length: usize = if (self.is_64bit) 8 else 4; - try std.fmt.format(writer, ".length = {}, ", .{length}); - try std.fmt.format(writer, "}}", .{}); - } - }; - - pub const Branch = struct { - arch: Arch, - - pub fn resolve(self: Branch, args: ResolveArgs) !void { - switch (self.arch) { - .aarch64 => { - const displacement = try math.cast( - i28, - @intCast(i64, args.target_addr) - @intCast(i64, args.source_addr), - ); - const code = args.block.code[args.offset..][0..4]; - var inst = aarch64.Instruction{ - .unconditional_branch_immediate = mem.bytesToValue(meta.TagPayload( - aarch64.Instruction, - aarch64.Instruction.unconditional_branch_immediate, - ), code), - }; - inst.unconditional_branch_immediate.imm26 = @truncate(u26, @bitCast(u28, displacement >> 2)); - mem.writeIntLittle(u32, code, inst.toU32()); - }, - .x86_64 => { - const displacement = try math.cast( - i32, - @intCast(i64, args.target_addr) - @intCast(i64, args.source_addr) - 4, - ); - mem.writeIntLittle(u32, args.block.code[args.offset..][0..4], @bitCast(u32, displacement)); - }, - else => return error.UnsupportedCpuArchitecture, - } - } - - pub fn format(self: Branch, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { - _ = self; - _ = fmt; - _ = options; - try std.fmt.format(writer, "Branch {{}}", .{}); - } - }; - - pub const Page = struct { - kind: enum { - page, - got, - tlvp, - }, - addend: ?u32 = null, - - pub fn resolve(self: Page, args: ResolveArgs) !void { - const target_addr = if (self.addend) |addend| args.target_addr + addend else args.target_addr; - const source_page = @intCast(i32, args.source_addr >> 12); - const target_page = @intCast(i32, target_addr >> 12); - const pages = @bitCast(u21, @intCast(i21, target_page - source_page)); - - const code = args.block.code[args.offset..][0..4]; - var inst = aarch64.Instruction{ - .pc_relative_address = mem.bytesToValue(meta.TagPayload( - aarch64.Instruction, - aarch64.Instruction.pc_relative_address, - ), code), - }; - inst.pc_relative_address.immhi = @truncate(u19, pages >> 2); - inst.pc_relative_address.immlo = @truncate(u2, pages); - - mem.writeIntLittle(u32, code, inst.toU32()); - } - - pub fn format(self: Page, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { - _ = fmt; - _ = options; - try std.fmt.format(writer, "Page {{ ", .{}); - switch (self.kind) { - .page => {}, - .got => { - try std.fmt.format(writer, ".got, ", .{}); - }, - .tlvp => { - try std.fmt.format(writer, ".tlvp", .{}); - }, - } - if (self.addend) |add| { - try std.fmt.format(writer, ".addend = {}, ", .{add}); - } - try std.fmt.format(writer, "}}", .{}); - } - }; - - pub const PageOff = struct { - kind: enum { - page, - got, - tlvp, - }, - addend: ?u32 = null, - op_kind: ?OpKind = null, - - pub const OpKind = enum { - arithmetic, - load, - }; - - pub fn resolve(self: PageOff, args: ResolveArgs) !void { - const code = args.block.code[args.offset..][0..4]; - - switch (self.kind) { - .page => { - const target_addr = if (self.addend) |addend| args.target_addr + addend else args.target_addr; - const narrowed = @truncate(u12, target_addr); - - const op_kind = self.op_kind orelse unreachable; - var inst: aarch64.Instruction = blk: { - switch (op_kind) { - .arithmetic => { - break :blk .{ - .add_subtract_immediate = mem.bytesToValue(meta.TagPayload( - aarch64.Instruction, - aarch64.Instruction.add_subtract_immediate, - ), code), - }; - }, - .load => { - break :blk .{ - .load_store_register = mem.bytesToValue(meta.TagPayload( - aarch64.Instruction, - aarch64.Instruction.load_store_register, - ), code), - }; - }, - } - }; - - if (op_kind == .arithmetic) { - inst.add_subtract_immediate.imm12 = narrowed; - } else { - const offset: u12 = blk: { - if (inst.load_store_register.size == 0) { - if (inst.load_store_register.v == 1) { - // 128-bit SIMD is scaled by 16. - break :blk try math.divExact(u12, narrowed, 16); - } - // Otherwise, 8-bit SIMD or ldrb. - break :blk narrowed; - } else { - const denom: u4 = try math.powi(u4, 2, inst.load_store_register.size); - break :blk try math.divExact(u12, narrowed, denom); - } - }; - inst.load_store_register.offset = offset; - } - - mem.writeIntLittle(u32, code, inst.toU32()); - }, - .got => { - const narrowed = @truncate(u12, args.target_addr); - var inst: aarch64.Instruction = .{ - .load_store_register = mem.bytesToValue(meta.TagPayload( - aarch64.Instruction, - aarch64.Instruction.load_store_register, - ), code), - }; - const offset = try math.divExact(u12, narrowed, 8); - inst.load_store_register.offset = offset; - mem.writeIntLittle(u32, code, inst.toU32()); - }, - .tlvp => { - const RegInfo = struct { - rd: u5, - rn: u5, - size: u1, - }; - const reg_info: RegInfo = blk: { - if (isArithmeticOp(code)) { - const inst = mem.bytesToValue(meta.TagPayload( - aarch64.Instruction, - aarch64.Instruction.add_subtract_immediate, - ), code); - break :blk .{ - .rd = inst.rd, - .rn = inst.rn, - .size = inst.sf, - }; - } else { - const inst = mem.bytesToValue(meta.TagPayload( - aarch64.Instruction, - aarch64.Instruction.load_store_register, - ), code); - break :blk .{ - .rd = inst.rt, - .rn = inst.rn, - .size = @truncate(u1, inst.size), - }; - } - }; - const narrowed = @truncate(u12, args.target_addr); - var inst = aarch64.Instruction{ - .add_subtract_immediate = .{ - .rd = reg_info.rd, - .rn = reg_info.rn, - .imm12 = narrowed, - .sh = 0, - .s = 0, - .op = 0, - .sf = reg_info.size, - }, - }; - mem.writeIntLittle(u32, code, inst.toU32()); - }, - } - } - - pub fn format(self: PageOff, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { - _ = fmt; - _ = options; - try std.fmt.format(writer, "PageOff {{ ", .{}); - switch (self.kind) { - .page => {}, - .got => { - try std.fmt.format(writer, ".got, ", .{}); - }, - .tlvp => { - try std.fmt.format(writer, ".tlvp, ", .{}); - }, - } - if (self.addend) |add| { - try std.fmt.format(writer, ".addend = {}, ", .{add}); - } - if (self.op_kind) |op| { - try std.fmt.format(writer, ".op_kind = {s}, ", .{op}); - } - try std.fmt.format(writer, "}}", .{}); - } - }; - - pub const PointerToGot = struct { - pub fn resolve(_: PointerToGot, args: ResolveArgs) !void { - const result = try math.cast(i32, @intCast(i64, args.target_addr) - @intCast(i64, args.source_addr)); - mem.writeIntLittle(u32, args.block.code[args.offset..][0..4], @bitCast(u32, result)); - } - - pub fn format(self: PointerToGot, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { - _ = self; - _ = fmt; - _ = options; - try std.fmt.format(writer, "PointerToGot {{}}", .{}); - } - }; - - pub const Signed = struct { - addend: i64, - correction: i4, - - pub fn resolve(self: Signed, args: ResolveArgs) !void { - const target_addr = @intCast(i64, args.target_addr) + self.addend; - const displacement = try math.cast( - i32, - target_addr - @intCast(i64, args.source_addr) - self.correction - 4, - ); - mem.writeIntLittle(u32, args.block.code[args.offset..][0..4], @bitCast(u32, displacement)); - } - - pub fn format(self: Signed, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { - _ = fmt; - _ = options; - try std.fmt.format(writer, "Signed {{ ", .{}); - try std.fmt.format(writer, ".addend = {}, ", .{self.addend}); - try std.fmt.format(writer, ".correction = {}, ", .{self.correction}); - try std.fmt.format(writer, "}}", .{}); - } - }; - - pub const Load = struct { - kind: enum { - got, - tlvp, - }, - addend: ?i32 = null, - - pub fn resolve(self: Load, args: ResolveArgs) !void { - if (self.kind == .tlvp) { - // We need to rewrite the opcode from movq to leaq. - args.block.code[args.offset - 2] = 0x8d; - } - const addend = if (self.addend) |addend| addend else 0; - const displacement = try math.cast( - i32, - @intCast(i64, args.target_addr) - @intCast(i64, args.source_addr) - 4 + addend, - ); - mem.writeIntLittle(u32, args.block.code[args.offset..][0..4], @bitCast(u32, displacement)); - } - - pub fn format(self: Load, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { - _ = fmt; - _ = options; - try std.fmt.format(writer, "Load {{ ", .{}); - try std.fmt.format(writer, "{s}, ", .{self.kind}); - if (self.addend) |addend| { - try std.fmt.format(writer, ".addend = {}, ", .{addend}); - } - try std.fmt.format(writer, "}}", .{}); - } - }; - - pub fn resolve(self: Relocation, block: *TextBlock, source_addr: u64, target_addr: u64) !void { - const args = ResolveArgs{ - .block = block, - .offset = self.offset, - .source_addr = source_addr, - .target_addr = target_addr, - }; - switch (self.payload) { - .unsigned => |unsigned| try unsigned.resolve(args), - .branch => |branch| try branch.resolve(args), - .page => |page| try page.resolve(args), - .page_off => |page_off| try page_off.resolve(args), - .pointer_to_got => |pointer_to_got| try pointer_to_got.resolve(args), - .signed => |signed| try signed.resolve(args), - .load => |load| try load.resolve(args), - } - } - - pub fn format(self: Relocation, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { - try std.fmt.format(writer, "Relocation {{ ", .{}); - try std.fmt.format(writer, ".offset = {}, ", .{self.offset}); - try std.fmt.format(writer, ".target = {}, ", .{self.target}); - - switch (self.payload) { - .unsigned => |unsigned| try unsigned.format(fmt, options, writer), - .branch => |branch| try branch.format(fmt, options, writer), - .page => |page| try page.format(fmt, options, writer), - .page_off => |page_off| try page_off.format(fmt, options, writer), - .pointer_to_got => |pointer_to_got| try pointer_to_got.format(fmt, options, writer), - .signed => |signed| try signed.format(fmt, options, writer), - .load => |load| try load.format(fmt, options, writer), - } - - try std.fmt.format(writer, "}}", .{}); - } -}; - -pub const RelocIterator = struct { - buffer: []const macho.relocation_info, - index: i32 = -1, - - pub fn next(self: *RelocIterator) ?macho.relocation_info { - self.index += 1; - if (self.index < self.buffer.len) { - return self.buffer[@intCast(u32, self.index)]; - } - return null; - } - - pub fn peek(self: RelocIterator) macho.relocation_info { - assert(self.index + 1 < self.buffer.len); - return self.buffer[@intCast(u32, self.index + 1)]; - } -}; - -pub const Parser = struct { - object: *Object, - zld: *Zld, - it: *RelocIterator, - block: *TextBlock, - - /// Base address of the parsed text block in the source section. - base_addr: u64, - - /// Used only when targeting aarch64 - addend: ?u32 = null, - - /// Parsed subtractor symbol from _RELOC_SUBTRACTOR reloc type. - subtractor: ?*Symbol = null, - - pub fn parse(self: *Parser) !void { - while (self.it.next()) |rel| { - const out_rel = blk: { - switch (self.object.arch.?) { - .aarch64 => { - const out_rel = switch (@intToEnum(macho.reloc_type_arm64, rel.r_type)) { - .ARM64_RELOC_BRANCH26 => try self.parseBranch(rel), - .ARM64_RELOC_SUBTRACTOR => { - // Subtractor is not a relocation with effect on the TextBlock, so - // parse it and carry on. - try self.parseSubtractor(rel); - - // Verify SUBTRACTOR is followed by UNSIGNED. - const next = @intToEnum(macho.reloc_type_arm64, self.it.peek().r_type); - if (next != .ARM64_RELOC_UNSIGNED) { - log.err("unexpected relocation type: expected UNSIGNED, found {s}", .{next}); - return error.UnexpectedRelocationType; - } - continue; - }, - .ARM64_RELOC_UNSIGNED => try self.parseUnsigned(rel), - .ARM64_RELOC_ADDEND => { - // Addend is not a relocation with effect on the TextBlock, so - // parse it and carry on. - try self.parseAddend(rel); - - // Verify ADDEND is followed by a load. - const next = @intToEnum(macho.reloc_type_arm64, self.it.peek().r_type); - switch (next) { - .ARM64_RELOC_PAGE21, .ARM64_RELOC_PAGEOFF12 => {}, - else => { - log.err("unexpected relocation type: expected PAGE21 or PAGEOFF12, found {s}", .{next}); - return error.UnexpectedRelocationType; - }, - } - continue; - }, - .ARM64_RELOC_PAGE21, - .ARM64_RELOC_GOT_LOAD_PAGE21, - .ARM64_RELOC_TLVP_LOAD_PAGE21, - => try self.parsePage(rel), - .ARM64_RELOC_PAGEOFF12, - .ARM64_RELOC_GOT_LOAD_PAGEOFF12, - .ARM64_RELOC_TLVP_LOAD_PAGEOFF12, - => try self.parsePageOff(rel), - .ARM64_RELOC_POINTER_TO_GOT => try self.parsePointerToGot(rel), - }; - break :blk out_rel; - }, - .x86_64 => { - const out_rel = switch (@intToEnum(macho.reloc_type_x86_64, rel.r_type)) { - .X86_64_RELOC_BRANCH => try self.parseBranch(rel), - .X86_64_RELOC_SUBTRACTOR => { - // Subtractor is not a relocation with effect on the TextBlock, so - // parse it and carry on. - try self.parseSubtractor(rel); - - // Verify SUBTRACTOR is followed by UNSIGNED. - const next = @intToEnum(macho.reloc_type_x86_64, self.it.peek().r_type); - if (next != .X86_64_RELOC_UNSIGNED) { - log.err("unexpected relocation type: expected UNSIGNED, found {s}", .{next}); - return error.UnexpectedRelocationType; - } - continue; - }, - .X86_64_RELOC_UNSIGNED => try self.parseUnsigned(rel), - .X86_64_RELOC_SIGNED, - .X86_64_RELOC_SIGNED_1, - .X86_64_RELOC_SIGNED_2, - .X86_64_RELOC_SIGNED_4, - => try self.parseSigned(rel), - .X86_64_RELOC_GOT_LOAD, - .X86_64_RELOC_GOT, - .X86_64_RELOC_TLV, - => try self.parseLoad(rel), - }; - break :blk out_rel; - }, - else => unreachable, - } - }; - try self.block.relocs.append(out_rel); - - if (out_rel.target.payload == .regular) { - try self.block.references.put(out_rel.target.payload.regular.local_sym_index, {}); - } - - const is_via_got = switch (out_rel.payload) { - .pointer_to_got => true, - .load => |load| load.kind == .got, - .page => |page| page.kind == .got, - .page_off => |page_off| page_off.kind == .got, - else => false, - }; - - if (is_via_got and out_rel.target.got_index == null) { - const index = @intCast(u32, self.zld.got_entries.items.len); - out_rel.target.got_index = index; - try self.zld.got_entries.append(self.zld.allocator, out_rel.target); - - log.debug("adding GOT entry for symbol {s} at index {}", .{ - self.zld.getString(out_rel.target.strx), - index, - }); - } else if (out_rel.payload == .unsigned) { - const sym = out_rel.target; - switch (sym.payload) { - .proxy => |proxy| { - try self.block.bindings.append(.{ - .local_sym_index = proxy.local_sym_index, - .offset = out_rel.offset, - }); - }, - else => { - const source_sym = self.zld.locals.items[self.block.local_sym_index]; - const source_reg = &source_sym.payload.regular; - const seg = self.zld.load_commands.items[source_reg.segment_id].Segment; - const sect = seg.sections.items[source_reg.section_id]; - const sect_type = commands.sectionType(sect); - - const should_rebase = rebase: { - if (!out_rel.payload.unsigned.is_64bit) break :rebase false; - - // TODO actually, a check similar to what dyld is doing, that is, verifying - // that the segment is writable should be enough here. - const is_right_segment = blk: { - if (self.zld.data_segment_cmd_index) |idx| { - if (source_reg.segment_id == idx) { - break :blk true; - } - } - if (self.zld.data_const_segment_cmd_index) |idx| { - if (source_reg.segment_id == idx) { - break :blk true; - } - } - break :blk false; - }; - - if (!is_right_segment) break :rebase false; - if (sect_type != macho.S_LITERAL_POINTERS and - sect_type != macho.S_REGULAR and - sect_type != macho.S_MOD_INIT_FUNC_POINTERS and - sect_type != macho.S_MOD_TERM_FUNC_POINTERS) - { - break :rebase false; - } - - break :rebase true; - }; - - if (should_rebase) { - try self.block.rebases.append(out_rel.offset); - } - }, - } - } else if (out_rel.payload == .branch) blk: { - const sym = out_rel.target; - - if (sym.stubs_index != null) break :blk; - if (sym.payload != .proxy) break :blk; - - const index = @intCast(u32, self.zld.stubs.items.len); - sym.stubs_index = index; - try self.zld.stubs.append(self.zld.allocator, sym); - - log.debug("adding stub entry for symbol {s} at index {}", .{ self.zld.getString(sym.strx), index }); - } - } - } - - fn parseBaseRelInfo(self: *Parser, rel: macho.relocation_info) !Relocation { - const offset = @intCast(u32, @intCast(u64, rel.r_address) - self.base_addr); - const target = try self.object.symbolFromReloc(self.zld, rel); - return Relocation{ - .offset = offset, - .target = target, - .payload = undefined, - }; - } - - fn parseUnsigned(self: *Parser, rel: macho.relocation_info) !Relocation { - defer { - // Reset parser's subtractor state - self.subtractor = null; - } - - assert(rel.r_pcrel == 0); - - var parsed = try self.parseBaseRelInfo(rel); - const is_64bit: bool = switch (rel.r_length) { - 3 => true, - 2 => false, - else => unreachable, - }; - - var addend: i64 = if (is_64bit) - mem.readIntLittle(i64, self.block.code[parsed.offset..][0..8]) - else - mem.readIntLittle(i32, self.block.code[parsed.offset..][0..4]); - - if (rel.r_extern == 0) { - addend -= @intCast(i64, parsed.target.payload.regular.address); - } - - parsed.payload = .{ - .unsigned = .{ - .subtractor = self.subtractor, - .is_64bit = is_64bit, - .addend = addend, - }, - }; - - return parsed; - } - - fn parseBranch(self: *Parser, rel: macho.relocation_info) !Relocation { - assert(rel.r_pcrel == 1); - assert(rel.r_length == 2); - - var parsed = try self.parseBaseRelInfo(rel); - parsed.payload = .{ - .branch = .{ - .arch = self.object.arch.?, - }, - }; - return parsed; - } - - fn parsePage(self: *Parser, rel: macho.relocation_info) !Relocation { - assert(rel.r_pcrel == 1); - assert(rel.r_length == 2); - - const rel_type = @intToEnum(macho.reloc_type_arm64, rel.r_type); - - defer if (rel_type == .ARM64_RELOC_PAGE21) { - // Reset parser's addend state - self.addend = null; - }; - - const addend = if (rel_type == .ARM64_RELOC_PAGE21) - self.addend - else - null; - - var parsed = try self.parseBaseRelInfo(rel); - parsed.payload = .{ - .page = .{ - .kind = switch (rel_type) { - .ARM64_RELOC_PAGE21 => .page, - .ARM64_RELOC_GOT_LOAD_PAGE21 => .got, - .ARM64_RELOC_TLVP_LOAD_PAGE21 => .tlvp, - else => unreachable, - }, - .addend = addend, - }, - }; - return parsed; - } - - fn parsePageOff(self: *Parser, rel: macho.relocation_info) !Relocation { - assert(rel.r_pcrel == 0); - assert(rel.r_length == 2); - - const rel_type = @intToEnum(macho.reloc_type_arm64, rel.r_type); - - defer if (rel_type == .ARM64_RELOC_PAGEOFF12) { - // Reset parser's addend state - self.addend = null; - }; - - const addend = if (rel_type == .ARM64_RELOC_PAGEOFF12) - self.addend - else - null; - - var parsed = try self.parseBaseRelInfo(rel); - const op_kind: ?Relocation.PageOff.OpKind = blk: { - if (rel_type != .ARM64_RELOC_PAGEOFF12) break :blk null; - const op_kind: Relocation.PageOff.OpKind = if (isArithmeticOp(self.block.code[parsed.offset..][0..4])) - .arithmetic - else - .load; - break :blk op_kind; - }; - - parsed.payload = .{ - .page_off = .{ - .kind = switch (rel_type) { - .ARM64_RELOC_PAGEOFF12 => .page, - .ARM64_RELOC_GOT_LOAD_PAGEOFF12 => .got, - .ARM64_RELOC_TLVP_LOAD_PAGEOFF12 => .tlvp, - else => unreachable, - }, - .addend = addend, - .op_kind = op_kind, - }, - }; - return parsed; - } - - fn parsePointerToGot(self: *Parser, rel: macho.relocation_info) !Relocation { - assert(rel.r_pcrel == 1); - assert(rel.r_length == 2); - - var parsed = try self.parseBaseRelInfo(rel); - parsed.payload = .{ - .pointer_to_got = .{}, - }; - return parsed; - } - - fn parseAddend(self: *Parser, rel: macho.relocation_info) !void { - assert(rel.r_pcrel == 0); - assert(rel.r_extern == 0); - assert(self.addend == null); - - self.addend = rel.r_symbolnum; - } - - fn parseSigned(self: *Parser, rel: macho.relocation_info) !Relocation { - assert(rel.r_pcrel == 1); - assert(rel.r_length == 2); - - var parsed = try self.parseBaseRelInfo(rel); - const rel_type = @intToEnum(macho.reloc_type_x86_64, rel.r_type); - const correction: i4 = switch (rel_type) { - .X86_64_RELOC_SIGNED => 0, - .X86_64_RELOC_SIGNED_1 => 1, - .X86_64_RELOC_SIGNED_2 => 2, - .X86_64_RELOC_SIGNED_4 => 4, - else => unreachable, - }; - var addend: i64 = mem.readIntLittle(i32, self.block.code[parsed.offset..][0..4]) + correction; - - if (rel.r_extern == 0) { - const source_sym = self.zld.locals.items[self.block.local_sym_index].payload.regular; - const source_addr = source_sym.address + parsed.offset + 4; - const target_sym = parsed.target.payload.regular; - addend = @intCast(i64, source_addr) + addend - @intCast(i64, target_sym.address); - } - - parsed.payload = .{ - .signed = .{ - .correction = correction, - .addend = addend, - }, - }; - - return parsed; - } - - fn parseSubtractor(self: *Parser, rel: macho.relocation_info) !void { - assert(rel.r_pcrel == 0); - assert(self.subtractor == null); - - self.subtractor = try self.object.symbolFromReloc(self.zld, rel); - } - - fn parseLoad(self: *Parser, rel: macho.relocation_info) !Relocation { - assert(rel.r_pcrel == 1); - assert(rel.r_length == 2); - - var parsed = try self.parseBaseRelInfo(rel); - const rel_type = @intToEnum(macho.reloc_type_x86_64, rel.r_type); - const addend = if (rel_type == .X86_64_RELOC_GOT) - mem.readIntLittle(i32, self.block.code[parsed.offset..][0..4]) - else - null; - - parsed.payload = .{ - .load = .{ - .kind = switch (rel_type) { - .X86_64_RELOC_GOT_LOAD, .X86_64_RELOC_GOT => .got, - .X86_64_RELOC_TLV => .tlvp, - else => unreachable, - }, - .addend = addend, - }, - }; - return parsed; - } -}; - -inline fn isArithmeticOp(inst: *const [4]u8) bool { - const group_decode = @truncate(u5, inst[3]); - return ((group_decode >> 2) == 4); -} From db8020ac0d40caec099ef987c5c43ba637f87c97 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sat, 17 Jul 2021 11:01:10 +0200 Subject: [PATCH 57/81] zld: adjust resolving relocs logic to the new approach --- src/link/MachO/TextBlock.zig | 90 +++++++++++++++++++----------------- 1 file changed, 48 insertions(+), 42 deletions(-) diff --git a/src/link/MachO/TextBlock.zig b/src/link/MachO/TextBlock.zig index 20283dfc9d..a76263954f 100644 --- a/src/link/MachO/TextBlock.zig +++ b/src/link/MachO/TextBlock.zig @@ -146,10 +146,14 @@ pub const Relocation = struct { is_64bit: bool, pub fn resolve(self: Unsigned, args: ResolveArgs) !void { - const result = if (self.subtractor) |subtractor| - @intCast(i64, args.target_addr) - @intCast(i64, subtractor.payload.regular.address) + self.addend - else - @intCast(i64, args.target_addr) + self.addend; + const result = blk: { + if (self.subtractor) |subtractor| { + const sym = args.zld.locals.items[subtractor]; + break :blk @intCast(i64, args.target_addr) - @intCast(i64, sym.n_value) + self.addend; + } else { + break :blk @intCast(i64, args.target_addr) + self.addend; + } + }; if (self.is_64bit) { mem.writeIntLittle(u64, args.block.code[args.offset..][0..8], @bitCast(u64, result)); @@ -422,7 +426,7 @@ pub const Relocation = struct { i32, target_addr - @intCast(i64, args.source_addr) - self.correction - 4, ); - mem.writeIntLittle(u32, block.code[offset..][0..4], @bitCast(u32, displacement)); + mem.writeIntLittle(u32, args.block.code[args.offset..][0..4], @bitCast(u32, displacement)); } pub fn format(self: Signed, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { @@ -442,16 +446,16 @@ pub const Relocation = struct { }, addend: i32 = 0, - pub fn resolve(self: Load, block: *TextBlock, offset: u32, args: ResolveArgs) !void { + pub fn resolve(self: Load, args: ResolveArgs) !void { if (self.kind == .tlvp) { // We need to rewrite the opcode from movq to leaq. - block.code[offset - 2] = 0x8d; + args.block.code[args.offset - 2] = 0x8d; } const displacement = try math.cast( i32, @intCast(i64, args.target_addr) - @intCast(i64, args.source_addr) - 4 + self.addend, ); - mem.writeIntLittle(u32, block.code[offset..][0..4], @bitCast(u32, displacement)); + mem.writeIntLittle(u32, args.block.code[args.offset..][0..4], @bitCast(u32, displacement)); } pub fn format(self: Load, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { @@ -464,15 +468,15 @@ pub const Relocation = struct { } }; - pub fn resolve(self: Relocation, block: *TextBlock, args: ResolveArgs) !void { + pub fn resolve(self: Relocation, args: ResolveArgs) !void { switch (self.payload) { - .unsigned => |unsigned| try unsigned.resolve(block, self.offset, args), - .branch => |branch| try branch.resolve(block, self.offset, args), - .page => |page| try page.resolve(block, self.offset, args), - .page_off => |page_off| try page_off.resolve(block, self.offset, args), - .pointer_to_got => |pointer_to_got| try pointer_to_got.resolve(block, self.offset, args), - .signed => |signed| try signed.resolve(block, self.offset, args), - .load => |load| try load.resolve(block, self.offset, args), + .unsigned => |unsigned| try unsigned.resolve(args), + .branch => |branch| try branch.resolve(args), + .page => |page| try page.resolve(args), + .page_off => |page_off| try page_off.resolve(args), + .pointer_to_got => |pointer_to_got| try pointer_to_got.resolve(args), + .signed => |signed| try signed.resolve(args), + .load => |load| try load.resolve(args), } } @@ -983,7 +987,7 @@ fn parseLoad(self: TextBlock, rel: macho.relocation_info, out: *Relocation) void pub fn resolveRelocs(self: *TextBlock, zld: *Zld) !void { for (self.relocs.items) |rel| { - log.debug("relocating {}", .{rel}); + log.warn("relocating {}", .{rel}); const source_addr = blk: { const sym = zld.locals.items[self.local_sym_index]; @@ -1001,20 +1005,29 @@ pub fn resolveRelocs(self: *TextBlock, zld: *Zld) !void { if (is_via_got) { const dc_seg = zld.load_commands.items[zld.data_const_segment_cmd_index.?].Segment; const got = dc_seg.sections.items[zld.got_section_index.?]; - const got_index = rel.target.got_index orelse { - log.err("expected GOT entry for symbol '{s}'", .{zld.getString(rel.target.strx)}); + const got_index = zld.got_entries.getIndex(.{ + .where = rel.where, + .where_index = rel.where_index, + }) orelse { + const sym = switch (rel.where) { + .local => zld.locals.items[rel.where_index], + .import => zld.imports.items[rel.where_index], + }; + log.err("expected GOT entry for symbol '{s}'", .{zld.getString(sym.n_strx)}); log.err(" this is an internal linker error", .{}); return error.FailedToResolveRelocationTarget; }; break :blk got.addr + got_index * @sizeOf(u64); } - switch (rel.target.payload) { - .regular => |reg| { + switch (rel.where) { + .local => { + const sym = zld.locals.items[rel.where_index]; const is_tlv = is_tlv: { const sym = zld.locals.items[self.local_sym_index]; - const seg = zld.load_commands.items[sym.payload.regular.segment_id].Segment; - const sect = seg.sections.items[sym.payload.regular.section_id]; + const match = zld.unpackSectionId(sym.n_sect); + const seg = zld.load_commands.items[match.seg].Segment; + const sect = seg.sections.items[match.sect]; break :is_tlv commands.sectionType(sect) == macho.S_THREAD_LOCAL_VARIABLES; }; if (is_tlv) { @@ -1036,36 +1049,29 @@ pub fn resolveRelocs(self: *TextBlock, zld: *Zld) !void { return error.FailedToResolveRelocationTarget; } }; - break :blk reg.address - base_address; + break :blk sym.n_value - base_address; } - break :blk reg.address; + break :blk sym.n_value; }, - .proxy => { - if (mem.eql(u8, zld.getString(rel.target.strx), "__tlv_bootstrap")) { - break :blk 0; // Dynamically bound by dyld. - } - - const segment = zld.load_commands.items[zld.text_segment_cmd_index.?].Segment; - const stubs = segment.sections.items[zld.stubs_section_index.?]; - const stubs_index = rel.target.stubs_index orelse { + .import => { + // TODO I think this will be autohandled by self.bindings. + // if (mem.eql(u8, zld.getString(rel.target.strx), "__tlv_bootstrap")) { + // break :blk 0; // Dynamically bound by dyld. + // } + const stubs_index = zld.stubs.getIndex(rel.where_index) orelse { // TODO verify in TextBlock that the symbol is indeed dynamically bound. break :blk 0; // Dynamically bound by dyld. }; + const segment = zld.load_commands.items[zld.text_segment_cmd_index.?].Segment; + const stubs = segment.sections.items[zld.stubs_section_index.?]; break :blk stubs.addr + stubs_index * stubs.reserved2; }, - else => { - log.err("failed to resolve symbol '{s}' as a relocation target", .{ - zld.getString(rel.target.strx), - }); - log.err(" this is an internal linker error", .{}); - return error.FailedToResolveRelocationTarget; - }, } }; - log.debug(" | source_addr = 0x{x}", .{source_addr}); - log.debug(" | target_addr = 0x{x}", .{target_addr}); + log.warn(" | source_addr = 0x{x}", .{source_addr}); + log.warn(" | target_addr = 0x{x}", .{target_addr}); try rel.resolve(self, source_addr, target_addr); } From 71384a383e0ec6d4c3f6e16571c36a9c44d1645c Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sat, 17 Jul 2021 11:29:40 +0200 Subject: [PATCH 58/81] zld: correctly set n_sect for sections as symbols --- src/link/MachO/Object.zig | 21 +++++++++++++-------- src/link/MachO/TextBlock.zig | 6 +++--- src/link/MachO/Zld.zig | 5 ++--- 3 files changed, 18 insertions(+), 14 deletions(-) diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index 031d71bd9d..4e0ddc7455 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -703,20 +703,25 @@ pub fn parseTextBlocks(self: *Object, zld: *Zld) !void { // Since there is no symbol to refer to this block, we create // a temp one, unless we already did that when working out the relocations // of other text blocks. - const block_local_sym_index = @intCast(u32, zld.locals.items.len); const sym_name = try std.fmt.allocPrint(self.allocator, "l_{s}_{s}_{s}", .{ self.name.?, segmentName(sect), sectionName(sect), }); defer self.allocator.free(sym_name); - try zld.locals.append(zld.allocator, .{ - .n_strx = try zld.makeString(sym_name), - .n_type = macho.N_SECT, - .n_sect = zld.sectionId(match), - .n_desc = 0, - .n_value = sect.addr, - }); + + const block_local_sym_index = self.sections_as_symbols.get(sect_id) orelse blk: { + const block_local_sym_index = @intCast(u32, zld.locals.items.len); + try zld.locals.append(zld.allocator, .{ + .n_strx = try zld.makeString(sym_name), + .n_type = macho.N_SECT, + .n_sect = zld.sectionId(match), + .n_desc = 0, + .n_value = sect.addr, + }); + try self.sections_as_symbols.putNoClobber(self.allocator, sect_id, block_local_sym_index); + break :blk block_local_sym_index; + }; const block = try self.allocator.create(TextBlock); errdefer self.allocator.destroy(block); diff --git a/src/link/MachO/TextBlock.zig b/src/link/MachO/TextBlock.zig index a76263954f..7b595b370c 100644 --- a/src/link/MachO/TextBlock.zig +++ b/src/link/MachO/TextBlock.zig @@ -548,6 +548,7 @@ fn initRelocFromObject(rel: macho.relocation_info, object: *Object, ctx: RelocCo const local_sym_index = object.sections_as_symbols.get(sect_id) orelse blk: { const seg = object.load_commands.items[object.segment_cmd_index.?].Segment; const sect = seg.sections.items[sect_id]; + const match = (try ctx.zld.getMatchingSection(sect)) orelse unreachable; const local_sym_index = @intCast(u32, ctx.zld.locals.items.len); const sym_name = try std.fmt.allocPrint(ctx.zld.allocator, "l_{s}_{s}_{s}", .{ object.name.?, @@ -559,9 +560,9 @@ fn initRelocFromObject(rel: macho.relocation_info, object: *Object, ctx: RelocCo try ctx.zld.locals.append(ctx.zld.allocator, .{ .n_strx = try ctx.zld.makeString(sym_name), .n_type = macho.N_SECT, - .n_sect = 0, + .n_sect = ctx.zld.sectionId(match), .n_desc = 0, - .n_value = 0, + .n_value = sect.addr, }); try object.sections_as_symbols.putNoClobber(object.allocator, sect_id, local_sym_index); break :blk local_sym_index; @@ -759,7 +760,6 @@ pub fn parseRelocsFromObject( } else if (parsed_rel.payload == .unsigned) { switch (parsed_rel.where) { .import => { - log.warn("WAT {s}", .{ctx.zld.getString(ctx.zld.imports.items[parsed_rel.where_index].n_strx)}); try self.bindings.append(.{ .local_sym_index = parsed_rel.where_index, .offset = parsed_rel.offset, diff --git a/src/link/MachO/Zld.zig b/src/link/MachO/Zld.zig index 67cd007ebf..614efe35b1 100644 --- a/src/link/MachO/Zld.zig +++ b/src/link/MachO/Zld.zig @@ -232,6 +232,8 @@ pub fn link(self: *Zld, files: []const []const u8, output: Output, args: LinkArg try self.parseInputFiles(files, args.syslibroot); try self.parseLibs(args.libs, args.syslibroot); try self.resolveSymbols(); + try self.parseTextBlocks(); + // try self.sortSections(); log.warn("locals", .{}); for (self.locals.items) |sym, id| { @@ -276,8 +278,6 @@ pub fn link(self: *Zld, files: []const []const u8, output: Output, args: LinkArg } } - try self.parseTextBlocks(); - var it = self.blocks.iterator(); while (it.next()) |entry| { const seg = self.load_commands.items[entry.key_ptr.seg].Segment; @@ -289,7 +289,6 @@ pub fn link(self: *Zld, files: []const []const u8, output: Output, args: LinkArg } return error.TODO; - // try self.sortSections(); // try self.addRpaths(args.rpaths); // try self.addDataInCodeLC(); // try self.addCodeSignatureLC(); From d81783375c2555b2e2cb6a028ea648179031bc93 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sat, 17 Jul 2021 15:13:16 +0200 Subject: [PATCH 59/81] zld: allocate TextBlocks and symbols --- src/link/MachO/Zld.zig | 68 +++++++++++++++++++++--------------------- 1 file changed, 34 insertions(+), 34 deletions(-) diff --git a/src/link/MachO/Zld.zig b/src/link/MachO/Zld.zig index 614efe35b1..9b0e3a5e43 100644 --- a/src/link/MachO/Zld.zig +++ b/src/link/MachO/Zld.zig @@ -233,7 +233,15 @@ pub fn link(self: *Zld, files: []const []const u8, output: Output, args: LinkArg try self.parseLibs(args.libs, args.syslibroot); try self.resolveSymbols(); try self.parseTextBlocks(); - // try self.sortSections(); + try self.sortSections(); + try self.addRpaths(args.rpaths); + try self.addDataInCodeLC(); + try self.addCodeSignatureLC(); + try self.allocateTextSegment(); + try self.allocateDataConstSegment(); + try self.allocateDataSegment(); + self.allocateLinkeditSegment(); + try self.allocateTextBlocks(); log.warn("locals", .{}); for (self.locals.items) |sym, id| { @@ -289,14 +297,6 @@ pub fn link(self: *Zld, files: []const []const u8, output: Output, args: LinkArg } return error.TODO; - // try self.addRpaths(args.rpaths); - // try self.addDataInCodeLC(); - // try self.addCodeSignatureLC(); - // try self.allocateTextSegment(); - // try self.allocateDataConstSegment(); - // try self.allocateDataSegment(); - // self.allocateLinkeditSegment(); - // try self.allocateTextBlocks(); // try self.flush(); } @@ -883,23 +883,11 @@ fn sortSections(self: *Zld) !void { self.blocks.deinit(self.allocator); self.blocks = transient; } - - for (self.locals.items) |sym, i| { - if (i == 0) continue; // skip the null symbol - assert(sym.payload == .regular); - const reg = &sym.payload.regular; - reg.section_id = if (reg.segment_id == self.text_segment_cmd_index.?) - text_index_mapping.get(reg.section_id).? - else if (reg.segment_id == self.data_const_segment_cmd_index.?) - data_const_index_mapping.get(reg.section_id).? - else - data_index_mapping.get(reg.section_id).?; - } } fn allocateTextSegment(self: *Zld) !void { const seg = &self.load_commands.items[self.text_segment_cmd_index.?].Segment; - const nstubs = @intCast(u32, self.stubs.items.len); + const nstubs = @intCast(u32, self.stubs.count()); const base_vmaddr = self.load_commands.items[self.pagezero_segment_cmd_index.?].Segment.inner.vmsize; seg.inner.fileoff = 0; @@ -950,7 +938,7 @@ fn allocateTextSegment(self: *Zld) !void { fn allocateDataConstSegment(self: *Zld) !void { const seg = &self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; - const nentries = @intCast(u32, self.got_entries.items.len); + const nentries = @intCast(u32, self.got_entries.count()); const text_seg = self.load_commands.items[self.text_segment_cmd_index.?].Segment; seg.inner.fileoff = text_seg.inner.fileoff + text_seg.inner.filesize; @@ -965,7 +953,7 @@ fn allocateDataConstSegment(self: *Zld) !void { fn allocateDataSegment(self: *Zld) !void { const seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; - const nstubs = @intCast(u32, self.stubs.items.len); + const nstubs = @intCast(u32, self.stubs.count()); const data_const_seg = self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; seg.inner.fileoff = data_const_seg.inner.fileoff + data_const_seg.inner.filesize; @@ -1021,6 +1009,7 @@ fn allocateTextBlocks(self: *Zld) !void { const sect = seg.sections.items[match.sect]; var base_addr: u64 = sect.addr; + const n_sect = self.sectionId(match); log.debug(" within section {s},{s}", .{ segmentName(sect), sectionName(sect) }); log.debug(" {}", .{sect}); @@ -1029,12 +1018,12 @@ fn allocateTextBlocks(self: *Zld) !void { const block_alignment = try math.powi(u32, 2, block.alignment); base_addr = mem.alignForwardGeneric(u64, base_addr, block_alignment); - const sym = self.locals.items[block.local_sym_index]; - assert(sym.payload == .regular); - sym.payload.regular.address = base_addr; + const sym = &self.locals.items[block.local_sym_index]; + sym.n_value = base_addr; + sym.n_sect = n_sect; log.debug(" {s}: start=0x{x}, end=0x{x}, size={}, align={}", .{ - self.getString(sym.strx), + self.getString(sym.n_strx), base_addr, base_addr + block.size, block.size, @@ -1043,17 +1032,17 @@ fn allocateTextBlocks(self: *Zld) !void { // Update each alias (if any) for (block.aliases.items) |index| { - const alias_sym = self.locals.items[index]; - assert(alias_sym.payload == .regular); - alias_sym.payload.regular.address = base_addr; + const alias_sym = &self.locals.items[index]; + alias_sym.n_value = base_addr; + alias_sym.n_sect = n_sect; } // Update each symbol contained within the TextBlock if (block.contained) |contained| { for (contained) |sym_at_off| { - const contained_sym = self.locals.items[sym_at_off.local_sym_index]; - assert(contained_sym.payload == .regular); - contained_sym.payload.regular.address = base_addr + sym_at_off.offset; + const contained_sym = &self.locals.items[sym_at_off.local_sym_index]; + contained_sym.n_value = base_addr + sym_at_off.offset; + contained_sym.n_sect = n_sect; } } @@ -1064,6 +1053,17 @@ fn allocateTextBlocks(self: *Zld) !void { } else break; } } + + // Update globals + for (self.symbol_resolver.values()) |resolv| { + if (resolv.where != .global) continue; + + assert(resolv.local_sym_index != 0); + const local_sym = self.locals.items[resolv.local_sym_index]; + const sym = &self.globals.items[resolv.where_index]; + sym.n_value = local_sym.n_value; + sym.n_sect = local_sym.n_sect; + } } fn writeTextBlocks(self: *Zld) !void { From 97914d93a9733307309d5660ee5ce044f56a98cc Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sat, 17 Jul 2021 17:30:16 +0200 Subject: [PATCH 60/81] zld: fixup flush function --- src/link/MachO/TextBlock.zig | 43 +++--- src/link/MachO/Zld.zig | 248 +++++++++++++++-------------------- 2 files changed, 134 insertions(+), 157 deletions(-) diff --git a/src/link/MachO/TextBlock.zig b/src/link/MachO/TextBlock.zig index 7b595b370c..f9060f5167 100644 --- a/src/link/MachO/TextBlock.zig +++ b/src/link/MachO/TextBlock.zig @@ -1,11 +1,14 @@ const TextBlock = @This(); const std = @import("std"); +const aarch64 = @import("../../codegen/aarch64.zig"); const assert = std.debug.assert; const commands = @import("commands.zig"); const log = std.log.scoped(.text_block); const macho = std.macho; +const math = std.math; const mem = std.mem; +const meta = std.meta; const Allocator = mem.Allocator; const Arch = std.Target.Cpu.Arch; @@ -44,25 +47,22 @@ pub const Stab = union(enum) { defer nlists.deinit(); const sym = zld.locals.items[local_sym_index]; - const reg = sym.payload.regular; - switch (stab) { .function => |size| { try nlists.ensureUnusedCapacity(4); - const section_id = reg.sectionId(zld); nlists.appendAssumeCapacity(.{ .n_strx = 0, .n_type = macho.N_BNSYM, - .n_sect = section_id, + .n_sect = sym.n_sect, .n_desc = 0, - .n_value = reg.address, + .n_value = sym.n_value, }); nlists.appendAssumeCapacity(.{ - .n_strx = sym.strx, + .n_strx = sym.n_strx, .n_type = macho.N_FUN, - .n_sect = section_id, + .n_sect = sym.n_sect, .n_desc = 0, - .n_value = reg.address, + .n_value = sym.n_value, }); nlists.appendAssumeCapacity(.{ .n_strx = 0, @@ -74,14 +74,14 @@ pub const Stab = union(enum) { nlists.appendAssumeCapacity(.{ .n_strx = 0, .n_type = macho.N_ENSYM, - .n_sect = section_id, + .n_sect = sym.n_sect, .n_desc = 0, .n_value = size, }); }, .global => { try nlists.append(.{ - .n_strx = sym.strx, + .n_strx = sym.n_strx, .n_type = macho.N_GSYM, .n_sect = 0, .n_desc = 0, @@ -90,11 +90,11 @@ pub const Stab = union(enum) { }, .static => { try nlists.append(.{ - .n_strx = sym.strx, + .n_strx = sym.n_strx, .n_type = macho.N_STSYM, - .n_sect = reg.sectionId(zld), + .n_sect = sym.n_sect, .n_desc = 0, - .n_value = reg.address, + .n_value = sym.n_value, }); }, } @@ -1006,7 +1006,10 @@ pub fn resolveRelocs(self: *TextBlock, zld: *Zld) !void { const dc_seg = zld.load_commands.items[zld.data_const_segment_cmd_index.?].Segment; const got = dc_seg.sections.items[zld.got_section_index.?]; const got_index = zld.got_entries.getIndex(.{ - .where = rel.where, + .where = switch (rel.where) { + .local => .local, + .import => .import, + }, .where_index = rel.where_index, }) orelse { const sym = switch (rel.where) { @@ -1024,8 +1027,8 @@ pub fn resolveRelocs(self: *TextBlock, zld: *Zld) !void { .local => { const sym = zld.locals.items[rel.where_index]; const is_tlv = is_tlv: { - const sym = zld.locals.items[self.local_sym_index]; - const match = zld.unpackSectionId(sym.n_sect); + const source_sym = zld.locals.items[self.local_sym_index]; + const match = zld.unpackSectionId(source_sym.n_sect); const seg = zld.load_commands.items[match.seg].Segment; const sect = seg.sections.items[match.sect]; break :is_tlv commands.sectionType(sect) == macho.S_THREAD_LOCAL_VARIABLES; @@ -1073,7 +1076,13 @@ pub fn resolveRelocs(self: *TextBlock, zld: *Zld) !void { log.warn(" | source_addr = 0x{x}", .{source_addr}); log.warn(" | target_addr = 0x{x}", .{target_addr}); - try rel.resolve(self, source_addr, target_addr); + try rel.resolve(.{ + .block = self, + .offset = rel.offset, + .source_addr = source_addr, + .target_addr = target_addr, + .zld = zld, + }); } } diff --git a/src/link/MachO/Zld.zig b/src/link/MachO/Zld.zig index 9b0e3a5e43..7f4c1471dd 100644 --- a/src/link/MachO/Zld.zig +++ b/src/link/MachO/Zld.zig @@ -233,6 +233,16 @@ pub fn link(self: *Zld, files: []const []const u8, output: Output, args: LinkArg try self.parseLibs(args.libs, args.syslibroot); try self.resolveSymbols(); try self.parseTextBlocks(); + + { + // Add dyld_stub_binder as the final GOT entry. + const resolv = self.symbol_resolver.get("dyld_stub_binder") orelse unreachable; + try self.got_entries.putNoClobber(self.allocator, .{ + .where = .import, + .where_index = resolv.where_index, + }, {}); + } + try self.sortSections(); try self.addRpaths(args.rpaths); try self.addDataInCodeLC(); @@ -296,9 +306,7 @@ pub fn link(self: *Zld, files: []const []const u8, output: Output, args: LinkArg entry.value_ptr.*.print(self); } - return error.TODO; - - // try self.flush(); + try self.flush(); } fn parseInputFiles(self: *Zld, files: []const []const u8, syslibroot: ?[]const u8) !void { @@ -1097,7 +1105,7 @@ fn writeTextBlocks(self: *Zld) !void { const sym = self.locals.items[block.local_sym_index]; log.debug(" {s}: start=0x{x}, end=0x{x}, size={}, align={}", .{ - self.getString(sym.strx), + self.getString(sym.n_strx), aligned_base_off, aligned_base_off + block.size, block.size, @@ -1154,8 +1162,12 @@ fn writeStubHelperCommon(self: *Zld) !void { code[9] = 0xff; code[10] = 0x25; { - const dyld_stub_binder = self.globals.get("dyld_stub_binder").?; - const addr = (got.addr + dyld_stub_binder.got_index.? * @sizeOf(u64)); + const resolv = self.symbol_resolver.get("dyld_stub_binder") orelse unreachable; + const got_index = self.got_entries.getIndex(.{ + .where = .import, + .where_index = resolv.where_index, + }) orelse unreachable; + const addr = got.addr + got_index * @sizeOf(u64); const displacement = try math.cast(u32, addr - stub_helper.addr - code_size); mem.writeIntLittle(u32, code[11..], displacement); } @@ -1198,9 +1210,13 @@ fn writeStubHelperCommon(self: *Zld) !void { code[10] = 0xbf; code[11] = 0xa9; binder_blk_outer: { - const dyld_stub_binder = self.globals.get("dyld_stub_binder").?; + const resolv = self.symbol_resolver.get("dyld_stub_binder") orelse unreachable; + const got_index = self.got_entries.getIndex(.{ + .where = .import, + .where_index = resolv.where_index, + }) orelse unreachable; const this_addr = stub_helper.addr + 3 * @sizeOf(u32); - const target_addr = (got.addr + dyld_stub_binder.got_index.? * @sizeOf(u64)); + const target_addr = got.addr + got_index * @sizeOf(u64); binder_blk: { const displacement = math.divExact(u64, target_addr - this_addr, 4) catch break :binder_blk; const literal = math.cast(u18, displacement) catch break :binder_blk; @@ -1251,12 +1267,11 @@ fn writeStubHelperCommon(self: *Zld) !void { } }; - for (self.stubs.items) |sym| { + for (self.stubs.keys()) |key| { // TODO weak bound pointers - const index = sym.stubs_index orelse unreachable; - try self.writeLazySymbolPointer(index); - try self.writeStub(index); - try self.writeStubInStubHelper(index); + try self.writeLazySymbolPointer(key); + try self.writeStub(key); + try self.writeStubInStubHelper(key); } } @@ -1764,7 +1779,7 @@ fn resolveSymbols(self: *Zld) !void { .n_strx = undef.n_strx, .n_type = macho.N_UNDF | macho.N_EXT, .n_sect = 0, - .n_desc = (dylib.ordinal.? * macho.N_SYMBOL_RESOLVER) | macho.REFERENCE_FLAG_UNDEFINED_NON_LAZY, + .n_desc = packDylibOrdinal(dylib.ordinal.?), .n_value = 0, }); resolv.* = .{ @@ -2218,16 +2233,16 @@ fn writeGotEntries(self: *Zld) !void { const seg = self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; const sect = seg.sections.items[self.got_section_index.?]; - var buffer = try self.allocator.alloc(u8, self.got_entries.items.len * @sizeOf(u64)); + var buffer = try self.allocator.alloc(u8, self.got_entries.count() * @sizeOf(u64)); defer self.allocator.free(buffer); var stream = std.io.fixedBufferStream(buffer); var writer = stream.writer(); - for (self.got_entries.items) |sym| { - const address: u64 = switch (sym.payload) { - .regular => |reg| reg.address, - else => 0, + for (self.got_entries.keys()) |key| { + const address: u64 = switch (key.where) { + .local => self.locals.items[key.where_index].n_value, + .import => 0, }; try writer.writeIntLittle(u64, address); } @@ -2243,9 +2258,14 @@ fn setEntryPoint(self: *Zld) !void { // TODO we should respect the -entry flag passed in by the user to set a custom // entrypoint. For now, assume default of `_main`. const seg = self.load_commands.items[self.text_segment_cmd_index.?].Segment; - const sym = self.globals.get("_main") orelse return error.MissingMainEntrypoint; + const resolv = self.symbol_resolver.get("_main") orelse { + log.err("'_main' export not found", .{}); + return error.MissingMainEntrypoint; + }; + assert(resolv.where == .global); + const sym = self.globals.items[resolv.where_index]; const ec = &self.load_commands.items[self.main_cmd_index.?].Main; - ec.entryoff = @intCast(u32, sym.payload.regular.address - seg.inner.vmaddr); + ec.entryoff = @intCast(u32, sym.n_value - seg.inner.vmaddr); ec.stacksize = self.stack_size; } @@ -2265,8 +2285,7 @@ fn writeRebaseInfoTable(self: *Zld) !void { while (true) { const sym = self.locals.items[block.local_sym_index]; - assert(sym.payload == .regular); - const base_offset = sym.payload.regular.address - seg.inner.vmaddr; + const base_offset = sym.n_value - seg.inner.vmaddr; for (block.rebases.items) |offset| { try pointers.append(.{ @@ -2288,11 +2307,11 @@ fn writeRebaseInfoTable(self: *Zld) !void { const base_offset = sect.addr - seg.inner.vmaddr; const segment_id = @intCast(u16, self.data_const_segment_cmd_index.?); - for (self.got_entries.items) |sym| { - if (sym.payload == .proxy) continue; + for (self.got_entries.keys()) |key, i| { + if (key.where == .import) continue; try pointers.append(.{ - .offset = base_offset + sym.got_index.? * @sizeOf(u64), + .offset = base_offset + i * @sizeOf(u64), .segment_id = segment_id, }); } @@ -2304,10 +2323,10 @@ fn writeRebaseInfoTable(self: *Zld) !void { const base_offset = sect.addr - seg.inner.vmaddr; const segment_id = @intCast(u16, self.data_segment_cmd_index.?); - try pointers.ensureCapacity(pointers.items.len + self.stubs.items.len); - for (self.stubs.items) |sym| { + try pointers.ensureCapacity(pointers.items.len + self.stubs.count()); + for (self.stubs.keys()) |_, i| { pointers.appendAssumeCapacity(.{ - .offset = base_offset + sym.stubs_index.? * @sizeOf(u64), + .offset = base_offset + i * @sizeOf(u64), .segment_id = segment_id, }); } @@ -2343,15 +2362,15 @@ fn writeBindInfoTable(self: *Zld) !void { const base_offset = sect.addr - seg.inner.vmaddr; const segment_id = @intCast(u16, self.data_const_segment_cmd_index.?); - for (self.got_entries.items) |sym| { - if (sym.payload != .proxy) continue; + for (self.got_entries.keys()) |key, i| { + if (key.where == .local) continue; - const proxy = sym.payload.proxy; + const sym = self.imports.items[key.where_index]; try pointers.append(.{ - .offset = base_offset + sym.got_index.? * @sizeOf(u64), + .offset = base_offset + i * @sizeOf(u64), .segment_id = segment_id, - .dylib_ordinal = proxy.dylibOrdinal(), - .name = self.getString(sym.strx), + .dylib_ordinal = unpackDylibOrdinal(sym.n_desc), + .name = self.getString(sym.n_strx), }); } } @@ -2368,18 +2387,15 @@ fn writeBindInfoTable(self: *Zld) !void { while (true) { const sym = self.locals.items[block.local_sym_index]; - assert(sym.payload == .regular); - const base_offset = sym.payload.regular.address - seg.inner.vmaddr; + const base_offset = sym.n_value - seg.inner.vmaddr; for (block.bindings.items) |binding| { const bind_sym = self.imports.items[binding.local_sym_index]; - const proxy = bind_sym.payload.proxy; - try pointers.append(.{ .offset = binding.offset + base_offset, .segment_id = match.seg, - .dylib_ordinal = proxy.dylibOrdinal(), - .name = self.getString(bind_sym.strx), + .dylib_ordinal = unpackDylibOrdinal(bind_sym.n_desc), + .name = self.getString(bind_sym.n_strx), }); } @@ -2390,21 +2406,21 @@ fn writeBindInfoTable(self: *Zld) !void { } } - if (self.tlv_section_index) |idx| { - const seg = self.load_commands.items[self.data_segment_cmd_index.?].Segment; - const sect = seg.sections.items[idx]; - const base_offset = sect.addr - seg.inner.vmaddr; - const segment_id = @intCast(u16, self.data_segment_cmd_index.?); + // if (self.tlv_section_index) |idx| { + // const seg = self.load_commands.items[self.data_segment_cmd_index.?].Segment; + // const sect = seg.sections.items[idx]; + // const base_offset = sect.addr - seg.inner.vmaddr; + // const segment_id = @intCast(u16, self.data_segment_cmd_index.?); - const sym = self.globals.get("__tlv_bootstrap") orelse unreachable; - const proxy = sym.payload.proxy; - try pointers.append(.{ - .offset = base_offset, - .segment_id = segment_id, - .dylib_ordinal = proxy.dylibOrdinal(), - .name = self.getString(sym.strx), - }); - } + // const sym = self.globals.get("__tlv_bootstrap") orelse unreachable; + // const proxy = sym.payload.proxy; + // try pointers.append(.{ + // .offset = base_offset, + // .segment_id = segment_id, + // .dylib_ordinal = proxy.dylibOrdinal(), + // .name = self.getString(sym.strx), + // }); + // } const size = try bindInfoSize(pointers.items); var buffer = try self.allocator.alloc(u8, @intCast(usize, size)); @@ -2434,15 +2450,15 @@ fn writeLazyBindInfoTable(self: *Zld) !void { const base_offset = sect.addr - seg.inner.vmaddr; const segment_id = @intCast(u16, self.data_segment_cmd_index.?); - try pointers.ensureCapacity(self.stubs.items.len); + try pointers.ensureCapacity(self.stubs.count()); - for (self.stubs.items) |sym| { - const proxy = sym.payload.proxy; + for (self.stubs.keys()) |key, i| { + const sym = self.imports.items[key]; pointers.appendAssumeCapacity(.{ - .offset = base_offset + sym.stubs_index.? * @sizeOf(u64), + .offset = base_offset + i * @sizeOf(u64), .segment_id = segment_id, - .dylib_ordinal = proxy.dylibOrdinal(), - .name = self.getString(sym.strx), + .dylib_ordinal = unpackDylibOrdinal(sym.n_desc), + .name = self.getString(sym.n_strx), }); } } @@ -2510,7 +2526,7 @@ fn populateLazyBindOffsetsInStubHelper(self: *Zld, buffer: []const u8) !void { else => {}, } } - assert(self.stubs.items.len <= offsets.items.len); + assert(self.stubs.count() <= offsets.items.len); const stub_size: u4 = switch (self.target.?.cpu.arch) { .x86_64 => 10, @@ -2523,8 +2539,7 @@ fn populateLazyBindOffsetsInStubHelper(self: *Zld, buffer: []const u8) !void { else => unreachable, }; var buf: [@sizeOf(u32)]u8 = undefined; - for (self.stubs.items) |sym| { - const index = sym.stubs_index orelse unreachable; + for (self.stubs.keys()) |_, index| { const placeholder_off = self.stub_helper_stubs_start_off.? + index * stub_size + off; mem.writeIntLittle(u32, &buf, offsets.items[index]); try self.file.?.pwriteAll(&buf, placeholder_off); @@ -2541,33 +2556,13 @@ fn writeExportInfo(self: *Zld) !void { // TODO handle macho.EXPORT_SYMBOL_FLAGS_REEXPORT and macho.EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER. log.debug("writing export trie", .{}); - const Sorter = struct { - fn lessThan(_: void, a: []const u8, b: []const u8) bool { - return mem.lessThan(u8, a, b); - } - }; - - var sorted_globals = std.ArrayList([]const u8).init(self.allocator); - defer sorted_globals.deinit(); - - for (self.globals.values()) |sym| { - if (sym.payload != .regular) continue; - const reg = sym.payload.regular; - if (reg.linkage != .global) continue; - try sorted_globals.append(self.getString(sym.strx)); - } - - std.sort.sort([]const u8, sorted_globals.items, {}, Sorter.lessThan); - - for (sorted_globals.items) |sym_name| { - const sym = self.globals.get(sym_name) orelse unreachable; - const reg = sym.payload.regular; - - log.debug(" | putting '{s}' defined at 0x{x}", .{ sym_name, reg.address }); + for (self.globals.items) |sym| { + const sym_name = self.getString(sym.n_strx); + log.debug(" | putting '{s}' defined at 0x{x}", .{ sym_name, sym.n_value }); try trie.put(.{ .name = sym_name, - .vmaddr_offset = reg.address - base_address, + .vmaddr_offset = sym.n_value - base_address, .export_flags = macho.EXPORT_SYMBOL_FLAGS_KIND_REGULAR, }); } @@ -2598,23 +2593,7 @@ fn writeSymbolTable(self: *Zld) !void { var locals = std.ArrayList(macho.nlist_64).init(self.allocator); defer locals.deinit(); - - var exports = std.ArrayList(macho.nlist_64).init(self.allocator); - defer exports.deinit(); - - for (self.locals.items) |symbol, i| { - if (i == 0) continue; // skip null symbol - if (symbol.isTemp(self)) continue; // TODO when merging codepaths, this should go into freelist - - const reg = symbol.payload.regular; - const nlist = try symbol.asNlist(self); - - if (reg.linkage == .translation_unit) { - try locals.append(nlist); - } else { - try exports.append(nlist); - } - } + try locals.appendSlice(self.locals.items); if (self.has_stabs) { for (self.objects.items) |object| { @@ -2671,21 +2650,9 @@ fn writeSymbolTable(self: *Zld) !void { } } - var undefs = std.ArrayList(macho.nlist_64).init(self.allocator); - defer undefs.deinit(); - var undef_dir = std.StringHashMap(u32).init(self.allocator); - defer undef_dir.deinit(); - - for (self.imports.items) |sym| { - const nlist = try sym.asNlist(self); - const id = @intCast(u32, undefs.items.len); - try undefs.append(nlist); - try undef_dir.putNoClobber(self.getString(sym.strx), id); - } - const nlocals = locals.items.len; - const nexports = exports.items.len; - const nundefs = undefs.items.len; + const nexports = self.globals.items.len; + const nundefs = self.imports.items.len; const locals_off = symtab.symoff + symtab.nsyms * @sizeOf(macho.nlist_64); const locals_size = nlocals * @sizeOf(macho.nlist_64); @@ -2695,12 +2662,12 @@ fn writeSymbolTable(self: *Zld) !void { const exports_off = locals_off + locals_size; const exports_size = nexports * @sizeOf(macho.nlist_64); log.debug("writing exported symbols from 0x{x} to 0x{x}", .{ exports_off, exports_size + exports_off }); - try self.file.?.pwriteAll(mem.sliceAsBytes(exports.items), exports_off); + try self.file.?.pwriteAll(mem.sliceAsBytes(self.globals.items), exports_off); const undefs_off = exports_off + exports_size; const undefs_size = nundefs * @sizeOf(macho.nlist_64); log.debug("writing undefined symbols from 0x{x} to 0x{x}", .{ undefs_off, undefs_size + undefs_off }); - try self.file.?.pwriteAll(mem.sliceAsBytes(undefs.items), undefs_off); + try self.file.?.pwriteAll(mem.sliceAsBytes(self.imports.items), undefs_off); symtab.nsyms += @intCast(u32, nlocals + nexports + nundefs); seg.inner.filesize += locals_size + exports_size + undefs_size; @@ -2720,8 +2687,8 @@ fn writeSymbolTable(self: *Zld) !void { const data_segment = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; const la_symbol_ptr = &data_segment.sections.items[self.la_symbol_ptr_section_index.?]; - const nstubs = @intCast(u32, self.stubs.items.len); - const ngot_entries = @intCast(u32, self.got_entries.items.len); + const nstubs = @intCast(u32, self.stubs.count()); + const ngot_entries = @intCast(u32, self.got_entries.count()); dysymtab.indirectsymoff = @intCast(u32, seg.inner.fileoff + seg.inner.filesize); dysymtab.nindirectsyms = nstubs * 2 + ngot_entries; @@ -2741,31 +2708,25 @@ fn writeSymbolTable(self: *Zld) !void { var writer = stream.writer(); stubs.reserved1 = 0; - for (self.stubs.items) |sym| { - const sym_name = self.getString(sym.strx); - const id = undef_dir.get(sym_name) orelse unreachable; - try writer.writeIntLittle(u32, dysymtab.iundefsym + id); + for (self.stubs.keys()) |key| { + try writer.writeIntLittle(u32, dysymtab.iundefsym + key); } got.reserved1 = nstubs; - for (self.got_entries.items) |sym| { - switch (sym.payload) { - .proxy => { - const sym_name = self.getString(sym.strx); - const id = undef_dir.get(sym_name) orelse unreachable; - try writer.writeIntLittle(u32, dysymtab.iundefsym + id); + for (self.got_entries.keys()) |key| { + switch (key.where) { + .import => { + try writer.writeIntLittle(u32, dysymtab.iundefsym + key.where_index); }, - else => { + .local => { try writer.writeIntLittle(u32, macho.INDIRECT_SYMBOL_LOCAL); }, } } la_symbol_ptr.reserved1 = got.reserved1 + ngot_entries; - for (self.stubs.items) |sym| { - const sym_name = self.getString(sym.strx); - const id = undef_dir.get(sym_name) orelse unreachable; - try writer.writeIntLittle(u32, dysymtab.iundefsym + id); + for (self.stubs.keys()) |key| { + try writer.writeIntLittle(u32, dysymtab.iundefsym + key); } try self.file.?.pwriteAll(buf, dysymtab.indirectsymoff); @@ -2813,8 +2774,7 @@ fn writeDices(self: *Zld) !void { while (true) { if (block.dices.items.len > 0) { const sym = self.locals.items[block.local_sym_index]; - const reg = sym.payload.regular; - const base_off = try math.cast(u32, reg.address - text_sect.addr + text_sect.offset); + const base_off = try math.cast(u32, sym.n_value - text_sect.addr + text_sect.offset); try buf.ensureUnusedCapacity(block.dices.items.len * @sizeOf(macho.data_in_code_entry)); for (block.dices.items) |dice| { @@ -3052,6 +3012,14 @@ pub fn unpackSectionId(self: Zld, section_id: u8) MatchingSection { return match; } +fn packDylibOrdinal(ordinal: u16) u16 { + return ordinal * macho.N_SYMBOL_RESOLVER; +} + +fn unpackDylibOrdinal(pack: u16) u16 { + return @divExact(pack, macho.N_SYMBOL_RESOLVER); +} + pub fn findFirst(comptime T: type, haystack: []T, start: usize, predicate: anytype) usize { if (!@hasDecl(@TypeOf(predicate), "predicate")) @compileError("Predicate is required to define fn predicate(@This(), T) bool"); From fccac48a55f91abfa6f04dc7274639a9faf5ab53 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sat, 17 Jul 2021 18:19:32 +0200 Subject: [PATCH 61/81] zld: fix committing stub info into final binary --- src/link/MachO/Zld.zig | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/link/MachO/Zld.zig b/src/link/MachO/Zld.zig index 7f4c1471dd..2d87df2594 100644 --- a/src/link/MachO/Zld.zig +++ b/src/link/MachO/Zld.zig @@ -1267,11 +1267,12 @@ fn writeStubHelperCommon(self: *Zld) !void { } }; - for (self.stubs.keys()) |key| { + for (self.stubs.keys()) |_, i| { + const index = @intCast(u32, i); // TODO weak bound pointers - try self.writeLazySymbolPointer(key); - try self.writeStub(key); - try self.writeStubInStubHelper(key); + try self.writeLazySymbolPointer(index); + try self.writeStub(index); + try self.writeStubInStubHelper(index); } } From 9f20a51555169dfcc531b06390001d3dbd78094d Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sat, 17 Jul 2021 18:33:47 +0200 Subject: [PATCH 62/81] zld: demote logging back to debug from warn --- src/link/MachO/Object.zig | 8 ++-- src/link/MachO/TextBlock.zig | 11 +++-- src/link/MachO/Zld.zig | 90 ++++++++++++++++++------------------ 3 files changed, 55 insertions(+), 54 deletions(-) diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index 4e0ddc7455..f8e88673a1 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -486,7 +486,7 @@ const TextBlockParser = struct { pub fn parseTextBlocks(self: *Object, zld: *Zld) !void { const seg = self.load_commands.items[self.segment_cmd_index.?].Segment; - log.warn("analysing {s}", .{self.name.?}); + log.debug("analysing {s}", .{self.name.?}); const dysymtab = self.load_commands.items[self.dysymtab_cmd_index.?].Dysymtab; // We only care about defined symbols, so filter every other out. @@ -507,14 +507,14 @@ pub fn parseTextBlocks(self: *Object, zld: *Zld) !void { for (seg.sections.items) |sect, id| { const sect_id = @intCast(u8, id); - log.warn("putting section '{s},{s}' as a TextBlock", .{ + log.debug("putting section '{s},{s}' as a TextBlock", .{ segmentName(sect), sectionName(sect), }); // Get matching segment/section in the final artifact. const match = (try zld.getMatchingSection(sect)) orelse { - log.warn("unhandled section", .{}); + log.debug("unhandled section", .{}); continue; }; @@ -533,7 +533,7 @@ pub fn parseTextBlocks(self: *Object, zld: *Zld) !void { const filtered_nlists = NlistWithIndex.filterInSection(sorted_nlists.items, sect); // Is there any padding between symbols within the section? - const is_splittable = self.header.?.flags & macho.MH_SUBSECTIONS_VIA_SYMBOLS != 0; + // const is_splittable = self.header.?.flags & macho.MH_SUBSECTIONS_VIA_SYMBOLS != 0; // TODO is it perhaps worth skip parsing subsections in Debug mode and not worry about // duplicates at all? Need some benchmarks! // const is_splittable = false; diff --git a/src/link/MachO/TextBlock.zig b/src/link/MachO/TextBlock.zig index f9060f5167..9991fb642a 100644 --- a/src/link/MachO/TextBlock.zig +++ b/src/link/MachO/TextBlock.zig @@ -695,7 +695,7 @@ pub fn parseRelocsFromObject( .ARM64_RELOC_GOT_LOAD_PAGEOFF12, .ARM64_RELOC_TLVP_LOAD_PAGEOFF12, => { - self.parsePageOff(rel, &parsed_rel, addend, ctx); + self.parsePageOff(rel, &parsed_rel, addend); if (rel_type == .ARM64_RELOC_PAGEOFF12) addend = 0; }, @@ -866,6 +866,7 @@ fn parseUnsigned( } fn parseBranch(self: TextBlock, rel: macho.relocation_info, out: *Relocation, ctx: RelocContext) void { + _ = self; assert(rel.r_pcrel == 1); assert(rel.r_length == 2); @@ -894,7 +895,7 @@ fn parsePage(self: TextBlock, rel: macho.relocation_info, out: *Relocation, adde }; } -fn parsePageOff(self: TextBlock, rel: macho.relocation_info, out: *Relocation, addend: u32, ctx: RelocContext) void { +fn parsePageOff(self: TextBlock, rel: macho.relocation_info, out: *Relocation, addend: u32) void { assert(rel.r_pcrel == 0); assert(rel.r_length == 2); @@ -987,7 +988,7 @@ fn parseLoad(self: TextBlock, rel: macho.relocation_info, out: *Relocation) void pub fn resolveRelocs(self: *TextBlock, zld: *Zld) !void { for (self.relocs.items) |rel| { - log.warn("relocating {}", .{rel}); + log.debug("relocating {}", .{rel}); const source_addr = blk: { const sym = zld.locals.items[self.local_sym_index]; @@ -1073,8 +1074,8 @@ pub fn resolveRelocs(self: *TextBlock, zld: *Zld) !void { } }; - log.warn(" | source_addr = 0x{x}", .{source_addr}); - log.warn(" | target_addr = 0x{x}", .{target_addr}); + log.debug(" | source_addr = 0x{x}", .{source_addr}); + log.debug(" | target_addr = 0x{x}", .{target_addr}); try rel.resolve(.{ .block = self, diff --git a/src/link/MachO/Zld.zig b/src/link/MachO/Zld.zig index 2d87df2594..633be212cd 100644 --- a/src/link/MachO/Zld.zig +++ b/src/link/MachO/Zld.zig @@ -253,58 +253,58 @@ pub fn link(self: *Zld, files: []const []const u8, output: Output, args: LinkArg self.allocateLinkeditSegment(); try self.allocateTextBlocks(); - log.warn("locals", .{}); - for (self.locals.items) |sym, id| { - log.warn(" {d}: {s}, {}", .{ id, self.getString(sym.n_strx), sym }); - } + // log.warn("locals", .{}); + // for (self.locals.items) |sym, id| { + // log.warn(" {d}: {s}, {}", .{ id, self.getString(sym.n_strx), sym }); + // } - log.warn("globals", .{}); - for (self.globals.items) |sym, id| { - log.warn(" {d}: {s}, {}", .{ id, self.getString(sym.n_strx), sym }); - } + // log.warn("globals", .{}); + // for (self.globals.items) |sym, id| { + // log.warn(" {d}: {s}, {}", .{ id, self.getString(sym.n_strx), sym }); + // } - log.warn("tentatives", .{}); - for (self.tentatives.items) |sym, id| { - log.warn(" {d}: {s}, {}", .{ id, self.getString(sym.n_strx), sym }); - } + // log.warn("tentatives", .{}); + // for (self.tentatives.items) |sym, id| { + // log.warn(" {d}: {s}, {}", .{ id, self.getString(sym.n_strx), sym }); + // } - log.warn("undefines", .{}); - for (self.undefs.items) |sym, id| { - log.warn(" {d}: {s}, {}", .{ id, self.getString(sym.n_strx), sym }); - } + // log.warn("undefines", .{}); + // for (self.undefs.items) |sym, id| { + // log.warn(" {d}: {s}, {}", .{ id, self.getString(sym.n_strx), sym }); + // } - log.warn("imports", .{}); - for (self.imports.items) |sym, id| { - log.warn(" {d}: {s}, {}", .{ id, self.getString(sym.n_strx), sym }); - } + // log.warn("imports", .{}); + // for (self.imports.items) |sym, id| { + // log.warn(" {d}: {s}, {}", .{ id, self.getString(sym.n_strx), sym }); + // } - log.warn("symbol resolver", .{}); - for (self.symbol_resolver.keys()) |key| { - log.warn(" {s} => {}", .{ key, self.symbol_resolver.get(key).? }); - } + // log.warn("symbol resolver", .{}); + // for (self.symbol_resolver.keys()) |key| { + // log.warn(" {s} => {}", .{ key, self.symbol_resolver.get(key).? }); + // } - log.warn("mappings", .{}); - for (self.objects.items) |object, id| { - const object_id = @intCast(u16, id); - log.warn(" in object {s}", .{object.name.?}); - for (object.symtab.items) |sym, sym_id| { - if (object.symbol_mapping.get(@intCast(u32, sym_id))) |local_id| { - log.warn(" | {d} => {d}", .{ sym_id, local_id }); - } else { - log.warn(" | {d} no local mapping for {s}", .{ sym_id, object.getString(sym.n_strx) }); - } - } - } + // log.warn("mappings", .{}); + // for (self.objects.items) |object, id| { + // const object_id = @intCast(u16, id); + // log.warn(" in object {s}", .{object.name.?}); + // for (object.symtab.items) |sym, sym_id| { + // if (object.symbol_mapping.get(@intCast(u32, sym_id))) |local_id| { + // log.warn(" | {d} => {d}", .{ sym_id, local_id }); + // } else { + // log.warn(" | {d} no local mapping for {s}", .{ sym_id, object.getString(sym.n_strx) }); + // } + // } + // } - var it = self.blocks.iterator(); - while (it.next()) |entry| { - const seg = self.load_commands.items[entry.key_ptr.seg].Segment; - const sect = seg.sections.items[entry.key_ptr.sect]; + // var it = self.blocks.iterator(); + // while (it.next()) |entry| { + // const seg = self.load_commands.items[entry.key_ptr.seg].Segment; + // const sect = seg.sections.items[entry.key_ptr.sect]; - log.warn("\n\n{s},{s} contents:", .{ segmentName(sect), sectionName(sect) }); - log.warn(" {}", .{sect}); - entry.value_ptr.*.print(self); - } + // log.warn("\n\n{s},{s} contents:", .{ segmentName(sect), sectionName(sect) }); + // log.warn(" {}", .{sect}); + // entry.value_ptr.*.print(self); + // } try self.flush(); } @@ -1411,7 +1411,7 @@ fn writeStubInStubHelper(self: *Zld, index: u32) !void { fn resolveSymbolsInObject(self: *Zld, object_id: u16) !void { const object = self.objects.items[object_id]; - log.warn("resolving symbols in '{s}'", .{object.name}); + log.debug("resolving symbols in '{s}'", .{object.name}); for (object.symtab.items) |sym, id| { const sym_id = @intCast(u32, id); From d8c4838c7da9efef07fcf5b8e709bb2a65cd2209 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sat, 17 Jul 2021 18:43:28 +0200 Subject: [PATCH 63/81] zld: fix incorrect global symbol collision check --- src/link/MachO/Zld.zig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/link/MachO/Zld.zig b/src/link/MachO/Zld.zig index 633be212cd..4d58de25c8 100644 --- a/src/link/MachO/Zld.zig +++ b/src/link/MachO/Zld.zig @@ -1489,8 +1489,8 @@ fn resolveSymbolsInObject(self: *Zld, object_id: u16) !void { .global => { const global = &self.globals.items[resolv.where_index]; - if (!(symbolIsWeakDef(sym) and symbolIsPext(sym)) and - !(symbolIsWeakDef(global.*) and symbolIsPext(global.*))) + if (!(symbolIsWeakDef(sym) or symbolIsPext(sym)) and + !(symbolIsWeakDef(global.*) or symbolIsPext(global.*))) { log.err("symbol '{s}' defined multiple times", .{sym_name}); log.err(" first definition in '{s}'", .{self.objects.items[resolv.file].name.?}); From a095263462ebfc82f25bb421aae3992d9f77e980 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sat, 17 Jul 2021 23:21:02 +0200 Subject: [PATCH 64/81] zld: more fixes todo with symbol resolution namely, fixes proper symbol reolution when scanning and including objects from static archives, and properly discard any null symbols when a tentative definition was substituted by a defined, global symbol. --- src/link/MachO/Zld.zig | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/src/link/MachO/Zld.zig b/src/link/MachO/Zld.zig index 4d58de25c8..1bfd922c6f 100644 --- a/src/link/MachO/Zld.zig +++ b/src/link/MachO/Zld.zig @@ -1636,7 +1636,11 @@ fn resolveSymbols(self: *Zld) !void { } // Second pass, resolve symbols in static libraries. - loop: for (self.undefs.items) |sym| { + var next_sym: usize = 0; + loop: while (true) : (next_sym += 1) { + if (next_sym == self.undefs.items.len) break; + + const sym = self.undefs.items[next_sym]; if (symbolIsNull(sym)) continue; const sym_name = self.getString(sym.n_strx); @@ -1661,6 +1665,8 @@ fn resolveSymbols(self: *Zld) !void { // Convert any tentative definition into a regular symbol and allocate // text blocks for each tentative defintion. for (self.tentatives.items) |sym| { + if (symbolIsNull(sym)) continue; + const sym_name = self.getString(sym.n_strx); const match: MatchingSection = blk: { if (self.common_section_index == null) { @@ -1813,6 +1819,13 @@ fn resolveSymbols(self: *Zld) !void { .n_desc = macho.N_WEAK_DEF, .n_value = seg.inner.vmaddr, }); + undef.* = .{ + .n_strx = 0, + .n_type = macho.N_UNDF, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }; resolv.* = .{ .where = .global, .where_index = global_sym_index, From 5aa9c0b4ab8ca00b0da3ce695924218984d11f11 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sat, 17 Jul 2021 23:50:45 +0200 Subject: [PATCH 65/81] zld: allocate empty TextBlock for synthetic ___dso_handle --- src/link/MachO/Zld.zig | 46 ++++++++++++++++++++++++++++++++++-------- 1 file changed, 38 insertions(+), 8 deletions(-) diff --git a/src/link/MachO/Zld.zig b/src/link/MachO/Zld.zig index 1bfd922c6f..d8edc54b73 100644 --- a/src/link/MachO/Zld.zig +++ b/src/link/MachO/Zld.zig @@ -1809,16 +1809,25 @@ fn resolveSymbols(self: *Zld) !void { if (self.symbol_resolver.getPtr("___dso_handle")) |resolv| blk: { if (resolv.where != .undef) break :blk; - const seg = self.load_commands.items[self.text_segment_cmd_index.?].Segment; const undef = &self.undefs.items[resolv.where_index]; - const global_sym_index = @intCast(u32, self.globals.items.len); - try self.globals.append(self.allocator, .{ + const match: MatchingSection = .{ + .seg = self.text_segment_cmd_index.?, + .sect = self.text_section_index.?, + }; + const local_sym_index = @intCast(u32, self.locals.items.len); + var nlist = macho.nlist_64{ .n_strx = undef.n_strx, - .n_type = macho.N_PEXT | macho.N_EXT | macho.N_SECT, - .n_sect = 0, - .n_desc = macho.N_WEAK_DEF, - .n_value = seg.inner.vmaddr, - }); + .n_type = macho.N_SECT, + .n_sect = self.sectionId(match), + .n_desc = 0, + .n_value = 0, + }; + try self.locals.append(self.allocator, nlist); + const global_sym_index = @intCast(u32, self.globals.items.len); + nlist.n_type |= macho.N_EXT; + nlist.n_desc = macho.N_WEAK_DEF; + try self.globals.append(self.allocator, nlist); + undef.* = .{ .n_strx = 0, .n_type = macho.N_UNDF, @@ -1829,7 +1838,28 @@ fn resolveSymbols(self: *Zld) !void { resolv.* = .{ .where = .global, .where_index = global_sym_index, + .local_sym_index = local_sym_index, }; + + // We create an empty atom for this symbol. + // TODO perhaps we should special-case special symbols? Create a separate + // linked list of atoms? + const block = try self.allocator.create(TextBlock); + errdefer self.allocator.destroy(block); + + block.* = TextBlock.init(self.allocator); + block.local_sym_index = local_sym_index; + block.code = try self.allocator.alloc(u8, 0); + block.size = 0; + block.alignment = 0; + + if (self.blocks.getPtr(match)) |last| { + last.*.next = block; + block.prev = last.*; + last.* = block; + } else { + try self.blocks.putNoClobber(self.allocator, match, block); + } } var has_undefined = false; From 2828cd2983446b116dc8d543ca5e209e361f39d0 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sun, 18 Jul 2021 15:05:52 +0200 Subject: [PATCH 66/81] zld: migrate symbol mgmt to incremental backend --- src/codegen.zig | 23 +- src/link/MachO.zig | 431 +++++++++++++++++------------------ src/link/MachO/TextBlock.zig | 4 - src/link/MachO/Zld.zig | 20 +- 4 files changed, 222 insertions(+), 256 deletions(-) diff --git a/src/codegen.zig b/src/codegen.zig index 6050fe0ed8..ec75cbadc6 100644 --- a/src/codegen.zig +++ b/src/codegen.zig @@ -2500,7 +2500,11 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { const got_addr = blk: { const seg = macho_file.load_commands.items[macho_file.data_const_segment_cmd_index.?].Segment; const got = seg.sections.items[macho_file.got_section_index.?]; - break :blk got.addr + func.owner_decl.link.macho.offset_table_index * @sizeOf(u64); + const got_index = macho_file.got_entries_map.get(.{ + .where = .local, + .where_index = func.owner_decl.link.macho.local_sym_index, + }) orelse unreachable; + break :blk got.addr + got_index * @sizeOf(u64); }; log.debug("got_addr = 0x{x}", .{got_addr}); switch (arch) { @@ -2521,11 +2525,10 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { const decl = func_payload.data; const decl_name = try std.fmt.allocPrint(self.bin_file.allocator, "_{s}", .{decl.name}); defer self.bin_file.allocator.free(decl_name); - const already_defined = macho_file.lazy_imports.contains(decl_name); - const symbol: u32 = if (macho_file.lazy_imports.getIndex(decl_name)) |index| - @intCast(u32, index) - else - try macho_file.addExternSymbol(decl_name); + const already_defined = macho_file.symbol_resolver.contains(decl_name); + const resolv = macho_file.symbol_resolver.get(decl_name) orelse blk: { + break :blk try macho_file.addExternFn(decl_name); + }; const start = self.code.items.len; const len: usize = blk: { switch (arch) { @@ -2544,7 +2547,7 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { } }; try macho_file.stub_fixups.append(self.bin_file.allocator, .{ - .symbol = symbol, + .symbol = resolv.where_index, .already_defined = already_defined, .start = start, .len = len, @@ -4351,7 +4354,11 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { const got_addr = blk: { const seg = macho_file.load_commands.items[macho_file.data_const_segment_cmd_index.?].Segment; const got = seg.sections.items[macho_file.got_section_index.?]; - break :blk got.addr + decl.link.macho.offset_table_index * ptr_bytes; + const got_index = macho_file.got_entries_map.get(.{ + .where = .local, + .where_index = decl.link.macho.local_sym_index, + }) orelse unreachable; + break :blk got.addr + got_index * ptr_bytes; }; return MCValue{ .memory = got_addr }; } else if (self.bin_file.cast(link.File.Coff)) |coff_file| { diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 847012110a..efd6e60da9 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -100,31 +100,29 @@ data_section_index: ?u16 = null, /// The absolute address of the entry point. entry_addr: ?u64 = null, -/// Table of all local symbols -/// Internally references string table for names (which are optional). locals: std.ArrayListUnmanaged(macho.nlist_64) = .{}, -/// Table of all global symbols globals: std.ArrayListUnmanaged(macho.nlist_64) = .{}, -/// Table of all extern nonlazy symbols, indexed by name. -nonlazy_imports: std.StringArrayHashMapUnmanaged(Import) = .{}, -/// Table of all extern lazy symbols, indexed by name. -lazy_imports: std.StringArrayHashMapUnmanaged(Import) = .{}, +imports: std.ArrayListUnmanaged(macho.nlist_64) = .{}, +symbol_resolver: std.StringArrayHashMapUnmanaged(SymbolWithLoc) = .{}, locals_free_list: std.ArrayListUnmanaged(u32) = .{}, globals_free_list: std.ArrayListUnmanaged(u32) = .{}, -offset_table_free_list: std.ArrayListUnmanaged(u32) = .{}, stub_helper_stubs_start_off: ?u64 = null, strtab: std.ArrayListUnmanaged(u8) = .{}, -strtab_cache: std.StringHashMapUnmanaged(u32) = .{}, -/// Table of GOT entries. -offset_table: std.ArrayListUnmanaged(GOTEntry) = .{}, +got_entries: std.ArrayListUnmanaged(GotIndirectionKey) = .{}, +got_entries_map: std.AutoHashMapUnmanaged(GotIndirectionKey, u32) = .{}, + +got_entries_free_list: std.ArrayListUnmanaged(u32) = .{}, + +stubs: std.ArrayListUnmanaged(u32) = .{}, +stubs_map: std.AutoHashMapUnmanaged(u32, u32) = .{}, error_flags: File.ErrorFlags = File.ErrorFlags{}, -offset_table_count_dirty: bool = false, +got_entries_count_dirty: bool = false, load_commands_dirty: bool = false, rebase_info_dirty: bool = false, binding_info_dirty: bool = false, @@ -170,31 +168,25 @@ pie_fixups: std.ArrayListUnmanaged(PIEFixup) = .{}, /// rather than sitting in the global scope. stub_fixups: std.ArrayListUnmanaged(StubFixup) = .{}, -pub const GOTEntry = struct { - /// GOT entry can either be a local pointer or an extern (nonlazy) import. - kind: enum { - Local, - Extern, +const SymbolWithLoc = struct { + // Table where the symbol can be found. + where: enum { + global, + import, + undef, + tentative, }, - - /// Id to the macho.nlist_64 from the respective table: either locals or nonlazy imports. - /// TODO I'm more and more inclined to just manage a single, max two symbol tables - /// rather than 4 as we currently do, but I'll follow up in the future PR. - symbol: u32, - - /// Index of this entry in the GOT. - index: u32, + where_index: u32, + local_sym_index: u32 = 0, + file: u16 = 0, }; -pub const Import = struct { - /// MachO symbol table entry. - symbol: macho.nlist_64, - - /// Id of the dynamic library where the specified entries can be found. - dylib_ordinal: i64, - - /// Index of this import within the import list. - index: u32, +pub const GotIndirectionKey = struct { + where: enum { + local, + import, + }, + where_index: u32, }; pub const PIEFixup = struct { @@ -253,9 +245,6 @@ pub const TextBlock = struct { /// If this field is 0, it means the codegen size = 0 and there is no symbol or /// offset table entry. local_sym_index: u32, - /// Index into offset table - /// This field is undefined for symbols with size = 0. - offset_table_index: u32, /// Size of this text block /// Unlike in Elf, we need to store the size of this symbol as part of /// the TextBlock since macho.nlist_64 lacks this information. @@ -275,7 +264,6 @@ pub const TextBlock = struct { pub const empty = TextBlock{ .local_sym_index = 0, - .offset_table_index = undefined, .size = 0, .prev = null, .next = null, @@ -433,7 +421,7 @@ pub fn flush(self: *MachO, comp: *Compilation) !void { } } - if (build_options.have_llvm) { + if (build_options.is_stage1) { return self.linkWithZld(comp); } else { switch (self.base.options.effectiveOutputMode()) { @@ -500,7 +488,7 @@ pub fn flushModule(self: *MachO, comp: *Compilation) !void { self.error_flags.no_entry_point_found = false; } - assert(!self.offset_table_count_dirty); + assert(!self.got_entries_count_dirty); assert(!self.load_commands_dirty); assert(!self.rebase_info_dirty); assert(!self.binding_info_dirty); @@ -971,31 +959,27 @@ pub fn deinit(self: *MachO) void { if (self.d_sym) |*ds| { ds.deinit(self.base.allocator); } - for (self.lazy_imports.keys()) |*key| { - self.base.allocator.free(key.*); - } - self.lazy_imports.deinit(self.base.allocator); - for (self.nonlazy_imports.keys()) |*key| { - self.base.allocator.free(key.*); - } - self.nonlazy_imports.deinit(self.base.allocator); + self.pie_fixups.deinit(self.base.allocator); self.stub_fixups.deinit(self.base.allocator); self.text_block_free_list.deinit(self.base.allocator); - self.offset_table.deinit(self.base.allocator); - self.offset_table_free_list.deinit(self.base.allocator); - { - var it = self.strtab_cache.keyIterator(); - while (it.next()) |key| { - self.base.allocator.free(key.*); - } - } - self.strtab_cache.deinit(self.base.allocator); + self.got_entries.deinit(self.base.allocator); + self.got_entries_map.deinit(self.base.allocator); + self.got_entries_free_list.deinit(self.base.allocator); + self.stubs.deinit(self.base.allocator); + self.stubs_map.deinit(self.base.allocator); self.strtab.deinit(self.base.allocator); + self.imports.deinit(self.base.allocator); self.globals.deinit(self.base.allocator); self.globals_free_list.deinit(self.base.allocator); self.locals.deinit(self.base.allocator); self.locals_free_list.deinit(self.base.allocator); + + for (self.symbol_resolver.keys()) |key| { + self.base.allocator.free(key); + } + self.symbol_resolver.deinit(self.base.allocator); + for (self.load_commands.items) |*lc| { lc.deinit(self.base.allocator); } @@ -1086,8 +1070,8 @@ fn growTextBlock(self: *MachO, text_block: *TextBlock, new_block_size: u64, alig pub fn allocateDeclIndexes(self: *MachO, decl: *Module.Decl) !void { if (decl.link.macho.local_sym_index != 0) return; - try self.locals.ensureCapacity(self.base.allocator, self.locals.items.len + 1); - try self.offset_table.ensureCapacity(self.base.allocator, self.offset_table.items.len + 1); + try self.locals.ensureUnusedCapacity(self.base.allocator, 1); + try self.got_entries.ensureUnusedCapacity(self.base.allocator, 1); if (self.locals_free_list.popOrNull()) |i| { log.debug("reusing symbol index {d} for {s}", .{ i, decl.name }); @@ -1098,16 +1082,19 @@ pub fn allocateDeclIndexes(self: *MachO, decl: *Module.Decl) !void { _ = self.locals.addOneAssumeCapacity(); } - if (self.offset_table_free_list.popOrNull()) |i| { - log.debug("reusing offset table entry index {d} for {s}", .{ i, decl.name }); - decl.link.macho.offset_table_index = i; - } else { - log.debug("allocating offset table entry index {d} for {s}", .{ self.offset_table.items.len, decl.name }); - decl.link.macho.offset_table_index = @intCast(u32, self.offset_table.items.len); - _ = self.offset_table.addOneAssumeCapacity(); - self.offset_table_count_dirty = true; - self.rebase_info_dirty = true; - } + const got_index: u32 = blk: { + if (self.got_entries_free_list.popOrNull()) |i| { + log.debug("reusing GOT entry index {d} for {s}", .{ i, decl.name }); + break :blk i; + } else { + const got_index = @intCast(u32, self.got_entries.items.len); + log.debug("allocating GOT entry index {d} for {s}", .{ got_index, decl.name }); + _ = self.got_entries.addOneAssumeCapacity(); + self.got_entries_count_dirty = true; + self.rebase_info_dirty = true; + break :blk got_index; + } + }; self.locals.items[decl.link.macho.local_sym_index] = .{ .n_strx = 0, @@ -1116,11 +1103,12 @@ pub fn allocateDeclIndexes(self: *MachO, decl: *Module.Decl) !void { .n_desc = 0, .n_value = 0, }; - self.offset_table.items[decl.link.macho.offset_table_index] = .{ - .kind = .Local, - .symbol = decl.link.macho.local_sym_index, - .index = decl.link.macho.offset_table_index, + const got_entry = GotIndirectionKey{ + .where = .local, + .where_index = decl.link.macho.local_sym_index, }; + self.got_entries.items[got_index] = got_entry; + try self.got_entries_map.putNoClobber(self.base.allocator, got_entry, got_index); } pub fn updateDecl(self: *MachO, module: *Module, decl: *Module.Decl) !void { @@ -1191,13 +1179,12 @@ pub fn updateDecl(self: *MachO, module: *Module, decl: *Module.Decl) !void { log.debug("growing {s} and moving from 0x{x} to 0x{x}", .{ decl.name, symbol.n_value, vaddr }); if (vaddr != symbol.n_value) { - log.debug(" (writing new offset table entry)", .{}); - self.offset_table.items[decl.link.macho.offset_table_index] = .{ - .kind = .Local, - .symbol = decl.link.macho.local_sym_index, - .index = decl.link.macho.offset_table_index, - }; - try self.writeOffsetTableEntry(decl.link.macho.offset_table_index); + log.debug(" (writing new GOT entry)", .{}); + const got_index = self.got_entries_map.get(.{ + .where = .local, + .where_index = decl.link.macho.local_sym_index, + }) orelse unreachable; + try self.writeGotEntry(got_index); } symbol.n_value = vaddr; @@ -1235,16 +1222,17 @@ pub fn updateDecl(self: *MachO, module: *Module, decl: *Module.Decl) !void { .n_desc = 0, .n_value = addr, }; - self.offset_table.items[decl.link.macho.offset_table_index] = .{ - .kind = .Local, - .symbol = decl.link.macho.local_sym_index, - .index = decl.link.macho.offset_table_index, - }; try self.writeLocalSymbol(decl.link.macho.local_sym_index); + if (self.d_sym) |*ds| try ds.writeLocalSymbol(decl.link.macho.local_sym_index); - try self.writeOffsetTableEntry(decl.link.macho.offset_table_index); + + const got_index = self.got_entries_map.get(.{ + .where = .local, + .where_index = decl.link.macho.local_sym_index, + }) orelse unreachable; + try self.writeGotEntry(got_index); } // Calculate displacements to target addr (if any). @@ -1291,7 +1279,8 @@ pub fn updateDecl(self: *MachO, module: *Module, decl: *Module.Decl) !void { const text_segment = self.load_commands.items[self.text_segment_cmd_index.?].Segment; const stubs = text_segment.sections.items[self.stubs_section_index.?]; for (self.stub_fixups.items) |fixup| { - const stub_addr = stubs.addr + fixup.symbol * stubs.reserved2; + const stubs_index = self.stubs_map.get(fixup.symbol) orelse unreachable; + const stub_addr = stubs.addr + stubs_index * stubs.reserved2; const text_addr = symbol.n_value + fixup.start; switch (self.base.options.target.cpu.arch) { .x86_64 => { @@ -1309,9 +1298,9 @@ pub fn updateDecl(self: *MachO, module: *Module, decl: *Module.Decl) !void { else => unreachable, // unsupported target architecture } if (!fixup.already_defined) { - try self.writeStub(fixup.symbol); - try self.writeStubInStubHelper(fixup.symbol); - try self.writeLazySymbolPointer(fixup.symbol); + try self.writeStub(stubs_index); + try self.writeStubInStubHelper(stubs_index); + try self.writeLazySymbolPointer(stubs_index); self.rebase_info_dirty = true; self.lazy_binding_info_dirty = true; @@ -1448,10 +1437,16 @@ pub fn freeDecl(self: *MachO, decl: *Module.Decl) void { self.freeTextBlock(&decl.link.macho); if (decl.link.macho.local_sym_index != 0) { self.locals_free_list.append(self.base.allocator, decl.link.macho.local_sym_index) catch {}; - self.offset_table_free_list.append(self.base.allocator, decl.link.macho.offset_table_index) catch {}; + + const got_key = GotIndirectionKey{ + .where = .local, + .where_index = decl.link.macho.local_sym_index, + }; + const got_index = self.got_entries_map.get(got_key) orelse unreachable; + _ = self.got_entries_map.remove(got_key); + self.got_entries_free_list.append(self.base.allocator, got_index) catch {}; self.locals.items[decl.link.macho.local_sym_index].n_type = 0; - decl.link.macho.local_sym_index = 0; } if (self.d_sym) |*ds| { @@ -1506,8 +1501,8 @@ pub fn populateMissingMetadata(self: *MachO) !void { const initprot = macho.VM_PROT_READ | macho.VM_PROT_EXECUTE; const program_code_size_hint = self.base.options.program_code_size_hint; - const offset_table_size_hint = @sizeOf(u64) * self.base.options.symbol_count_hint; - const ideal_size = self.header_pad + program_code_size_hint + 3 * offset_table_size_hint; + const got_size_hint = @sizeOf(u64) * self.base.options.symbol_count_hint; + const ideal_size = self.header_pad + program_code_size_hint + 3 * got_size_hint; const needed_size = mem.alignForwardGeneric(u64, padToIdeal(ideal_size), self.page_size); log.debug("found __TEXT segment free space 0x{x} to 0x{x}", .{ 0, needed_size }); @@ -1934,28 +1929,28 @@ pub fn populateMissingMetadata(self: *MachO) !void { }); self.load_commands_dirty = true; } - if (!self.nonlazy_imports.contains("dyld_stub_binder")) { - const index = @intCast(u32, self.nonlazy_imports.count()); + if (!self.symbol_resolver.contains("dyld_stub_binder")) { + const import_sym_index = @intCast(u32, self.imports.items.len); + try self.imports.append(self.base.allocator, .{ + .n_strx = try self.makeString("dyld_stub_binder"), + .n_type = macho.N_UNDF | macho.N_EXT, + .n_sect = 0, + .n_desc = packDylibOrdinal(1), + .n_value = 0, + }); const name = try self.base.allocator.dupe(u8, "dyld_stub_binder"); - const offset = try self.makeString("dyld_stub_binder"); - try self.nonlazy_imports.putNoClobber(self.base.allocator, name, .{ - .symbol = .{ - .n_strx = offset, - .n_type = std.macho.N_UNDF | std.macho.N_EXT, - .n_sect = 0, - .n_desc = std.macho.REFERENCE_FLAG_UNDEFINED_NON_LAZY | std.macho.N_SYMBOL_RESOLVER, - .n_value = 0, - }, - .dylib_ordinal = 1, // TODO this is currently hardcoded. - .index = index, + try self.symbol_resolver.putNoClobber(self.base.allocator, name, .{ + .where = .import, + .where_index = import_sym_index, }); - const off_index = @intCast(u32, self.offset_table.items.len); - try self.offset_table.append(self.base.allocator, .{ - .kind = .Extern, - .symbol = index, - .index = off_index, - }); - try self.writeOffsetTableEntry(off_index); + const got_key = GotIndirectionKey{ + .where = .import, + .where_index = import_sym_index, + }; + const got_index = @intCast(u32, self.got_entries.items.len); + try self.got_entries.append(self.base.allocator, got_key); + try self.got_entries_map.putNoClobber(self.base.allocator, got_key, got_index); + try self.writeGotEntry(got_index); self.binding_info_dirty = true; } if (self.stub_helper_stubs_start_off == null) { @@ -2068,24 +2063,25 @@ fn allocateTextBlock(self: *MachO, text_block: *TextBlock, new_block_size: u64, return vaddr; } -pub fn addExternSymbol(self: *MachO, name: []const u8) !u32 { - const index = @intCast(u32, self.lazy_imports.count()); - const offset = try self.makeString(name); - const sym_name = try self.base.allocator.dupe(u8, name); - const dylib_ordinal = 1; // TODO this is now hardcoded, since we only support libSystem. - try self.lazy_imports.putNoClobber(self.base.allocator, sym_name, .{ - .symbol = .{ - .n_strx = offset, - .n_type = macho.N_UNDF | macho.N_EXT, - .n_sect = 0, - .n_desc = macho.REFERENCE_FLAG_UNDEFINED_NON_LAZY | macho.N_SYMBOL_RESOLVER, - .n_value = 0, - }, - .dylib_ordinal = dylib_ordinal, - .index = index, +pub fn addExternFn(self: *MachO, name: []const u8) !SymbolWithLoc { + log.debug("adding new extern function '{s}' with dylib ordinal 1", .{name}); + const import_sym_index = @intCast(u32, self.imports.items.len); + try self.imports.append(self.base.allocator, .{ + .n_strx = try self.makeString(name), + .n_type = macho.N_UNDF | macho.N_EXT, + .n_sect = 0, + .n_desc = packDylibOrdinal(1), + .n_value = 0, }); - log.debug("adding new extern symbol '{s}' with dylib ordinal '{}'", .{ name, dylib_ordinal }); - return index; + const resolv = .{ + .where = .import, + .where_index = import_sym_index, + }; + try self.symbol_resolver.putNoClobber(self.base.allocator, try self.base.allocator.dupe(u8, name), resolv); + const stubs_index = @intCast(u32, self.stubs.items.len); + try self.stubs.append(self.base.allocator, import_sym_index); + try self.stubs_map.putNoClobber(self.base.allocator, import_sym_index, stubs_index); + return resolv; } const NextSegmentAddressAndOffset = struct { @@ -2239,29 +2235,26 @@ fn findFreeSpaceLinkedit(self: *MachO, object_size: u64, min_alignment: u16, sta return st; } -fn writeOffsetTableEntry(self: *MachO, index: usize) !void { +fn writeGotEntry(self: *MachO, index: usize) !void { const seg = &self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; const sect = &seg.sections.items[self.got_section_index.?]; const off = sect.offset + @sizeOf(u64) * index; - if (self.offset_table_count_dirty) { + if (self.got_entries_count_dirty) { // TODO relocate. - self.offset_table_count_dirty = false; + self.got_entries_count_dirty = false; } - const got_entry = self.offset_table.items[index]; - const sym = blk: { - switch (got_entry.kind) { - .Local => { - break :blk self.locals.items[got_entry.symbol]; - }, - .Extern => { - break :blk self.nonlazy_imports.values()[got_entry.symbol].symbol; - }, - } + const got_entry = self.got_entries.items[index]; + const sym = switch (got_entry.where) { + .local => self.locals.items[got_entry.where_index], + .import => self.imports.items[got_entry.where_index], }; - const sym_name = self.getString(sym.n_strx) orelse unreachable; - log.debug("writing offset table entry [ 0x{x} => 0x{x} ({s}) ]", .{ off, sym.n_value, sym_name }); + log.debug("writing offset table entry [ 0x{x} => 0x{x} ({s}) ]", .{ + off, + sym.n_value, + self.getString(sym.n_strx), + }); try self.base.file.?.pwriteAll(mem.asBytes(&sym.n_value), off); } @@ -2539,7 +2532,7 @@ fn relocateSymbolTable(self: *MachO) !void { const symtab = &self.load_commands.items[self.symtab_cmd_index.?].Symtab; const nlocals = self.locals.items.len; const nglobals = self.globals.items.len; - const nundefs = self.lazy_imports.count() + self.nonlazy_imports.count(); + const nundefs = self.imports.items.len; const nsyms = nlocals + nglobals + nundefs; if (symtab.nsyms < nsyms) { @@ -2584,17 +2577,7 @@ fn writeAllGlobalAndUndefSymbols(self: *MachO) !void { const symtab = &self.load_commands.items[self.symtab_cmd_index.?].Symtab; const nlocals = self.locals.items.len; const nglobals = self.globals.items.len; - - const nundefs = self.lazy_imports.count() + self.nonlazy_imports.count(); - var undefs = std.ArrayList(macho.nlist_64).init(self.base.allocator); - defer undefs.deinit(); - try undefs.ensureCapacity(nundefs); - for (self.lazy_imports.values()) |*value| { - undefs.appendAssumeCapacity(value.symbol); - } - for (self.nonlazy_imports.values()) |*value| { - undefs.appendAssumeCapacity(value.symbol); - } + const nundefs = self.imports.items.len; const locals_off = symtab.symoff; const locals_size = nlocals * @sizeOf(macho.nlist_64); @@ -2607,7 +2590,7 @@ fn writeAllGlobalAndUndefSymbols(self: *MachO) !void { const undefs_off = globals_off + globals_size; const undefs_size = nundefs * @sizeOf(macho.nlist_64); log.debug("writing extern symbols from 0x{x} to 0x{x}", .{ undefs_off, undefs_size + undefs_off }); - try self.base.file.?.pwriteAll(mem.sliceAsBytes(undefs.items), undefs_off); + try self.base.file.?.pwriteAll(mem.sliceAsBytes(self.imports.items), undefs_off); // Update dynamic symbol table. const dysymtab = &self.load_commands.items[self.dysymtab_cmd_index.?].Dysymtab; @@ -2633,10 +2616,10 @@ fn writeIndirectSymbolTable(self: *MachO) !void { const la_symbol_ptr = &data_segment.sections.items[self.la_symbol_ptr_section_index.?]; const dysymtab = &self.load_commands.items[self.dysymtab_cmd_index.?].Dysymtab; - const lazy_count = self.lazy_imports.count(); - const got_entries = self.offset_table.items; + const nstubs = @intCast(u32, self.stubs.items.len); + const ngot_entries = @intCast(u32, self.got_entries.items.len); const allocated_size = self.allocatedSizeLinkedit(dysymtab.indirectsymoff); - const nindirectsyms = @intCast(u32, lazy_count * 2 + got_entries.len); + const nindirectsyms = nstubs * 2 + ngot_entries; const needed_size = @intCast(u32, nindirectsyms * @sizeOf(u32)); if (needed_size > allocated_size) { @@ -2655,35 +2638,25 @@ fn writeIndirectSymbolTable(self: *MachO) !void { var writer = stream.writer(); stubs.reserved1 = 0; - { - var i: usize = 0; - while (i < lazy_count) : (i += 1) { - const symtab_idx = @intCast(u32, dysymtab.iundefsym + i); - try writer.writeIntLittle(u32, symtab_idx); - } + for (self.stubs.items) |id| { + try writer.writeIntLittle(u32, dysymtab.iundefsym + id); } - const base_id = @intCast(u32, lazy_count); - got.reserved1 = base_id; - for (got_entries) |entry| { - switch (entry.kind) { - .Local => { + got.reserved1 = nstubs; + for (self.got_entries.items) |entry| { + switch (entry.where) { + .import => { + try writer.writeIntLittle(u32, dysymtab.iundefsym + entry.where_index); + }, + .local => { try writer.writeIntLittle(u32, macho.INDIRECT_SYMBOL_LOCAL); }, - .Extern => { - const symtab_idx = @intCast(u32, dysymtab.iundefsym + entry.index + base_id); - try writer.writeIntLittle(u32, symtab_idx); - }, } } - la_symbol_ptr.reserved1 = got.reserved1 + @intCast(u32, got_entries.len); - { - var i: usize = 0; - while (i < lazy_count) : (i += 1) { - const symtab_idx = @intCast(u32, dysymtab.iundefsym + i); - try writer.writeIntLittle(u32, symtab_idx); - } + la_symbol_ptr.reserved1 = got.reserved1 + ngot_entries; + for (self.stubs.items) |id| { + try writer.writeIntLittle(u32, dysymtab.iundefsym + id); } try self.base.file.?.pwriteAll(buf, dysymtab.indirectsymoff); @@ -2756,13 +2729,18 @@ fn writeExportTrie(self: *MachO) !void { defer trie.deinit(); const text_segment = self.load_commands.items[self.text_segment_cmd_index.?].Segment; - for (self.globals.items) |symbol| { - // TODO figure out if we should put all global symbols into the export trie - const name = self.getString(symbol.n_strx) orelse unreachable; - assert(symbol.n_value >= text_segment.inner.vmaddr); + const base_address = text_segment.inner.vmaddr; + + // TODO handle macho.EXPORT_SYMBOL_FLAGS_REEXPORT and macho.EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER. + log.debug("writing export trie", .{}); + + for (self.globals.items) |sym| { + const sym_name = self.getString(sym.n_strx); + log.debug(" | putting '{s}' defined at 0x{x}", .{ sym_name, sym.n_value }); + try trie.put(.{ - .name = name, - .vmaddr_offset = symbol.n_value - text_segment.inner.vmaddr, + .name = sym_name, + .vmaddr_offset = sym.n_value - base_address, .export_flags = macho.EXPORT_SYMBOL_FLAGS_KIND_REGULAR, }); } @@ -2804,27 +2782,28 @@ fn writeRebaseInfoTable(self: *MachO) !void { const seg = self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; const sect = seg.sections.items[idx]; const base_offset = sect.addr - seg.inner.vmaddr; - const segment_id = self.data_const_segment_cmd_index.?; + const segment_id = @intCast(u16, self.data_const_segment_cmd_index.?); + + for (self.got_entries.items) |entry, i| { + if (entry.where == .import) continue; - for (self.offset_table.items) |entry| { - if (entry.kind == .Extern) continue; try pointers.append(.{ - .offset = base_offset + entry.index * @sizeOf(u64), + .offset = base_offset + i * @sizeOf(u64), .segment_id = segment_id, }); } } if (self.la_symbol_ptr_section_index) |idx| { - try pointers.ensureCapacity(pointers.items.len + self.lazy_imports.count()); const seg = self.load_commands.items[self.data_segment_cmd_index.?].Segment; const sect = seg.sections.items[idx]; const base_offset = sect.addr - seg.inner.vmaddr; - const segment_id = self.data_segment_cmd_index.?; + const segment_id = @intCast(u16, self.data_segment_cmd_index.?); - for (self.lazy_imports.values()) |*value| { + try pointers.ensureUnusedCapacity(self.stubs.items.len); + for (self.stubs.items) |_, i| { pointers.appendAssumeCapacity(.{ - .offset = base_offset + value.index * @sizeOf(u64), + .offset = base_offset + i * @sizeOf(u64), .segment_id = segment_id, }); } @@ -2872,15 +2851,15 @@ fn writeBindingInfoTable(self: *MachO) !void { const base_offset = sect.addr - seg.inner.vmaddr; const segment_id = @intCast(u16, self.data_const_segment_cmd_index.?); - for (self.offset_table.items) |entry| { - if (entry.kind == .Local) continue; - const import_key = self.nonlazy_imports.keys()[entry.symbol]; - const import_ordinal = self.nonlazy_imports.values()[entry.symbol].dylib_ordinal; + for (self.got_entries.items) |entry, i| { + if (entry.where == .local) continue; + + const sym = self.imports.items[entry.where_index]; try pointers.append(.{ - .offset = base_offset + entry.index * @sizeOf(u64), + .offset = base_offset + i * @sizeOf(u64), .segment_id = segment_id, - .dylib_ordinal = import_ordinal, - .name = import_key, + .dylib_ordinal = unpackDylibOrdinal(sym.n_desc), + .name = self.getString(sym.n_strx), }); } } @@ -2920,21 +2899,20 @@ fn writeLazyBindingInfoTable(self: *MachO) !void { defer pointers.deinit(); if (self.la_symbol_ptr_section_index) |idx| { - try pointers.ensureCapacity(self.lazy_imports.count()); const seg = self.load_commands.items[self.data_segment_cmd_index.?].Segment; const sect = seg.sections.items[idx]; const base_offset = sect.addr - seg.inner.vmaddr; const segment_id = @intCast(u16, self.data_segment_cmd_index.?); - const slice = self.lazy_imports.entries.slice(); - const keys = slice.items(.key); - const values = slice.items(.value); - for (keys) |*key, i| { + try pointers.ensureUnusedCapacity(self.stubs.items.len); + + for (self.stubs.items) |import_id, i| { + const sym = self.imports.items[import_id]; pointers.appendAssumeCapacity(.{ - .offset = base_offset + values[i].index * @sizeOf(u64), + .offset = base_offset + i * @sizeOf(u64), .segment_id = segment_id, - .dylib_ordinal = values[i].dylib_ordinal, - .name = key.*, + .dylib_ordinal = unpackDylibOrdinal(sym.n_desc), + .name = self.getString(sym.n_strx), }); } } @@ -2966,7 +2944,7 @@ fn writeLazyBindingInfoTable(self: *MachO) !void { } fn populateLazyBindOffsetsInStubHelper(self: *MachO, buffer: []const u8) !void { - if (self.lazy_imports.count() == 0) return; + if (self.stubs.items.len == 0) return; var stream = std.io.fixedBufferStream(buffer); var reader = stream.reader(); @@ -3011,7 +2989,7 @@ fn populateLazyBindOffsetsInStubHelper(self: *MachO, buffer: []const u8) !void { else => {}, } } - assert(self.lazy_imports.count() <= offsets.items.len); + assert(self.stubs.items.len <= offsets.items.len); const stub_size: u4 = switch (self.base.options.target.cpu.arch) { .x86_64 => 10, @@ -3024,9 +3002,9 @@ fn populateLazyBindOffsetsInStubHelper(self: *MachO, buffer: []const u8) !void { else => unreachable, }; var buf: [@sizeOf(u32)]u8 = undefined; - for (offsets.items[0..self.lazy_imports.count()]) |offset, i| { - const placeholder_off = self.stub_helper_stubs_start_off.? + i * stub_size + off; - mem.writeIntLittle(u32, &buf, offset); + for (self.stubs.items) |_, index| { + const placeholder_off = self.stub_helper_stubs_start_off.? + index * stub_size + off; + mem.writeIntLittle(u32, &buf, offsets.items[index]); try self.base.file.?.pwriteAll(&buf, placeholder_off); } } @@ -3182,11 +3160,6 @@ fn hasTlvDescriptors(_: *MachO) bool { } pub fn makeString(self: *MachO, string: []const u8) !u32 { - if (self.strtab_cache.get(string)) |off| { - log.debug("reusing string '{s}' at offset 0x{x}", .{ string, off }); - return off; - } - try self.strtab.ensureUnusedCapacity(self.base.allocator, string.len + 1); const new_off = @intCast(u32, self.strtab.items.len); @@ -3195,12 +3168,18 @@ pub fn makeString(self: *MachO, string: []const u8) !u32 { self.strtab.appendSliceAssumeCapacity(string); self.strtab.appendAssumeCapacity(0); - try self.strtab_cache.putNoClobber(self.base.allocator, try self.base.allocator.dupe(u8, string), new_off); - return new_off; } -pub fn getString(self: *MachO, off: u32) ?[]const u8 { +pub fn getString(self: *MachO, off: u32) []const u8 { assert(off < self.strtab.items.len); return mem.spanZ(@ptrCast([*:0]const u8, self.strtab.items.ptr + off)); } + +fn packDylibOrdinal(ordinal: u16) u16 { + return ordinal * macho.N_SYMBOL_RESOLVER; +} + +fn unpackDylibOrdinal(pack: u16) u16 { + return @divExact(pack, macho.N_SYMBOL_RESOLVER); +} diff --git a/src/link/MachO/TextBlock.zig b/src/link/MachO/TextBlock.zig index 9991fb642a..ad2d4c11cf 100644 --- a/src/link/MachO/TextBlock.zig +++ b/src/link/MachO/TextBlock.zig @@ -1059,10 +1059,6 @@ pub fn resolveRelocs(self: *TextBlock, zld: *Zld) !void { break :blk sym.n_value; }, .import => { - // TODO I think this will be autohandled by self.bindings. - // if (mem.eql(u8, zld.getString(rel.target.strx), "__tlv_bootstrap")) { - // break :blk 0; // Dynamically bound by dyld. - // } const stubs_index = zld.stubs.getIndex(rel.where_index) orelse { // TODO verify in TextBlock that the symbol is indeed dynamically bound. break :blk 0; // Dynamically bound by dyld. diff --git a/src/link/MachO/Zld.zig b/src/link/MachO/Zld.zig index d8edc54b73..41edbb5988 100644 --- a/src/link/MachO/Zld.zig +++ b/src/link/MachO/Zld.zig @@ -2367,7 +2367,7 @@ fn writeRebaseInfoTable(self: *Zld) !void { const base_offset = sect.addr - seg.inner.vmaddr; const segment_id = @intCast(u16, self.data_segment_cmd_index.?); - try pointers.ensureCapacity(pointers.items.len + self.stubs.count()); + try pointers.ensureUnusedCapacity(self.stubs.count()); for (self.stubs.keys()) |_, i| { pointers.appendAssumeCapacity(.{ .offset = base_offset + i * @sizeOf(u64), @@ -2450,22 +2450,6 @@ fn writeBindInfoTable(self: *Zld) !void { } } - // if (self.tlv_section_index) |idx| { - // const seg = self.load_commands.items[self.data_segment_cmd_index.?].Segment; - // const sect = seg.sections.items[idx]; - // const base_offset = sect.addr - seg.inner.vmaddr; - // const segment_id = @intCast(u16, self.data_segment_cmd_index.?); - - // const sym = self.globals.get("__tlv_bootstrap") orelse unreachable; - // const proxy = sym.payload.proxy; - // try pointers.append(.{ - // .offset = base_offset, - // .segment_id = segment_id, - // .dylib_ordinal = proxy.dylibOrdinal(), - // .name = self.getString(sym.strx), - // }); - // } - const size = try bindInfoSize(pointers.items); var buffer = try self.allocator.alloc(u8, @intCast(usize, size)); defer self.allocator.free(buffer); @@ -2494,7 +2478,7 @@ fn writeLazyBindInfoTable(self: *Zld) !void { const base_offset = sect.addr - seg.inner.vmaddr; const segment_id = @intCast(u16, self.data_segment_cmd_index.?); - try pointers.ensureCapacity(self.stubs.count()); + try pointers.ensureUnusedCapacity(self.stubs.count()); for (self.stubs.keys()) |key, i| { const sym = self.imports.items[key]; From e0b53ad3c99b8f38d2fdba7b9aa6bf3e638dbeb9 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sun, 18 Jul 2021 15:39:01 +0200 Subject: [PATCH 67/81] macho: clean up imports --- src/link/MachO.zig | 33 +++++++++++++++++---------------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index efd6e60da9..ace95297d2 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -1,35 +1,36 @@ const MachO = @This(); const std = @import("std"); -const Allocator = std.mem.Allocator; +const build_options = @import("build_options"); const assert = std.debug.assert; const fmt = std.fmt; const fs = std.fs; const log = std.log.scoped(.link); const macho = std.macho; -const codegen = @import("../codegen.zig"); -const aarch64 = @import("../codegen/aarch64.zig"); const math = std.math; const mem = std.mem; const meta = std.meta; +const aarch64 = @import("../codegen/aarch64.zig"); const bind = @import("MachO/bind.zig"); -const trace = @import("../tracy.zig").trace; -const build_options = @import("build_options"); -const Module = @import("../Module.zig"); -const Compilation = @import("../Compilation.zig"); +const codegen = @import("../codegen.zig"); +const commands = @import("MachO/commands.zig"); const link = @import("../link.zig"); -const File = link.File; -const Cache = @import("../Cache.zig"); const target_util = @import("../target.zig"); +const trace = @import("../tracy.zig").trace; -const DebugSymbols = @import("MachO/DebugSymbols.zig"); -const Trie = @import("MachO/Trie.zig"); +const Allocator = mem.Allocator; +const Cache = @import("../Cache.zig"); const CodeSignature = @import("MachO/CodeSignature.zig"); +const Compilation = @import("../Compilation.zig"); +const DebugSymbols = @import("MachO/DebugSymbols.zig"); +const LoadCommand = commands.LoadCommand; +const Module = @import("../Module.zig"); +const File = link.File; +const Trie = @import("MachO/Trie.zig"); +const SegmentCommand = commands.SegmentCommand; const Zld = @import("MachO/Zld.zig"); -usingnamespace @import("MachO/commands.zig"); - pub const base_tag: File.Tag = File.Tag.macho; base: File, @@ -1841,7 +1842,7 @@ pub fn populateMissingMetadata(self: *MachO) !void { @sizeOf(macho.dylinker_command) + mem.lenZ(DEFAULT_DYLD_PATH), @sizeOf(u64), )); - var dylinker_cmd = emptyGenericCommandWithData(macho.dylinker_command{ + var dylinker_cmd = commands.emptyGenericCommandWithData(macho.dylinker_command{ .cmd = macho.LC_LOAD_DYLINKER, .cmdsize = cmdsize, .name = @sizeOf(macho.dylinker_command), @@ -1855,7 +1856,7 @@ pub fn populateMissingMetadata(self: *MachO) !void { if (self.libsystem_cmd_index == null) { self.libsystem_cmd_index = @intCast(u16, self.load_commands.items.len); - var dylib_cmd = try createLoadDylibCommand(self.base.allocator, mem.spanZ(LIB_SYSTEM_PATH), 2, 0, 0); + var dylib_cmd = try commands.createLoadDylibCommand(self.base.allocator, mem.spanZ(LIB_SYSTEM_PATH), 2, 0, 0); errdefer dylib_cmd.deinit(self.base.allocator); try self.load_commands.append(self.base.allocator, .{ .Dylib = dylib_cmd }); @@ -3105,7 +3106,7 @@ fn writeLoadCommands(self: *MachO) !void { /// Writes Mach-O file header. fn writeHeader(self: *MachO) !void { - var header = emptyHeader(.{ + var header = commands.emptyHeader(.{ .flags = macho.MH_NOUNDEFS | macho.MH_DYLDLINK | macho.MH_PIE | macho.MH_TWOLEVEL, }); From f6d13e9d6f7a4b9161f369544501ecbb447c1658 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sun, 18 Jul 2021 17:48:00 +0200 Subject: [PATCH 68/81] zld: move contents of Zld into MachO module --- CMakeLists.txt | 1 - src/link/MachO.zig | 2602 +++++++++++++++++++++++++++-- src/link/MachO/Dylib.zig | 4 +- src/link/MachO/Object.zig | 86 +- src/link/MachO/TextBlock.zig | 264 +-- src/link/MachO/Zld.zig | 3062 ---------------------------------- 6 files changed, 2649 insertions(+), 3370 deletions(-) delete mode 100644 src/link/MachO/Zld.zig diff --git a/CMakeLists.txt b/CMakeLists.txt index 83352beea8..661535e2d6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -583,7 +583,6 @@ set(ZIG_STAGE2_SOURCES "${CMAKE_SOURCE_DIR}/src/link/MachO/Object.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO/TextBlock.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO/Trie.zig" - "${CMAKE_SOURCE_DIR}/src/link/MachO/Zld.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO/bind.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO/commands.zig" "${CMAKE_SOURCE_DIR}/src/link/Wasm.zig" diff --git a/src/link/MachO.zig b/src/link/MachO.zig index ace95297d2..66a1ee4e04 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -20,16 +20,19 @@ const target_util = @import("../target.zig"); const trace = @import("../tracy.zig").trace; const Allocator = mem.Allocator; +const Archive = @import("MachO/Archive.zig"); const Cache = @import("../Cache.zig"); const CodeSignature = @import("MachO/CodeSignature.zig"); const Compilation = @import("../Compilation.zig"); const DebugSymbols = @import("MachO/DebugSymbols.zig"); +const Dylib = @import("MachO/Dylib.zig"); +const Object = @import("MachO/Object.zig"); const LoadCommand = commands.LoadCommand; const Module = @import("../Module.zig"); const File = link.File; +pub const TextBlock = @import("MachO/TextBlock.zig"); const Trie = @import("MachO/Trie.zig"); const SegmentCommand = commands.SegmentCommand; -const Zld = @import("MachO/Zld.zig"); pub const base_tag: File.Tag = File.Tag.macho; @@ -47,63 +50,83 @@ page_size: u16, /// potential future extensions. header_pad: u16 = 0x1000, -/// Table of all load commands -load_commands: std.ArrayListUnmanaged(LoadCommand) = .{}, -/// __PAGEZERO segment -pagezero_segment_cmd_index: ?u16 = null, -/// __TEXT segment -text_segment_cmd_index: ?u16 = null, -/// __DATA_CONST segment -data_const_segment_cmd_index: ?u16 = null, -/// __DATA segment -data_segment_cmd_index: ?u16 = null, -/// __LINKEDIT segment -linkedit_segment_cmd_index: ?u16 = null, -/// Dyld info -dyld_info_cmd_index: ?u16 = null, -/// Symbol table -symtab_cmd_index: ?u16 = null, -/// Dynamic symbol table -dysymtab_cmd_index: ?u16 = null, -/// Path to dyld linker -dylinker_cmd_index: ?u16 = null, -/// Path to libSystem -libsystem_cmd_index: ?u16 = null, -/// Data-in-code section of __LINKEDIT segment -data_in_code_cmd_index: ?u16 = null, -/// Address to entry point function -function_starts_cmd_index: ?u16 = null, -/// Main/entry point -/// Specifies offset wrt __TEXT segment start address to the main entry point -/// of the binary. -main_cmd_index: ?u16 = null, -/// Minimum OS version -version_min_cmd_index: ?u16 = null, -/// Source version -source_version_cmd_index: ?u16 = null, -/// UUID load command -uuid_cmd_index: ?u16 = null, -/// Code signature -code_signature_cmd_index: ?u16 = null, - -/// Index into __TEXT,__text section. -text_section_index: ?u16 = null, -/// Index into __TEXT,__stubs section. -stubs_section_index: ?u16 = null, -/// Index into __TEXT,__stub_helper section. -stub_helper_section_index: ?u16 = null, -/// Index into __DATA_CONST,__got section. -got_section_index: ?u16 = null, -/// Index into __DATA,__la_symbol_ptr section. -la_symbol_ptr_section_index: ?u16 = null, -/// Index into __DATA,__data section. -data_section_index: ?u16 = null, /// The absolute address of the entry point. entry_addr: ?u64 = null, +objects: std.ArrayListUnmanaged(*Object) = .{}, +archives: std.ArrayListUnmanaged(*Archive) = .{}, +dylibs: std.ArrayListUnmanaged(*Dylib) = .{}, + +next_dylib_ordinal: u16 = 1, + +load_commands: std.ArrayListUnmanaged(LoadCommand) = .{}, + +pagezero_segment_cmd_index: ?u16 = null, +text_segment_cmd_index: ?u16 = null, +data_const_segment_cmd_index: ?u16 = null, +data_segment_cmd_index: ?u16 = null, +linkedit_segment_cmd_index: ?u16 = null, +dyld_info_cmd_index: ?u16 = null, +symtab_cmd_index: ?u16 = null, +dysymtab_cmd_index: ?u16 = null, +dylinker_cmd_index: ?u16 = null, +data_in_code_cmd_index: ?u16 = null, +function_starts_cmd_index: ?u16 = null, +main_cmd_index: ?u16 = null, +dylib_id_cmd_index: ?u16 = null, +version_min_cmd_index: ?u16 = null, +source_version_cmd_index: ?u16 = null, +uuid_cmd_index: ?u16 = null, +code_signature_cmd_index: ?u16 = null, +/// Path to libSystem +/// TODO this is obsolete, remove it. +libsystem_cmd_index: ?u16 = null, + +// __TEXT segment sections +text_section_index: ?u16 = null, +stubs_section_index: ?u16 = null, +stub_helper_section_index: ?u16 = null, +text_const_section_index: ?u16 = null, +cstring_section_index: ?u16 = null, +ustring_section_index: ?u16 = null, +gcc_except_tab_section_index: ?u16 = null, +unwind_info_section_index: ?u16 = null, +eh_frame_section_index: ?u16 = null, + +objc_methlist_section_index: ?u16 = null, +objc_methname_section_index: ?u16 = null, +objc_methtype_section_index: ?u16 = null, +objc_classname_section_index: ?u16 = null, + +// __DATA_CONST segment sections +got_section_index: ?u16 = null, +mod_init_func_section_index: ?u16 = null, +mod_term_func_section_index: ?u16 = null, +data_const_section_index: ?u16 = null, + +objc_cfstring_section_index: ?u16 = null, +objc_classlist_section_index: ?u16 = null, +objc_imageinfo_section_index: ?u16 = null, + +// __DATA segment sections +tlv_section_index: ?u16 = null, +tlv_data_section_index: ?u16 = null, +tlv_bss_section_index: ?u16 = null, +la_symbol_ptr_section_index: ?u16 = null, +data_section_index: ?u16 = null, +bss_section_index: ?u16 = null, +common_section_index: ?u16 = null, + +objc_const_section_index: ?u16 = null, +objc_selrefs_section_index: ?u16 = null, +objc_classrefs_section_index: ?u16 = null, +objc_data_section_index: ?u16 = null, + locals: std.ArrayListUnmanaged(macho.nlist_64) = .{}, globals: std.ArrayListUnmanaged(macho.nlist_64) = .{}, imports: std.ArrayListUnmanaged(macho.nlist_64) = .{}, +undefs: std.ArrayListUnmanaged(macho.nlist_64) = .{}, +tentatives: std.ArrayListUnmanaged(macho.nlist_64) = .{}, symbol_resolver: std.StringArrayHashMapUnmanaged(SymbolWithLoc) = .{}, locals_free_list: std.ArrayListUnmanaged(u32) = .{}, @@ -133,6 +156,9 @@ export_info_dirty: bool = false, strtab_dirty: bool = false, strtab_needs_relocation: bool = false, +has_dices: bool = false, +has_stabs: bool = false, + /// A list of text blocks that have surplus capacity. This list can have false /// positives, as functions grow and shrink over time, only sometimes being added /// or removed from the freelist. @@ -153,6 +179,8 @@ text_block_free_list: std.ArrayListUnmanaged(*TextBlock) = .{}, /// Pointer to the last allocated text block last_text_block: ?*TextBlock = null, +blocks: std.AutoHashMapUnmanaged(MatchingSection, *TextBlock) = .{}, + /// A list of all PIE fixups required for this run of the linker. /// Warning, this is currently NOT thread-safe. See the TODO below. /// TODO Move this list inside `updateDecl` where it should be allocated @@ -236,71 +264,7 @@ const LIB_SYSTEM_PATH: [*:0]const u8 = DEFAULT_LIB_SEARCH_PATH ++ "/libSystem.B. /// it as a possible place to put new symbols, it must have enough room for this many bytes /// (plus extra for reserved capacity). const minimum_text_block_size = 64; -const min_text_capacity = padToIdeal(minimum_text_block_size); - -pub const TextBlock = struct { - /// Each decl always gets a local symbol with the fully qualified name. - /// The vaddr and size are found here directly. - /// The file offset is found by computing the vaddr offset from the section vaddr - /// the symbol references, and adding that to the file offset of the section. - /// If this field is 0, it means the codegen size = 0 and there is no symbol or - /// offset table entry. - local_sym_index: u32, - /// Size of this text block - /// Unlike in Elf, we need to store the size of this symbol as part of - /// the TextBlock since macho.nlist_64 lacks this information. - size: u64, - /// Points to the previous and next neighbours - prev: ?*TextBlock, - next: ?*TextBlock, - - /// Previous/next linked list pointers. - /// This is the linked list node for this Decl's corresponding .debug_info tag. - dbg_info_prev: ?*TextBlock, - dbg_info_next: ?*TextBlock, - /// Offset into .debug_info pointing to the tag for this Decl. - dbg_info_off: u32, - /// Size of the .debug_info tag for this Decl, not including padding. - dbg_info_len: u32, - - pub const empty = TextBlock{ - .local_sym_index = 0, - .size = 0, - .prev = null, - .next = null, - .dbg_info_prev = null, - .dbg_info_next = null, - .dbg_info_off = undefined, - .dbg_info_len = undefined, - }; - - /// Returns how much room there is to grow in virtual address space. - /// File offset relocation happens transparently, so it is not included in - /// this calculation. - fn capacity(self: TextBlock, macho_file: MachO) u64 { - const self_sym = macho_file.locals.items[self.local_sym_index]; - if (self.next) |next| { - const next_sym = macho_file.locals.items[next.local_sym_index]; - return next_sym.n_value - self_sym.n_value; - } else { - // We are the last block. - // The capacity is limited only by virtual address space. - return std.math.maxInt(u64) - self_sym.n_value; - } - } - - fn freeListEligible(self: TextBlock, macho_file: MachO) bool { - // No need to keep a free list node for the last block. - const next = self.next orelse return false; - const self_sym = macho_file.locals.items[self.local_sym_index]; - const next_sym = macho_file.locals.items[next.local_sym_index]; - const cap = next_sym.n_value - self_sym.n_value; - const ideal_cap = padToIdeal(self.size); - if (cap <= ideal_cap) return false; - const surplus = cap - ideal_cap; - return surplus >= min_text_capacity; - } -}; +pub const min_text_capacity = padToIdeal(minimum_text_block_size); pub const Export = struct { sym_index: ?u32 = null, @@ -452,9 +416,9 @@ pub fn flushModule(self: *MachO, comp: *Compilation) !void { self.load_commands_dirty = true; } try self.writeRebaseInfoTable(); - try self.writeBindingInfoTable(); - try self.writeLazyBindingInfoTable(); - try self.writeExportTrie(); + try self.writeBindInfoTable(); + try self.writeLazyBindInfoTable(); + try self.writeExportInfo(); try self.writeAllGlobalAndUndefSymbols(); try self.writeIndirectSymbolTable(); try self.writeStringTable(); @@ -718,14 +682,6 @@ fn linkWithZld(self: *MachO, comp: *Compilation) !void { try fs.cwd().copyFile(the_object_path, fs.cwd(), full_out_path, .{}); } } else { - var zld = try Zld.init(self.base.allocator); - defer { - zld.closeFiles(); - zld.deinit(); - } - zld.target = target; - zld.stack_size = stack_size; - // Positional arguments to the linker such as object files and static archives. var positionals = std.ArrayList([]const u8).init(arena); @@ -868,23 +824,6 @@ fn linkWithZld(self: *MachO, comp: *Compilation) !void { rpaths.appendAssumeCapacity(key.*); } - const output: Zld.Output = output: { - if (is_dyn_lib) { - const install_name = try std.fmt.allocPrint(arena, "@rpath/{s}", .{ - self.base.options.emit.?.sub_path, - }); - break :output .{ - .tag = .dylib, - .path = full_out_path, - .install_name = install_name, - }; - } - break :output .{ - .tag = .exe, - .path = full_out_path, - }; - }; - if (self.base.options.verbose_link) { var argv = std.ArrayList([]const u8).init(arena); @@ -898,8 +837,11 @@ fn linkWithZld(self: *MachO, comp: *Compilation) !void { if (is_dyn_lib) { try argv.append("-dylib"); + const install_name = try std.fmt.allocPrint(arena, "@rpath/{s}", .{ + self.base.options.emit.?.sub_path, + }); try argv.append("-install_name"); - try argv.append(output.install_name.?); + try argv.append(install_name); } if (self.base.options.sysroot) |syslibroot| { @@ -915,7 +857,7 @@ fn linkWithZld(self: *MachO, comp: *Compilation) !void { try argv.appendSlice(positionals.items); try argv.append("-o"); - try argv.append(output.path); + try argv.append(full_out_path); if (native_libsystem_available) { try argv.append("-lSystem"); @@ -933,11 +875,99 @@ fn linkWithZld(self: *MachO, comp: *Compilation) !void { Compilation.dump_argv(argv.items); } - try zld.link(positionals.items, output, .{ - .syslibroot = self.base.options.sysroot, - .libs = libs.items, - .rpaths = rpaths.items, + self.base.file = try fs.cwd().createFile(full_out_path, .{ + .truncate = true, + .read = true, + .mode = if (std.Target.current.os.tag == .windows) 0 else 0o777, }); + self.page_size = switch (self.base.options.target.cpu.arch) { + .aarch64 => 0x4000, + .x86_64 => 0x1000, + else => unreachable, + }; + + try self.populateMetadata(); + try self.parseInputFiles(positionals.items, self.base.options.sysroot); + try self.parseLibs(libs.items, self.base.options.sysroot); + try self.resolveSymbols(); + try self.parseTextBlocks(); + + { + // Add dyld_stub_binder as the final GOT entry. + const resolv = self.symbol_resolver.get("dyld_stub_binder") orelse unreachable; + const got_index = @intCast(u32, self.got_entries.items.len); + const got_entry = GotIndirectionKey{ + .where = .import, + .where_index = resolv.where_index, + }; + try self.got_entries.append(self.base.allocator, got_entry); + try self.got_entries_map.putNoClobber(self.base.allocator, got_entry, got_index); + } + + try self.sortSections(); + try self.addRpaths(rpaths.items); + try self.addDataInCodeLC(); + try self.addCodeSignatureLC(); + try self.allocateTextSegment(); + try self.allocateDataConstSegment(); + try self.allocateDataSegment(); + self.allocateLinkeditSegment(); + try self.allocateTextBlocks(); + + // log.warn("locals", .{}); + // for (self.locals.items) |sym, id| { + // log.warn(" {d}: {s}, {}", .{ id, self.getString(sym.n_strx), sym }); + // } + + // log.warn("globals", .{}); + // for (self.globals.items) |sym, id| { + // log.warn(" {d}: {s}, {}", .{ id, self.getString(sym.n_strx), sym }); + // } + + // log.warn("tentatives", .{}); + // for (self.tentatives.items) |sym, id| { + // log.warn(" {d}: {s}, {}", .{ id, self.getString(sym.n_strx), sym }); + // } + + // log.warn("undefines", .{}); + // for (self.undefs.items) |sym, id| { + // log.warn(" {d}: {s}, {}", .{ id, self.getString(sym.n_strx), sym }); + // } + + // log.warn("imports", .{}); + // for (self.imports.items) |sym, id| { + // log.warn(" {d}: {s}, {}", .{ id, self.getString(sym.n_strx), sym }); + // } + + // log.warn("symbol resolver", .{}); + // for (self.symbol_resolver.keys()) |key| { + // log.warn(" {s} => {}", .{ key, self.symbol_resolver.get(key).? }); + // } + + // log.warn("mappings", .{}); + // for (self.objects.items) |object, id| { + // const object_id = @intCast(u16, id); + // log.warn(" in object {s}", .{object.name.?}); + // for (object.symtab.items) |sym, sym_id| { + // if (object.symbol_mapping.get(@intCast(u32, sym_id))) |local_id| { + // log.warn(" | {d} => {d}", .{ sym_id, local_id }); + // } else { + // log.warn(" | {d} no local mapping for {s}", .{ sym_id, object.getString(sym.n_strx) }); + // } + // } + // } + + // var it = self.blocks.iterator(); + // while (it.next()) |entry| { + // const seg = self.load_commands.items[entry.key_ptr.seg].Segment; + // const sect = seg.sections.items[entry.key_ptr.sect]; + + // log.warn("\n\n{s},{s} contents:", .{ segmentName(sect), sectionName(sect) }); + // log.warn(" {}", .{sect}); + // entry.value_ptr.*.print(self); + // } + + try self.flushZld(); } if (!self.base.options.disable_lld_caching) { @@ -956,6 +986,2020 @@ fn linkWithZld(self: *MachO, comp: *Compilation) !void { } } +fn parseInputFiles(self: *MachO, files: []const []const u8, syslibroot: ?[]const u8) !void { + const arch = self.base.options.target.cpu.arch; + for (files) |file_name| { + const full_path = full_path: { + var buffer: [std.fs.MAX_PATH_BYTES]u8 = undefined; + const path = try std.fs.realpath(file_name, &buffer); + break :full_path try self.base.allocator.dupe(u8, path); + }; + + if (try Object.createAndParseFromPath(self.base.allocator, arch, full_path)) |object| { + try self.objects.append(self.base.allocator, object); + continue; + } + + if (try Archive.createAndParseFromPath(self.base.allocator, arch, full_path)) |archive| { + try self.archives.append(self.base.allocator, archive); + continue; + } + + if (try Dylib.createAndParseFromPath(self.base.allocator, arch, full_path, .{ + .syslibroot = syslibroot, + })) |dylibs| { + defer self.base.allocator.free(dylibs); + try self.dylibs.appendSlice(self.base.allocator, dylibs); + continue; + } + + log.warn("unknown filetype for positional input file: '{s}'", .{file_name}); + } +} + +fn parseLibs(self: *MachO, libs: []const []const u8, syslibroot: ?[]const u8) !void { + const arch = self.base.options.target.cpu.arch; + for (libs) |lib| { + if (try Dylib.createAndParseFromPath(self.base.allocator, arch, lib, .{ + .syslibroot = syslibroot, + })) |dylibs| { + defer self.base.allocator.free(dylibs); + try self.dylibs.appendSlice(self.base.allocator, dylibs); + continue; + } + + if (try Archive.createAndParseFromPath(self.base.allocator, arch, lib)) |archive| { + try self.archives.append(self.base.allocator, archive); + continue; + } + + log.warn("unknown filetype for a library: '{s}'", .{lib}); + } +} + +pub const MatchingSection = struct { + seg: u16, + sect: u16, +}; + +pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSection { + const text_seg = &self.load_commands.items[self.text_segment_cmd_index.?].Segment; + const data_const_seg = &self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; + const data_seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; + const segname = commands.segmentName(sect); + const sectname = commands.sectionName(sect); + + const res: ?MatchingSection = blk: { + switch (commands.sectionType(sect)) { + macho.S_4BYTE_LITERALS, macho.S_8BYTE_LITERALS, macho.S_16BYTE_LITERALS => { + if (self.text_const_section_index == null) { + self.text_const_section_index = @intCast(u16, text_seg.sections.items.len); + try text_seg.addSection(self.base.allocator, "__const", .{}); + } + + break :blk .{ + .seg = self.text_segment_cmd_index.?, + .sect = self.text_const_section_index.?, + }; + }, + macho.S_CSTRING_LITERALS => { + if (mem.eql(u8, sectname, "__objc_methname")) { + // TODO it seems the common values within the sections in objects are deduplicated/merged + // on merging the sections' contents. + if (self.objc_methname_section_index == null) { + self.objc_methname_section_index = @intCast(u16, text_seg.sections.items.len); + try text_seg.addSection(self.base.allocator, "__objc_methname", .{ + .flags = macho.S_CSTRING_LITERALS, + }); + } + + break :blk .{ + .seg = self.text_segment_cmd_index.?, + .sect = self.objc_methname_section_index.?, + }; + } else if (mem.eql(u8, sectname, "__objc_methtype")) { + if (self.objc_methtype_section_index == null) { + self.objc_methtype_section_index = @intCast(u16, text_seg.sections.items.len); + try text_seg.addSection(self.base.allocator, "__objc_methtype", .{ + .flags = macho.S_CSTRING_LITERALS, + }); + } + + break :blk .{ + .seg = self.text_segment_cmd_index.?, + .sect = self.objc_methtype_section_index.?, + }; + } else if (mem.eql(u8, sectname, "__objc_classname")) { + if (self.objc_classname_section_index == null) { + self.objc_classname_section_index = @intCast(u16, text_seg.sections.items.len); + try text_seg.addSection(self.base.allocator, "__objc_classname", .{}); + } + + break :blk .{ + .seg = self.text_segment_cmd_index.?, + .sect = self.objc_classname_section_index.?, + }; + } + + if (self.cstring_section_index == null) { + self.cstring_section_index = @intCast(u16, text_seg.sections.items.len); + try text_seg.addSection(self.base.allocator, "__cstring", .{ + .flags = macho.S_CSTRING_LITERALS, + }); + } + + break :blk .{ + .seg = self.text_segment_cmd_index.?, + .sect = self.cstring_section_index.?, + }; + }, + macho.S_LITERAL_POINTERS => { + if (mem.eql(u8, segname, "__DATA") and mem.eql(u8, sectname, "__objc_selrefs")) { + if (self.objc_selrefs_section_index == null) { + self.objc_selrefs_section_index = @intCast(u16, data_seg.sections.items.len); + try data_seg.addSection(self.base.allocator, "__objc_selrefs", .{ + .flags = macho.S_LITERAL_POINTERS, + }); + } + + break :blk .{ + .seg = self.data_segment_cmd_index.?, + .sect = self.objc_selrefs_section_index.?, + }; + } + + // TODO investigate + break :blk null; + }, + macho.S_MOD_INIT_FUNC_POINTERS => { + if (self.mod_init_func_section_index == null) { + self.mod_init_func_section_index = @intCast(u16, data_const_seg.sections.items.len); + try data_const_seg.addSection(self.base.allocator, "__mod_init_func", .{ + .flags = macho.S_MOD_INIT_FUNC_POINTERS, + }); + } + + break :blk .{ + .seg = self.data_const_segment_cmd_index.?, + .sect = self.mod_init_func_section_index.?, + }; + }, + macho.S_MOD_TERM_FUNC_POINTERS => { + if (self.mod_term_func_section_index == null) { + self.mod_term_func_section_index = @intCast(u16, data_const_seg.sections.items.len); + try data_const_seg.addSection(self.base.allocator, "__mod_term_func", .{ + .flags = macho.S_MOD_TERM_FUNC_POINTERS, + }); + } + + break :blk .{ + .seg = self.data_const_segment_cmd_index.?, + .sect = self.mod_term_func_section_index.?, + }; + }, + macho.S_ZEROFILL => { + if (mem.eql(u8, sectname, "__common")) { + if (self.common_section_index == null) { + self.common_section_index = @intCast(u16, data_seg.sections.items.len); + try data_seg.addSection(self.base.allocator, "__common", .{ + .flags = macho.S_ZEROFILL, + }); + } + + break :blk .{ + .seg = self.data_segment_cmd_index.?, + .sect = self.common_section_index.?, + }; + } else { + if (self.bss_section_index == null) { + self.bss_section_index = @intCast(u16, data_seg.sections.items.len); + try data_seg.addSection(self.base.allocator, "__bss", .{ + .flags = macho.S_ZEROFILL, + }); + } + + break :blk .{ + .seg = self.data_segment_cmd_index.?, + .sect = self.bss_section_index.?, + }; + } + }, + macho.S_THREAD_LOCAL_VARIABLES => { + if (self.tlv_section_index == null) { + self.tlv_section_index = @intCast(u16, data_seg.sections.items.len); + try data_seg.addSection(self.base.allocator, "__thread_vars", .{ + .flags = macho.S_THREAD_LOCAL_VARIABLES, + }); + } + + break :blk .{ + .seg = self.data_segment_cmd_index.?, + .sect = self.tlv_section_index.?, + }; + }, + macho.S_THREAD_LOCAL_REGULAR => { + if (self.tlv_data_section_index == null) { + self.tlv_data_section_index = @intCast(u16, data_seg.sections.items.len); + try data_seg.addSection(self.base.allocator, "__thread_data", .{ + .flags = macho.S_THREAD_LOCAL_REGULAR, + }); + } + + break :blk .{ + .seg = self.data_segment_cmd_index.?, + .sect = self.tlv_data_section_index.?, + }; + }, + macho.S_THREAD_LOCAL_ZEROFILL => { + if (self.tlv_bss_section_index == null) { + self.tlv_bss_section_index = @intCast(u16, data_seg.sections.items.len); + try data_seg.addSection(self.base.allocator, "__thread_bss", .{ + .flags = macho.S_THREAD_LOCAL_ZEROFILL, + }); + } + + break :blk .{ + .seg = self.data_segment_cmd_index.?, + .sect = self.tlv_bss_section_index.?, + }; + }, + macho.S_COALESCED => { + if (mem.eql(u8, "__TEXT", segname) and mem.eql(u8, "__eh_frame", sectname)) { + // TODO I believe __eh_frame is currently part of __unwind_info section + // in the latest ld64 output. + if (self.eh_frame_section_index == null) { + self.eh_frame_section_index = @intCast(u16, text_seg.sections.items.len); + try text_seg.addSection(self.base.allocator, "__eh_frame", .{}); + } + + break :blk .{ + .seg = self.text_segment_cmd_index.?, + .sect = self.eh_frame_section_index.?, + }; + } + + // TODO audit this: is this the right mapping? + if (self.data_const_section_index == null) { + self.data_const_section_index = @intCast(u16, data_const_seg.sections.items.len); + try data_const_seg.addSection(self.base.allocator, "__const", .{}); + } + + break :blk .{ + .seg = self.data_const_segment_cmd_index.?, + .sect = self.data_const_section_index.?, + }; + }, + macho.S_REGULAR => { + if (commands.sectionIsCode(sect)) { + if (self.text_section_index == null) { + self.text_section_index = @intCast(u16, text_seg.sections.items.len); + try text_seg.addSection(self.base.allocator, "__text", .{ + .flags = macho.S_REGULAR | macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS, + }); + } + + break :blk .{ + .seg = self.text_segment_cmd_index.?, + .sect = self.text_section_index.?, + }; + } + if (commands.sectionIsDebug(sect)) { + // TODO debug attributes + if (mem.eql(u8, "__LD", segname) and mem.eql(u8, "__compact_unwind", sectname)) { + log.debug("TODO compact unwind section: type 0x{x}, name '{s},{s}'", .{ + sect.flags, segname, sectname, + }); + } + break :blk null; + } + + if (mem.eql(u8, segname, "__TEXT")) { + if (mem.eql(u8, sectname, "__ustring")) { + if (self.ustring_section_index == null) { + self.ustring_section_index = @intCast(u16, text_seg.sections.items.len); + try text_seg.addSection(self.base.allocator, "__ustring", .{}); + } + + break :blk .{ + .seg = self.text_segment_cmd_index.?, + .sect = self.ustring_section_index.?, + }; + } else if (mem.eql(u8, sectname, "__gcc_except_tab")) { + if (self.gcc_except_tab_section_index == null) { + self.gcc_except_tab_section_index = @intCast(u16, text_seg.sections.items.len); + try text_seg.addSection(self.base.allocator, "__gcc_except_tab", .{}); + } + + break :blk .{ + .seg = self.text_segment_cmd_index.?, + .sect = self.gcc_except_tab_section_index.?, + }; + } else if (mem.eql(u8, sectname, "__objc_methlist")) { + if (self.objc_methlist_section_index == null) { + self.objc_methlist_section_index = @intCast(u16, text_seg.sections.items.len); + try text_seg.addSection(self.base.allocator, "__objc_methlist", .{}); + } + + break :blk .{ + .seg = self.text_segment_cmd_index.?, + .sect = self.objc_methlist_section_index.?, + }; + } else if (mem.eql(u8, sectname, "__rodata") or + mem.eql(u8, sectname, "__typelink") or + mem.eql(u8, sectname, "__itablink") or + mem.eql(u8, sectname, "__gosymtab") or + mem.eql(u8, sectname, "__gopclntab")) + { + if (self.data_const_section_index == null) { + self.data_const_section_index = @intCast(u16, data_const_seg.sections.items.len); + try data_const_seg.addSection(self.base.allocator, "__const", .{}); + } + + break :blk .{ + .seg = self.data_const_segment_cmd_index.?, + .sect = self.data_const_section_index.?, + }; + } else { + if (self.text_const_section_index == null) { + self.text_const_section_index = @intCast(u16, text_seg.sections.items.len); + try text_seg.addSection(self.base.allocator, "__const", .{}); + } + + break :blk .{ + .seg = self.text_segment_cmd_index.?, + .sect = self.text_const_section_index.?, + }; + } + } + + if (mem.eql(u8, segname, "__DATA_CONST")) { + if (self.data_const_section_index == null) { + self.data_const_section_index = @intCast(u16, data_const_seg.sections.items.len); + try data_const_seg.addSection(self.base.allocator, "__const", .{}); + } + + break :blk .{ + .seg = self.data_const_segment_cmd_index.?, + .sect = self.data_const_section_index.?, + }; + } + + if (mem.eql(u8, segname, "__DATA")) { + if (mem.eql(u8, sectname, "__const")) { + if (self.data_const_section_index == null) { + self.data_const_section_index = @intCast(u16, data_const_seg.sections.items.len); + try data_const_seg.addSection(self.base.allocator, "__const", .{}); + } + + break :blk .{ + .seg = self.data_const_segment_cmd_index.?, + .sect = self.data_const_section_index.?, + }; + } else if (mem.eql(u8, sectname, "__cfstring")) { + if (self.objc_cfstring_section_index == null) { + self.objc_cfstring_section_index = @intCast(u16, data_const_seg.sections.items.len); + try data_const_seg.addSection(self.base.allocator, "__cfstring", .{}); + } + + break :blk .{ + .seg = self.data_const_segment_cmd_index.?, + .sect = self.objc_cfstring_section_index.?, + }; + } else if (mem.eql(u8, sectname, "__objc_classlist")) { + if (self.objc_classlist_section_index == null) { + self.objc_classlist_section_index = @intCast(u16, data_const_seg.sections.items.len); + try data_const_seg.addSection(self.base.allocator, "__objc_classlist", .{}); + } + + break :blk .{ + .seg = self.data_const_segment_cmd_index.?, + .sect = self.objc_classlist_section_index.?, + }; + } else if (mem.eql(u8, sectname, "__objc_imageinfo")) { + if (self.objc_imageinfo_section_index == null) { + self.objc_imageinfo_section_index = @intCast(u16, data_const_seg.sections.items.len); + try data_const_seg.addSection(self.base.allocator, "__objc_imageinfo", .{}); + } + + break :blk .{ + .seg = self.data_const_segment_cmd_index.?, + .sect = self.objc_imageinfo_section_index.?, + }; + } else if (mem.eql(u8, sectname, "__objc_const")) { + if (self.objc_const_section_index == null) { + self.objc_const_section_index = @intCast(u16, data_seg.sections.items.len); + try data_seg.addSection(self.base.allocator, "__objc_const", .{}); + } + + break :blk .{ + .seg = self.data_segment_cmd_index.?, + .sect = self.objc_const_section_index.?, + }; + } else if (mem.eql(u8, sectname, "__objc_classrefs")) { + if (self.objc_classrefs_section_index == null) { + self.objc_classrefs_section_index = @intCast(u16, data_seg.sections.items.len); + try data_seg.addSection(self.base.allocator, "__objc_classrefs", .{}); + } + + break :blk .{ + .seg = self.data_segment_cmd_index.?, + .sect = self.objc_classrefs_section_index.?, + }; + } else if (mem.eql(u8, sectname, "__objc_data")) { + if (self.objc_data_section_index == null) { + self.objc_data_section_index = @intCast(u16, data_seg.sections.items.len); + try data_seg.addSection(self.base.allocator, "__objc_data", .{}); + } + + break :blk .{ + .seg = self.data_segment_cmd_index.?, + .sect = self.objc_data_section_index.?, + }; + } else { + if (self.data_section_index == null) { + self.data_section_index = @intCast(u16, data_seg.sections.items.len); + try data_seg.addSection(self.base.allocator, "__data", .{}); + } + + break :blk .{ + .seg = self.data_segment_cmd_index.?, + .sect = self.data_section_index.?, + }; + } + } + + if (mem.eql(u8, "__LLVM", segname) and mem.eql(u8, "__asm", sectname)) { + log.debug("TODO LLVM asm section: type 0x{x}, name '{s},{s}'", .{ + sect.flags, segname, sectname, + }); + } + + break :blk null; + }, + else => break :blk null, + } + }; + + return res; +} + +fn sortSections(self: *MachO) !void { + var text_index_mapping = std.AutoHashMap(u16, u16).init(self.base.allocator); + defer text_index_mapping.deinit(); + var data_const_index_mapping = std.AutoHashMap(u16, u16).init(self.base.allocator); + defer data_const_index_mapping.deinit(); + var data_index_mapping = std.AutoHashMap(u16, u16).init(self.base.allocator); + defer data_index_mapping.deinit(); + + { + // __TEXT segment + const seg = &self.load_commands.items[self.text_segment_cmd_index.?].Segment; + var sections = seg.sections.toOwnedSlice(self.base.allocator); + defer self.base.allocator.free(sections); + try seg.sections.ensureCapacity(self.base.allocator, sections.len); + + const indices = &[_]*?u16{ + &self.text_section_index, + &self.stubs_section_index, + &self.stub_helper_section_index, + &self.gcc_except_tab_section_index, + &self.cstring_section_index, + &self.ustring_section_index, + &self.text_const_section_index, + &self.objc_methname_section_index, + &self.objc_methtype_section_index, + &self.objc_classname_section_index, + &self.eh_frame_section_index, + }; + for (indices) |maybe_index| { + const new_index: u16 = if (maybe_index.*) |index| blk: { + const idx = @intCast(u16, seg.sections.items.len); + seg.sections.appendAssumeCapacity(sections[index]); + try text_index_mapping.putNoClobber(index, idx); + break :blk idx; + } else continue; + maybe_index.* = new_index; + } + } + + { + // __DATA_CONST segment + const seg = &self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; + var sections = seg.sections.toOwnedSlice(self.base.allocator); + defer self.base.allocator.free(sections); + try seg.sections.ensureCapacity(self.base.allocator, sections.len); + + const indices = &[_]*?u16{ + &self.got_section_index, + &self.mod_init_func_section_index, + &self.mod_term_func_section_index, + &self.data_const_section_index, + &self.objc_cfstring_section_index, + &self.objc_classlist_section_index, + &self.objc_imageinfo_section_index, + }; + for (indices) |maybe_index| { + const new_index: u16 = if (maybe_index.*) |index| blk: { + const idx = @intCast(u16, seg.sections.items.len); + seg.sections.appendAssumeCapacity(sections[index]); + try data_const_index_mapping.putNoClobber(index, idx); + break :blk idx; + } else continue; + maybe_index.* = new_index; + } + } + + { + // __DATA segment + const seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; + var sections = seg.sections.toOwnedSlice(self.base.allocator); + defer self.base.allocator.free(sections); + try seg.sections.ensureCapacity(self.base.allocator, sections.len); + + // __DATA segment + const indices = &[_]*?u16{ + &self.la_symbol_ptr_section_index, + &self.objc_const_section_index, + &self.objc_selrefs_section_index, + &self.objc_classrefs_section_index, + &self.objc_data_section_index, + &self.data_section_index, + &self.tlv_section_index, + &self.tlv_data_section_index, + &self.tlv_bss_section_index, + &self.bss_section_index, + &self.common_section_index, + }; + for (indices) |maybe_index| { + const new_index: u16 = if (maybe_index.*) |index| blk: { + const idx = @intCast(u16, seg.sections.items.len); + seg.sections.appendAssumeCapacity(sections[index]); + try data_index_mapping.putNoClobber(index, idx); + break :blk idx; + } else continue; + maybe_index.* = new_index; + } + } + + { + var transient: std.AutoHashMapUnmanaged(MatchingSection, *TextBlock) = .{}; + try transient.ensureCapacity(self.base.allocator, self.blocks.count()); + + var it = self.blocks.iterator(); + while (it.next()) |entry| { + const old = entry.key_ptr.*; + const sect = if (old.seg == self.text_segment_cmd_index.?) + text_index_mapping.get(old.sect).? + else if (old.seg == self.data_const_segment_cmd_index.?) + data_const_index_mapping.get(old.sect).? + else + data_index_mapping.get(old.sect).?; + transient.putAssumeCapacityNoClobber(.{ + .seg = old.seg, + .sect = sect, + }, entry.value_ptr.*); + } + + self.blocks.clearAndFree(self.base.allocator); + self.blocks.deinit(self.base.allocator); + self.blocks = transient; + } +} + +fn allocateTextSegment(self: *MachO) !void { + const seg = &self.load_commands.items[self.text_segment_cmd_index.?].Segment; + const nstubs = @intCast(u32, self.stubs.items.len); + + const base_vmaddr = self.load_commands.items[self.pagezero_segment_cmd_index.?].Segment.inner.vmsize; + seg.inner.fileoff = 0; + seg.inner.vmaddr = base_vmaddr; + + // Set stubs and stub_helper sizes + const stubs = &seg.sections.items[self.stubs_section_index.?]; + const stub_helper = &seg.sections.items[self.stub_helper_section_index.?]; + stubs.size += nstubs * stubs.reserved2; + + const stub_size: u4 = switch (self.base.options.target.cpu.arch) { + .x86_64 => 10, + .aarch64 => 3 * @sizeOf(u32), + else => unreachable, + }; + stub_helper.size += nstubs * stub_size; + + var sizeofcmds: u64 = 0; + for (self.load_commands.items) |lc| { + sizeofcmds += lc.cmdsize(); + } + + try self.allocateSegment(self.text_segment_cmd_index.?, @sizeOf(macho.mach_header_64) + sizeofcmds); + + // Shift all sections to the back to minimize jump size between __TEXT and __DATA segments. + var min_alignment: u32 = 0; + for (seg.sections.items) |sect| { + const alignment = try math.powi(u32, 2, sect.@"align"); + min_alignment = math.max(min_alignment, alignment); + } + + assert(min_alignment > 0); + const last_sect_idx = seg.sections.items.len - 1; + const last_sect = seg.sections.items[last_sect_idx]; + const shift: u32 = blk: { + const diff = seg.inner.filesize - last_sect.offset - last_sect.size; + const factor = @divTrunc(diff, min_alignment); + break :blk @intCast(u32, factor * min_alignment); + }; + + if (shift > 0) { + for (seg.sections.items) |*sect| { + sect.offset += shift; + sect.addr += shift; + } + } +} + +fn allocateDataConstSegment(self: *MachO) !void { + const seg = &self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; + const nentries = @intCast(u32, self.got_entries.items.len); + + const text_seg = self.load_commands.items[self.text_segment_cmd_index.?].Segment; + seg.inner.fileoff = text_seg.inner.fileoff + text_seg.inner.filesize; + seg.inner.vmaddr = text_seg.inner.vmaddr + text_seg.inner.vmsize; + + // Set got size + const got = &seg.sections.items[self.got_section_index.?]; + got.size += nentries * @sizeOf(u64); + + try self.allocateSegment(self.data_const_segment_cmd_index.?, 0); +} + +fn allocateDataSegment(self: *MachO) !void { + const seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; + const nstubs = @intCast(u32, self.stubs.items.len); + + const data_const_seg = self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; + seg.inner.fileoff = data_const_seg.inner.fileoff + data_const_seg.inner.filesize; + seg.inner.vmaddr = data_const_seg.inner.vmaddr + data_const_seg.inner.vmsize; + + // Set la_symbol_ptr and data size + const la_symbol_ptr = &seg.sections.items[self.la_symbol_ptr_section_index.?]; + const data = &seg.sections.items[self.data_section_index.?]; + la_symbol_ptr.size += nstubs * @sizeOf(u64); + data.size += @sizeOf(u64); // We need at least 8bytes for address of dyld_stub_binder + + try self.allocateSegment(self.data_segment_cmd_index.?, 0); +} + +fn allocateLinkeditSegment(self: *MachO) void { + const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; + const data_seg = self.load_commands.items[self.data_segment_cmd_index.?].Segment; + seg.inner.fileoff = data_seg.inner.fileoff + data_seg.inner.filesize; + seg.inner.vmaddr = data_seg.inner.vmaddr + data_seg.inner.vmsize; +} + +fn allocateSegment(self: *MachO, index: u16, offset: u64) !void { + const seg = &self.load_commands.items[index].Segment; + + // Allocate the sections according to their alignment at the beginning of the segment. + var start: u64 = offset; + for (seg.sections.items) |*sect| { + const alignment = try math.powi(u32, 2, sect.@"align"); + const start_aligned = mem.alignForwardGeneric(u64, start, alignment); + const end_aligned = mem.alignForwardGeneric(u64, start_aligned + sect.size, alignment); + sect.offset = @intCast(u32, seg.inner.fileoff + start_aligned); + sect.addr = seg.inner.vmaddr + start_aligned; + start = end_aligned; + } + + const seg_size_aligned = mem.alignForwardGeneric(u64, start, self.page_size); + seg.inner.filesize = seg_size_aligned; + seg.inner.vmsize = seg_size_aligned; +} + +fn allocateTextBlocks(self: *MachO) !void { + var it = self.blocks.iterator(); + while (it.next()) |entry| { + const match = entry.key_ptr.*; + var block: *TextBlock = entry.value_ptr.*; + + // Find the first block + while (block.prev) |prev| { + block = prev; + } + + const seg = self.load_commands.items[match.seg].Segment; + const sect = seg.sections.items[match.sect]; + + var base_addr: u64 = sect.addr; + const n_sect = self.sectionId(match); + + log.debug(" within section {s},{s}", .{ commands.segmentName(sect), commands.sectionName(sect) }); + log.debug(" {}", .{sect}); + + while (true) { + const block_alignment = try math.powi(u32, 2, block.alignment); + base_addr = mem.alignForwardGeneric(u64, base_addr, block_alignment); + + const sym = &self.locals.items[block.local_sym_index]; + sym.n_value = base_addr; + sym.n_sect = n_sect; + + log.debug(" {s}: start=0x{x}, end=0x{x}, size={}, align={}", .{ + self.getString(sym.n_strx), + base_addr, + base_addr + block.size, + block.size, + block.alignment, + }); + + // Update each alias (if any) + for (block.aliases.items) |index| { + const alias_sym = &self.locals.items[index]; + alias_sym.n_value = base_addr; + alias_sym.n_sect = n_sect; + } + + // Update each symbol contained within the TextBlock + for (block.contained.items) |sym_at_off| { + const contained_sym = &self.locals.items[sym_at_off.local_sym_index]; + contained_sym.n_value = base_addr + sym_at_off.offset; + contained_sym.n_sect = n_sect; + } + + base_addr += block.size; + + if (block.next) |next| { + block = next; + } else break; + } + } + + // Update globals + for (self.symbol_resolver.values()) |resolv| { + if (resolv.where != .global) continue; + + assert(resolv.local_sym_index != 0); + const local_sym = self.locals.items[resolv.local_sym_index]; + const sym = &self.globals.items[resolv.where_index]; + sym.n_value = local_sym.n_value; + sym.n_sect = local_sym.n_sect; + } +} + +fn writeTextBlocks(self: *MachO) !void { + var it = self.blocks.iterator(); + while (it.next()) |entry| { + const match = entry.key_ptr.*; + var block: *TextBlock = entry.value_ptr.*; + + while (block.prev) |prev| { + block = prev; + } + + const seg = self.load_commands.items[match.seg].Segment; + const sect = seg.sections.items[match.sect]; + const sect_type = commands.sectionType(sect); + + log.debug(" for section {s},{s}", .{ commands.segmentName(sect), commands.sectionName(sect) }); + log.debug(" {}", .{sect}); + + var code = try self.base.allocator.alloc(u8, sect.size); + defer self.base.allocator.free(code); + + if (sect_type == macho.S_ZEROFILL or sect_type == macho.S_THREAD_LOCAL_ZEROFILL) { + mem.set(u8, code, 0); + } else { + var base_off: u64 = 0; + + while (true) { + const block_alignment = try math.powi(u32, 2, block.alignment); + const aligned_base_off = mem.alignForwardGeneric(u64, base_off, block_alignment); + + const sym = self.locals.items[block.local_sym_index]; + log.debug(" {s}: start=0x{x}, end=0x{x}, size={}, align={}", .{ + self.getString(sym.n_strx), + aligned_base_off, + aligned_base_off + block.size, + block.size, + block.alignment, + }); + + try block.resolveRelocs(self); + mem.copy(u8, code[aligned_base_off..][0..block.size], block.code); + + // TODO NOP for machine code instead of just zeroing out + const padding_len = aligned_base_off - base_off; + mem.set(u8, code[base_off..][0..padding_len], 0); + + base_off = aligned_base_off + block.size; + + if (block.next) |next| { + block = next; + } else break; + } + + mem.set(u8, code[base_off..], 0); + } + + try self.base.file.?.pwriteAll(code, sect.offset); + } +} + +fn writeStubHelperCommon(self: *MachO) !void { + const text_segment = &self.load_commands.items[self.text_segment_cmd_index.?].Segment; + const stub_helper = &text_segment.sections.items[self.stub_helper_section_index.?]; + const data_const_segment = &self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; + const got = &data_const_segment.sections.items[self.got_section_index.?]; + const data_segment = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; + const data = &data_segment.sections.items[self.data_section_index.?]; + + self.stub_helper_stubs_start_off = blk: { + switch (self.base.options.target.cpu.arch) { + .x86_64 => { + const code_size = 15; + var code: [code_size]u8 = undefined; + // lea %r11, [rip + disp] + code[0] = 0x4c; + code[1] = 0x8d; + code[2] = 0x1d; + { + const target_addr = data.addr + data.size - @sizeOf(u64); + const displacement = try math.cast(u32, target_addr - stub_helper.addr - 7); + mem.writeIntLittle(u32, code[3..7], displacement); + } + // push %r11 + code[7] = 0x41; + code[8] = 0x53; + // jmp [rip + disp] + code[9] = 0xff; + code[10] = 0x25; + { + const resolv = self.symbol_resolver.get("dyld_stub_binder") orelse unreachable; + const got_index = self.got_entries_map.get(.{ + .where = .import, + .where_index = resolv.where_index, + }) orelse unreachable; + const addr = got.addr + got_index * @sizeOf(u64); + const displacement = try math.cast(u32, addr - stub_helper.addr - code_size); + mem.writeIntLittle(u32, code[11..], displacement); + } + try self.base.file.?.pwriteAll(&code, stub_helper.offset); + break :blk stub_helper.offset + code_size; + }, + .aarch64 => { + var code: [6 * @sizeOf(u32)]u8 = undefined; + data_blk_outer: { + const this_addr = stub_helper.addr; + const target_addr = data.addr + data.size - @sizeOf(u64); + data_blk: { + const displacement = math.cast(i21, target_addr - this_addr) catch break :data_blk; + // adr x17, disp + mem.writeIntLittle(u32, code[0..4], aarch64.Instruction.adr(.x17, displacement).toU32()); + // nop + mem.writeIntLittle(u32, code[4..8], aarch64.Instruction.nop().toU32()); + break :data_blk_outer; + } + data_blk: { + const new_this_addr = this_addr + @sizeOf(u32); + const displacement = math.cast(i21, target_addr - new_this_addr) catch break :data_blk; + // nop + mem.writeIntLittle(u32, code[0..4], aarch64.Instruction.nop().toU32()); + // adr x17, disp + mem.writeIntLittle(u32, code[4..8], aarch64.Instruction.adr(.x17, displacement).toU32()); + break :data_blk_outer; + } + // Jump is too big, replace adr with adrp and add. + const this_page = @intCast(i32, this_addr >> 12); + const target_page = @intCast(i32, target_addr >> 12); + const pages = @intCast(i21, target_page - this_page); + mem.writeIntLittle(u32, code[0..4], aarch64.Instruction.adrp(.x17, pages).toU32()); + const narrowed = @truncate(u12, target_addr); + mem.writeIntLittle(u32, code[4..8], aarch64.Instruction.add(.x17, .x17, narrowed, false).toU32()); + } + // stp x16, x17, [sp, #-16]! + code[8] = 0xf0; + code[9] = 0x47; + code[10] = 0xbf; + code[11] = 0xa9; + binder_blk_outer: { + const resolv = self.symbol_resolver.get("dyld_stub_binder") orelse unreachable; + const got_index = self.got_entries_map.get(.{ + .where = .import, + .where_index = resolv.where_index, + }) orelse unreachable; + const this_addr = stub_helper.addr + 3 * @sizeOf(u32); + const target_addr = got.addr + got_index * @sizeOf(u64); + binder_blk: { + const displacement = math.divExact(u64, target_addr - this_addr, 4) catch break :binder_blk; + const literal = math.cast(u18, displacement) catch break :binder_blk; + // ldr x16, label + mem.writeIntLittle(u32, code[12..16], aarch64.Instruction.ldr(.x16, .{ + .literal = literal, + }).toU32()); + // nop + mem.writeIntLittle(u32, code[16..20], aarch64.Instruction.nop().toU32()); + break :binder_blk_outer; + } + binder_blk: { + const new_this_addr = this_addr + @sizeOf(u32); + const displacement = math.divExact(u64, target_addr - new_this_addr, 4) catch break :binder_blk; + const literal = math.cast(u18, displacement) catch break :binder_blk; + // Pad with nop to please division. + // nop + mem.writeIntLittle(u32, code[12..16], aarch64.Instruction.nop().toU32()); + // ldr x16, label + mem.writeIntLittle(u32, code[16..20], aarch64.Instruction.ldr(.x16, .{ + .literal = literal, + }).toU32()); + break :binder_blk_outer; + } + // Use adrp followed by ldr(immediate). + const this_page = @intCast(i32, this_addr >> 12); + const target_page = @intCast(i32, target_addr >> 12); + const pages = @intCast(i21, target_page - this_page); + mem.writeIntLittle(u32, code[12..16], aarch64.Instruction.adrp(.x16, pages).toU32()); + const narrowed = @truncate(u12, target_addr); + const offset = try math.divExact(u12, narrowed, 8); + mem.writeIntLittle(u32, code[16..20], aarch64.Instruction.ldr(.x16, .{ + .register = .{ + .rn = .x16, + .offset = aarch64.Instruction.LoadStoreOffset.imm(offset), + }, + }).toU32()); + } + // br x16 + code[20] = 0x00; + code[21] = 0x02; + code[22] = 0x1f; + code[23] = 0xd6; + try self.base.file.?.pwriteAll(&code, stub_helper.offset); + break :blk stub_helper.offset + 6 * @sizeOf(u32); + }, + else => unreachable, + } + }; + + for (self.stubs.items) |_, i| { + const index = @intCast(u32, i); + // TODO weak bound pointers + try self.writeLazySymbolPointer(index); + try self.writeStub(index); + try self.writeStubInStubHelper(index); + } +} + +fn resolveSymbolsInObject(self: *MachO, object_id: u16) !void { + const object = self.objects.items[object_id]; + + log.debug("resolving symbols in '{s}'", .{object.name}); + + for (object.symtab.items) |sym, id| { + const sym_id = @intCast(u32, id); + const sym_name = object.getString(sym.n_strx); + + if (symbolIsStab(sym)) { + log.err("unhandled symbol type: stab", .{}); + log.err(" symbol '{s}'", .{sym_name}); + log.err(" first definition in '{s}'", .{object.name.?}); + return error.UnhandledSymbolType; + } + + if (symbolIsIndr(sym)) { + log.err("unhandled symbol type: indirect", .{}); + log.err(" symbol '{s}'", .{sym_name}); + log.err(" first definition in '{s}'", .{object.name.?}); + return error.UnhandledSymbolType; + } + + if (symbolIsAbs(sym)) { + log.err("unhandled symbol type: absolute", .{}); + log.err(" symbol '{s}'", .{sym_name}); + log.err(" first definition in '{s}'", .{object.name.?}); + return error.UnhandledSymbolType; + } + + if (symbolIsSect(sym)) { + // Defined symbol regardless of scope lands in the locals symbol table. + const n_strx = blk: { + if (self.symbol_resolver.get(sym_name)) |resolv| { + switch (resolv.where) { + .global => break :blk self.globals.items[resolv.where_index].n_strx, + .tentative => break :blk self.tentatives.items[resolv.where_index].n_strx, + .undef => break :blk self.undefs.items[resolv.where_index].n_strx, + .import => unreachable, + } + } + break :blk try self.makeString(sym_name); + }; + const local_sym_index = @intCast(u32, self.locals.items.len); + try self.locals.append(self.base.allocator, .{ + .n_strx = n_strx, + .n_type = macho.N_SECT, + .n_sect = 0, + .n_desc = 0, + .n_value = sym.n_value, + }); + try object.symbol_mapping.putNoClobber(self.base.allocator, sym_id, local_sym_index); + + // If the symbol's scope is not local aka translation unit, then we need work out + // if we should save the symbol as a global, or potentially flag the error. + if (!symbolIsExt(sym)) continue; + + const local = self.locals.items[local_sym_index]; + const resolv = self.symbol_resolver.getPtr(sym_name) orelse { + const global_sym_index = @intCast(u32, self.globals.items.len); + try self.globals.append(self.base.allocator, .{ + .n_strx = n_strx, + .n_type = sym.n_type, + .n_sect = 0, + .n_desc = sym.n_desc, + .n_value = sym.n_value, + }); + try self.symbol_resolver.putNoClobber(self.base.allocator, try self.base.allocator.dupe(u8, sym_name), .{ + .where = .global, + .where_index = global_sym_index, + .local_sym_index = local_sym_index, + .file = object_id, + }); + continue; + }; + + switch (resolv.where) { + .import => unreachable, + .global => { + const global = &self.globals.items[resolv.where_index]; + + if (!(symbolIsWeakDef(sym) or symbolIsPext(sym)) and + !(symbolIsWeakDef(global.*) or symbolIsPext(global.*))) + { + log.err("symbol '{s}' defined multiple times", .{sym_name}); + log.err(" first definition in '{s}'", .{self.objects.items[resolv.file].name.?}); + log.err(" next definition in '{s}'", .{object.name.?}); + return error.MultipleSymbolDefinitions; + } + + if (symbolIsWeakDef(sym) or symbolIsPext(sym)) continue; // Current symbol is weak, so skip it. + + // Otherwise, update the resolver and the global symbol. + global.n_type = sym.n_type; + resolv.local_sym_index = local_sym_index; + resolv.file = object_id; + + continue; + }, + .undef => { + const undef = &self.undefs.items[resolv.where_index]; + undef.* = .{ + .n_strx = 0, + .n_type = macho.N_UNDF, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }; + }, + .tentative => { + const tentative = &self.tentatives.items[resolv.where_index]; + tentative.* = .{ + .n_strx = 0, + .n_type = macho.N_UNDF, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }; + }, + } + + const global_sym_index = @intCast(u32, self.globals.items.len); + try self.globals.append(self.base.allocator, .{ + .n_strx = local.n_strx, + .n_type = sym.n_type, + .n_sect = 0, + .n_desc = sym.n_desc, + .n_value = sym.n_value, + }); + resolv.* = .{ + .where = .global, + .where_index = global_sym_index, + .local_sym_index = local_sym_index, + .file = object_id, + }; + } else if (symbolIsTentative(sym)) { + // Symbol is a tentative definition. + const resolv = self.symbol_resolver.getPtr(sym_name) orelse { + const tent_sym_index = @intCast(u32, self.tentatives.items.len); + try self.tentatives.append(self.base.allocator, .{ + .n_strx = try self.makeString(sym_name), + .n_type = sym.n_type, + .n_sect = 0, + .n_desc = sym.n_desc, + .n_value = sym.n_value, + }); + try self.symbol_resolver.putNoClobber(self.base.allocator, try self.base.allocator.dupe(u8, sym_name), .{ + .where = .tentative, + .where_index = tent_sym_index, + .file = object_id, + }); + continue; + }; + + switch (resolv.where) { + .import => unreachable, + .global => {}, + .undef => { + const undef = &self.undefs.items[resolv.where_index]; + const tent_sym_index = @intCast(u32, self.tentatives.items.len); + try self.tentatives.append(self.base.allocator, .{ + .n_strx = undef.n_strx, + .n_type = sym.n_type, + .n_sect = 0, + .n_desc = sym.n_desc, + .n_value = sym.n_value, + }); + resolv.* = .{ + .where = .tentative, + .where_index = tent_sym_index, + .file = object_id, + }; + undef.* = .{ + .n_strx = 0, + .n_type = macho.N_UNDF, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }; + }, + .tentative => { + const tentative = &self.tentatives.items[resolv.where_index]; + if (tentative.n_value >= sym.n_value) continue; + + tentative.n_desc = sym.n_desc; + tentative.n_value = sym.n_value; + resolv.file = object_id; + }, + } + } else { + // Symbol is undefined. + if (self.symbol_resolver.contains(sym_name)) continue; + + const undef_sym_index = @intCast(u32, self.undefs.items.len); + try self.undefs.append(self.base.allocator, .{ + .n_strx = try self.makeString(sym_name), + .n_type = macho.N_UNDF, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }); + try self.symbol_resolver.putNoClobber(self.base.allocator, try self.base.allocator.dupe(u8, sym_name), .{ + .where = .undef, + .where_index = undef_sym_index, + .file = object_id, + }); + } + } +} + +fn resolveSymbols(self: *MachO) !void { + // TODO mimicking insertion of null symbol from incremental linker. + // This will need to moved. + try self.locals.append(self.base.allocator, .{ + .n_strx = 0, + .n_type = macho.N_UNDF, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }); + try self.strtab.append(self.base.allocator, 0); + + // First pass, resolve symbols in provided objects. + for (self.objects.items) |_, object_id| { + try self.resolveSymbolsInObject(@intCast(u16, object_id)); + } + + // Second pass, resolve symbols in static libraries. + var next_sym: usize = 0; + loop: while (true) : (next_sym += 1) { + if (next_sym == self.undefs.items.len) break; + + const sym = self.undefs.items[next_sym]; + if (symbolIsNull(sym)) continue; + + const sym_name = self.getString(sym.n_strx); + + for (self.archives.items) |archive| { + // Check if the entry exists in a static archive. + const offsets = archive.toc.get(sym_name) orelse { + // No hit. + continue; + }; + assert(offsets.items.len > 0); + + const object = try archive.parseObject(offsets.items[0]); + const object_id = @intCast(u16, self.objects.items.len); + try self.objects.append(self.base.allocator, object); + try self.resolveSymbolsInObject(object_id); + + continue :loop; + } + } + + // Convert any tentative definition into a regular symbol and allocate + // text blocks for each tentative defintion. + for (self.tentatives.items) |sym| { + if (symbolIsNull(sym)) continue; + + const sym_name = self.getString(sym.n_strx); + const match: MatchingSection = blk: { + if (self.common_section_index == null) { + const data_seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; + self.common_section_index = @intCast(u16, data_seg.sections.items.len); + try data_seg.addSection(self.base.allocator, "__common", .{ + .flags = macho.S_ZEROFILL, + }); + } + break :blk .{ + .seg = self.data_segment_cmd_index.?, + .sect = self.common_section_index.?, + }; + }; + + const size = sym.n_value; + const code = try self.base.allocator.alloc(u8, size); + mem.set(u8, code, 0); + const alignment = (sym.n_desc >> 8) & 0x0f; + + const resolv = self.symbol_resolver.getPtr(sym_name) orelse unreachable; + const local_sym_index = @intCast(u32, self.locals.items.len); + var nlist = macho.nlist_64{ + .n_strx = sym.n_strx, + .n_type = macho.N_SECT, + .n_sect = self.sectionId(match), + .n_desc = 0, + .n_value = 0, + }; + try self.locals.append(self.base.allocator, nlist); + const global_sym_index = @intCast(u32, self.globals.items.len); + nlist.n_type |= macho.N_EXT; + try self.globals.append(self.base.allocator, nlist); + resolv.* = .{ + .where = .global, + .where_index = global_sym_index, + .local_sym_index = local_sym_index, + }; + + const block = try self.base.allocator.create(TextBlock); + errdefer self.base.allocator.destroy(block); + + block.* = TextBlock.empty; + block.local_sym_index = local_sym_index; + block.code = code; + block.size = size; + block.alignment = alignment; + + // Update target section's metadata + // TODO should we update segment's size here too? + // How does it tie with incremental space allocs? + const tseg = &self.load_commands.items[match.seg].Segment; + const tsect = &tseg.sections.items[match.sect]; + const new_alignment = math.max(tsect.@"align", block.alignment); + const new_alignment_pow_2 = try math.powi(u32, 2, new_alignment); + const new_size = mem.alignForwardGeneric(u64, tsect.size, new_alignment_pow_2) + block.size; + tsect.size = new_size; + tsect.@"align" = new_alignment; + + if (self.blocks.getPtr(match)) |last| { + last.*.next = block; + block.prev = last.*; + last.* = block; + } else { + try self.blocks.putNoClobber(self.base.allocator, match, block); + } + } + + // Third pass, resolve symbols in dynamic libraries. + { + // Put dyld_stub_binder as an undefined special symbol. + const undef_sym_index = @intCast(u32, self.undefs.items.len); + try self.undefs.append(self.base.allocator, .{ + .n_strx = try self.makeString("dyld_stub_binder"), + .n_type = macho.N_UNDF, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }); + try self.symbol_resolver.putNoClobber(self.base.allocator, try self.base.allocator.dupe(u8, "dyld_stub_binder"), .{ + .where = .undef, + .where_index = undef_sym_index, + }); + } + + var referenced = std.AutoHashMap(*Dylib, void).init(self.base.allocator); + defer referenced.deinit(); + + loop: for (self.undefs.items) |sym| { + if (symbolIsNull(sym)) continue; + + const sym_name = self.getString(sym.n_strx); + for (self.dylibs.items) |dylib| { + if (!dylib.symbols.contains(sym_name)) continue; + + if (!referenced.contains(dylib)) { + // Add LC_LOAD_DYLIB load command for each referenced dylib/stub. + dylib.ordinal = self.next_dylib_ordinal; + const dylib_id = dylib.id orelse unreachable; + var dylib_cmd = try commands.createLoadDylibCommand( + self.base.allocator, + dylib_id.name, + dylib_id.timestamp, + dylib_id.current_version, + dylib_id.compatibility_version, + ); + errdefer dylib_cmd.deinit(self.base.allocator); + try self.load_commands.append(self.base.allocator, .{ .Dylib = dylib_cmd }); + self.next_dylib_ordinal += 1; + try referenced.putNoClobber(dylib, {}); + } + + const resolv = self.symbol_resolver.getPtr(sym_name) orelse unreachable; + const undef = &self.undefs.items[resolv.where_index]; + const import_sym_index = @intCast(u32, self.imports.items.len); + try self.imports.append(self.base.allocator, .{ + .n_strx = undef.n_strx, + .n_type = macho.N_UNDF | macho.N_EXT, + .n_sect = 0, + .n_desc = packDylibOrdinal(dylib.ordinal.?), + .n_value = 0, + }); + resolv.* = .{ + .where = .import, + .where_index = import_sym_index, + }; + undef.* = .{ + .n_strx = 0, + .n_type = macho.N_UNDF, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }; + + continue :loop; + } + } + + // Fourth pass, handle synthetic symbols and flag any undefined references. + if (self.symbol_resolver.getPtr("___dso_handle")) |resolv| blk: { + if (resolv.where != .undef) break :blk; + + const undef = &self.undefs.items[resolv.where_index]; + const match: MatchingSection = .{ + .seg = self.text_segment_cmd_index.?, + .sect = self.text_section_index.?, + }; + const local_sym_index = @intCast(u32, self.locals.items.len); + var nlist = macho.nlist_64{ + .n_strx = undef.n_strx, + .n_type = macho.N_SECT, + .n_sect = self.sectionId(match), + .n_desc = 0, + .n_value = 0, + }; + try self.locals.append(self.base.allocator, nlist); + const global_sym_index = @intCast(u32, self.globals.items.len); + nlist.n_type |= macho.N_EXT; + nlist.n_desc = macho.N_WEAK_DEF; + try self.globals.append(self.base.allocator, nlist); + + undef.* = .{ + .n_strx = 0, + .n_type = macho.N_UNDF, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }; + resolv.* = .{ + .where = .global, + .where_index = global_sym_index, + .local_sym_index = local_sym_index, + }; + + // We create an empty atom for this symbol. + // TODO perhaps we should special-case special symbols? Create a separate + // linked list of atoms? + const block = try self.base.allocator.create(TextBlock); + errdefer self.base.allocator.destroy(block); + + block.* = TextBlock.empty; + block.local_sym_index = local_sym_index; + block.code = try self.base.allocator.alloc(u8, 0); + block.size = 0; + block.alignment = 0; + + if (self.blocks.getPtr(match)) |last| { + last.*.next = block; + block.prev = last.*; + last.* = block; + } else { + try self.blocks.putNoClobber(self.base.allocator, match, block); + } + } + + var has_undefined = false; + for (self.undefs.items) |sym| { + if (symbolIsNull(sym)) continue; + + const sym_name = self.getString(sym.n_strx); + const resolv = self.symbol_resolver.get(sym_name) orelse unreachable; + + log.err("undefined reference to symbol '{s}'", .{sym_name}); + log.err(" first referenced in '{s}'", .{self.objects.items[resolv.file].name.?}); + has_undefined = true; + } + + if (has_undefined) return error.UndefinedSymbolReference; +} + +fn parseTextBlocks(self: *MachO) !void { + for (self.objects.items) |object| { + try object.parseTextBlocks(self); + } +} + +fn populateMetadata(self: *MachO) !void { + if (self.pagezero_segment_cmd_index == null) { + self.pagezero_segment_cmd_index = @intCast(u16, self.load_commands.items.len); + try self.load_commands.append(self.base.allocator, .{ + .Segment = SegmentCommand.empty("__PAGEZERO", .{ + .vmsize = 0x100000000, // size always set to 4GB + }), + }); + } + + if (self.text_segment_cmd_index == null) { + self.text_segment_cmd_index = @intCast(u16, self.load_commands.items.len); + try self.load_commands.append(self.base.allocator, .{ + .Segment = SegmentCommand.empty("__TEXT", .{ + .vmaddr = 0x100000000, // always starts at 4GB + .maxprot = macho.VM_PROT_READ | macho.VM_PROT_EXECUTE, + .initprot = macho.VM_PROT_READ | macho.VM_PROT_EXECUTE, + }), + }); + } + + if (self.text_section_index == null) { + const text_seg = &self.load_commands.items[self.text_segment_cmd_index.?].Segment; + self.text_section_index = @intCast(u16, text_seg.sections.items.len); + const alignment: u2 = switch (self.base.options.target.cpu.arch) { + .x86_64 => 0, + .aarch64 => 2, + else => unreachable, // unhandled architecture type + }; + try text_seg.addSection(self.base.allocator, "__text", .{ + .@"align" = alignment, + .flags = macho.S_REGULAR | macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS, + }); + } + + if (self.stubs_section_index == null) { + const text_seg = &self.load_commands.items[self.text_segment_cmd_index.?].Segment; + self.stubs_section_index = @intCast(u16, text_seg.sections.items.len); + const alignment: u2 = switch (self.base.options.target.cpu.arch) { + .x86_64 => 0, + .aarch64 => 2, + else => unreachable, // unhandled architecture type + }; + const stub_size: u4 = switch (self.base.options.target.cpu.arch) { + .x86_64 => 6, + .aarch64 => 3 * @sizeOf(u32), + else => unreachable, // unhandled architecture type + }; + try text_seg.addSection(self.base.allocator, "__stubs", .{ + .@"align" = alignment, + .flags = macho.S_SYMBOL_STUBS | macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS, + .reserved2 = stub_size, + }); + } + + if (self.stub_helper_section_index == null) { + const text_seg = &self.load_commands.items[self.text_segment_cmd_index.?].Segment; + self.stub_helper_section_index = @intCast(u16, text_seg.sections.items.len); + const alignment: u2 = switch (self.base.options.target.cpu.arch) { + .x86_64 => 0, + .aarch64 => 2, + else => unreachable, // unhandled architecture type + }; + const stub_helper_size: u6 = switch (self.base.options.target.cpu.arch) { + .x86_64 => 15, + .aarch64 => 6 * @sizeOf(u32), + else => unreachable, + }; + try text_seg.addSection(self.base.allocator, "__stub_helper", .{ + .size = stub_helper_size, + .@"align" = alignment, + .flags = macho.S_REGULAR | macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS, + }); + } + + if (self.data_const_segment_cmd_index == null) { + self.data_const_segment_cmd_index = @intCast(u16, self.load_commands.items.len); + try self.load_commands.append(self.base.allocator, .{ + .Segment = SegmentCommand.empty("__DATA_CONST", .{ + .maxprot = macho.VM_PROT_READ | macho.VM_PROT_WRITE, + .initprot = macho.VM_PROT_READ | macho.VM_PROT_WRITE, + }), + }); + } + + if (self.got_section_index == null) { + const data_const_seg = &self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; + self.got_section_index = @intCast(u16, data_const_seg.sections.items.len); + try data_const_seg.addSection(self.base.allocator, "__got", .{ + .@"align" = 3, // 2^3 = @sizeOf(u64) + .flags = macho.S_NON_LAZY_SYMBOL_POINTERS, + }); + } + + if (self.data_segment_cmd_index == null) { + self.data_segment_cmd_index = @intCast(u16, self.load_commands.items.len); + try self.load_commands.append(self.base.allocator, .{ + .Segment = SegmentCommand.empty("__DATA", .{ + .maxprot = macho.VM_PROT_READ | macho.VM_PROT_WRITE, + .initprot = macho.VM_PROT_READ | macho.VM_PROT_WRITE, + }), + }); + } + + if (self.la_symbol_ptr_section_index == null) { + const data_seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; + self.la_symbol_ptr_section_index = @intCast(u16, data_seg.sections.items.len); + try data_seg.addSection(self.base.allocator, "__la_symbol_ptr", .{ + .@"align" = 3, // 2^3 = @sizeOf(u64) + .flags = macho.S_LAZY_SYMBOL_POINTERS, + }); + } + + if (self.data_section_index == null) { + const data_seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; + self.data_section_index = @intCast(u16, data_seg.sections.items.len); + try data_seg.addSection(self.base.allocator, "__data", .{ + .@"align" = 3, // 2^3 = @sizeOf(u64) + }); + } + + if (self.linkedit_segment_cmd_index == null) { + self.linkedit_segment_cmd_index = @intCast(u16, self.load_commands.items.len); + try self.load_commands.append(self.base.allocator, .{ + .Segment = SegmentCommand.empty("__LINKEDIT", .{ + .maxprot = macho.VM_PROT_READ, + .initprot = macho.VM_PROT_READ, + }), + }); + } + + if (self.dyld_info_cmd_index == null) { + self.dyld_info_cmd_index = @intCast(u16, self.load_commands.items.len); + try self.load_commands.append(self.base.allocator, .{ + .DyldInfoOnly = .{ + .cmd = macho.LC_DYLD_INFO_ONLY, + .cmdsize = @sizeOf(macho.dyld_info_command), + .rebase_off = 0, + .rebase_size = 0, + .bind_off = 0, + .bind_size = 0, + .weak_bind_off = 0, + .weak_bind_size = 0, + .lazy_bind_off = 0, + .lazy_bind_size = 0, + .export_off = 0, + .export_size = 0, + }, + }); + } + + if (self.symtab_cmd_index == null) { + self.symtab_cmd_index = @intCast(u16, self.load_commands.items.len); + try self.load_commands.append(self.base.allocator, .{ + .Symtab = .{ + .cmd = macho.LC_SYMTAB, + .cmdsize = @sizeOf(macho.symtab_command), + .symoff = 0, + .nsyms = 0, + .stroff = 0, + .strsize = 0, + }, + }); + } + + if (self.dysymtab_cmd_index == null) { + self.dysymtab_cmd_index = @intCast(u16, self.load_commands.items.len); + try self.load_commands.append(self.base.allocator, .{ + .Dysymtab = .{ + .cmd = macho.LC_DYSYMTAB, + .cmdsize = @sizeOf(macho.dysymtab_command), + .ilocalsym = 0, + .nlocalsym = 0, + .iextdefsym = 0, + .nextdefsym = 0, + .iundefsym = 0, + .nundefsym = 0, + .tocoff = 0, + .ntoc = 0, + .modtaboff = 0, + .nmodtab = 0, + .extrefsymoff = 0, + .nextrefsyms = 0, + .indirectsymoff = 0, + .nindirectsyms = 0, + .extreloff = 0, + .nextrel = 0, + .locreloff = 0, + .nlocrel = 0, + }, + }); + } + + if (self.dylinker_cmd_index == null) { + self.dylinker_cmd_index = @intCast(u16, self.load_commands.items.len); + const cmdsize = @intCast(u32, mem.alignForwardGeneric( + u64, + @sizeOf(macho.dylinker_command) + mem.lenZ(DEFAULT_DYLD_PATH), + @sizeOf(u64), + )); + var dylinker_cmd = commands.emptyGenericCommandWithData(macho.dylinker_command{ + .cmd = macho.LC_LOAD_DYLINKER, + .cmdsize = cmdsize, + .name = @sizeOf(macho.dylinker_command), + }); + dylinker_cmd.data = try self.base.allocator.alloc(u8, cmdsize - dylinker_cmd.inner.name); + mem.set(u8, dylinker_cmd.data, 0); + mem.copy(u8, dylinker_cmd.data, mem.spanZ(DEFAULT_DYLD_PATH)); + try self.load_commands.append(self.base.allocator, .{ .Dylinker = dylinker_cmd }); + } + + if (self.main_cmd_index == null and self.base.options.output_mode == .Exe) { + self.main_cmd_index = @intCast(u16, self.load_commands.items.len); + try self.load_commands.append(self.base.allocator, .{ + .Main = .{ + .cmd = macho.LC_MAIN, + .cmdsize = @sizeOf(macho.entry_point_command), + .entryoff = 0x0, + .stacksize = 0, + }, + }); + } + + if (self.dylib_id_cmd_index == null and self.base.options.output_mode == .Lib) { + self.dylib_id_cmd_index = @intCast(u16, self.load_commands.items.len); + const install_name = try std.fmt.allocPrint(self.base.allocator, "@rpath/{s}", .{ + self.base.options.emit.?.sub_path, + }); + defer self.base.allocator.free(install_name); + var dylib_cmd = try commands.createLoadDylibCommand( + self.base.allocator, + install_name, + 2, + 0x10000, // TODO forward user-provided versions + 0x10000, + ); + errdefer dylib_cmd.deinit(self.base.allocator); + dylib_cmd.inner.cmd = macho.LC_ID_DYLIB; + try self.load_commands.append(self.base.allocator, .{ .Dylib = dylib_cmd }); + } + + if (self.version_min_cmd_index == null) { + self.version_min_cmd_index = @intCast(u16, self.load_commands.items.len); + const cmd: u32 = switch (self.base.options.target.os.tag) { + .macos => macho.LC_VERSION_MIN_MACOSX, + .ios => macho.LC_VERSION_MIN_IPHONEOS, + .tvos => macho.LC_VERSION_MIN_TVOS, + .watchos => macho.LC_VERSION_MIN_WATCHOS, + else => unreachable, // wrong OS + }; + const ver = self.base.options.target.os.version_range.semver.min; + const version = ver.major << 16 | ver.minor << 8 | ver.patch; + try self.load_commands.append(self.base.allocator, .{ + .VersionMin = .{ + .cmd = cmd, + .cmdsize = @sizeOf(macho.version_min_command), + .version = version, + .sdk = version, + }, + }); + } + + if (self.source_version_cmd_index == null) { + self.source_version_cmd_index = @intCast(u16, self.load_commands.items.len); + try self.load_commands.append(self.base.allocator, .{ + .SourceVersion = .{ + .cmd = macho.LC_SOURCE_VERSION, + .cmdsize = @sizeOf(macho.source_version_command), + .version = 0x0, + }, + }); + } + + if (self.uuid_cmd_index == null) { + self.uuid_cmd_index = @intCast(u16, self.load_commands.items.len); + var uuid_cmd: macho.uuid_command = .{ + .cmd = macho.LC_UUID, + .cmdsize = @sizeOf(macho.uuid_command), + .uuid = undefined, + }; + std.crypto.random.bytes(&uuid_cmd.uuid); + try self.load_commands.append(self.base.allocator, .{ .Uuid = uuid_cmd }); + } +} + +fn addDataInCodeLC(self: *MachO) !void { + if (self.data_in_code_cmd_index == null) { + self.data_in_code_cmd_index = @intCast(u16, self.load_commands.items.len); + try self.load_commands.append(self.base.allocator, .{ + .LinkeditData = .{ + .cmd = macho.LC_DATA_IN_CODE, + .cmdsize = @sizeOf(macho.linkedit_data_command), + .dataoff = 0, + .datasize = 0, + }, + }); + } +} + +fn addCodeSignatureLC(self: *MachO) !void { + if (self.code_signature_cmd_index == null and self.base.options.target.cpu.arch == .aarch64) { + self.code_signature_cmd_index = @intCast(u16, self.load_commands.items.len); + try self.load_commands.append(self.base.allocator, .{ + .LinkeditData = .{ + .cmd = macho.LC_CODE_SIGNATURE, + .cmdsize = @sizeOf(macho.linkedit_data_command), + .dataoff = 0, + .datasize = 0, + }, + }); + } +} + +fn addRpaths(self: *MachO, rpaths: []const []const u8) !void { + for (rpaths) |rpath| { + const cmdsize = @intCast(u32, mem.alignForwardGeneric( + u64, + @sizeOf(macho.rpath_command) + rpath.len + 1, + @sizeOf(u64), + )); + var rpath_cmd = commands.emptyGenericCommandWithData(macho.rpath_command{ + .cmd = macho.LC_RPATH, + .cmdsize = cmdsize, + .path = @sizeOf(macho.rpath_command), + }); + rpath_cmd.data = try self.base.allocator.alloc(u8, cmdsize - rpath_cmd.inner.path); + mem.set(u8, rpath_cmd.data, 0); + mem.copy(u8, rpath_cmd.data, rpath); + try self.load_commands.append(self.base.allocator, .{ .Rpath = rpath_cmd }); + } +} + +fn flushZld(self: *MachO) !void { + try self.writeTextBlocks(); + try self.writeStubHelperCommon(); + + if (self.common_section_index) |index| { + const seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; + const sect = &seg.sections.items[index]; + sect.offset = 0; + } + + if (self.bss_section_index) |index| { + const seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; + const sect = &seg.sections.items[index]; + sect.offset = 0; + } + + if (self.tlv_bss_section_index) |index| { + const seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; + const sect = &seg.sections.items[index]; + sect.offset = 0; + } + + try self.writeGotEntries(); + try self.setEntryPoint(); + try self.writeRebaseInfoTable(); + try self.writeBindInfoTable(); + try self.writeLazyBindInfoTable(); + try self.writeExportInfo(); + try self.writeDices(); + + { + const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; + const symtab = &self.load_commands.items[self.symtab_cmd_index.?].Symtab; + symtab.symoff = @intCast(u32, seg.inner.fileoff + seg.inner.filesize); + } + + try self.writeSymbolTable(); + try self.writeStringTable(); + + { + // Seal __LINKEDIT size + const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; + seg.inner.vmsize = mem.alignForwardGeneric(u64, seg.inner.filesize, self.page_size); + } + + if (self.base.options.target.cpu.arch == .aarch64) { + try self.writeCodeSignaturePadding(); + } + + try self.writeLoadCommands(); + try self.writeHeader(); + + if (self.base.options.target.cpu.arch == .aarch64) { + try self.writeCodeSignature(); + } + + // if (comptime std.Target.current.isDarwin() and std.Target.current.cpu.arch == .aarch64) { + // const out_path = self.output.?.path; + // try fs.cwd().copyFile(out_path, fs.cwd(), out_path, .{}); + // } +} + +fn writeGotEntries(self: *MachO) !void { + const seg = self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; + const sect = seg.sections.items[self.got_section_index.?]; + + var buffer = try self.base.allocator.alloc(u8, self.got_entries.items.len * @sizeOf(u64)); + defer self.base.allocator.free(buffer); + + var stream = std.io.fixedBufferStream(buffer); + var writer = stream.writer(); + + for (self.got_entries.items) |key| { + const address: u64 = switch (key.where) { + .local => self.locals.items[key.where_index].n_value, + .import => 0, + }; + try writer.writeIntLittle(u64, address); + } + + log.debug("writing GOT pointers at 0x{x} to 0x{x}", .{ sect.offset, sect.offset + buffer.len }); + + try self.base.file.?.pwriteAll(buffer, sect.offset); +} + +fn setEntryPoint(self: *MachO) !void { + if (self.base.options.output_mode != .Exe) return; + + // TODO we should respect the -entry flag passed in by the user to set a custom + // entrypoint. For now, assume default of `_main`. + const seg = self.load_commands.items[self.text_segment_cmd_index.?].Segment; + const resolv = self.symbol_resolver.get("_main") orelse { + log.err("'_main' export not found", .{}); + return error.MissingMainEntrypoint; + }; + assert(resolv.where == .global); + const sym = self.globals.items[resolv.where_index]; + const ec = &self.load_commands.items[self.main_cmd_index.?].Main; + ec.entryoff = @intCast(u32, sym.n_value - seg.inner.vmaddr); + ec.stacksize = self.base.options.stack_size_override orelse 0; +} + +fn writeSymbolTable(self: *MachO) !void { + const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; + const symtab = &self.load_commands.items[self.symtab_cmd_index.?].Symtab; + + var locals = std.ArrayList(macho.nlist_64).init(self.base.allocator); + defer locals.deinit(); + try locals.appendSlice(self.locals.items); + + if (self.has_stabs) { + for (self.objects.items) |object| { + if (object.debug_info == null) continue; + + // Open scope + try locals.ensureUnusedCapacity(4); + locals.appendAssumeCapacity(.{ + .n_strx = try self.makeString(object.tu_comp_dir.?), + .n_type = macho.N_SO, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }); + locals.appendAssumeCapacity(.{ + .n_strx = try self.makeString(object.tu_name.?), + .n_type = macho.N_SO, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }); + locals.appendAssumeCapacity(.{ + .n_strx = try self.makeString(object.name.?), + .n_type = macho.N_OSO, + .n_sect = 0, + .n_desc = 1, + .n_value = object.mtime orelse 0, + }); + + for (object.text_blocks.items) |block| { + if (block.stab) |stab| { + const nlists = try stab.asNlists(block.local_sym_index, self); + defer self.base.allocator.free(nlists); + try locals.appendSlice(nlists); + } else { + for (block.contained.items) |sym_at_off| { + const stab = sym_at_off.stab orelse continue; + const nlists = try stab.asNlists(sym_at_off.local_sym_index, self); + defer self.base.allocator.free(nlists); + try locals.appendSlice(nlists); + } + } + } + + // Close scope + locals.appendAssumeCapacity(.{ + .n_strx = 0, + .n_type = macho.N_SO, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }); + } + } + + const nlocals = locals.items.len; + const nexports = self.globals.items.len; + const nundefs = self.imports.items.len; + + const locals_off = symtab.symoff + symtab.nsyms * @sizeOf(macho.nlist_64); + const locals_size = nlocals * @sizeOf(macho.nlist_64); + log.debug("writing local symbols from 0x{x} to 0x{x}", .{ locals_off, locals_size + locals_off }); + try self.base.file.?.pwriteAll(mem.sliceAsBytes(locals.items), locals_off); + + const exports_off = locals_off + locals_size; + const exports_size = nexports * @sizeOf(macho.nlist_64); + log.debug("writing exported symbols from 0x{x} to 0x{x}", .{ exports_off, exports_size + exports_off }); + try self.base.file.?.pwriteAll(mem.sliceAsBytes(self.globals.items), exports_off); + + const undefs_off = exports_off + exports_size; + const undefs_size = nundefs * @sizeOf(macho.nlist_64); + log.debug("writing undefined symbols from 0x{x} to 0x{x}", .{ undefs_off, undefs_size + undefs_off }); + try self.base.file.?.pwriteAll(mem.sliceAsBytes(self.imports.items), undefs_off); + + symtab.nsyms += @intCast(u32, nlocals + nexports + nundefs); + seg.inner.filesize += locals_size + exports_size + undefs_size; + + // Update dynamic symbol table. + const dysymtab = &self.load_commands.items[self.dysymtab_cmd_index.?].Dysymtab; + dysymtab.nlocalsym += @intCast(u32, nlocals); + dysymtab.iextdefsym = dysymtab.nlocalsym; + dysymtab.nextdefsym = @intCast(u32, nexports); + dysymtab.iundefsym = dysymtab.nlocalsym + dysymtab.nextdefsym; + dysymtab.nundefsym = @intCast(u32, nundefs); + + const text_segment = &self.load_commands.items[self.text_segment_cmd_index.?].Segment; + const stubs = &text_segment.sections.items[self.stubs_section_index.?]; + const data_const_segment = &self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; + const got = &data_const_segment.sections.items[self.got_section_index.?]; + const data_segment = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; + const la_symbol_ptr = &data_segment.sections.items[self.la_symbol_ptr_section_index.?]; + + const nstubs = @intCast(u32, self.stubs.items.len); + const ngot_entries = @intCast(u32, self.got_entries.items.len); + + dysymtab.indirectsymoff = @intCast(u32, seg.inner.fileoff + seg.inner.filesize); + dysymtab.nindirectsyms = nstubs * 2 + ngot_entries; + + const needed_size = dysymtab.nindirectsyms * @sizeOf(u32); + seg.inner.filesize += needed_size; + + log.debug("writing indirect symbol table from 0x{x} to 0x{x}", .{ + dysymtab.indirectsymoff, + dysymtab.indirectsymoff + needed_size, + }); + + var buf = try self.base.allocator.alloc(u8, needed_size); + defer self.base.allocator.free(buf); + + var stream = std.io.fixedBufferStream(buf); + var writer = stream.writer(); + + stubs.reserved1 = 0; + for (self.stubs.items) |id| { + try writer.writeIntLittle(u32, dysymtab.iundefsym + id); + } + + got.reserved1 = nstubs; + for (self.got_entries.items) |entry| { + switch (entry.where) { + .import => { + try writer.writeIntLittle(u32, dysymtab.iundefsym + entry.where_index); + }, + .local => { + try writer.writeIntLittle(u32, macho.INDIRECT_SYMBOL_LOCAL); + }, + } + } + + la_symbol_ptr.reserved1 = got.reserved1 + ngot_entries; + for (self.stubs.items) |id| { + try writer.writeIntLittle(u32, dysymtab.iundefsym + id); + } + + try self.base.file.?.pwriteAll(buf, dysymtab.indirectsymoff); +} + pub fn deinit(self: *MachO) void { if (self.d_sym) |*ds| { ds.deinit(self.base.allocator); @@ -970,6 +3014,8 @@ pub fn deinit(self: *MachO) void { self.stubs.deinit(self.base.allocator); self.stubs_map.deinit(self.base.allocator); self.strtab.deinit(self.base.allocator); + self.undefs.deinit(self.base.allocator); + self.tentatives.deinit(self.base.allocator); self.imports.deinit(self.base.allocator); self.globals.deinit(self.base.allocator); self.globals_free_list.deinit(self.base.allocator); @@ -981,10 +3027,40 @@ pub fn deinit(self: *MachO) void { } self.symbol_resolver.deinit(self.base.allocator); + for (self.objects.items) |object| { + object.deinit(); + self.base.allocator.destroy(object); + } + self.objects.deinit(self.base.allocator); + + for (self.archives.items) |archive| { + archive.deinit(); + self.base.allocator.destroy(archive); + } + self.archives.deinit(self.base.allocator); + + for (self.dylibs.items) |dylib| { + dylib.deinit(); + self.base.allocator.destroy(dylib); + } + self.dylibs.deinit(self.base.allocator); + for (self.load_commands.items) |*lc| { lc.deinit(self.base.allocator); } self.load_commands.deinit(self.base.allocator); + + // TODO dealloc all blocks + self.blocks.deinit(self.base.allocator); +} + +pub fn closeFiles(self: MachO) void { + for (self.objects.items) |object| { + object.closeFile(); + } + for (self.archives.items) |archive| { + archive.closeFile(); + } } fn freeTextBlock(self: *MachO, text_block: *TextBlock) void { @@ -2664,6 +4740,60 @@ fn writeIndirectSymbolTable(self: *MachO) !void { self.load_commands_dirty = true; } +fn writeDices(self: *MachO) !void { + if (!self.has_dices) return; + + const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; + const dice_cmd = &self.load_commands.items[self.data_in_code_cmd_index.?].LinkeditData; + const fileoff = seg.inner.fileoff + seg.inner.filesize; + + var buf = std.ArrayList(u8).init(self.base.allocator); + defer buf.deinit(); + + var block: *TextBlock = self.blocks.get(.{ + .seg = self.text_segment_cmd_index orelse return, + .sect = self.text_section_index orelse return, + }) orelse return; + + while (block.prev) |prev| { + block = prev; + } + + const text_seg = self.load_commands.items[self.text_segment_cmd_index.?].Segment; + const text_sect = text_seg.sections.items[self.text_section_index.?]; + + while (true) { + if (block.dices.items.len > 0) { + const sym = self.locals.items[block.local_sym_index]; + const base_off = try math.cast(u32, sym.n_value - text_sect.addr + text_sect.offset); + + try buf.ensureUnusedCapacity(block.dices.items.len * @sizeOf(macho.data_in_code_entry)); + for (block.dices.items) |dice| { + const rebased_dice = macho.data_in_code_entry{ + .offset = base_off + dice.offset, + .length = dice.length, + .kind = dice.kind, + }; + buf.appendSliceAssumeCapacity(mem.asBytes(&rebased_dice)); + } + } + + if (block.next) |next| { + block = next; + } else break; + } + + const datasize = @intCast(u32, buf.items.len); + + dice_cmd.dataoff = @intCast(u32, fileoff); + dice_cmd.datasize = datasize; + seg.inner.filesize += datasize; + + log.debug("writing data-in-code from 0x{x} to 0x{x}", .{ fileoff, fileoff + datasize }); + + try self.base.file.?.pwriteAll(buf.items, fileoff); +} + fn writeCodeSignaturePadding(self: *MachO) !void { // TODO figure out how not to rewrite padding every single time. const tracy = trace(@src()); @@ -2719,7 +4849,7 @@ fn writeCodeSignature(self: *MachO) !void { try self.base.file.?.pwriteAll(buffer, code_sig_cmd.dataoff); } -fn writeExportTrie(self: *MachO) !void { +fn writeExportInfo(self: *MachO) !void { if (!self.export_info_dirty) return; if (self.globals.items.len == 0) return; @@ -2779,6 +4909,34 @@ fn writeRebaseInfoTable(self: *MachO) !void { var pointers = std.ArrayList(bind.Pointer).init(self.base.allocator); defer pointers.deinit(); + { + var it = self.blocks.iterator(); + while (it.next()) |entry| { + const match = entry.key_ptr.*; + var block: *TextBlock = entry.value_ptr.*; + + if (match.seg == self.text_segment_cmd_index.?) continue; // __TEXT is non-writable + + const seg = self.load_commands.items[match.seg].Segment; + + while (true) { + const sym = self.locals.items[block.local_sym_index]; + const base_offset = sym.n_value - seg.inner.vmaddr; + + for (block.rebases.items) |offset| { + try pointers.append(.{ + .offset = base_offset + offset, + .segment_id = match.seg, + }); + } + + if (block.prev) |prev| { + block = prev; + } else break; + } + } + } + if (self.got_section_index) |idx| { const seg = self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; const sect = seg.sections.items[idx]; @@ -2837,7 +4995,7 @@ fn writeRebaseInfoTable(self: *MachO) !void { self.rebase_info_dirty = false; } -fn writeBindingInfoTable(self: *MachO) !void { +fn writeBindInfoTable(self: *MachO) !void { if (!self.binding_info_dirty) return; const tracy = trace(@src()); @@ -2865,6 +5023,37 @@ fn writeBindingInfoTable(self: *MachO) !void { } } + { + var it = self.blocks.iterator(); + while (it.next()) |entry| { + const match = entry.key_ptr.*; + var block: *TextBlock = entry.value_ptr.*; + + if (match.seg == self.text_segment_cmd_index.?) continue; // __TEXT is non-writable + + const seg = self.load_commands.items[match.seg].Segment; + + while (true) { + const sym = self.locals.items[block.local_sym_index]; + const base_offset = sym.n_value - seg.inner.vmaddr; + + for (block.bindings.items) |binding| { + const bind_sym = self.imports.items[binding.local_sym_index]; + try pointers.append(.{ + .offset = binding.offset + base_offset, + .segment_id = match.seg, + .dylib_ordinal = unpackDylibOrdinal(bind_sym.n_desc), + .name = self.getString(bind_sym.n_strx), + }); + } + + if (block.prev) |prev| { + block = prev; + } else break; + } + } + } + const size = try bind.bindInfoSize(pointers.items); var buffer = try self.base.allocator.alloc(u8, @intCast(usize, size)); defer self.base.allocator.free(buffer); @@ -2890,7 +5079,7 @@ fn writeBindingInfoTable(self: *MachO) !void { self.binding_info_dirty = false; } -fn writeLazyBindingInfoTable(self: *MachO) !void { +fn writeLazyBindInfoTable(self: *MachO) !void { if (!self.lazy_binding_info_dirty) return; const tracy = trace(@src()); @@ -3134,7 +5323,7 @@ fn writeHeader(self: *MachO) !void { else => unreachable, } - if (self.hasTlvDescriptors()) { + if (self.tlv_section_index) |_| { header.flags |= macho.MH_HAS_TLV_DESCRIPTORS; } @@ -3156,10 +5345,6 @@ pub fn padToIdeal(actual_size: anytype) @TypeOf(actual_size) { std.math.maxInt(@TypeOf(actual_size)); } -fn hasTlvDescriptors(_: *MachO) bool { - return false; -} - pub fn makeString(self: *MachO, string: []const u8) !u32 { try self.strtab.ensureUnusedCapacity(self.base.allocator, string.len + 1); const new_off = @intCast(u32, self.strtab.items.len); @@ -3177,6 +5362,92 @@ pub fn getString(self: *MachO, off: u32) []const u8 { return mem.spanZ(@ptrCast([*:0]const u8, self.strtab.items.ptr + off)); } +pub fn symbolIsStab(sym: macho.nlist_64) bool { + return (macho.N_STAB & sym.n_type) != 0; +} + +pub fn symbolIsPext(sym: macho.nlist_64) bool { + return (macho.N_PEXT & sym.n_type) != 0; +} + +pub fn symbolIsExt(sym: macho.nlist_64) bool { + return (macho.N_EXT & sym.n_type) != 0; +} + +pub fn symbolIsSect(sym: macho.nlist_64) bool { + const type_ = macho.N_TYPE & sym.n_type; + return type_ == macho.N_SECT; +} + +pub fn symbolIsUndf(sym: macho.nlist_64) bool { + const type_ = macho.N_TYPE & sym.n_type; + return type_ == macho.N_UNDF; +} + +pub fn symbolIsIndr(sym: macho.nlist_64) bool { + const type_ = macho.N_TYPE & sym.n_type; + return type_ == macho.N_INDR; +} + +pub fn symbolIsAbs(sym: macho.nlist_64) bool { + const type_ = macho.N_TYPE & sym.n_type; + return type_ == macho.N_ABS; +} + +pub fn symbolIsWeakDef(sym: macho.nlist_64) bool { + return (sym.n_desc & macho.N_WEAK_DEF) != 0; +} + +pub fn symbolIsWeakRef(sym: macho.nlist_64) bool { + return (sym.n_desc & macho.N_WEAK_REF) != 0; +} + +pub fn symbolIsTentative(sym: macho.nlist_64) bool { + if (!symbolIsUndf(sym)) return false; + return sym.n_value != 0; +} + +pub fn symbolIsNull(sym: macho.nlist_64) bool { + return sym.n_value == 0 and sym.n_desc == 0 and sym.n_type == 0 and sym.n_strx == 0 and sym.n_sect == 0; +} + +pub fn symbolIsTemp(sym: macho.nlist_64, sym_name: []const u8) bool { + if (!symbolIsSect(sym)) return false; + if (symbolIsExt(sym)) return false; + return mem.startsWith(u8, sym_name, "l") or mem.startsWith(u8, sym_name, "L"); +} + +pub fn sectionId(self: MachO, match: MatchingSection) u8 { + // TODO there might be a more generic way of doing this. + var section: u8 = 0; + for (self.load_commands.items) |cmd, cmd_id| { + if (cmd != .Segment) break; + if (cmd_id == match.seg) { + section += @intCast(u8, match.sect) + 1; + break; + } + section += @intCast(u8, cmd.Segment.sections.items.len); + } + return section; +} + +pub fn unpackSectionId(self: MachO, section_id: u8) MatchingSection { + var match: MatchingSection = undefined; + var section: u8 = 0; + outer: for (self.load_commands.items) |cmd, cmd_id| { + assert(cmd == .Segment); + for (cmd.Segment.sections.items) |_, sect_id| { + section += 1; + if (section_id == section) { + match.seg = @intCast(u16, cmd_id); + match.sect = @intCast(u16, sect_id); + break :outer; + } + } + } + return match; +} + fn packDylibOrdinal(ordinal: u16) u16 { return ordinal * macho.N_SYMBOL_RESOLVER; } @@ -3184,3 +5455,16 @@ fn packDylibOrdinal(ordinal: u16) u16 { fn unpackDylibOrdinal(pack: u16) u16 { return @divExact(pack, macho.N_SYMBOL_RESOLVER); } + +pub fn findFirst(comptime T: type, haystack: []T, start: usize, predicate: anytype) usize { + if (!@hasDecl(@TypeOf(predicate), "predicate")) + @compileError("Predicate is required to define fn predicate(@This(), T) bool"); + + if (start == haystack.len) return start; + + var i = start; + while (i < haystack.len) : (i += 1) { + if (predicate.predicate(haystack[i])) break; + } + return i; +} diff --git a/src/link/MachO/Dylib.zig b/src/link/MachO/Dylib.zig index ca71b7613c..0e7f95a3d5 100644 --- a/src/link/MachO/Dylib.zig +++ b/src/link/MachO/Dylib.zig @@ -13,7 +13,7 @@ const fat = @import("fat.zig"); const Allocator = mem.Allocator; const Arch = std.Target.Cpu.Arch; const LibStub = @import("../tapi.zig").LibStub; -const Zld = @import("Zld.zig"); +const MachO = @import("../MachO.zig"); usingnamespace @import("commands.zig"); @@ -324,7 +324,7 @@ fn parseSymbols(self: *Dylib) !void { _ = try self.file.?.preadAll(strtab, symtab_cmd.stroff + self.library_offset); for (slice) |sym| { - const add_to_symtab = Zld.symbolIsExt(sym) and (Zld.symbolIsSect(sym) or Zld.symbolIsIndr(sym)); + const add_to_symtab = MachO.symbolIsExt(sym) and (MachO.symbolIsSect(sym) or MachO.symbolIsIndr(sym)); if (!add_to_symtab) continue; diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index f8e88673a1..eaab05140e 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -13,8 +13,8 @@ const sort = std.sort; const Allocator = mem.Allocator; const Arch = std.Target.Cpu.Arch; +const MachO = @import("../MachO.zig"); const TextBlock = @import("TextBlock.zig"); -const Zld = @import("Zld.zig"); usingnamespace @import("commands.zig"); @@ -307,8 +307,8 @@ const NlistWithIndex = struct { } }; - const start = Zld.findFirst(NlistWithIndex, symbols, 0, Predicate{ .addr = sect.addr }); - const end = Zld.findFirst(NlistWithIndex, symbols, start, Predicate{ .addr = sect.addr + sect.size }); + const start = MachO.findFirst(NlistWithIndex, symbols, 0, Predicate{ .addr = sect.addr }); + const end = MachO.findFirst(NlistWithIndex, symbols, start, Predicate{ .addr = sect.addr + sect.size }); return symbols[start..end]; } @@ -323,8 +323,8 @@ fn filterDice(dices: []macho.data_in_code_entry, start_addr: u64, end_addr: u64) } }; - const start = Zld.findFirst(macho.data_in_code_entry, dices, 0, Predicate{ .addr = start_addr }); - const end = Zld.findFirst(macho.data_in_code_entry, dices, start, Predicate{ .addr = end_addr }); + const start = MachO.findFirst(macho.data_in_code_entry, dices, 0, Predicate{ .addr = start_addr }); + const end = MachO.findFirst(macho.data_in_code_entry, dices, start, Predicate{ .addr = end_addr }); return dices[start..end]; } @@ -335,10 +335,10 @@ const TextBlockParser = struct { code: []u8, relocs: []macho.relocation_info, object: *Object, - zld: *Zld, + macho_file: *MachO, nlists: []NlistWithIndex, index: u32 = 0, - match: Zld.MatchingSection, + match: MachO.MatchingSection, fn peek(self: *TextBlockParser) ?NlistWithIndex { return if (self.index + 1 < self.nlists.len) self.nlists[self.index + 1] else null; @@ -349,10 +349,10 @@ const TextBlockParser = struct { }; fn lessThanBySeniority(context: SeniorityContext, lhs: NlistWithIndex, rhs: NlistWithIndex) bool { - if (!Zld.symbolIsExt(rhs.nlist)) { - return Zld.symbolIsTemp(lhs.nlist, context.object.getString(lhs.nlist.n_strx)); - } else if (Zld.symbolIsPext(rhs.nlist) or Zld.symbolIsWeakDef(rhs.nlist)) { - return !Zld.symbolIsExt(lhs.nlist); + if (!MachO.symbolIsExt(rhs.nlist)) { + return MachO.symbolIsTemp(lhs.nlist, context.object.getString(lhs.nlist.n_strx)); + } else if (MachO.symbolIsPext(rhs.nlist) or MachO.symbolIsWeakDef(rhs.nlist)) { + return !MachO.symbolIsExt(lhs.nlist); } else { return true; } @@ -383,7 +383,7 @@ const TextBlockParser = struct { const sym = self.object.symbols.items[nlist_with_index.index]; if (sym.payload != .regular) { log.err("expected a regular symbol, found {s}", .{sym.payload}); - log.err(" when remapping {s}", .{self.zld.getString(sym.strx)}); + log.err(" when remapping {s}", .{self.macho_file.getString(sym.strx)}); return error.SymbolIsNotRegular; } assert(sym.payload.regular.local_sym_index != 0); // This means the symbol has not been properly resolved. @@ -401,7 +401,7 @@ const TextBlockParser = struct { } const senior_nlist = aliases.pop(); - const senior_sym = self.zld.locals.items[senior_nlist.index]; + const senior_sym = self.macho_file.locals.items[senior_nlist.index]; assert(senior_sym.payload == .regular); senior_sym.payload.regular.segment_id = self.match.seg; senior_sym.payload.regular.section_id = self.match.sect; @@ -429,7 +429,7 @@ const TextBlockParser = struct { } } } - if (self.zld.globals.contains(self.zld.getString(senior_sym.strx))) break :blk .global; + if (self.macho_file.globals.contains(self.macho_file.getString(senior_sym.strx))) break :blk .global; break :blk .static; } else null; @@ -448,19 +448,19 @@ const TextBlockParser = struct { for (aliases.items) |alias| { block.aliases.appendAssumeCapacity(alias.index); - const sym = self.zld.locals.items[alias.index]; + const sym = self.macho_file.locals.items[alias.index]; const reg = &sym.payload.regular; reg.segment_id = self.match.seg; reg.section_id = self.match.sect; } } - try block.parseRelocsFromObject(relocs, object, .{ + try block.parseRelocsFromObject(self.allocator, relocs, object, .{ .base_addr = start_addr, - .zld = self.zld, + .macho_file = self.macho_file, }); - if (self.zld.has_dices) { + if (self.macho_file.has_dices) { const dices = filterDice( self.object.data_in_code_entries.items, senior_nlist.nlist.n_value, @@ -483,7 +483,7 @@ const TextBlockParser = struct { } }; -pub fn parseTextBlocks(self: *Object, zld: *Zld) !void { +pub fn parseTextBlocks(self: *Object, macho_file: *MachO) !void { const seg = self.load_commands.items[self.segment_cmd_index.?].Segment; log.debug("analysing {s}", .{self.name.?}); @@ -513,7 +513,7 @@ pub fn parseTextBlocks(self: *Object, zld: *Zld) !void { }); // Get matching segment/section in the final artifact. - const match = (try zld.getMatchingSection(sect)) orelse { + const match = (try macho_file.getMatchingSection(sect)) orelse { log.debug("unhandled section", .{}); continue; }; @@ -538,7 +538,7 @@ pub fn parseTextBlocks(self: *Object, zld: *Zld) !void { // duplicates at all? Need some benchmarks! // const is_splittable = false; - zld.has_dices = blk: { + macho_file.has_dices = blk: { if (self.text_section_index) |index| { if (index != id) break :blk false; if (self.data_in_code_entries.items.len == 0) break :blk false; @@ -546,7 +546,7 @@ pub fn parseTextBlocks(self: *Object, zld: *Zld) !void { } break :blk false; }; - zld.has_stabs = zld.has_stabs or self.debug_info != null; + macho_file.has_stabs = macho_file.has_stabs or self.debug_info != null; { // next: { @@ -711,11 +711,11 @@ pub fn parseTextBlocks(self: *Object, zld: *Zld) !void { defer self.allocator.free(sym_name); const block_local_sym_index = self.sections_as_symbols.get(sect_id) orelse blk: { - const block_local_sym_index = @intCast(u32, zld.locals.items.len); - try zld.locals.append(zld.allocator, .{ - .n_strx = try zld.makeString(sym_name), + const block_local_sym_index = @intCast(u32, macho_file.locals.items.len); + try macho_file.locals.append(macho_file.base.allocator, .{ + .n_strx = try macho_file.makeString(sym_name), .n_type = macho.N_SECT, - .n_sect = zld.sectionId(match), + .n_sect = macho_file.sectionId(match), .n_desc = 0, .n_value = sect.addr, }); @@ -726,20 +726,20 @@ pub fn parseTextBlocks(self: *Object, zld: *Zld) !void { const block = try self.allocator.create(TextBlock); errdefer self.allocator.destroy(block); - block.* = TextBlock.init(self.allocator); + block.* = TextBlock.empty; block.local_sym_index = block_local_sym_index; block.code = try self.allocator.dupe(u8, code); block.size = sect.size; block.alignment = sect.@"align"; - try block.parseRelocsFromObject(relocs, self, .{ + try block.parseRelocsFromObject(self.allocator, relocs, self, .{ .base_addr = 0, - .zld = zld, + .macho_file = macho_file, }); - if (zld.has_dices) { + if (macho_file.has_dices) { const dices = filterDice(self.data_in_code_entries.items, sect.addr, sect.addr + sect.size); - try block.dices.ensureTotalCapacity(dices.len); + try block.dices.ensureTotalCapacity(self.allocator, dices.len); for (dices) |dice| { block.dices.appendAssumeCapacity(.{ @@ -755,15 +755,13 @@ pub fn parseTextBlocks(self: *Object, zld: *Zld) !void { // the filtered symbols and note which symbol is contained within so that // we can properly allocate addresses down the line. // While we're at it, we need to update segment,section mapping of each symbol too. - var contained = std.ArrayList(TextBlock.SymbolAtOffset).init(self.allocator); - defer contained.deinit(); - try contained.ensureTotalCapacity(filtered_nlists.len); + try block.contained.ensureTotalCapacity(self.allocator, filtered_nlists.len); for (filtered_nlists) |nlist_with_index| { const nlist = nlist_with_index.nlist; const local_sym_index = self.symbol_mapping.get(nlist_with_index.index) orelse unreachable; - const local = &zld.locals.items[local_sym_index]; - local.n_sect = zld.sectionId(match); + const local = &macho_file.locals.items[local_sym_index]; + local.n_sect = macho_file.sectionId(match); const stab: ?TextBlock.Stab = if (self.debug_info) |di| blk: { // TODO there has to be a better to handle this. @@ -781,19 +779,17 @@ pub fn parseTextBlocks(self: *Object, zld: *Zld) !void { break :blk .static; } else null; - contained.appendAssumeCapacity(.{ + block.contained.appendAssumeCapacity(.{ .local_sym_index = local_sym_index, .offset = nlist.n_value - sect.addr, .stab = stab, }); } - block.contained = contained.toOwnedSlice(); - // Update target section's metadata // TODO should we update segment's size here too? // How does it tie with incremental space allocs? - const tseg = &zld.load_commands.items[match.seg].Segment; + const tseg = &macho_file.load_commands.items[match.seg].Segment; const tsect = &tseg.sections.items[match.sect]; const new_alignment = math.max(tsect.@"align", block.alignment); const new_alignment_pow_2 = try math.powi(u32, 2, new_alignment); @@ -801,12 +797,12 @@ pub fn parseTextBlocks(self: *Object, zld: *Zld) !void { tsect.size = new_size; tsect.@"align" = new_alignment; - if (zld.blocks.getPtr(match)) |last| { + if (macho_file.blocks.getPtr(match)) |last| { last.*.next = block; block.prev = last.*; last.* = block; } else { - try zld.blocks.putNoClobber(zld.allocator, match, block); + try macho_file.blocks.putNoClobber(self.allocator, match, block); } try self.text_blocks.append(self.allocator, block); @@ -814,7 +810,7 @@ pub fn parseTextBlocks(self: *Object, zld: *Zld) !void { } } -pub fn symbolFromReloc(self: *Object, zld: *Zld, rel: macho.relocation_info) !*Symbol { +pub fn symbolFromReloc(self: *Object, macho_file: *MachO, rel: macho.relocation_info) !*Symbol { const symbol = blk: { if (rel.r_extern == 1) { break :blk self.symbols.items[rel.r_symbolnum]; @@ -832,9 +828,9 @@ pub fn symbolFromReloc(self: *Object, zld: *Zld, rel: macho.relocation_info) !*S sectionName(sect), }); defer self.allocator.free(name); - const symbol = try zld.allocator.create(Symbol); + const symbol = try macho_file.allocator.create(Symbol); symbol.* = .{ - .strx = try zld.makeString(name), + .strx = try macho_file.makeString(name), .payload = .{ .regular = .{ .linkage = .translation_unit, diff --git a/src/link/MachO/TextBlock.zig b/src/link/MachO/TextBlock.zig index ad2d4c11cf..0b6ff20d94 100644 --- a/src/link/MachO/TextBlock.zig +++ b/src/link/MachO/TextBlock.zig @@ -12,24 +12,64 @@ const meta = std.meta; const Allocator = mem.Allocator; const Arch = std.Target.Cpu.Arch; +const MachO = @import("../MachO.zig"); const Object = @import("Object.zig"); -const Zld = @import("Zld.zig"); -allocator: *Allocator, +/// Each decl always gets a local symbol with the fully qualified name. +/// The vaddr and size are found here directly. +/// The file offset is found by computing the vaddr offset from the section vaddr +/// the symbol references, and adding that to the file offset of the section. +/// If this field is 0, it means the codegen size = 0 and there is no symbol or +/// offset table entry. local_sym_index: u32, -stab: ?Stab = null, -aliases: std.ArrayList(u32), -references: std.AutoArrayHashMap(u32, void), -contained: ?[]SymbolAtOffset = null, + +/// List of symbol aliases pointing to the same block via different nlists +aliases: std.ArrayListUnmanaged(u32) = .{}, + +/// List of symbols contained within this block +contained: std.ArrayListUnmanaged(SymbolAtOffset) = .{}, + +/// Code (may be non-relocated) this block represents code: []u8, -relocs: std.ArrayList(Relocation), + +/// Size and alignment of this text block +/// Unlike in Elf, we need to store the size of this symbol as part of +/// the TextBlock since macho.nlist_64 lacks this information. size: u64, alignment: u32, -rebases: std.ArrayList(u64), -bindings: std.ArrayList(SymbolAtOffset), -dices: std.ArrayList(macho.data_in_code_entry), -next: ?*TextBlock = null, -prev: ?*TextBlock = null, + +relocs: std.ArrayListUnmanaged(Relocation) = .{}, + +/// List of offsets contained within this block that need rebasing by the dynamic +/// loader in presence of ASLR +rebases: std.ArrayListUnmanaged(u64) = .{}, + +/// List of offsets contained within this block that will be dynamically bound +/// by the dynamic loader and contain pointers to resolved (at load time) extern +/// symbols (aka proxies aka imports) +bindings: std.ArrayListUnmanaged(SymbolAtOffset) = .{}, + +/// List of data-in-code entries. This is currently specific to x86_64 only. +dices: std.ArrayListUnmanaged(macho.data_in_code_entry) = .{}, + +/// Stab entry for this block. This is currently specific to a binary created +/// by linking object files in a traditional sense - in incremental sense, we +/// bypass stabs altogether to produce dSYM bundle directly with fully relocated +/// DWARF sections. +stab: ?Stab = null, + +/// Points to the previous and next neighbours +next: ?*TextBlock, +prev: ?*TextBlock, + +/// Previous/next linked list pointers. +/// This is the linked list node for this Decl's corresponding .debug_info tag. +dbg_info_prev: ?*TextBlock, +dbg_info_next: ?*TextBlock, +/// Offset into .debug_info pointing to the tag for this Decl. +dbg_info_off: u32, +/// Size of the .debug_info tag for this Decl, not including padding. +dbg_info_len: u32, pub const SymbolAtOffset = struct { local_sym_index: u32, @@ -42,11 +82,11 @@ pub const Stab = union(enum) { static, global, - pub fn asNlists(stab: Stab, local_sym_index: u32, zld: *Zld) ![]macho.nlist_64 { - var nlists = std.ArrayList(macho.nlist_64).init(zld.allocator); + pub fn asNlists(stab: Stab, local_sym_index: u32, macho_file: anytype) ![]macho.nlist_64 { + var nlists = std.ArrayList(macho.nlist_64).init(macho_file.base.allocator); defer nlists.deinit(); - const sym = zld.locals.items[local_sym_index]; + const sym = macho_file.locals.items[local_sym_index]; switch (stab) { .function => |size| { try nlists.ensureUnusedCapacity(4); @@ -130,7 +170,7 @@ pub const Relocation = struct { offset: u32, source_addr: u64, target_addr: u64, - zld: *Zld, + macho_file: *MachO, }; pub const Unsigned = struct { @@ -148,7 +188,7 @@ pub const Relocation = struct { pub fn resolve(self: Unsigned, args: ResolveArgs) !void { const result = blk: { if (self.subtractor) |subtractor| { - const sym = args.zld.locals.items[subtractor]; + const sym = args.macho_file.locals.items[subtractor]; break :blk @intCast(i64, args.target_addr) - @intCast(i64, sym.n_value) + self.addend; } else { break :blk @intCast(i64, args.target_addr) + self.addend; @@ -500,38 +540,59 @@ pub const Relocation = struct { } }; -pub fn init(allocator: *Allocator) TextBlock { - return .{ - .allocator = allocator, - .local_sym_index = undefined, - .aliases = std.ArrayList(u32).init(allocator), - .references = std.AutoArrayHashMap(u32, void).init(allocator), - .code = undefined, - .relocs = std.ArrayList(Relocation).init(allocator), - .size = undefined, - .alignment = undefined, - .rebases = std.ArrayList(u64).init(allocator), - .bindings = std.ArrayList(SymbolAtOffset).init(allocator), - .dices = std.ArrayList(macho.data_in_code_entry).init(allocator), - }; +pub const empty = TextBlock{ + .local_sym_index = 0, + .code = undefined, + .size = 0, + .alignment = 0, + .prev = null, + .next = null, + .dbg_info_prev = null, + .dbg_info_next = null, + .dbg_info_off = undefined, + .dbg_info_len = undefined, +}; + +pub fn deinit(self: *TextBlock, allocator: *Allocator) void { + self.dices.deinit(allocator); + self.bindings.deinit(allocator); + self.rebases.deinit(allocator); + self.relocs.deinit(allocator); + self.allocator.free(self.code); + self.contained.deinit(allocator); + self.aliases.deinit(allocator); } -pub fn deinit(self: *TextBlock) void { - self.aliases.deinit(); - self.references.deinit(); - if (self.contained) |contained| { - self.allocator.free(contained); +/// Returns how much room there is to grow in virtual address space. +/// File offset relocation happens transparently, so it is not included in +/// this calculation. +pub fn capacity(self: TextBlock, macho_file: MachO) u64 { + const self_sym = macho_file.locals.items[self.local_sym_index]; + if (self.next) |next| { + const next_sym = macho_file.locals.items[next.local_sym_index]; + return next_sym.n_value - self_sym.n_value; + } else { + // We are the last block. + // The capacity is limited only by virtual address space. + return std.math.maxInt(u64) - self_sym.n_value; } - self.allocator.free(self.code); - self.relocs.deinit(); - self.rebases.deinit(); - self.bindings.deinit(); - self.dices.deinit(); +} + +pub fn freeListEligible(self: TextBlock, macho_file: MachO) bool { + // No need to keep a free list node for the last block. + const next = self.next orelse return false; + const self_sym = macho_file.locals.items[self.local_sym_index]; + const next_sym = macho_file.locals.items[next.local_sym_index]; + const cap = next_sym.n_value - self_sym.n_value; + const ideal_cap = MachO.padToIdeal(self.size); + if (cap <= ideal_cap) return false; + const surplus = cap - ideal_cap; + return surplus >= MachO.min_text_capacity; } const RelocContext = struct { base_addr: u64 = 0, - zld: *Zld, + macho_file: *MachO, }; fn initRelocFromObject(rel: macho.relocation_info, object: *Object, ctx: RelocContext) !Relocation { @@ -548,19 +609,19 @@ fn initRelocFromObject(rel: macho.relocation_info, object: *Object, ctx: RelocCo const local_sym_index = object.sections_as_symbols.get(sect_id) orelse blk: { const seg = object.load_commands.items[object.segment_cmd_index.?].Segment; const sect = seg.sections.items[sect_id]; - const match = (try ctx.zld.getMatchingSection(sect)) orelse unreachable; - const local_sym_index = @intCast(u32, ctx.zld.locals.items.len); - const sym_name = try std.fmt.allocPrint(ctx.zld.allocator, "l_{s}_{s}_{s}", .{ + const match = (try ctx.macho_file.getMatchingSection(sect)) orelse unreachable; + const local_sym_index = @intCast(u32, ctx.macho_file.locals.items.len); + const sym_name = try std.fmt.allocPrint(ctx.macho_file.base.allocator, "l_{s}_{s}_{s}", .{ object.name.?, commands.segmentName(sect), commands.sectionName(sect), }); - defer ctx.zld.allocator.free(sym_name); + defer ctx.macho_file.base.allocator.free(sym_name); - try ctx.zld.locals.append(ctx.zld.allocator, .{ - .n_strx = try ctx.zld.makeString(sym_name), + try ctx.macho_file.locals.append(ctx.macho_file.base.allocator, .{ + .n_strx = try ctx.macho_file.makeString(sym_name), .n_type = macho.N_SECT, - .n_sect = ctx.zld.sectionId(match), + .n_sect = ctx.macho_file.sectionId(match), .n_desc = 0, .n_value = sect.addr, }); @@ -574,12 +635,12 @@ fn initRelocFromObject(rel: macho.relocation_info, object: *Object, ctx: RelocCo const sym = object.symtab.items[rel.r_symbolnum]; const sym_name = object.getString(sym.n_strx); - if (Zld.symbolIsSect(sym) and !Zld.symbolIsExt(sym)) { + if (MachO.symbolIsSect(sym) and !MachO.symbolIsExt(sym)) { const where_index = object.symbol_mapping.get(rel.r_symbolnum) orelse unreachable; parsed_rel.where = .local; parsed_rel.where_index = where_index; } else { - const resolv = ctx.zld.symbol_resolver.get(sym_name) orelse unreachable; + const resolv = ctx.macho_file.symbol_resolver.get(sym_name) orelse unreachable; switch (resolv.where) { .global => { parsed_rel.where = .local; @@ -599,6 +660,7 @@ fn initRelocFromObject(rel: macho.relocation_info, object: *Object, ctx: RelocCo pub fn parseRelocsFromObject( self: *TextBlock, + allocator: *Allocator, relocs: []macho.relocation_info, object: *Object, ctx: RelocContext, @@ -638,11 +700,11 @@ pub fn parseRelocsFromObject( const sym = object.symtab.items[rel.r_symbolnum]; const sym_name = object.getString(sym.n_strx); - if (Zld.symbolIsSect(sym) and !Zld.symbolIsExt(sym)) { + if (MachO.symbolIsSect(sym) and !MachO.symbolIsExt(sym)) { const where_index = object.symbol_mapping.get(rel.r_symbolnum) orelse unreachable; subtractor = where_index; } else { - const resolv = ctx.zld.symbol_resolver.get(sym_name) orelse unreachable; + const resolv = ctx.macho_file.symbol_resolver.get(sym_name) orelse unreachable; assert(resolv.where == .global); subtractor = resolv.local_sym_index; } @@ -732,11 +794,7 @@ pub fn parseRelocsFromObject( else => unreachable, } - try self.relocs.append(parsed_rel); - - if (parsed_rel.where == .local) { - try self.references.put(parsed_rel.where_index, {}); - } + try self.relocs.append(allocator, parsed_rel); const is_via_got = switch (parsed_rel.payload) { .pointer_to_got => true, @@ -747,28 +805,30 @@ pub fn parseRelocsFromObject( }; if (is_via_got) blk: { - const key = Zld.GotIndirectionKey{ + const key = MachO.GotIndirectionKey{ .where = switch (parsed_rel.where) { .local => .local, .import => .import, }, .where_index = parsed_rel.where_index, }; - if (ctx.zld.got_entries.contains(key)) break :blk; + if (ctx.macho_file.got_entries_map.contains(key)) break :blk; - try ctx.zld.got_entries.putNoClobber(ctx.zld.allocator, key, {}); + const got_index = @intCast(u32, ctx.macho_file.got_entries.items.len); + try ctx.macho_file.got_entries.append(ctx.macho_file.base.allocator, key); + try ctx.macho_file.got_entries_map.putNoClobber(ctx.macho_file.base.allocator, key, got_index); } else if (parsed_rel.payload == .unsigned) { switch (parsed_rel.where) { .import => { - try self.bindings.append(.{ + try self.bindings.append(allocator, .{ .local_sym_index = parsed_rel.where_index, .offset = parsed_rel.offset, }); }, .local => { - const source_sym = ctx.zld.locals.items[self.local_sym_index]; - const match = ctx.zld.unpackSectionId(source_sym.n_sect); - const seg = ctx.zld.load_commands.items[match.seg].Segment; + const source_sym = ctx.macho_file.locals.items[self.local_sym_index]; + const match = ctx.macho_file.unpackSectionId(source_sym.n_sect); + const seg = ctx.macho_file.load_commands.items[match.seg].Segment; const sect = seg.sections.items[match.sect]; const sect_type = commands.sectionType(sect); @@ -778,12 +838,12 @@ pub fn parseRelocsFromObject( // TODO actually, a check similar to what dyld is doing, that is, verifying // that the segment is writable should be enough here. const is_right_segment = blk: { - if (ctx.zld.data_segment_cmd_index) |idx| { + if (ctx.macho_file.data_segment_cmd_index) |idx| { if (match.seg == idx) { break :blk true; } } - if (ctx.zld.data_const_segment_cmd_index) |idx| { + if (ctx.macho_file.data_const_segment_cmd_index) |idx| { if (match.seg == idx) { break :blk true; } @@ -804,15 +864,17 @@ pub fn parseRelocsFromObject( }; if (should_rebase) { - try self.rebases.append(parsed_rel.offset); + try self.rebases.append(allocator, parsed_rel.offset); } }, } } else if (parsed_rel.payload == .branch) blk: { if (parsed_rel.where != .import) break :blk; - if (ctx.zld.stubs.contains(parsed_rel.where_index)) break :blk; + if (ctx.macho_file.stubs_map.contains(parsed_rel.where_index)) break :blk; - try ctx.zld.stubs.putNoClobber(ctx.zld.allocator, parsed_rel.where_index, {}); + const stubs_index = @intCast(u32, ctx.macho_file.stubs.items.len); + try ctx.macho_file.stubs.append(ctx.macho_file.base.allocator, parsed_rel.where_index); + try ctx.macho_file.stubs_map.putNoClobber(ctx.macho_file.base.allocator, parsed_rel.where_index, stubs_index); } } } @@ -852,7 +914,7 @@ fn parseUnsigned( if (rel.r_extern == 0) { assert(out.where == .local); - const target_sym = ctx.zld.locals.items[out.where_index]; + const target_sym = ctx.macho_file.locals.items[out.where_index]; addend -= @intCast(i64, target_sym.n_value); } @@ -872,7 +934,7 @@ fn parseBranch(self: TextBlock, rel: macho.relocation_info, out: *Relocation, ct out.payload = .{ .branch = .{ - .arch = ctx.zld.target.?.cpu.arch, + .arch = ctx.macho_file.base.options.target.cpu.arch, }, }; } @@ -948,10 +1010,10 @@ fn parseSigned(self: TextBlock, rel: macho.relocation_info, out: *Relocation, ct var addend: i64 = mem.readIntLittle(i32, self.code[out.offset..][0..4]) + correction; if (rel.r_extern == 0) { - const source_sym = ctx.zld.locals.items[self.local_sym_index]; + const source_sym = ctx.macho_file.locals.items[self.local_sym_index]; const target_sym = switch (out.where) { - .local => ctx.zld.locals.items[out.where_index], - .import => ctx.zld.imports.items[out.where_index], + .local => ctx.macho_file.locals.items[out.where_index], + .import => ctx.macho_file.imports.items[out.where_index], }; addend = @intCast(i64, source_sym.n_value + out.offset + 4) + addend - @intCast(i64, target_sym.n_value); } @@ -986,12 +1048,12 @@ fn parseLoad(self: TextBlock, rel: macho.relocation_info, out: *Relocation) void }; } -pub fn resolveRelocs(self: *TextBlock, zld: *Zld) !void { +pub fn resolveRelocs(self: *TextBlock, macho_file: *MachO) !void { for (self.relocs.items) |rel| { log.debug("relocating {}", .{rel}); const source_addr = blk: { - const sym = zld.locals.items[self.local_sym_index]; + const sym = macho_file.locals.items[self.local_sym_index]; break :blk sym.n_value + rel.offset; }; const target_addr = blk: { @@ -1004,9 +1066,9 @@ pub fn resolveRelocs(self: *TextBlock, zld: *Zld) !void { }; if (is_via_got) { - const dc_seg = zld.load_commands.items[zld.data_const_segment_cmd_index.?].Segment; - const got = dc_seg.sections.items[zld.got_section_index.?]; - const got_index = zld.got_entries.getIndex(.{ + const dc_seg = macho_file.load_commands.items[macho_file.data_const_segment_cmd_index.?].Segment; + const got = dc_seg.sections.items[macho_file.got_section_index.?]; + const got_index = macho_file.got_entries_map.get(.{ .where = switch (rel.where) { .local => .local, .import => .import, @@ -1014,10 +1076,10 @@ pub fn resolveRelocs(self: *TextBlock, zld: *Zld) !void { .where_index = rel.where_index, }) orelse { const sym = switch (rel.where) { - .local => zld.locals.items[rel.where_index], - .import => zld.imports.items[rel.where_index], + .local => macho_file.locals.items[rel.where_index], + .import => macho_file.imports.items[rel.where_index], }; - log.err("expected GOT entry for symbol '{s}'", .{zld.getString(sym.n_strx)}); + log.err("expected GOT entry for symbol '{s}'", .{macho_file.getString(sym.n_strx)}); log.err(" this is an internal linker error", .{}); return error.FailedToResolveRelocationTarget; }; @@ -1026,11 +1088,11 @@ pub fn resolveRelocs(self: *TextBlock, zld: *Zld) !void { switch (rel.where) { .local => { - const sym = zld.locals.items[rel.where_index]; + const sym = macho_file.locals.items[rel.where_index]; const is_tlv = is_tlv: { - const source_sym = zld.locals.items[self.local_sym_index]; - const match = zld.unpackSectionId(source_sym.n_sect); - const seg = zld.load_commands.items[match.seg].Segment; + const source_sym = macho_file.locals.items[self.local_sym_index]; + const match = macho_file.unpackSectionId(source_sym.n_sect); + const seg = macho_file.load_commands.items[match.seg].Segment; const sect = seg.sections.items[match.sect]; break :is_tlv commands.sectionType(sect) == macho.S_THREAD_LOCAL_VARIABLES; }; @@ -1040,11 +1102,11 @@ pub fn resolveRelocs(self: *TextBlock, zld: *Zld) !void { // defined TLV template init section in the following order: // * wrt to __thread_data if defined, then // * wrt to __thread_bss - const seg = zld.load_commands.items[zld.data_segment_cmd_index.?].Segment; + const seg = macho_file.load_commands.items[macho_file.data_segment_cmd_index.?].Segment; const base_address = inner: { - if (zld.tlv_data_section_index) |i| { + if (macho_file.tlv_data_section_index) |i| { break :inner seg.sections.items[i].addr; - } else if (zld.tlv_bss_section_index) |i| { + } else if (macho_file.tlv_bss_section_index) |i| { break :inner seg.sections.items[i].addr; } else { log.err("threadlocal variables present but no initializer sections found", .{}); @@ -1059,12 +1121,12 @@ pub fn resolveRelocs(self: *TextBlock, zld: *Zld) !void { break :blk sym.n_value; }, .import => { - const stubs_index = zld.stubs.getIndex(rel.where_index) orelse { + const stubs_index = macho_file.stubs_map.get(rel.where_index) orelse { // TODO verify in TextBlock that the symbol is indeed dynamically bound. break :blk 0; // Dynamically bound by dyld. }; - const segment = zld.load_commands.items[zld.text_segment_cmd_index.?].Segment; - const stubs = segment.sections.items[zld.stubs_section_index.?]; + const segment = macho_file.load_commands.items[macho_file.text_segment_cmd_index.?].Segment; + const stubs = segment.sections.items[macho_file.stubs_section_index.?]; break :blk stubs.addr + stubs_index * stubs.reserved2; }, } @@ -1078,14 +1140,14 @@ pub fn resolveRelocs(self: *TextBlock, zld: *Zld) !void { .offset = rel.offset, .source_addr = source_addr, .target_addr = target_addr, - .zld = zld, + .macho_file = macho_file, }); } } -pub fn print_this(self: *const TextBlock, zld: *Zld) void { +pub fn print_this(self: *const TextBlock, macho_file: MachO) void { log.warn("TextBlock", .{}); - log.warn(" {}: {}", .{ self.local_sym_index, zld.locals.items[self.local_sym_index] }); + log.warn(" {}: {}", .{ self.local_sym_index, macho_file.locals.items[self.local_sym_index] }); if (self.stab) |stab| { log.warn(" stab: {}", .{stab}); } @@ -1125,11 +1187,11 @@ pub fn print_this(self: *const TextBlock, zld: *Zld) void { log.warn(" align = {}", .{self.alignment}); } -pub fn print(self: *const TextBlock, zld: *Zld) void { +pub fn print(self: *const TextBlock, macho_file: MachO) void { if (self.prev) |prev| { - prev.print(zld); + prev.print(macho_file); } - self.print_this(zld); + self.print_this(macho_file); } const RelocIterator = struct { @@ -1159,8 +1221,8 @@ fn filterRelocs(relocs: []macho.relocation_info, start_addr: u64, end_addr: u64) } }; - const start = Zld.findFirst(macho.relocation_info, relocs, 0, Predicate{ .addr = end_addr }); - const end = Zld.findFirst(macho.relocation_info, relocs, start, Predicate{ .addr = start_addr }); + const start = MachO.findFirst(macho.relocation_info, relocs, 0, Predicate{ .addr = end_addr }); + const end = MachO.findFirst(macho.relocation_info, relocs, start, Predicate{ .addr = start_addr }); return relocs[start..end]; } diff --git a/src/link/MachO/Zld.zig b/src/link/MachO/Zld.zig deleted file mode 100644 index 41edbb5988..0000000000 --- a/src/link/MachO/Zld.zig +++ /dev/null @@ -1,3062 +0,0 @@ -const Zld = @This(); - -const std = @import("std"); -const assert = std.debug.assert; -const leb = std.leb; -const mem = std.mem; -const meta = std.meta; -const fs = std.fs; -const macho = std.macho; -const math = std.math; -const log = std.log.scoped(.zld); -const aarch64 = @import("../../codegen/aarch64.zig"); - -const Allocator = mem.Allocator; -const Archive = @import("Archive.zig"); -const CodeSignature = @import("CodeSignature.zig"); -const Dylib = @import("Dylib.zig"); -const Object = @import("Object.zig"); -const TextBlock = @import("TextBlock.zig"); -const Trie = @import("Trie.zig"); - -usingnamespace @import("commands.zig"); -usingnamespace @import("bind.zig"); - -allocator: *Allocator, - -target: ?std.Target = null, -page_size: ?u16 = null, -file: ?fs.File = null, -output: ?Output = null, - -// TODO these args will become obselete once Zld is coalesced with incremental -// linker. -stack_size: u64 = 0, - -objects: std.ArrayListUnmanaged(*Object) = .{}, -archives: std.ArrayListUnmanaged(*Archive) = .{}, -dylibs: std.ArrayListUnmanaged(*Dylib) = .{}, - -next_dylib_ordinal: u16 = 1, - -load_commands: std.ArrayListUnmanaged(LoadCommand) = .{}, - -pagezero_segment_cmd_index: ?u16 = null, -text_segment_cmd_index: ?u16 = null, -data_const_segment_cmd_index: ?u16 = null, -data_segment_cmd_index: ?u16 = null, -linkedit_segment_cmd_index: ?u16 = null, -dyld_info_cmd_index: ?u16 = null, -symtab_cmd_index: ?u16 = null, -dysymtab_cmd_index: ?u16 = null, -dylinker_cmd_index: ?u16 = null, -data_in_code_cmd_index: ?u16 = null, -function_starts_cmd_index: ?u16 = null, -main_cmd_index: ?u16 = null, -dylib_id_cmd_index: ?u16 = null, -version_min_cmd_index: ?u16 = null, -source_version_cmd_index: ?u16 = null, -uuid_cmd_index: ?u16 = null, -code_signature_cmd_index: ?u16 = null, - -// __TEXT segment sections -text_section_index: ?u16 = null, -stubs_section_index: ?u16 = null, -stub_helper_section_index: ?u16 = null, -text_const_section_index: ?u16 = null, -cstring_section_index: ?u16 = null, -ustring_section_index: ?u16 = null, -gcc_except_tab_section_index: ?u16 = null, -unwind_info_section_index: ?u16 = null, -eh_frame_section_index: ?u16 = null, - -objc_methlist_section_index: ?u16 = null, -objc_methname_section_index: ?u16 = null, -objc_methtype_section_index: ?u16 = null, -objc_classname_section_index: ?u16 = null, - -// __DATA_CONST segment sections -got_section_index: ?u16 = null, -mod_init_func_section_index: ?u16 = null, -mod_term_func_section_index: ?u16 = null, -data_const_section_index: ?u16 = null, - -objc_cfstring_section_index: ?u16 = null, -objc_classlist_section_index: ?u16 = null, -objc_imageinfo_section_index: ?u16 = null, - -// __DATA segment sections -tlv_section_index: ?u16 = null, -tlv_data_section_index: ?u16 = null, -tlv_bss_section_index: ?u16 = null, -la_symbol_ptr_section_index: ?u16 = null, -data_section_index: ?u16 = null, -bss_section_index: ?u16 = null, -common_section_index: ?u16 = null, - -objc_const_section_index: ?u16 = null, -objc_selrefs_section_index: ?u16 = null, -objc_classrefs_section_index: ?u16 = null, -objc_data_section_index: ?u16 = null, - -locals: std.ArrayListUnmanaged(macho.nlist_64) = .{}, -globals: std.ArrayListUnmanaged(macho.nlist_64) = .{}, -imports: std.ArrayListUnmanaged(macho.nlist_64) = .{}, -undefs: std.ArrayListUnmanaged(macho.nlist_64) = .{}, -tentatives: std.ArrayListUnmanaged(macho.nlist_64) = .{}, -symbol_resolver: std.StringArrayHashMapUnmanaged(SymbolWithLoc) = .{}, - -strtab: std.ArrayListUnmanaged(u8) = .{}, - -stubs: std.AutoArrayHashMapUnmanaged(u32, void) = .{}, -got_entries: std.AutoArrayHashMapUnmanaged(GotIndirectionKey, void) = .{}, - -stub_helper_stubs_start_off: ?u64 = null, - -blocks: std.AutoHashMapUnmanaged(MatchingSection, *TextBlock) = .{}, - -has_dices: bool = false, -has_stabs: bool = false, - -const SymbolWithLoc = struct { - // Table where the symbol can be found. - where: enum { - global, - import, - undef, - tentative, - }, - where_index: u32, - local_sym_index: u32 = 0, - file: u16 = 0, -}; - -pub const GotIndirectionKey = struct { - where: enum { - local, - import, - }, - where_index: u32, -}; - -pub const Output = struct { - tag: enum { exe, dylib }, - path: []const u8, - install_name: ?[]const u8 = null, -}; - -/// Default path to dyld -const DEFAULT_DYLD_PATH: [*:0]const u8 = "/usr/lib/dyld"; - -pub fn init(allocator: *Allocator) !Zld { - return Zld{ .allocator = allocator }; -} - -pub fn deinit(self: *Zld) void { - self.stubs.deinit(self.allocator); - self.got_entries.deinit(self.allocator); - - for (self.load_commands.items) |*lc| { - lc.deinit(self.allocator); - } - self.load_commands.deinit(self.allocator); - - for (self.objects.items) |object| { - object.deinit(); - self.allocator.destroy(object); - } - self.objects.deinit(self.allocator); - - for (self.archives.items) |archive| { - archive.deinit(); - self.allocator.destroy(archive); - } - self.archives.deinit(self.allocator); - - for (self.dylibs.items) |dylib| { - dylib.deinit(); - self.allocator.destroy(dylib); - } - self.dylibs.deinit(self.allocator); - - self.locals.deinit(self.allocator); - self.globals.deinit(self.allocator); - self.imports.deinit(self.allocator); - self.undefs.deinit(self.allocator); - self.tentatives.deinit(self.allocator); - - for (self.symbol_resolver.keys()) |key| { - self.allocator.free(key); - } - self.symbol_resolver.deinit(self.allocator); - - self.strtab.deinit(self.allocator); - - // TODO dealloc all blocks - self.blocks.deinit(self.allocator); -} - -pub fn closeFiles(self: Zld) void { - for (self.objects.items) |object| { - object.closeFile(); - } - for (self.archives.items) |archive| { - archive.closeFile(); - } - if (self.file) |f| f.close(); -} - -const LinkArgs = struct { - syslibroot: ?[]const u8, - libs: []const []const u8, - rpaths: []const []const u8, -}; - -pub fn link(self: *Zld, files: []const []const u8, output: Output, args: LinkArgs) !void { - if (files.len == 0) return error.NoInputFiles; - if (output.path.len == 0) return error.EmptyOutputPath; - - self.page_size = switch (self.target.?.cpu.arch) { - .aarch64 => 0x4000, - .x86_64 => 0x1000, - else => unreachable, - }; - self.output = output; - self.file = try fs.cwd().createFile(self.output.?.path, .{ - .truncate = true, - .read = true, - .mode = if (std.Target.current.os.tag == .windows) 0 else 0o777, - }); - - try self.populateMetadata(); - try self.parseInputFiles(files, args.syslibroot); - try self.parseLibs(args.libs, args.syslibroot); - try self.resolveSymbols(); - try self.parseTextBlocks(); - - { - // Add dyld_stub_binder as the final GOT entry. - const resolv = self.symbol_resolver.get("dyld_stub_binder") orelse unreachable; - try self.got_entries.putNoClobber(self.allocator, .{ - .where = .import, - .where_index = resolv.where_index, - }, {}); - } - - try self.sortSections(); - try self.addRpaths(args.rpaths); - try self.addDataInCodeLC(); - try self.addCodeSignatureLC(); - try self.allocateTextSegment(); - try self.allocateDataConstSegment(); - try self.allocateDataSegment(); - self.allocateLinkeditSegment(); - try self.allocateTextBlocks(); - - // log.warn("locals", .{}); - // for (self.locals.items) |sym, id| { - // log.warn(" {d}: {s}, {}", .{ id, self.getString(sym.n_strx), sym }); - // } - - // log.warn("globals", .{}); - // for (self.globals.items) |sym, id| { - // log.warn(" {d}: {s}, {}", .{ id, self.getString(sym.n_strx), sym }); - // } - - // log.warn("tentatives", .{}); - // for (self.tentatives.items) |sym, id| { - // log.warn(" {d}: {s}, {}", .{ id, self.getString(sym.n_strx), sym }); - // } - - // log.warn("undefines", .{}); - // for (self.undefs.items) |sym, id| { - // log.warn(" {d}: {s}, {}", .{ id, self.getString(sym.n_strx), sym }); - // } - - // log.warn("imports", .{}); - // for (self.imports.items) |sym, id| { - // log.warn(" {d}: {s}, {}", .{ id, self.getString(sym.n_strx), sym }); - // } - - // log.warn("symbol resolver", .{}); - // for (self.symbol_resolver.keys()) |key| { - // log.warn(" {s} => {}", .{ key, self.symbol_resolver.get(key).? }); - // } - - // log.warn("mappings", .{}); - // for (self.objects.items) |object, id| { - // const object_id = @intCast(u16, id); - // log.warn(" in object {s}", .{object.name.?}); - // for (object.symtab.items) |sym, sym_id| { - // if (object.symbol_mapping.get(@intCast(u32, sym_id))) |local_id| { - // log.warn(" | {d} => {d}", .{ sym_id, local_id }); - // } else { - // log.warn(" | {d} no local mapping for {s}", .{ sym_id, object.getString(sym.n_strx) }); - // } - // } - // } - - // var it = self.blocks.iterator(); - // while (it.next()) |entry| { - // const seg = self.load_commands.items[entry.key_ptr.seg].Segment; - // const sect = seg.sections.items[entry.key_ptr.sect]; - - // log.warn("\n\n{s},{s} contents:", .{ segmentName(sect), sectionName(sect) }); - // log.warn(" {}", .{sect}); - // entry.value_ptr.*.print(self); - // } - - try self.flush(); -} - -fn parseInputFiles(self: *Zld, files: []const []const u8, syslibroot: ?[]const u8) !void { - for (files) |file_name| { - const full_path = full_path: { - var buffer: [std.fs.MAX_PATH_BYTES]u8 = undefined; - const path = try std.fs.realpath(file_name, &buffer); - break :full_path try self.allocator.dupe(u8, path); - }; - - if (try Object.createAndParseFromPath(self.allocator, self.target.?.cpu.arch, full_path)) |object| { - try self.objects.append(self.allocator, object); - continue; - } - - if (try Archive.createAndParseFromPath(self.allocator, self.target.?.cpu.arch, full_path)) |archive| { - try self.archives.append(self.allocator, archive); - continue; - } - - if (try Dylib.createAndParseFromPath( - self.allocator, - self.target.?.cpu.arch, - full_path, - .{ .syslibroot = syslibroot }, - )) |dylibs| { - defer self.allocator.free(dylibs); - try self.dylibs.appendSlice(self.allocator, dylibs); - continue; - } - - log.warn("unknown filetype for positional input file: '{s}'", .{file_name}); - } -} - -fn parseLibs(self: *Zld, libs: []const []const u8, syslibroot: ?[]const u8) !void { - for (libs) |lib| { - if (try Dylib.createAndParseFromPath( - self.allocator, - self.target.?.cpu.arch, - lib, - .{ .syslibroot = syslibroot }, - )) |dylibs| { - defer self.allocator.free(dylibs); - try self.dylibs.appendSlice(self.allocator, dylibs); - continue; - } - - if (try Archive.createAndParseFromPath(self.allocator, self.target.?.cpu.arch, lib)) |archive| { - try self.archives.append(self.allocator, archive); - continue; - } - - log.warn("unknown filetype for a library: '{s}'", .{lib}); - } -} - -pub const MatchingSection = struct { - seg: u16, - sect: u16, -}; - -pub fn getMatchingSection(self: *Zld, sect: macho.section_64) !?MatchingSection { - const text_seg = &self.load_commands.items[self.text_segment_cmd_index.?].Segment; - const data_const_seg = &self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; - const data_seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; - const segname = segmentName(sect); - const sectname = sectionName(sect); - - const res: ?MatchingSection = blk: { - switch (sectionType(sect)) { - macho.S_4BYTE_LITERALS, macho.S_8BYTE_LITERALS, macho.S_16BYTE_LITERALS => { - if (self.text_const_section_index == null) { - self.text_const_section_index = @intCast(u16, text_seg.sections.items.len); - try text_seg.addSection(self.allocator, "__const", .{}); - } - - break :blk .{ - .seg = self.text_segment_cmd_index.?, - .sect = self.text_const_section_index.?, - }; - }, - macho.S_CSTRING_LITERALS => { - if (mem.eql(u8, sectname, "__objc_methname")) { - // TODO it seems the common values within the sections in objects are deduplicated/merged - // on merging the sections' contents. - if (self.objc_methname_section_index == null) { - self.objc_methname_section_index = @intCast(u16, text_seg.sections.items.len); - try text_seg.addSection(self.allocator, "__objc_methname", .{ - .flags = macho.S_CSTRING_LITERALS, - }); - } - - break :blk .{ - .seg = self.text_segment_cmd_index.?, - .sect = self.objc_methname_section_index.?, - }; - } else if (mem.eql(u8, sectname, "__objc_methtype")) { - if (self.objc_methtype_section_index == null) { - self.objc_methtype_section_index = @intCast(u16, text_seg.sections.items.len); - try text_seg.addSection(self.allocator, "__objc_methtype", .{ - .flags = macho.S_CSTRING_LITERALS, - }); - } - - break :blk .{ - .seg = self.text_segment_cmd_index.?, - .sect = self.objc_methtype_section_index.?, - }; - } else if (mem.eql(u8, sectname, "__objc_classname")) { - if (self.objc_classname_section_index == null) { - self.objc_classname_section_index = @intCast(u16, text_seg.sections.items.len); - try text_seg.addSection(self.allocator, "__objc_classname", .{}); - } - - break :blk .{ - .seg = self.text_segment_cmd_index.?, - .sect = self.objc_classname_section_index.?, - }; - } - - if (self.cstring_section_index == null) { - self.cstring_section_index = @intCast(u16, text_seg.sections.items.len); - try text_seg.addSection(self.allocator, "__cstring", .{ - .flags = macho.S_CSTRING_LITERALS, - }); - } - - break :blk .{ - .seg = self.text_segment_cmd_index.?, - .sect = self.cstring_section_index.?, - }; - }, - macho.S_LITERAL_POINTERS => { - if (mem.eql(u8, segname, "__DATA") and mem.eql(u8, sectname, "__objc_selrefs")) { - if (self.objc_selrefs_section_index == null) { - self.objc_selrefs_section_index = @intCast(u16, data_seg.sections.items.len); - try data_seg.addSection(self.allocator, "__objc_selrefs", .{ - .flags = macho.S_LITERAL_POINTERS, - }); - } - - break :blk .{ - .seg = self.data_segment_cmd_index.?, - .sect = self.objc_selrefs_section_index.?, - }; - } - - // TODO investigate - break :blk null; - }, - macho.S_MOD_INIT_FUNC_POINTERS => { - if (self.mod_init_func_section_index == null) { - self.mod_init_func_section_index = @intCast(u16, data_const_seg.sections.items.len); - try data_const_seg.addSection(self.allocator, "__mod_init_func", .{ - .flags = macho.S_MOD_INIT_FUNC_POINTERS, - }); - } - - break :blk .{ - .seg = self.data_const_segment_cmd_index.?, - .sect = self.mod_init_func_section_index.?, - }; - }, - macho.S_MOD_TERM_FUNC_POINTERS => { - if (self.mod_term_func_section_index == null) { - self.mod_term_func_section_index = @intCast(u16, data_const_seg.sections.items.len); - try data_const_seg.addSection(self.allocator, "__mod_term_func", .{ - .flags = macho.S_MOD_TERM_FUNC_POINTERS, - }); - } - - break :blk .{ - .seg = self.data_const_segment_cmd_index.?, - .sect = self.mod_term_func_section_index.?, - }; - }, - macho.S_ZEROFILL => { - if (mem.eql(u8, sectname, "__common")) { - if (self.common_section_index == null) { - self.common_section_index = @intCast(u16, data_seg.sections.items.len); - try data_seg.addSection(self.allocator, "__common", .{ - .flags = macho.S_ZEROFILL, - }); - } - - break :blk .{ - .seg = self.data_segment_cmd_index.?, - .sect = self.common_section_index.?, - }; - } else { - if (self.bss_section_index == null) { - self.bss_section_index = @intCast(u16, data_seg.sections.items.len); - try data_seg.addSection(self.allocator, "__bss", .{ - .flags = macho.S_ZEROFILL, - }); - } - - break :blk .{ - .seg = self.data_segment_cmd_index.?, - .sect = self.bss_section_index.?, - }; - } - }, - macho.S_THREAD_LOCAL_VARIABLES => { - if (self.tlv_section_index == null) { - self.tlv_section_index = @intCast(u16, data_seg.sections.items.len); - try data_seg.addSection(self.allocator, "__thread_vars", .{ - .flags = macho.S_THREAD_LOCAL_VARIABLES, - }); - } - - break :blk .{ - .seg = self.data_segment_cmd_index.?, - .sect = self.tlv_section_index.?, - }; - }, - macho.S_THREAD_LOCAL_REGULAR => { - if (self.tlv_data_section_index == null) { - self.tlv_data_section_index = @intCast(u16, data_seg.sections.items.len); - try data_seg.addSection(self.allocator, "__thread_data", .{ - .flags = macho.S_THREAD_LOCAL_REGULAR, - }); - } - - break :blk .{ - .seg = self.data_segment_cmd_index.?, - .sect = self.tlv_data_section_index.?, - }; - }, - macho.S_THREAD_LOCAL_ZEROFILL => { - if (self.tlv_bss_section_index == null) { - self.tlv_bss_section_index = @intCast(u16, data_seg.sections.items.len); - try data_seg.addSection(self.allocator, "__thread_bss", .{ - .flags = macho.S_THREAD_LOCAL_ZEROFILL, - }); - } - - break :blk .{ - .seg = self.data_segment_cmd_index.?, - .sect = self.tlv_bss_section_index.?, - }; - }, - macho.S_COALESCED => { - if (mem.eql(u8, "__TEXT", segname) and mem.eql(u8, "__eh_frame", sectname)) { - // TODO I believe __eh_frame is currently part of __unwind_info section - // in the latest ld64 output. - if (self.eh_frame_section_index == null) { - self.eh_frame_section_index = @intCast(u16, text_seg.sections.items.len); - try text_seg.addSection(self.allocator, "__eh_frame", .{}); - } - - break :blk .{ - .seg = self.text_segment_cmd_index.?, - .sect = self.eh_frame_section_index.?, - }; - } - - // TODO audit this: is this the right mapping? - if (self.data_const_section_index == null) { - self.data_const_section_index = @intCast(u16, data_const_seg.sections.items.len); - try data_const_seg.addSection(self.allocator, "__const", .{}); - } - - break :blk .{ - .seg = self.data_const_segment_cmd_index.?, - .sect = self.data_const_section_index.?, - }; - }, - macho.S_REGULAR => { - if (sectionIsCode(sect)) { - if (self.text_section_index == null) { - self.text_section_index = @intCast(u16, text_seg.sections.items.len); - try text_seg.addSection(self.allocator, "__text", .{ - .flags = macho.S_REGULAR | macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS, - }); - } - - break :blk .{ - .seg = self.text_segment_cmd_index.?, - .sect = self.text_section_index.?, - }; - } - if (sectionIsDebug(sect)) { - // TODO debug attributes - if (mem.eql(u8, "__LD", segname) and mem.eql(u8, "__compact_unwind", sectname)) { - log.debug("TODO compact unwind section: type 0x{x}, name '{s},{s}'", .{ - sect.flags, segname, sectname, - }); - } - break :blk null; - } - - if (mem.eql(u8, segname, "__TEXT")) { - if (mem.eql(u8, sectname, "__ustring")) { - if (self.ustring_section_index == null) { - self.ustring_section_index = @intCast(u16, text_seg.sections.items.len); - try text_seg.addSection(self.allocator, "__ustring", .{}); - } - - break :blk .{ - .seg = self.text_segment_cmd_index.?, - .sect = self.ustring_section_index.?, - }; - } else if (mem.eql(u8, sectname, "__gcc_except_tab")) { - if (self.gcc_except_tab_section_index == null) { - self.gcc_except_tab_section_index = @intCast(u16, text_seg.sections.items.len); - try text_seg.addSection(self.allocator, "__gcc_except_tab", .{}); - } - - break :blk .{ - .seg = self.text_segment_cmd_index.?, - .sect = self.gcc_except_tab_section_index.?, - }; - } else if (mem.eql(u8, sectname, "__objc_methlist")) { - if (self.objc_methlist_section_index == null) { - self.objc_methlist_section_index = @intCast(u16, text_seg.sections.items.len); - try text_seg.addSection(self.allocator, "__objc_methlist", .{}); - } - - break :blk .{ - .seg = self.text_segment_cmd_index.?, - .sect = self.objc_methlist_section_index.?, - }; - } else if (mem.eql(u8, sectname, "__rodata") or - mem.eql(u8, sectname, "__typelink") or - mem.eql(u8, sectname, "__itablink") or - mem.eql(u8, sectname, "__gosymtab") or - mem.eql(u8, sectname, "__gopclntab")) - { - if (self.data_const_section_index == null) { - self.data_const_section_index = @intCast(u16, data_const_seg.sections.items.len); - try data_const_seg.addSection(self.allocator, "__const", .{}); - } - - break :blk .{ - .seg = self.data_const_segment_cmd_index.?, - .sect = self.data_const_section_index.?, - }; - } else { - if (self.text_const_section_index == null) { - self.text_const_section_index = @intCast(u16, text_seg.sections.items.len); - try text_seg.addSection(self.allocator, "__const", .{}); - } - - break :blk .{ - .seg = self.text_segment_cmd_index.?, - .sect = self.text_const_section_index.?, - }; - } - } - - if (mem.eql(u8, segname, "__DATA_CONST")) { - if (self.data_const_section_index == null) { - self.data_const_section_index = @intCast(u16, data_const_seg.sections.items.len); - try data_const_seg.addSection(self.allocator, "__const", .{}); - } - - break :blk .{ - .seg = self.data_const_segment_cmd_index.?, - .sect = self.data_const_section_index.?, - }; - } - - if (mem.eql(u8, segname, "__DATA")) { - if (mem.eql(u8, sectname, "__const")) { - if (self.data_const_section_index == null) { - self.data_const_section_index = @intCast(u16, data_const_seg.sections.items.len); - try data_const_seg.addSection(self.allocator, "__const", .{}); - } - - break :blk .{ - .seg = self.data_const_segment_cmd_index.?, - .sect = self.data_const_section_index.?, - }; - } else if (mem.eql(u8, sectname, "__cfstring")) { - if (self.objc_cfstring_section_index == null) { - self.objc_cfstring_section_index = @intCast(u16, data_const_seg.sections.items.len); - try data_const_seg.addSection(self.allocator, "__cfstring", .{}); - } - - break :blk .{ - .seg = self.data_const_segment_cmd_index.?, - .sect = self.objc_cfstring_section_index.?, - }; - } else if (mem.eql(u8, sectname, "__objc_classlist")) { - if (self.objc_classlist_section_index == null) { - self.objc_classlist_section_index = @intCast(u16, data_const_seg.sections.items.len); - try data_const_seg.addSection(self.allocator, "__objc_classlist", .{}); - } - - break :blk .{ - .seg = self.data_const_segment_cmd_index.?, - .sect = self.objc_classlist_section_index.?, - }; - } else if (mem.eql(u8, sectname, "__objc_imageinfo")) { - if (self.objc_imageinfo_section_index == null) { - self.objc_imageinfo_section_index = @intCast(u16, data_const_seg.sections.items.len); - try data_const_seg.addSection(self.allocator, "__objc_imageinfo", .{}); - } - - break :blk .{ - .seg = self.data_const_segment_cmd_index.?, - .sect = self.objc_imageinfo_section_index.?, - }; - } else if (mem.eql(u8, sectname, "__objc_const")) { - if (self.objc_const_section_index == null) { - self.objc_const_section_index = @intCast(u16, data_seg.sections.items.len); - try data_seg.addSection(self.allocator, "__objc_const", .{}); - } - - break :blk .{ - .seg = self.data_segment_cmd_index.?, - .sect = self.objc_const_section_index.?, - }; - } else if (mem.eql(u8, sectname, "__objc_classrefs")) { - if (self.objc_classrefs_section_index == null) { - self.objc_classrefs_section_index = @intCast(u16, data_seg.sections.items.len); - try data_seg.addSection(self.allocator, "__objc_classrefs", .{}); - } - - break :blk .{ - .seg = self.data_segment_cmd_index.?, - .sect = self.objc_classrefs_section_index.?, - }; - } else if (mem.eql(u8, sectname, "__objc_data")) { - if (self.objc_data_section_index == null) { - self.objc_data_section_index = @intCast(u16, data_seg.sections.items.len); - try data_seg.addSection(self.allocator, "__objc_data", .{}); - } - - break :blk .{ - .seg = self.data_segment_cmd_index.?, - .sect = self.objc_data_section_index.?, - }; - } else { - if (self.data_section_index == null) { - self.data_section_index = @intCast(u16, data_seg.sections.items.len); - try data_seg.addSection(self.allocator, "__data", .{}); - } - - break :blk .{ - .seg = self.data_segment_cmd_index.?, - .sect = self.data_section_index.?, - }; - } - } - - if (mem.eql(u8, "__LLVM", segname) and mem.eql(u8, "__asm", sectname)) { - log.debug("TODO LLVM asm section: type 0x{x}, name '{s},{s}'", .{ - sect.flags, segname, sectname, - }); - } - - break :blk null; - }, - else => break :blk null, - } - }; - - return res; -} - -fn sortSections(self: *Zld) !void { - var text_index_mapping = std.AutoHashMap(u16, u16).init(self.allocator); - defer text_index_mapping.deinit(); - var data_const_index_mapping = std.AutoHashMap(u16, u16).init(self.allocator); - defer data_const_index_mapping.deinit(); - var data_index_mapping = std.AutoHashMap(u16, u16).init(self.allocator); - defer data_index_mapping.deinit(); - - { - // __TEXT segment - const seg = &self.load_commands.items[self.text_segment_cmd_index.?].Segment; - var sections = seg.sections.toOwnedSlice(self.allocator); - defer self.allocator.free(sections); - try seg.sections.ensureCapacity(self.allocator, sections.len); - - const indices = &[_]*?u16{ - &self.text_section_index, - &self.stubs_section_index, - &self.stub_helper_section_index, - &self.gcc_except_tab_section_index, - &self.cstring_section_index, - &self.ustring_section_index, - &self.text_const_section_index, - &self.objc_methname_section_index, - &self.objc_methtype_section_index, - &self.objc_classname_section_index, - &self.eh_frame_section_index, - }; - for (indices) |maybe_index| { - const new_index: u16 = if (maybe_index.*) |index| blk: { - const idx = @intCast(u16, seg.sections.items.len); - seg.sections.appendAssumeCapacity(sections[index]); - try text_index_mapping.putNoClobber(index, idx); - break :blk idx; - } else continue; - maybe_index.* = new_index; - } - } - - { - // __DATA_CONST segment - const seg = &self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; - var sections = seg.sections.toOwnedSlice(self.allocator); - defer self.allocator.free(sections); - try seg.sections.ensureCapacity(self.allocator, sections.len); - - const indices = &[_]*?u16{ - &self.got_section_index, - &self.mod_init_func_section_index, - &self.mod_term_func_section_index, - &self.data_const_section_index, - &self.objc_cfstring_section_index, - &self.objc_classlist_section_index, - &self.objc_imageinfo_section_index, - }; - for (indices) |maybe_index| { - const new_index: u16 = if (maybe_index.*) |index| blk: { - const idx = @intCast(u16, seg.sections.items.len); - seg.sections.appendAssumeCapacity(sections[index]); - try data_const_index_mapping.putNoClobber(index, idx); - break :blk idx; - } else continue; - maybe_index.* = new_index; - } - } - - { - // __DATA segment - const seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; - var sections = seg.sections.toOwnedSlice(self.allocator); - defer self.allocator.free(sections); - try seg.sections.ensureCapacity(self.allocator, sections.len); - - // __DATA segment - const indices = &[_]*?u16{ - &self.la_symbol_ptr_section_index, - &self.objc_const_section_index, - &self.objc_selrefs_section_index, - &self.objc_classrefs_section_index, - &self.objc_data_section_index, - &self.data_section_index, - &self.tlv_section_index, - &self.tlv_data_section_index, - &self.tlv_bss_section_index, - &self.bss_section_index, - &self.common_section_index, - }; - for (indices) |maybe_index| { - const new_index: u16 = if (maybe_index.*) |index| blk: { - const idx = @intCast(u16, seg.sections.items.len); - seg.sections.appendAssumeCapacity(sections[index]); - try data_index_mapping.putNoClobber(index, idx); - break :blk idx; - } else continue; - maybe_index.* = new_index; - } - } - - { - var transient: std.AutoHashMapUnmanaged(MatchingSection, *TextBlock) = .{}; - try transient.ensureCapacity(self.allocator, self.blocks.count()); - - var it = self.blocks.iterator(); - while (it.next()) |entry| { - const old = entry.key_ptr.*; - const sect = if (old.seg == self.text_segment_cmd_index.?) - text_index_mapping.get(old.sect).? - else if (old.seg == self.data_const_segment_cmd_index.?) - data_const_index_mapping.get(old.sect).? - else - data_index_mapping.get(old.sect).?; - transient.putAssumeCapacityNoClobber(.{ - .seg = old.seg, - .sect = sect, - }, entry.value_ptr.*); - } - - self.blocks.clearAndFree(self.allocator); - self.blocks.deinit(self.allocator); - self.blocks = transient; - } -} - -fn allocateTextSegment(self: *Zld) !void { - const seg = &self.load_commands.items[self.text_segment_cmd_index.?].Segment; - const nstubs = @intCast(u32, self.stubs.count()); - - const base_vmaddr = self.load_commands.items[self.pagezero_segment_cmd_index.?].Segment.inner.vmsize; - seg.inner.fileoff = 0; - seg.inner.vmaddr = base_vmaddr; - - // Set stubs and stub_helper sizes - const stubs = &seg.sections.items[self.stubs_section_index.?]; - const stub_helper = &seg.sections.items[self.stub_helper_section_index.?]; - stubs.size += nstubs * stubs.reserved2; - - const stub_size: u4 = switch (self.target.?.cpu.arch) { - .x86_64 => 10, - .aarch64 => 3 * @sizeOf(u32), - else => unreachable, - }; - stub_helper.size += nstubs * stub_size; - - var sizeofcmds: u64 = 0; - for (self.load_commands.items) |lc| { - sizeofcmds += lc.cmdsize(); - } - - try self.allocateSegment(self.text_segment_cmd_index.?, @sizeOf(macho.mach_header_64) + sizeofcmds); - - // Shift all sections to the back to minimize jump size between __TEXT and __DATA segments. - var min_alignment: u32 = 0; - for (seg.sections.items) |sect| { - const alignment = try math.powi(u32, 2, sect.@"align"); - min_alignment = math.max(min_alignment, alignment); - } - - assert(min_alignment > 0); - const last_sect_idx = seg.sections.items.len - 1; - const last_sect = seg.sections.items[last_sect_idx]; - const shift: u32 = blk: { - const diff = seg.inner.filesize - last_sect.offset - last_sect.size; - const factor = @divTrunc(diff, min_alignment); - break :blk @intCast(u32, factor * min_alignment); - }; - - if (shift > 0) { - for (seg.sections.items) |*sect| { - sect.offset += shift; - sect.addr += shift; - } - } -} - -fn allocateDataConstSegment(self: *Zld) !void { - const seg = &self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; - const nentries = @intCast(u32, self.got_entries.count()); - - const text_seg = self.load_commands.items[self.text_segment_cmd_index.?].Segment; - seg.inner.fileoff = text_seg.inner.fileoff + text_seg.inner.filesize; - seg.inner.vmaddr = text_seg.inner.vmaddr + text_seg.inner.vmsize; - - // Set got size - const got = &seg.sections.items[self.got_section_index.?]; - got.size += nentries * @sizeOf(u64); - - try self.allocateSegment(self.data_const_segment_cmd_index.?, 0); -} - -fn allocateDataSegment(self: *Zld) !void { - const seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; - const nstubs = @intCast(u32, self.stubs.count()); - - const data_const_seg = self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; - seg.inner.fileoff = data_const_seg.inner.fileoff + data_const_seg.inner.filesize; - seg.inner.vmaddr = data_const_seg.inner.vmaddr + data_const_seg.inner.vmsize; - - // Set la_symbol_ptr and data size - const la_symbol_ptr = &seg.sections.items[self.la_symbol_ptr_section_index.?]; - const data = &seg.sections.items[self.data_section_index.?]; - la_symbol_ptr.size += nstubs * @sizeOf(u64); - data.size += @sizeOf(u64); // We need at least 8bytes for address of dyld_stub_binder - - try self.allocateSegment(self.data_segment_cmd_index.?, 0); -} - -fn allocateLinkeditSegment(self: *Zld) void { - const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; - const data_seg = self.load_commands.items[self.data_segment_cmd_index.?].Segment; - seg.inner.fileoff = data_seg.inner.fileoff + data_seg.inner.filesize; - seg.inner.vmaddr = data_seg.inner.vmaddr + data_seg.inner.vmsize; -} - -fn allocateSegment(self: *Zld, index: u16, offset: u64) !void { - const seg = &self.load_commands.items[index].Segment; - - // Allocate the sections according to their alignment at the beginning of the segment. - var start: u64 = offset; - for (seg.sections.items) |*sect| { - const alignment = try math.powi(u32, 2, sect.@"align"); - const start_aligned = mem.alignForwardGeneric(u64, start, alignment); - const end_aligned = mem.alignForwardGeneric(u64, start_aligned + sect.size, alignment); - sect.offset = @intCast(u32, seg.inner.fileoff + start_aligned); - sect.addr = seg.inner.vmaddr + start_aligned; - start = end_aligned; - } - - const seg_size_aligned = mem.alignForwardGeneric(u64, start, self.page_size.?); - seg.inner.filesize = seg_size_aligned; - seg.inner.vmsize = seg_size_aligned; -} - -fn allocateTextBlocks(self: *Zld) !void { - var it = self.blocks.iterator(); - while (it.next()) |entry| { - const match = entry.key_ptr.*; - var block: *TextBlock = entry.value_ptr.*; - - // Find the first block - while (block.prev) |prev| { - block = prev; - } - - const seg = self.load_commands.items[match.seg].Segment; - const sect = seg.sections.items[match.sect]; - - var base_addr: u64 = sect.addr; - const n_sect = self.sectionId(match); - - log.debug(" within section {s},{s}", .{ segmentName(sect), sectionName(sect) }); - log.debug(" {}", .{sect}); - - while (true) { - const block_alignment = try math.powi(u32, 2, block.alignment); - base_addr = mem.alignForwardGeneric(u64, base_addr, block_alignment); - - const sym = &self.locals.items[block.local_sym_index]; - sym.n_value = base_addr; - sym.n_sect = n_sect; - - log.debug(" {s}: start=0x{x}, end=0x{x}, size={}, align={}", .{ - self.getString(sym.n_strx), - base_addr, - base_addr + block.size, - block.size, - block.alignment, - }); - - // Update each alias (if any) - for (block.aliases.items) |index| { - const alias_sym = &self.locals.items[index]; - alias_sym.n_value = base_addr; - alias_sym.n_sect = n_sect; - } - - // Update each symbol contained within the TextBlock - if (block.contained) |contained| { - for (contained) |sym_at_off| { - const contained_sym = &self.locals.items[sym_at_off.local_sym_index]; - contained_sym.n_value = base_addr + sym_at_off.offset; - contained_sym.n_sect = n_sect; - } - } - - base_addr += block.size; - - if (block.next) |next| { - block = next; - } else break; - } - } - - // Update globals - for (self.symbol_resolver.values()) |resolv| { - if (resolv.where != .global) continue; - - assert(resolv.local_sym_index != 0); - const local_sym = self.locals.items[resolv.local_sym_index]; - const sym = &self.globals.items[resolv.where_index]; - sym.n_value = local_sym.n_value; - sym.n_sect = local_sym.n_sect; - } -} - -fn writeTextBlocks(self: *Zld) !void { - var it = self.blocks.iterator(); - while (it.next()) |entry| { - const match = entry.key_ptr.*; - var block: *TextBlock = entry.value_ptr.*; - - while (block.prev) |prev| { - block = prev; - } - - const seg = self.load_commands.items[match.seg].Segment; - const sect = seg.sections.items[match.sect]; - const sect_type = sectionType(sect); - - log.debug(" for section {s},{s}", .{ segmentName(sect), sectionName(sect) }); - log.debug(" {}", .{sect}); - - var code = try self.allocator.alloc(u8, sect.size); - defer self.allocator.free(code); - - if (sect_type == macho.S_ZEROFILL or sect_type == macho.S_THREAD_LOCAL_ZEROFILL) { - mem.set(u8, code, 0); - } else { - var base_off: u64 = 0; - - while (true) { - const block_alignment = try math.powi(u32, 2, block.alignment); - const aligned_base_off = mem.alignForwardGeneric(u64, base_off, block_alignment); - - const sym = self.locals.items[block.local_sym_index]; - log.debug(" {s}: start=0x{x}, end=0x{x}, size={}, align={}", .{ - self.getString(sym.n_strx), - aligned_base_off, - aligned_base_off + block.size, - block.size, - block.alignment, - }); - - try block.resolveRelocs(self); - mem.copy(u8, code[aligned_base_off..][0..block.size], block.code); - - // TODO NOP for machine code instead of just zeroing out - const padding_len = aligned_base_off - base_off; - mem.set(u8, code[base_off..][0..padding_len], 0); - - base_off = aligned_base_off + block.size; - - if (block.next) |next| { - block = next; - } else break; - } - - mem.set(u8, code[base_off..], 0); - } - - try self.file.?.pwriteAll(code, sect.offset); - } -} - -fn writeStubHelperCommon(self: *Zld) !void { - const text_segment = &self.load_commands.items[self.text_segment_cmd_index.?].Segment; - const stub_helper = &text_segment.sections.items[self.stub_helper_section_index.?]; - const data_const_segment = &self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; - const got = &data_const_segment.sections.items[self.got_section_index.?]; - const data_segment = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; - const data = &data_segment.sections.items[self.data_section_index.?]; - - self.stub_helper_stubs_start_off = blk: { - switch (self.target.?.cpu.arch) { - .x86_64 => { - const code_size = 15; - var code: [code_size]u8 = undefined; - // lea %r11, [rip + disp] - code[0] = 0x4c; - code[1] = 0x8d; - code[2] = 0x1d; - { - const target_addr = data.addr + data.size - @sizeOf(u64); - const displacement = try math.cast(u32, target_addr - stub_helper.addr - 7); - mem.writeIntLittle(u32, code[3..7], displacement); - } - // push %r11 - code[7] = 0x41; - code[8] = 0x53; - // jmp [rip + disp] - code[9] = 0xff; - code[10] = 0x25; - { - const resolv = self.symbol_resolver.get("dyld_stub_binder") orelse unreachable; - const got_index = self.got_entries.getIndex(.{ - .where = .import, - .where_index = resolv.where_index, - }) orelse unreachable; - const addr = got.addr + got_index * @sizeOf(u64); - const displacement = try math.cast(u32, addr - stub_helper.addr - code_size); - mem.writeIntLittle(u32, code[11..], displacement); - } - try self.file.?.pwriteAll(&code, stub_helper.offset); - break :blk stub_helper.offset + code_size; - }, - .aarch64 => { - var code: [6 * @sizeOf(u32)]u8 = undefined; - data_blk_outer: { - const this_addr = stub_helper.addr; - const target_addr = data.addr + data.size - @sizeOf(u64); - data_blk: { - const displacement = math.cast(i21, target_addr - this_addr) catch break :data_blk; - // adr x17, disp - mem.writeIntLittle(u32, code[0..4], aarch64.Instruction.adr(.x17, displacement).toU32()); - // nop - mem.writeIntLittle(u32, code[4..8], aarch64.Instruction.nop().toU32()); - break :data_blk_outer; - } - data_blk: { - const new_this_addr = this_addr + @sizeOf(u32); - const displacement = math.cast(i21, target_addr - new_this_addr) catch break :data_blk; - // nop - mem.writeIntLittle(u32, code[0..4], aarch64.Instruction.nop().toU32()); - // adr x17, disp - mem.writeIntLittle(u32, code[4..8], aarch64.Instruction.adr(.x17, displacement).toU32()); - break :data_blk_outer; - } - // Jump is too big, replace adr with adrp and add. - const this_page = @intCast(i32, this_addr >> 12); - const target_page = @intCast(i32, target_addr >> 12); - const pages = @intCast(i21, target_page - this_page); - mem.writeIntLittle(u32, code[0..4], aarch64.Instruction.adrp(.x17, pages).toU32()); - const narrowed = @truncate(u12, target_addr); - mem.writeIntLittle(u32, code[4..8], aarch64.Instruction.add(.x17, .x17, narrowed, false).toU32()); - } - // stp x16, x17, [sp, #-16]! - code[8] = 0xf0; - code[9] = 0x47; - code[10] = 0xbf; - code[11] = 0xa9; - binder_blk_outer: { - const resolv = self.symbol_resolver.get("dyld_stub_binder") orelse unreachable; - const got_index = self.got_entries.getIndex(.{ - .where = .import, - .where_index = resolv.where_index, - }) orelse unreachable; - const this_addr = stub_helper.addr + 3 * @sizeOf(u32); - const target_addr = got.addr + got_index * @sizeOf(u64); - binder_blk: { - const displacement = math.divExact(u64, target_addr - this_addr, 4) catch break :binder_blk; - const literal = math.cast(u18, displacement) catch break :binder_blk; - // ldr x16, label - mem.writeIntLittle(u32, code[12..16], aarch64.Instruction.ldr(.x16, .{ - .literal = literal, - }).toU32()); - // nop - mem.writeIntLittle(u32, code[16..20], aarch64.Instruction.nop().toU32()); - break :binder_blk_outer; - } - binder_blk: { - const new_this_addr = this_addr + @sizeOf(u32); - const displacement = math.divExact(u64, target_addr - new_this_addr, 4) catch break :binder_blk; - const literal = math.cast(u18, displacement) catch break :binder_blk; - // Pad with nop to please division. - // nop - mem.writeIntLittle(u32, code[12..16], aarch64.Instruction.nop().toU32()); - // ldr x16, label - mem.writeIntLittle(u32, code[16..20], aarch64.Instruction.ldr(.x16, .{ - .literal = literal, - }).toU32()); - break :binder_blk_outer; - } - // Use adrp followed by ldr(immediate). - const this_page = @intCast(i32, this_addr >> 12); - const target_page = @intCast(i32, target_addr >> 12); - const pages = @intCast(i21, target_page - this_page); - mem.writeIntLittle(u32, code[12..16], aarch64.Instruction.adrp(.x16, pages).toU32()); - const narrowed = @truncate(u12, target_addr); - const offset = try math.divExact(u12, narrowed, 8); - mem.writeIntLittle(u32, code[16..20], aarch64.Instruction.ldr(.x16, .{ - .register = .{ - .rn = .x16, - .offset = aarch64.Instruction.LoadStoreOffset.imm(offset), - }, - }).toU32()); - } - // br x16 - code[20] = 0x00; - code[21] = 0x02; - code[22] = 0x1f; - code[23] = 0xd6; - try self.file.?.pwriteAll(&code, stub_helper.offset); - break :blk stub_helper.offset + 6 * @sizeOf(u32); - }, - else => unreachable, - } - }; - - for (self.stubs.keys()) |_, i| { - const index = @intCast(u32, i); - // TODO weak bound pointers - try self.writeLazySymbolPointer(index); - try self.writeStub(index); - try self.writeStubInStubHelper(index); - } -} - -fn writeLazySymbolPointer(self: *Zld, index: u32) !void { - const text_segment = self.load_commands.items[self.text_segment_cmd_index.?].Segment; - const stub_helper = text_segment.sections.items[self.stub_helper_section_index.?]; - const data_segment = self.load_commands.items[self.data_segment_cmd_index.?].Segment; - const la_symbol_ptr = data_segment.sections.items[self.la_symbol_ptr_section_index.?]; - - const stub_size: u4 = switch (self.target.?.cpu.arch) { - .x86_64 => 10, - .aarch64 => 3 * @sizeOf(u32), - else => unreachable, - }; - const stub_off = self.stub_helper_stubs_start_off.? + index * stub_size; - const end = stub_helper.addr + stub_off - stub_helper.offset; - var buf: [@sizeOf(u64)]u8 = undefined; - mem.writeIntLittle(u64, &buf, end); - const off = la_symbol_ptr.offset + index * @sizeOf(u64); - log.debug("writing lazy symbol pointer entry 0x{x} at 0x{x}", .{ end, off }); - try self.file.?.pwriteAll(&buf, off); -} - -fn writeStub(self: *Zld, index: u32) !void { - const text_segment = self.load_commands.items[self.text_segment_cmd_index.?].Segment; - const stubs = text_segment.sections.items[self.stubs_section_index.?]; - const data_segment = self.load_commands.items[self.data_segment_cmd_index.?].Segment; - const la_symbol_ptr = data_segment.sections.items[self.la_symbol_ptr_section_index.?]; - - const stub_off = stubs.offset + index * stubs.reserved2; - const stub_addr = stubs.addr + index * stubs.reserved2; - const la_ptr_addr = la_symbol_ptr.addr + index * @sizeOf(u64); - log.debug("writing stub at 0x{x}", .{stub_off}); - var code = try self.allocator.alloc(u8, stubs.reserved2); - defer self.allocator.free(code); - switch (self.target.?.cpu.arch) { - .x86_64 => { - assert(la_ptr_addr >= stub_addr + stubs.reserved2); - const displacement = try math.cast(u32, la_ptr_addr - stub_addr - stubs.reserved2); - // jmp - code[0] = 0xff; - code[1] = 0x25; - mem.writeIntLittle(u32, code[2..][0..4], displacement); - }, - .aarch64 => { - assert(la_ptr_addr >= stub_addr); - outer: { - const this_addr = stub_addr; - const target_addr = la_ptr_addr; - inner: { - const displacement = math.divExact(u64, target_addr - this_addr, 4) catch break :inner; - const literal = math.cast(u18, displacement) catch break :inner; - // ldr x16, literal - mem.writeIntLittle(u32, code[0..4], aarch64.Instruction.ldr(.x16, .{ - .literal = literal, - }).toU32()); - // nop - mem.writeIntLittle(u32, code[4..8], aarch64.Instruction.nop().toU32()); - break :outer; - } - inner: { - const new_this_addr = this_addr + @sizeOf(u32); - const displacement = math.divExact(u64, target_addr - new_this_addr, 4) catch break :inner; - const literal = math.cast(u18, displacement) catch break :inner; - // nop - mem.writeIntLittle(u32, code[0..4], aarch64.Instruction.nop().toU32()); - // ldr x16, literal - mem.writeIntLittle(u32, code[4..8], aarch64.Instruction.ldr(.x16, .{ - .literal = literal, - }).toU32()); - break :outer; - } - // Use adrp followed by ldr(immediate). - const this_page = @intCast(i32, this_addr >> 12); - const target_page = @intCast(i32, target_addr >> 12); - const pages = @intCast(i21, target_page - this_page); - mem.writeIntLittle(u32, code[0..4], aarch64.Instruction.adrp(.x16, pages).toU32()); - const narrowed = @truncate(u12, target_addr); - const offset = try math.divExact(u12, narrowed, 8); - mem.writeIntLittle(u32, code[4..8], aarch64.Instruction.ldr(.x16, .{ - .register = .{ - .rn = .x16, - .offset = aarch64.Instruction.LoadStoreOffset.imm(offset), - }, - }).toU32()); - } - // br x16 - mem.writeIntLittle(u32, code[8..12], aarch64.Instruction.br(.x16).toU32()); - }, - else => unreachable, - } - try self.file.?.pwriteAll(code, stub_off); -} - -fn writeStubInStubHelper(self: *Zld, index: u32) !void { - const text_segment = self.load_commands.items[self.text_segment_cmd_index.?].Segment; - const stub_helper = text_segment.sections.items[self.stub_helper_section_index.?]; - - const stub_size: u4 = switch (self.target.?.cpu.arch) { - .x86_64 => 10, - .aarch64 => 3 * @sizeOf(u32), - else => unreachable, - }; - const stub_off = self.stub_helper_stubs_start_off.? + index * stub_size; - var code = try self.allocator.alloc(u8, stub_size); - defer self.allocator.free(code); - switch (self.target.?.cpu.arch) { - .x86_64 => { - const displacement = try math.cast( - i32, - @intCast(i64, stub_helper.offset) - @intCast(i64, stub_off) - stub_size, - ); - // pushq - code[0] = 0x68; - mem.writeIntLittle(u32, code[1..][0..4], 0x0); // Just a placeholder populated in `populateLazyBindOffsetsInStubHelper`. - // jmpq - code[5] = 0xe9; - mem.writeIntLittle(u32, code[6..][0..4], @bitCast(u32, displacement)); - }, - .aarch64 => { - const displacement = try math.cast(i28, @intCast(i64, stub_helper.offset) - @intCast(i64, stub_off) - 4); - const literal = @divExact(stub_size - @sizeOf(u32), 4); - // ldr w16, literal - mem.writeIntLittle(u32, code[0..4], aarch64.Instruction.ldr(.w16, .{ - .literal = literal, - }).toU32()); - // b disp - mem.writeIntLittle(u32, code[4..8], aarch64.Instruction.b(displacement).toU32()); - mem.writeIntLittle(u32, code[8..12], 0x0); // Just a placeholder populated in `populateLazyBindOffsetsInStubHelper`. - }, - else => unreachable, - } - try self.file.?.pwriteAll(code, stub_off); -} - -fn resolveSymbolsInObject(self: *Zld, object_id: u16) !void { - const object = self.objects.items[object_id]; - - log.debug("resolving symbols in '{s}'", .{object.name}); - - for (object.symtab.items) |sym, id| { - const sym_id = @intCast(u32, id); - const sym_name = object.getString(sym.n_strx); - - if (symbolIsStab(sym)) { - log.err("unhandled symbol type: stab", .{}); - log.err(" symbol '{s}'", .{sym_name}); - log.err(" first definition in '{s}'", .{object.name.?}); - return error.UnhandledSymbolType; - } - - if (symbolIsIndr(sym)) { - log.err("unhandled symbol type: indirect", .{}); - log.err(" symbol '{s}'", .{sym_name}); - log.err(" first definition in '{s}'", .{object.name.?}); - return error.UnhandledSymbolType; - } - - if (symbolIsAbs(sym)) { - log.err("unhandled symbol type: absolute", .{}); - log.err(" symbol '{s}'", .{sym_name}); - log.err(" first definition in '{s}'", .{object.name.?}); - return error.UnhandledSymbolType; - } - - if (symbolIsSect(sym)) { - // Defined symbol regardless of scope lands in the locals symbol table. - const n_strx = blk: { - if (self.symbol_resolver.get(sym_name)) |resolv| { - switch (resolv.where) { - .global => break :blk self.globals.items[resolv.where_index].n_strx, - .tentative => break :blk self.tentatives.items[resolv.where_index].n_strx, - .undef => break :blk self.undefs.items[resolv.where_index].n_strx, - .import => unreachable, - } - } - break :blk try self.makeString(sym_name); - }; - const local_sym_index = @intCast(u32, self.locals.items.len); - try self.locals.append(self.allocator, .{ - .n_strx = n_strx, - .n_type = macho.N_SECT, - .n_sect = 0, - .n_desc = 0, - .n_value = sym.n_value, - }); - try object.symbol_mapping.putNoClobber(self.allocator, sym_id, local_sym_index); - - // If the symbol's scope is not local aka translation unit, then we need work out - // if we should save the symbol as a global, or potentially flag the error. - if (!symbolIsExt(sym)) continue; - - const local = self.locals.items[local_sym_index]; - const resolv = self.symbol_resolver.getPtr(sym_name) orelse { - const global_sym_index = @intCast(u32, self.globals.items.len); - try self.globals.append(self.allocator, .{ - .n_strx = n_strx, - .n_type = sym.n_type, - .n_sect = 0, - .n_desc = sym.n_desc, - .n_value = sym.n_value, - }); - try self.symbol_resolver.putNoClobber(self.allocator, try self.allocator.dupe(u8, sym_name), .{ - .where = .global, - .where_index = global_sym_index, - .local_sym_index = local_sym_index, - .file = object_id, - }); - continue; - }; - - switch (resolv.where) { - .import => unreachable, - .global => { - const global = &self.globals.items[resolv.where_index]; - - if (!(symbolIsWeakDef(sym) or symbolIsPext(sym)) and - !(symbolIsWeakDef(global.*) or symbolIsPext(global.*))) - { - log.err("symbol '{s}' defined multiple times", .{sym_name}); - log.err(" first definition in '{s}'", .{self.objects.items[resolv.file].name.?}); - log.err(" next definition in '{s}'", .{object.name.?}); - return error.MultipleSymbolDefinitions; - } - - if (symbolIsWeakDef(sym) or symbolIsPext(sym)) continue; // Current symbol is weak, so skip it. - - // Otherwise, update the resolver and the global symbol. - global.n_type = sym.n_type; - resolv.local_sym_index = local_sym_index; - resolv.file = object_id; - - continue; - }, - .undef => { - const undef = &self.undefs.items[resolv.where_index]; - undef.* = .{ - .n_strx = 0, - .n_type = macho.N_UNDF, - .n_sect = 0, - .n_desc = 0, - .n_value = 0, - }; - }, - .tentative => { - const tentative = &self.tentatives.items[resolv.where_index]; - tentative.* = .{ - .n_strx = 0, - .n_type = macho.N_UNDF, - .n_sect = 0, - .n_desc = 0, - .n_value = 0, - }; - }, - } - - const global_sym_index = @intCast(u32, self.globals.items.len); - try self.globals.append(self.allocator, .{ - .n_strx = local.n_strx, - .n_type = sym.n_type, - .n_sect = 0, - .n_desc = sym.n_desc, - .n_value = sym.n_value, - }); - resolv.* = .{ - .where = .global, - .where_index = global_sym_index, - .local_sym_index = local_sym_index, - .file = object_id, - }; - } else if (symbolIsTentative(sym)) { - // Symbol is a tentative definition. - const resolv = self.symbol_resolver.getPtr(sym_name) orelse { - const tent_sym_index = @intCast(u32, self.tentatives.items.len); - try self.tentatives.append(self.allocator, .{ - .n_strx = try self.makeString(sym_name), - .n_type = sym.n_type, - .n_sect = 0, - .n_desc = sym.n_desc, - .n_value = sym.n_value, - }); - try self.symbol_resolver.putNoClobber(self.allocator, try self.allocator.dupe(u8, sym_name), .{ - .where = .tentative, - .where_index = tent_sym_index, - .file = object_id, - }); - continue; - }; - - switch (resolv.where) { - .import => unreachable, - .global => {}, - .undef => { - const undef = &self.undefs.items[resolv.where_index]; - const tent_sym_index = @intCast(u32, self.tentatives.items.len); - try self.tentatives.append(self.allocator, .{ - .n_strx = undef.n_strx, - .n_type = sym.n_type, - .n_sect = 0, - .n_desc = sym.n_desc, - .n_value = sym.n_value, - }); - resolv.* = .{ - .where = .tentative, - .where_index = tent_sym_index, - .file = object_id, - }; - undef.* = .{ - .n_strx = 0, - .n_type = macho.N_UNDF, - .n_sect = 0, - .n_desc = 0, - .n_value = 0, - }; - }, - .tentative => { - const tentative = &self.tentatives.items[resolv.where_index]; - if (tentative.n_value >= sym.n_value) continue; - - tentative.n_desc = sym.n_desc; - tentative.n_value = sym.n_value; - resolv.file = object_id; - }, - } - } else { - // Symbol is undefined. - if (self.symbol_resolver.contains(sym_name)) continue; - - const undef_sym_index = @intCast(u32, self.undefs.items.len); - try self.undefs.append(self.allocator, .{ - .n_strx = try self.makeString(sym_name), - .n_type = macho.N_UNDF, - .n_sect = 0, - .n_desc = 0, - .n_value = 0, - }); - try self.symbol_resolver.putNoClobber(self.allocator, try self.allocator.dupe(u8, sym_name), .{ - .where = .undef, - .where_index = undef_sym_index, - .file = object_id, - }); - } - } -} - -fn resolveSymbols(self: *Zld) !void { - // TODO mimicking insertion of null symbol from incremental linker. - // This will need to moved. - try self.locals.append(self.allocator, .{ - .n_strx = 0, - .n_type = macho.N_UNDF, - .n_sect = 0, - .n_desc = 0, - .n_value = 0, - }); - try self.strtab.append(self.allocator, 0); - - // First pass, resolve symbols in provided objects. - for (self.objects.items) |_, object_id| { - try self.resolveSymbolsInObject(@intCast(u16, object_id)); - } - - // Second pass, resolve symbols in static libraries. - var next_sym: usize = 0; - loop: while (true) : (next_sym += 1) { - if (next_sym == self.undefs.items.len) break; - - const sym = self.undefs.items[next_sym]; - if (symbolIsNull(sym)) continue; - - const sym_name = self.getString(sym.n_strx); - - for (self.archives.items) |archive| { - // Check if the entry exists in a static archive. - const offsets = archive.toc.get(sym_name) orelse { - // No hit. - continue; - }; - assert(offsets.items.len > 0); - - const object = try archive.parseObject(offsets.items[0]); - const object_id = @intCast(u16, self.objects.items.len); - try self.objects.append(self.allocator, object); - try self.resolveSymbolsInObject(object_id); - - continue :loop; - } - } - - // Convert any tentative definition into a regular symbol and allocate - // text blocks for each tentative defintion. - for (self.tentatives.items) |sym| { - if (symbolIsNull(sym)) continue; - - const sym_name = self.getString(sym.n_strx); - const match: MatchingSection = blk: { - if (self.common_section_index == null) { - const data_seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; - self.common_section_index = @intCast(u16, data_seg.sections.items.len); - try data_seg.addSection(self.allocator, "__common", .{ - .flags = macho.S_ZEROFILL, - }); - } - break :blk .{ - .seg = self.data_segment_cmd_index.?, - .sect = self.common_section_index.?, - }; - }; - - const size = sym.n_value; - const code = try self.allocator.alloc(u8, size); - mem.set(u8, code, 0); - const alignment = (sym.n_desc >> 8) & 0x0f; - - const resolv = self.symbol_resolver.getPtr(sym_name) orelse unreachable; - const local_sym_index = @intCast(u32, self.locals.items.len); - var nlist = macho.nlist_64{ - .n_strx = sym.n_strx, - .n_type = macho.N_SECT, - .n_sect = self.sectionId(match), - .n_desc = 0, - .n_value = 0, - }; - try self.locals.append(self.allocator, nlist); - const global_sym_index = @intCast(u32, self.globals.items.len); - nlist.n_type |= macho.N_EXT; - try self.globals.append(self.allocator, nlist); - resolv.* = .{ - .where = .global, - .where_index = global_sym_index, - .local_sym_index = local_sym_index, - }; - - const block = try self.allocator.create(TextBlock); - errdefer self.allocator.destroy(block); - - block.* = TextBlock.init(self.allocator); - block.local_sym_index = local_sym_index; - block.code = code; - block.size = size; - block.alignment = alignment; - - // Update target section's metadata - // TODO should we update segment's size here too? - // How does it tie with incremental space allocs? - const tseg = &self.load_commands.items[match.seg].Segment; - const tsect = &tseg.sections.items[match.sect]; - const new_alignment = math.max(tsect.@"align", block.alignment); - const new_alignment_pow_2 = try math.powi(u32, 2, new_alignment); - const new_size = mem.alignForwardGeneric(u64, tsect.size, new_alignment_pow_2) + block.size; - tsect.size = new_size; - tsect.@"align" = new_alignment; - - if (self.blocks.getPtr(match)) |last| { - last.*.next = block; - block.prev = last.*; - last.* = block; - } else { - try self.blocks.putNoClobber(self.allocator, match, block); - } - } - - // Third pass, resolve symbols in dynamic libraries. - { - // Put dyld_stub_binder as an undefined special symbol. - const undef_sym_index = @intCast(u32, self.undefs.items.len); - try self.undefs.append(self.allocator, .{ - .n_strx = try self.makeString("dyld_stub_binder"), - .n_type = macho.N_UNDF, - .n_sect = 0, - .n_desc = 0, - .n_value = 0, - }); - try self.symbol_resolver.putNoClobber(self.allocator, try self.allocator.dupe(u8, "dyld_stub_binder"), .{ - .where = .undef, - .where_index = undef_sym_index, - }); - } - - var referenced = std.AutoHashMap(*Dylib, void).init(self.allocator); - defer referenced.deinit(); - - loop: for (self.undefs.items) |sym| { - if (symbolIsNull(sym)) continue; - - const sym_name = self.getString(sym.n_strx); - for (self.dylibs.items) |dylib| { - if (!dylib.symbols.contains(sym_name)) continue; - - if (!referenced.contains(dylib)) { - // Add LC_LOAD_DYLIB load command for each referenced dylib/stub. - dylib.ordinal = self.next_dylib_ordinal; - const dylib_id = dylib.id orelse unreachable; - var dylib_cmd = try createLoadDylibCommand( - self.allocator, - dylib_id.name, - dylib_id.timestamp, - dylib_id.current_version, - dylib_id.compatibility_version, - ); - errdefer dylib_cmd.deinit(self.allocator); - try self.load_commands.append(self.allocator, .{ .Dylib = dylib_cmd }); - self.next_dylib_ordinal += 1; - try referenced.putNoClobber(dylib, {}); - } - - const resolv = self.symbol_resolver.getPtr(sym_name) orelse unreachable; - const undef = &self.undefs.items[resolv.where_index]; - const import_sym_index = @intCast(u32, self.imports.items.len); - try self.imports.append(self.allocator, .{ - .n_strx = undef.n_strx, - .n_type = macho.N_UNDF | macho.N_EXT, - .n_sect = 0, - .n_desc = packDylibOrdinal(dylib.ordinal.?), - .n_value = 0, - }); - resolv.* = .{ - .where = .import, - .where_index = import_sym_index, - }; - undef.* = .{ - .n_strx = 0, - .n_type = macho.N_UNDF, - .n_sect = 0, - .n_desc = 0, - .n_value = 0, - }; - - continue :loop; - } - } - - // Fourth pass, handle synthetic symbols and flag any undefined references. - if (self.symbol_resolver.getPtr("___dso_handle")) |resolv| blk: { - if (resolv.where != .undef) break :blk; - - const undef = &self.undefs.items[resolv.where_index]; - const match: MatchingSection = .{ - .seg = self.text_segment_cmd_index.?, - .sect = self.text_section_index.?, - }; - const local_sym_index = @intCast(u32, self.locals.items.len); - var nlist = macho.nlist_64{ - .n_strx = undef.n_strx, - .n_type = macho.N_SECT, - .n_sect = self.sectionId(match), - .n_desc = 0, - .n_value = 0, - }; - try self.locals.append(self.allocator, nlist); - const global_sym_index = @intCast(u32, self.globals.items.len); - nlist.n_type |= macho.N_EXT; - nlist.n_desc = macho.N_WEAK_DEF; - try self.globals.append(self.allocator, nlist); - - undef.* = .{ - .n_strx = 0, - .n_type = macho.N_UNDF, - .n_sect = 0, - .n_desc = 0, - .n_value = 0, - }; - resolv.* = .{ - .where = .global, - .where_index = global_sym_index, - .local_sym_index = local_sym_index, - }; - - // We create an empty atom for this symbol. - // TODO perhaps we should special-case special symbols? Create a separate - // linked list of atoms? - const block = try self.allocator.create(TextBlock); - errdefer self.allocator.destroy(block); - - block.* = TextBlock.init(self.allocator); - block.local_sym_index = local_sym_index; - block.code = try self.allocator.alloc(u8, 0); - block.size = 0; - block.alignment = 0; - - if (self.blocks.getPtr(match)) |last| { - last.*.next = block; - block.prev = last.*; - last.* = block; - } else { - try self.blocks.putNoClobber(self.allocator, match, block); - } - } - - var has_undefined = false; - for (self.undefs.items) |sym| { - if (symbolIsNull(sym)) continue; - - const sym_name = self.getString(sym.n_strx); - const resolv = self.symbol_resolver.get(sym_name) orelse unreachable; - - log.err("undefined reference to symbol '{s}'", .{sym_name}); - log.err(" first referenced in '{s}'", .{self.objects.items[resolv.file].name.?}); - has_undefined = true; - } - - if (has_undefined) return error.UndefinedSymbolReference; -} - -fn parseTextBlocks(self: *Zld) !void { - for (self.objects.items) |object| { - try object.parseTextBlocks(self); - } -} - -fn populateMetadata(self: *Zld) !void { - if (self.pagezero_segment_cmd_index == null) { - self.pagezero_segment_cmd_index = @intCast(u16, self.load_commands.items.len); - try self.load_commands.append(self.allocator, .{ - .Segment = SegmentCommand.empty("__PAGEZERO", .{ - .vmsize = 0x100000000, // size always set to 4GB - }), - }); - } - - if (self.text_segment_cmd_index == null) { - self.text_segment_cmd_index = @intCast(u16, self.load_commands.items.len); - try self.load_commands.append(self.allocator, .{ - .Segment = SegmentCommand.empty("__TEXT", .{ - .vmaddr = 0x100000000, // always starts at 4GB - .maxprot = macho.VM_PROT_READ | macho.VM_PROT_EXECUTE, - .initprot = macho.VM_PROT_READ | macho.VM_PROT_EXECUTE, - }), - }); - } - - if (self.text_section_index == null) { - const text_seg = &self.load_commands.items[self.text_segment_cmd_index.?].Segment; - self.text_section_index = @intCast(u16, text_seg.sections.items.len); - const alignment: u2 = switch (self.target.?.cpu.arch) { - .x86_64 => 0, - .aarch64 => 2, - else => unreachable, // unhandled architecture type - }; - try text_seg.addSection(self.allocator, "__text", .{ - .@"align" = alignment, - .flags = macho.S_REGULAR | macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS, - }); - } - - if (self.stubs_section_index == null) { - const text_seg = &self.load_commands.items[self.text_segment_cmd_index.?].Segment; - self.stubs_section_index = @intCast(u16, text_seg.sections.items.len); - const alignment: u2 = switch (self.target.?.cpu.arch) { - .x86_64 => 0, - .aarch64 => 2, - else => unreachable, // unhandled architecture type - }; - const stub_size: u4 = switch (self.target.?.cpu.arch) { - .x86_64 => 6, - .aarch64 => 3 * @sizeOf(u32), - else => unreachable, // unhandled architecture type - }; - try text_seg.addSection(self.allocator, "__stubs", .{ - .@"align" = alignment, - .flags = macho.S_SYMBOL_STUBS | macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS, - .reserved2 = stub_size, - }); - } - - if (self.stub_helper_section_index == null) { - const text_seg = &self.load_commands.items[self.text_segment_cmd_index.?].Segment; - self.stub_helper_section_index = @intCast(u16, text_seg.sections.items.len); - const alignment: u2 = switch (self.target.?.cpu.arch) { - .x86_64 => 0, - .aarch64 => 2, - else => unreachable, // unhandled architecture type - }; - const stub_helper_size: u6 = switch (self.target.?.cpu.arch) { - .x86_64 => 15, - .aarch64 => 6 * @sizeOf(u32), - else => unreachable, - }; - try text_seg.addSection(self.allocator, "__stub_helper", .{ - .size = stub_helper_size, - .@"align" = alignment, - .flags = macho.S_REGULAR | macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS, - }); - } - - if (self.data_const_segment_cmd_index == null) { - self.data_const_segment_cmd_index = @intCast(u16, self.load_commands.items.len); - try self.load_commands.append(self.allocator, .{ - .Segment = SegmentCommand.empty("__DATA_CONST", .{ - .maxprot = macho.VM_PROT_READ | macho.VM_PROT_WRITE, - .initprot = macho.VM_PROT_READ | macho.VM_PROT_WRITE, - }), - }); - } - - if (self.got_section_index == null) { - const data_const_seg = &self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; - self.got_section_index = @intCast(u16, data_const_seg.sections.items.len); - try data_const_seg.addSection(self.allocator, "__got", .{ - .@"align" = 3, // 2^3 = @sizeOf(u64) - .flags = macho.S_NON_LAZY_SYMBOL_POINTERS, - }); - } - - if (self.data_segment_cmd_index == null) { - self.data_segment_cmd_index = @intCast(u16, self.load_commands.items.len); - try self.load_commands.append(self.allocator, .{ - .Segment = SegmentCommand.empty("__DATA", .{ - .maxprot = macho.VM_PROT_READ | macho.VM_PROT_WRITE, - .initprot = macho.VM_PROT_READ | macho.VM_PROT_WRITE, - }), - }); - } - - if (self.la_symbol_ptr_section_index == null) { - const data_seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; - self.la_symbol_ptr_section_index = @intCast(u16, data_seg.sections.items.len); - try data_seg.addSection(self.allocator, "__la_symbol_ptr", .{ - .@"align" = 3, // 2^3 = @sizeOf(u64) - .flags = macho.S_LAZY_SYMBOL_POINTERS, - }); - } - - if (self.data_section_index == null) { - const data_seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; - self.data_section_index = @intCast(u16, data_seg.sections.items.len); - try data_seg.addSection(self.allocator, "__data", .{ - .@"align" = 3, // 2^3 = @sizeOf(u64) - }); - } - - if (self.linkedit_segment_cmd_index == null) { - self.linkedit_segment_cmd_index = @intCast(u16, self.load_commands.items.len); - try self.load_commands.append(self.allocator, .{ - .Segment = SegmentCommand.empty("__LINKEDIT", .{ - .maxprot = macho.VM_PROT_READ, - .initprot = macho.VM_PROT_READ, - }), - }); - } - - if (self.dyld_info_cmd_index == null) { - self.dyld_info_cmd_index = @intCast(u16, self.load_commands.items.len); - try self.load_commands.append(self.allocator, .{ - .DyldInfoOnly = .{ - .cmd = macho.LC_DYLD_INFO_ONLY, - .cmdsize = @sizeOf(macho.dyld_info_command), - .rebase_off = 0, - .rebase_size = 0, - .bind_off = 0, - .bind_size = 0, - .weak_bind_off = 0, - .weak_bind_size = 0, - .lazy_bind_off = 0, - .lazy_bind_size = 0, - .export_off = 0, - .export_size = 0, - }, - }); - } - - if (self.symtab_cmd_index == null) { - self.symtab_cmd_index = @intCast(u16, self.load_commands.items.len); - try self.load_commands.append(self.allocator, .{ - .Symtab = .{ - .cmd = macho.LC_SYMTAB, - .cmdsize = @sizeOf(macho.symtab_command), - .symoff = 0, - .nsyms = 0, - .stroff = 0, - .strsize = 0, - }, - }); - } - - if (self.dysymtab_cmd_index == null) { - self.dysymtab_cmd_index = @intCast(u16, self.load_commands.items.len); - try self.load_commands.append(self.allocator, .{ - .Dysymtab = .{ - .cmd = macho.LC_DYSYMTAB, - .cmdsize = @sizeOf(macho.dysymtab_command), - .ilocalsym = 0, - .nlocalsym = 0, - .iextdefsym = 0, - .nextdefsym = 0, - .iundefsym = 0, - .nundefsym = 0, - .tocoff = 0, - .ntoc = 0, - .modtaboff = 0, - .nmodtab = 0, - .extrefsymoff = 0, - .nextrefsyms = 0, - .indirectsymoff = 0, - .nindirectsyms = 0, - .extreloff = 0, - .nextrel = 0, - .locreloff = 0, - .nlocrel = 0, - }, - }); - } - - if (self.dylinker_cmd_index == null) { - self.dylinker_cmd_index = @intCast(u16, self.load_commands.items.len); - const cmdsize = @intCast(u32, mem.alignForwardGeneric( - u64, - @sizeOf(macho.dylinker_command) + mem.lenZ(DEFAULT_DYLD_PATH), - @sizeOf(u64), - )); - var dylinker_cmd = emptyGenericCommandWithData(macho.dylinker_command{ - .cmd = macho.LC_LOAD_DYLINKER, - .cmdsize = cmdsize, - .name = @sizeOf(macho.dylinker_command), - }); - dylinker_cmd.data = try self.allocator.alloc(u8, cmdsize - dylinker_cmd.inner.name); - mem.set(u8, dylinker_cmd.data, 0); - mem.copy(u8, dylinker_cmd.data, mem.spanZ(DEFAULT_DYLD_PATH)); - try self.load_commands.append(self.allocator, .{ .Dylinker = dylinker_cmd }); - } - - if (self.main_cmd_index == null and self.output.?.tag == .exe) { - self.main_cmd_index = @intCast(u16, self.load_commands.items.len); - try self.load_commands.append(self.allocator, .{ - .Main = .{ - .cmd = macho.LC_MAIN, - .cmdsize = @sizeOf(macho.entry_point_command), - .entryoff = 0x0, - .stacksize = 0, - }, - }); - } - - if (self.dylib_id_cmd_index == null and self.output.?.tag == .dylib) { - self.dylib_id_cmd_index = @intCast(u16, self.load_commands.items.len); - var dylib_cmd = try createLoadDylibCommand( - self.allocator, - self.output.?.install_name.?, - 2, - 0x10000, // TODO forward user-provided versions - 0x10000, - ); - errdefer dylib_cmd.deinit(self.allocator); - dylib_cmd.inner.cmd = macho.LC_ID_DYLIB; - try self.load_commands.append(self.allocator, .{ .Dylib = dylib_cmd }); - } - - if (self.version_min_cmd_index == null) { - self.version_min_cmd_index = @intCast(u16, self.load_commands.items.len); - const cmd: u32 = switch (self.target.?.os.tag) { - .macos => macho.LC_VERSION_MIN_MACOSX, - .ios => macho.LC_VERSION_MIN_IPHONEOS, - .tvos => macho.LC_VERSION_MIN_TVOS, - .watchos => macho.LC_VERSION_MIN_WATCHOS, - else => unreachable, // wrong OS - }; - const ver = self.target.?.os.version_range.semver.min; - const version = ver.major << 16 | ver.minor << 8 | ver.patch; - try self.load_commands.append(self.allocator, .{ - .VersionMin = .{ - .cmd = cmd, - .cmdsize = @sizeOf(macho.version_min_command), - .version = version, - .sdk = version, - }, - }); - } - - if (self.source_version_cmd_index == null) { - self.source_version_cmd_index = @intCast(u16, self.load_commands.items.len); - try self.load_commands.append(self.allocator, .{ - .SourceVersion = .{ - .cmd = macho.LC_SOURCE_VERSION, - .cmdsize = @sizeOf(macho.source_version_command), - .version = 0x0, - }, - }); - } - - if (self.uuid_cmd_index == null) { - self.uuid_cmd_index = @intCast(u16, self.load_commands.items.len); - var uuid_cmd: macho.uuid_command = .{ - .cmd = macho.LC_UUID, - .cmdsize = @sizeOf(macho.uuid_command), - .uuid = undefined, - }; - std.crypto.random.bytes(&uuid_cmd.uuid); - try self.load_commands.append(self.allocator, .{ .Uuid = uuid_cmd }); - } -} - -fn addDataInCodeLC(self: *Zld) !void { - if (self.data_in_code_cmd_index == null) { - self.data_in_code_cmd_index = @intCast(u16, self.load_commands.items.len); - try self.load_commands.append(self.allocator, .{ - .LinkeditData = .{ - .cmd = macho.LC_DATA_IN_CODE, - .cmdsize = @sizeOf(macho.linkedit_data_command), - .dataoff = 0, - .datasize = 0, - }, - }); - } -} - -fn addCodeSignatureLC(self: *Zld) !void { - if (self.code_signature_cmd_index == null and self.target.?.cpu.arch == .aarch64) { - self.code_signature_cmd_index = @intCast(u16, self.load_commands.items.len); - try self.load_commands.append(self.allocator, .{ - .LinkeditData = .{ - .cmd = macho.LC_CODE_SIGNATURE, - .cmdsize = @sizeOf(macho.linkedit_data_command), - .dataoff = 0, - .datasize = 0, - }, - }); - } -} - -fn addRpaths(self: *Zld, rpaths: []const []const u8) !void { - for (rpaths) |rpath| { - const cmdsize = @intCast(u32, mem.alignForwardGeneric( - u64, - @sizeOf(macho.rpath_command) + rpath.len + 1, - @sizeOf(u64), - )); - var rpath_cmd = emptyGenericCommandWithData(macho.rpath_command{ - .cmd = macho.LC_RPATH, - .cmdsize = cmdsize, - .path = @sizeOf(macho.rpath_command), - }); - rpath_cmd.data = try self.allocator.alloc(u8, cmdsize - rpath_cmd.inner.path); - mem.set(u8, rpath_cmd.data, 0); - mem.copy(u8, rpath_cmd.data, rpath); - try self.load_commands.append(self.allocator, .{ .Rpath = rpath_cmd }); - } -} - -fn flush(self: *Zld) !void { - try self.writeTextBlocks(); - try self.writeStubHelperCommon(); - - if (self.common_section_index) |index| { - const seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; - const sect = &seg.sections.items[index]; - sect.offset = 0; - } - - if (self.bss_section_index) |index| { - const seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; - const sect = &seg.sections.items[index]; - sect.offset = 0; - } - - if (self.tlv_bss_section_index) |index| { - const seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; - const sect = &seg.sections.items[index]; - sect.offset = 0; - } - - try self.writeGotEntries(); - try self.setEntryPoint(); - try self.writeRebaseInfoTable(); - try self.writeBindInfoTable(); - try self.writeLazyBindInfoTable(); - try self.writeExportInfo(); - try self.writeDices(); - - { - const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; - const symtab = &self.load_commands.items[self.symtab_cmd_index.?].Symtab; - symtab.symoff = @intCast(u32, seg.inner.fileoff + seg.inner.filesize); - } - - try self.writeSymbolTable(); - try self.writeStringTable(); - - { - // Seal __LINKEDIT size - const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; - seg.inner.vmsize = mem.alignForwardGeneric(u64, seg.inner.filesize, self.page_size.?); - } - - if (self.target.?.cpu.arch == .aarch64) { - try self.writeCodeSignaturePadding(); - } - - try self.writeLoadCommands(); - try self.writeHeader(); - - if (self.target.?.cpu.arch == .aarch64) { - try self.writeCodeSignature(); - } - - if (comptime std.Target.current.isDarwin() and std.Target.current.cpu.arch == .aarch64) { - const out_path = self.output.?.path; - try fs.cwd().copyFile(out_path, fs.cwd(), out_path, .{}); - } -} - -fn writeGotEntries(self: *Zld) !void { - const seg = self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; - const sect = seg.sections.items[self.got_section_index.?]; - - var buffer = try self.allocator.alloc(u8, self.got_entries.count() * @sizeOf(u64)); - defer self.allocator.free(buffer); - - var stream = std.io.fixedBufferStream(buffer); - var writer = stream.writer(); - - for (self.got_entries.keys()) |key| { - const address: u64 = switch (key.where) { - .local => self.locals.items[key.where_index].n_value, - .import => 0, - }; - try writer.writeIntLittle(u64, address); - } - - log.debug("writing GOT pointers at 0x{x} to 0x{x}", .{ sect.offset, sect.offset + buffer.len }); - - try self.file.?.pwriteAll(buffer, sect.offset); -} - -fn setEntryPoint(self: *Zld) !void { - if (self.output.?.tag != .exe) return; - - // TODO we should respect the -entry flag passed in by the user to set a custom - // entrypoint. For now, assume default of `_main`. - const seg = self.load_commands.items[self.text_segment_cmd_index.?].Segment; - const resolv = self.symbol_resolver.get("_main") orelse { - log.err("'_main' export not found", .{}); - return error.MissingMainEntrypoint; - }; - assert(resolv.where == .global); - const sym = self.globals.items[resolv.where_index]; - const ec = &self.load_commands.items[self.main_cmd_index.?].Main; - ec.entryoff = @intCast(u32, sym.n_value - seg.inner.vmaddr); - ec.stacksize = self.stack_size; -} - -fn writeRebaseInfoTable(self: *Zld) !void { - var pointers = std.ArrayList(Pointer).init(self.allocator); - defer pointers.deinit(); - - { - var it = self.blocks.iterator(); - while (it.next()) |entry| { - const match = entry.key_ptr.*; - var block: *TextBlock = entry.value_ptr.*; - - if (match.seg == self.text_segment_cmd_index.?) continue; // __TEXT is non-writable - - const seg = self.load_commands.items[match.seg].Segment; - - while (true) { - const sym = self.locals.items[block.local_sym_index]; - const base_offset = sym.n_value - seg.inner.vmaddr; - - for (block.rebases.items) |offset| { - try pointers.append(.{ - .offset = base_offset + offset, - .segment_id = match.seg, - }); - } - - if (block.prev) |prev| { - block = prev; - } else break; - } - } - } - - if (self.got_section_index) |idx| { - const seg = self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; - const sect = seg.sections.items[idx]; - const base_offset = sect.addr - seg.inner.vmaddr; - const segment_id = @intCast(u16, self.data_const_segment_cmd_index.?); - - for (self.got_entries.keys()) |key, i| { - if (key.where == .import) continue; - - try pointers.append(.{ - .offset = base_offset + i * @sizeOf(u64), - .segment_id = segment_id, - }); - } - } - - if (self.la_symbol_ptr_section_index) |idx| { - const seg = self.load_commands.items[self.data_segment_cmd_index.?].Segment; - const sect = seg.sections.items[idx]; - const base_offset = sect.addr - seg.inner.vmaddr; - const segment_id = @intCast(u16, self.data_segment_cmd_index.?); - - try pointers.ensureUnusedCapacity(self.stubs.count()); - for (self.stubs.keys()) |_, i| { - pointers.appendAssumeCapacity(.{ - .offset = base_offset + i * @sizeOf(u64), - .segment_id = segment_id, - }); - } - } - - std.sort.sort(Pointer, pointers.items, {}, pointerCmp); - - const size = try rebaseInfoSize(pointers.items); - var buffer = try self.allocator.alloc(u8, @intCast(usize, size)); - defer self.allocator.free(buffer); - - var stream = std.io.fixedBufferStream(buffer); - try writeRebaseInfo(pointers.items, stream.writer()); - - const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; - const dyld_info = &self.load_commands.items[self.dyld_info_cmd_index.?].DyldInfoOnly; - dyld_info.rebase_off = @intCast(u32, seg.inner.fileoff); - dyld_info.rebase_size = @intCast(u32, mem.alignForwardGeneric(u64, buffer.len, @sizeOf(u64))); - seg.inner.filesize += dyld_info.rebase_size; - - log.debug("writing rebase info from 0x{x} to 0x{x}", .{ dyld_info.rebase_off, dyld_info.rebase_off + dyld_info.rebase_size }); - - try self.file.?.pwriteAll(buffer, dyld_info.rebase_off); -} - -fn writeBindInfoTable(self: *Zld) !void { - var pointers = std.ArrayList(Pointer).init(self.allocator); - defer pointers.deinit(); - - if (self.got_section_index) |idx| { - const seg = self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; - const sect = seg.sections.items[idx]; - const base_offset = sect.addr - seg.inner.vmaddr; - const segment_id = @intCast(u16, self.data_const_segment_cmd_index.?); - - for (self.got_entries.keys()) |key, i| { - if (key.where == .local) continue; - - const sym = self.imports.items[key.where_index]; - try pointers.append(.{ - .offset = base_offset + i * @sizeOf(u64), - .segment_id = segment_id, - .dylib_ordinal = unpackDylibOrdinal(sym.n_desc), - .name = self.getString(sym.n_strx), - }); - } - } - - { - var it = self.blocks.iterator(); - while (it.next()) |entry| { - const match = entry.key_ptr.*; - var block: *TextBlock = entry.value_ptr.*; - - if (match.seg == self.text_segment_cmd_index.?) continue; // __TEXT is non-writable - - const seg = self.load_commands.items[match.seg].Segment; - - while (true) { - const sym = self.locals.items[block.local_sym_index]; - const base_offset = sym.n_value - seg.inner.vmaddr; - - for (block.bindings.items) |binding| { - const bind_sym = self.imports.items[binding.local_sym_index]; - try pointers.append(.{ - .offset = binding.offset + base_offset, - .segment_id = match.seg, - .dylib_ordinal = unpackDylibOrdinal(bind_sym.n_desc), - .name = self.getString(bind_sym.n_strx), - }); - } - - if (block.prev) |prev| { - block = prev; - } else break; - } - } - } - - const size = try bindInfoSize(pointers.items); - var buffer = try self.allocator.alloc(u8, @intCast(usize, size)); - defer self.allocator.free(buffer); - - var stream = std.io.fixedBufferStream(buffer); - try writeBindInfo(pointers.items, stream.writer()); - - const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; - const dyld_info = &self.load_commands.items[self.dyld_info_cmd_index.?].DyldInfoOnly; - dyld_info.bind_off = @intCast(u32, seg.inner.fileoff + seg.inner.filesize); - dyld_info.bind_size = @intCast(u32, mem.alignForwardGeneric(u64, buffer.len, @alignOf(u64))); - seg.inner.filesize += dyld_info.bind_size; - - log.debug("writing binding info from 0x{x} to 0x{x}", .{ dyld_info.bind_off, dyld_info.bind_off + dyld_info.bind_size }); - - try self.file.?.pwriteAll(buffer, dyld_info.bind_off); -} - -fn writeLazyBindInfoTable(self: *Zld) !void { - var pointers = std.ArrayList(Pointer).init(self.allocator); - defer pointers.deinit(); - - if (self.la_symbol_ptr_section_index) |idx| { - const seg = self.load_commands.items[self.data_segment_cmd_index.?].Segment; - const sect = seg.sections.items[idx]; - const base_offset = sect.addr - seg.inner.vmaddr; - const segment_id = @intCast(u16, self.data_segment_cmd_index.?); - - try pointers.ensureUnusedCapacity(self.stubs.count()); - - for (self.stubs.keys()) |key, i| { - const sym = self.imports.items[key]; - pointers.appendAssumeCapacity(.{ - .offset = base_offset + i * @sizeOf(u64), - .segment_id = segment_id, - .dylib_ordinal = unpackDylibOrdinal(sym.n_desc), - .name = self.getString(sym.n_strx), - }); - } - } - - const size = try lazyBindInfoSize(pointers.items); - var buffer = try self.allocator.alloc(u8, @intCast(usize, size)); - defer self.allocator.free(buffer); - - var stream = std.io.fixedBufferStream(buffer); - try writeLazyBindInfo(pointers.items, stream.writer()); - - const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; - const dyld_info = &self.load_commands.items[self.dyld_info_cmd_index.?].DyldInfoOnly; - dyld_info.lazy_bind_off = @intCast(u32, seg.inner.fileoff + seg.inner.filesize); - dyld_info.lazy_bind_size = @intCast(u32, mem.alignForwardGeneric(u64, buffer.len, @alignOf(u64))); - seg.inner.filesize += dyld_info.lazy_bind_size; - - log.debug("writing lazy binding info from 0x{x} to 0x{x}", .{ dyld_info.lazy_bind_off, dyld_info.lazy_bind_off + dyld_info.lazy_bind_size }); - - try self.file.?.pwriteAll(buffer, dyld_info.lazy_bind_off); - try self.populateLazyBindOffsetsInStubHelper(buffer); -} - -fn populateLazyBindOffsetsInStubHelper(self: *Zld, buffer: []const u8) !void { - var stream = std.io.fixedBufferStream(buffer); - var reader = stream.reader(); - var offsets = std.ArrayList(u32).init(self.allocator); - try offsets.append(0); - defer offsets.deinit(); - var valid_block = false; - - while (true) { - const inst = reader.readByte() catch |err| switch (err) { - error.EndOfStream => break, - else => return err, - }; - const opcode: u8 = inst & macho.BIND_OPCODE_MASK; - - switch (opcode) { - macho.BIND_OPCODE_DO_BIND => { - valid_block = true; - }, - macho.BIND_OPCODE_DONE => { - if (valid_block) { - const offset = try stream.getPos(); - try offsets.append(@intCast(u32, offset)); - } - valid_block = false; - }, - macho.BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM => { - var next = try reader.readByte(); - while (next != @as(u8, 0)) { - next = try reader.readByte(); - } - }, - macho.BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB => { - _ = try leb.readULEB128(u64, reader); - }, - macho.BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB => { - _ = try leb.readULEB128(u64, reader); - }, - macho.BIND_OPCODE_SET_ADDEND_SLEB => { - _ = try leb.readILEB128(i64, reader); - }, - else => {}, - } - } - assert(self.stubs.count() <= offsets.items.len); - - const stub_size: u4 = switch (self.target.?.cpu.arch) { - .x86_64 => 10, - .aarch64 => 3 * @sizeOf(u32), - else => unreachable, - }; - const off: u4 = switch (self.target.?.cpu.arch) { - .x86_64 => 1, - .aarch64 => 2 * @sizeOf(u32), - else => unreachable, - }; - var buf: [@sizeOf(u32)]u8 = undefined; - for (self.stubs.keys()) |_, index| { - const placeholder_off = self.stub_helper_stubs_start_off.? + index * stub_size + off; - mem.writeIntLittle(u32, &buf, offsets.items[index]); - try self.file.?.pwriteAll(&buf, placeholder_off); - } -} - -fn writeExportInfo(self: *Zld) !void { - var trie = Trie.init(self.allocator); - defer trie.deinit(); - - const text_segment = self.load_commands.items[self.text_segment_cmd_index.?].Segment; - const base_address = text_segment.inner.vmaddr; - - // TODO handle macho.EXPORT_SYMBOL_FLAGS_REEXPORT and macho.EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER. - log.debug("writing export trie", .{}); - - for (self.globals.items) |sym| { - const sym_name = self.getString(sym.n_strx); - log.debug(" | putting '{s}' defined at 0x{x}", .{ sym_name, sym.n_value }); - - try trie.put(.{ - .name = sym_name, - .vmaddr_offset = sym.n_value - base_address, - .export_flags = macho.EXPORT_SYMBOL_FLAGS_KIND_REGULAR, - }); - } - - try trie.finalize(); - - var buffer = try self.allocator.alloc(u8, @intCast(usize, trie.size)); - defer self.allocator.free(buffer); - - var stream = std.io.fixedBufferStream(buffer); - const nwritten = try trie.write(stream.writer()); - assert(nwritten == trie.size); - - const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; - const dyld_info = &self.load_commands.items[self.dyld_info_cmd_index.?].DyldInfoOnly; - dyld_info.export_off = @intCast(u32, seg.inner.fileoff + seg.inner.filesize); - dyld_info.export_size = @intCast(u32, mem.alignForwardGeneric(u64, buffer.len, @alignOf(u64))); - seg.inner.filesize += dyld_info.export_size; - - log.debug("writing export info from 0x{x} to 0x{x}", .{ dyld_info.export_off, dyld_info.export_off + dyld_info.export_size }); - - try self.file.?.pwriteAll(buffer, dyld_info.export_off); -} - -fn writeSymbolTable(self: *Zld) !void { - const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; - const symtab = &self.load_commands.items[self.symtab_cmd_index.?].Symtab; - - var locals = std.ArrayList(macho.nlist_64).init(self.allocator); - defer locals.deinit(); - try locals.appendSlice(self.locals.items); - - if (self.has_stabs) { - for (self.objects.items) |object| { - if (object.debug_info == null) continue; - - // Open scope - try locals.ensureUnusedCapacity(4); - locals.appendAssumeCapacity(.{ - .n_strx = try self.makeString(object.tu_comp_dir.?), - .n_type = macho.N_SO, - .n_sect = 0, - .n_desc = 0, - .n_value = 0, - }); - locals.appendAssumeCapacity(.{ - .n_strx = try self.makeString(object.tu_name.?), - .n_type = macho.N_SO, - .n_sect = 0, - .n_desc = 0, - .n_value = 0, - }); - locals.appendAssumeCapacity(.{ - .n_strx = try self.makeString(object.name.?), - .n_type = macho.N_OSO, - .n_sect = 0, - .n_desc = 1, - .n_value = object.mtime orelse 0, - }); - - for (object.text_blocks.items) |block| { - if (block.stab) |stab| { - const nlists = try stab.asNlists(block.local_sym_index, self); - defer self.allocator.free(nlists); - try locals.appendSlice(nlists); - } else { - const contained = block.contained orelse continue; - for (contained) |sym_at_off| { - const stab = sym_at_off.stab orelse continue; - const nlists = try stab.asNlists(sym_at_off.local_sym_index, self); - defer self.allocator.free(nlists); - try locals.appendSlice(nlists); - } - } - } - - // Close scope - locals.appendAssumeCapacity(.{ - .n_strx = 0, - .n_type = macho.N_SO, - .n_sect = 0, - .n_desc = 0, - .n_value = 0, - }); - } - } - - const nlocals = locals.items.len; - const nexports = self.globals.items.len; - const nundefs = self.imports.items.len; - - const locals_off = symtab.symoff + symtab.nsyms * @sizeOf(macho.nlist_64); - const locals_size = nlocals * @sizeOf(macho.nlist_64); - log.debug("writing local symbols from 0x{x} to 0x{x}", .{ locals_off, locals_size + locals_off }); - try self.file.?.pwriteAll(mem.sliceAsBytes(locals.items), locals_off); - - const exports_off = locals_off + locals_size; - const exports_size = nexports * @sizeOf(macho.nlist_64); - log.debug("writing exported symbols from 0x{x} to 0x{x}", .{ exports_off, exports_size + exports_off }); - try self.file.?.pwriteAll(mem.sliceAsBytes(self.globals.items), exports_off); - - const undefs_off = exports_off + exports_size; - const undefs_size = nundefs * @sizeOf(macho.nlist_64); - log.debug("writing undefined symbols from 0x{x} to 0x{x}", .{ undefs_off, undefs_size + undefs_off }); - try self.file.?.pwriteAll(mem.sliceAsBytes(self.imports.items), undefs_off); - - symtab.nsyms += @intCast(u32, nlocals + nexports + nundefs); - seg.inner.filesize += locals_size + exports_size + undefs_size; - - // Update dynamic symbol table. - const dysymtab = &self.load_commands.items[self.dysymtab_cmd_index.?].Dysymtab; - dysymtab.nlocalsym += @intCast(u32, nlocals); - dysymtab.iextdefsym = dysymtab.nlocalsym; - dysymtab.nextdefsym = @intCast(u32, nexports); - dysymtab.iundefsym = dysymtab.nlocalsym + dysymtab.nextdefsym; - dysymtab.nundefsym = @intCast(u32, nundefs); - - const text_segment = &self.load_commands.items[self.text_segment_cmd_index.?].Segment; - const stubs = &text_segment.sections.items[self.stubs_section_index.?]; - const data_const_segment = &self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; - const got = &data_const_segment.sections.items[self.got_section_index.?]; - const data_segment = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; - const la_symbol_ptr = &data_segment.sections.items[self.la_symbol_ptr_section_index.?]; - - const nstubs = @intCast(u32, self.stubs.count()); - const ngot_entries = @intCast(u32, self.got_entries.count()); - - dysymtab.indirectsymoff = @intCast(u32, seg.inner.fileoff + seg.inner.filesize); - dysymtab.nindirectsyms = nstubs * 2 + ngot_entries; - - const needed_size = dysymtab.nindirectsyms * @sizeOf(u32); - seg.inner.filesize += needed_size; - - log.debug("writing indirect symbol table from 0x{x} to 0x{x}", .{ - dysymtab.indirectsymoff, - dysymtab.indirectsymoff + needed_size, - }); - - var buf = try self.allocator.alloc(u8, needed_size); - defer self.allocator.free(buf); - - var stream = std.io.fixedBufferStream(buf); - var writer = stream.writer(); - - stubs.reserved1 = 0; - for (self.stubs.keys()) |key| { - try writer.writeIntLittle(u32, dysymtab.iundefsym + key); - } - - got.reserved1 = nstubs; - for (self.got_entries.keys()) |key| { - switch (key.where) { - .import => { - try writer.writeIntLittle(u32, dysymtab.iundefsym + key.where_index); - }, - .local => { - try writer.writeIntLittle(u32, macho.INDIRECT_SYMBOL_LOCAL); - }, - } - } - - la_symbol_ptr.reserved1 = got.reserved1 + ngot_entries; - for (self.stubs.keys()) |key| { - try writer.writeIntLittle(u32, dysymtab.iundefsym + key); - } - - try self.file.?.pwriteAll(buf, dysymtab.indirectsymoff); -} - -fn writeStringTable(self: *Zld) !void { - const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; - const symtab = &self.load_commands.items[self.symtab_cmd_index.?].Symtab; - symtab.stroff = @intCast(u32, seg.inner.fileoff + seg.inner.filesize); - symtab.strsize = @intCast(u32, mem.alignForwardGeneric(u64, self.strtab.items.len, @alignOf(u64))); - seg.inner.filesize += symtab.strsize; - - log.debug("writing string table from 0x{x} to 0x{x}", .{ symtab.stroff, symtab.stroff + symtab.strsize }); - - try self.file.?.pwriteAll(self.strtab.items, symtab.stroff); - - if (symtab.strsize > self.strtab.items.len and self.target.?.cpu.arch == .x86_64) { - // This is the last section, so we need to pad it out. - try self.file.?.pwriteAll(&[_]u8{0}, seg.inner.fileoff + seg.inner.filesize - 1); - } -} - -fn writeDices(self: *Zld) !void { - if (!self.has_dices) return; - - const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; - const dice_cmd = &self.load_commands.items[self.data_in_code_cmd_index.?].LinkeditData; - const fileoff = seg.inner.fileoff + seg.inner.filesize; - - var buf = std.ArrayList(u8).init(self.allocator); - defer buf.deinit(); - - var block: *TextBlock = self.blocks.get(.{ - .seg = self.text_segment_cmd_index orelse return, - .sect = self.text_section_index orelse return, - }) orelse return; - - while (block.prev) |prev| { - block = prev; - } - - const text_seg = self.load_commands.items[self.text_segment_cmd_index.?].Segment; - const text_sect = text_seg.sections.items[self.text_section_index.?]; - - while (true) { - if (block.dices.items.len > 0) { - const sym = self.locals.items[block.local_sym_index]; - const base_off = try math.cast(u32, sym.n_value - text_sect.addr + text_sect.offset); - - try buf.ensureUnusedCapacity(block.dices.items.len * @sizeOf(macho.data_in_code_entry)); - for (block.dices.items) |dice| { - const rebased_dice = macho.data_in_code_entry{ - .offset = base_off + dice.offset, - .length = dice.length, - .kind = dice.kind, - }; - buf.appendSliceAssumeCapacity(mem.asBytes(&rebased_dice)); - } - } - - if (block.next) |next| { - block = next; - } else break; - } - - const datasize = @intCast(u32, buf.items.len); - - dice_cmd.dataoff = @intCast(u32, fileoff); - dice_cmd.datasize = datasize; - seg.inner.filesize += datasize; - - log.debug("writing data-in-code from 0x{x} to 0x{x}", .{ fileoff, fileoff + datasize }); - - try self.file.?.pwriteAll(buf.items, fileoff); -} - -fn writeCodeSignaturePadding(self: *Zld) !void { - const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; - const code_sig_cmd = &self.load_commands.items[self.code_signature_cmd_index.?].LinkeditData; - const fileoff = seg.inner.fileoff + seg.inner.filesize; - const needed_size = CodeSignature.calcCodeSignaturePaddingSize( - self.output.?.path, - fileoff, - self.page_size.?, - ); - code_sig_cmd.dataoff = @intCast(u32, fileoff); - code_sig_cmd.datasize = needed_size; - - // Advance size of __LINKEDIT segment - seg.inner.filesize += needed_size; - seg.inner.vmsize = mem.alignForwardGeneric(u64, seg.inner.filesize, self.page_size.?); - - log.debug("writing code signature padding from 0x{x} to 0x{x}", .{ fileoff, fileoff + needed_size }); - - // Pad out the space. We need to do this to calculate valid hashes for everything in the file - // except for code signature data. - try self.file.?.pwriteAll(&[_]u8{0}, fileoff + needed_size - 1); -} - -fn writeCodeSignature(self: *Zld) !void { - const text_seg = self.load_commands.items[self.text_segment_cmd_index.?].Segment; - const code_sig_cmd = self.load_commands.items[self.code_signature_cmd_index.?].LinkeditData; - - var code_sig = CodeSignature.init(self.allocator, self.page_size.?); - defer code_sig.deinit(); - try code_sig.calcAdhocSignature( - self.file.?, - self.output.?.path, - text_seg.inner, - code_sig_cmd, - .Exe, - ); - - var buffer = try self.allocator.alloc(u8, code_sig.size()); - defer self.allocator.free(buffer); - var stream = std.io.fixedBufferStream(buffer); - try code_sig.write(stream.writer()); - - log.debug("writing code signature from 0x{x} to 0x{x}", .{ code_sig_cmd.dataoff, code_sig_cmd.dataoff + buffer.len }); - try self.file.?.pwriteAll(buffer, code_sig_cmd.dataoff); -} - -fn writeLoadCommands(self: *Zld) !void { - var sizeofcmds: u32 = 0; - for (self.load_commands.items) |lc| { - sizeofcmds += lc.cmdsize(); - } - - var buffer = try self.allocator.alloc(u8, sizeofcmds); - defer self.allocator.free(buffer); - var writer = std.io.fixedBufferStream(buffer).writer(); - for (self.load_commands.items) |lc| { - try lc.write(writer); - } - - const off = @sizeOf(macho.mach_header_64); - log.debug("writing {} load commands from 0x{x} to 0x{x}", .{ self.load_commands.items.len, off, off + sizeofcmds }); - try self.file.?.pwriteAll(buffer, off); -} - -fn writeHeader(self: *Zld) !void { - var header = emptyHeader(.{ - .flags = macho.MH_NOUNDEFS | macho.MH_DYLDLINK | macho.MH_PIE | macho.MH_TWOLEVEL, - }); - - switch (self.target.?.cpu.arch) { - .aarch64 => { - header.cputype = macho.CPU_TYPE_ARM64; - header.cpusubtype = macho.CPU_SUBTYPE_ARM_ALL; - }, - .x86_64 => { - header.cputype = macho.CPU_TYPE_X86_64; - header.cpusubtype = macho.CPU_SUBTYPE_X86_64_ALL; - }, - else => return error.UnsupportedCpuArchitecture, - } - - switch (self.output.?.tag) { - .exe => { - header.filetype = macho.MH_EXECUTE; - }, - .dylib => { - header.filetype = macho.MH_DYLIB; - header.flags |= macho.MH_NO_REEXPORTED_DYLIBS; - }, - } - - if (self.tlv_section_index) |_| - header.flags |= macho.MH_HAS_TLV_DESCRIPTORS; - - header.ncmds = @intCast(u32, self.load_commands.items.len); - header.sizeofcmds = 0; - - for (self.load_commands.items) |cmd| { - header.sizeofcmds += cmd.cmdsize(); - } - - log.debug("writing Mach-O header {}", .{header}); - - try self.file.?.pwriteAll(mem.asBytes(&header), 0); -} - -pub fn makeString(self: *Zld, string: []const u8) !u32 { - try self.strtab.ensureUnusedCapacity(self.allocator, string.len + 1); - const new_off = @intCast(u32, self.strtab.items.len); - - log.debug("writing new string '{s}' at offset 0x{x}", .{ string, new_off }); - - self.strtab.appendSliceAssumeCapacity(string); - self.strtab.appendAssumeCapacity(0); - - return new_off; -} - -pub fn getString(self: *Zld, off: u32) []const u8 { - assert(off < self.strtab.items.len); - return mem.spanZ(@ptrCast([*:0]const u8, self.strtab.items.ptr + off)); -} - -pub fn symbolIsStab(sym: macho.nlist_64) bool { - return (macho.N_STAB & sym.n_type) != 0; -} - -pub fn symbolIsPext(sym: macho.nlist_64) bool { - return (macho.N_PEXT & sym.n_type) != 0; -} - -pub fn symbolIsExt(sym: macho.nlist_64) bool { - return (macho.N_EXT & sym.n_type) != 0; -} - -pub fn symbolIsSect(sym: macho.nlist_64) bool { - const type_ = macho.N_TYPE & sym.n_type; - return type_ == macho.N_SECT; -} - -pub fn symbolIsUndf(sym: macho.nlist_64) bool { - const type_ = macho.N_TYPE & sym.n_type; - return type_ == macho.N_UNDF; -} - -pub fn symbolIsIndr(sym: macho.nlist_64) bool { - const type_ = macho.N_TYPE & sym.n_type; - return type_ == macho.N_INDR; -} - -pub fn symbolIsAbs(sym: macho.nlist_64) bool { - const type_ = macho.N_TYPE & sym.n_type; - return type_ == macho.N_ABS; -} - -pub fn symbolIsWeakDef(sym: macho.nlist_64) bool { - return (sym.n_desc & macho.N_WEAK_DEF) != 0; -} - -pub fn symbolIsWeakRef(sym: macho.nlist_64) bool { - return (sym.n_desc & macho.N_WEAK_REF) != 0; -} - -pub fn symbolIsTentative(sym: macho.nlist_64) bool { - if (!symbolIsUndf(sym)) return false; - return sym.n_value != 0; -} - -pub fn symbolIsNull(sym: macho.nlist_64) bool { - return sym.n_value == 0 and sym.n_desc == 0 and sym.n_type == 0 and sym.n_strx == 0 and sym.n_sect == 0; -} - -pub fn symbolIsTemp(sym: macho.nlist_64, sym_name: []const u8) bool { - if (!symbolIsSect(sym)) return false; - if (symbolIsExt(sym)) return false; - return mem.startsWith(u8, sym_name, "l") or mem.startsWith(u8, sym_name, "L"); -} - -pub fn sectionId(self: Zld, match: MatchingSection) u8 { - // TODO there might be a more generic way of doing this. - var section: u8 = 0; - for (self.load_commands.items) |cmd, cmd_id| { - if (cmd != .Segment) break; - if (cmd_id == match.seg) { - section += @intCast(u8, match.sect) + 1; - break; - } - section += @intCast(u8, cmd.Segment.sections.items.len); - } - return section; -} - -pub fn unpackSectionId(self: Zld, section_id: u8) MatchingSection { - var match: MatchingSection = undefined; - var section: u8 = 0; - outer: for (self.load_commands.items) |cmd, cmd_id| { - assert(cmd == .Segment); - for (cmd.Segment.sections.items) |_, sect_id| { - section += 1; - if (section_id == section) { - match.seg = @intCast(u16, cmd_id); - match.sect = @intCast(u16, sect_id); - break :outer; - } - } - } - return match; -} - -fn packDylibOrdinal(ordinal: u16) u16 { - return ordinal * macho.N_SYMBOL_RESOLVER; -} - -fn unpackDylibOrdinal(pack: u16) u16 { - return @divExact(pack, macho.N_SYMBOL_RESOLVER); -} - -pub fn findFirst(comptime T: type, haystack: []T, start: usize, predicate: anytype) usize { - if (!@hasDecl(@TypeOf(predicate), "predicate")) - @compileError("Predicate is required to define fn predicate(@This(), T) bool"); - - if (start == haystack.len) return start; - - var i = start; - while (i < haystack.len) : (i += 1) { - if (predicate.predicate(haystack[i])) break; - } - return i; -} From 4bc72c48b76eb59e6f1eb160d3d590606e7ae090 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sun, 18 Jul 2021 18:33:23 +0200 Subject: [PATCH 69/81] macho: temporarily dupe a few linkedit fns so that traditional linker works --- src/link/MachO.zig | 267 ++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 261 insertions(+), 6 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 66a1ee4e04..8aac8820da 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -583,7 +583,6 @@ fn linkWithZld(self: *MachO, comp: *Compilation) !void { const is_lib = self.base.options.output_mode == .Lib; const is_dyn_lib = self.base.options.link_mode == .Dynamic and is_lib; const is_exe_or_dyn_lib = is_dyn_lib or self.base.options.output_mode == .Exe; - const target = self.base.options.target; const stack_size = self.base.options.stack_size_override orelse 0; const allow_shlib_undefined = self.base.options.allow_shlib_undefined orelse !self.base.options.is_native_os; @@ -2755,6 +2754,7 @@ fn addRpaths(self: *MachO, rpaths: []const []const u8) !void { } fn flushZld(self: *MachO) !void { + self.load_commands_dirty = true; try self.writeTextBlocks(); try self.writeStubHelperCommon(); @@ -2778,10 +2778,10 @@ fn flushZld(self: *MachO) !void { try self.writeGotEntries(); try self.setEntryPoint(); - try self.writeRebaseInfoTable(); - try self.writeBindInfoTable(); - try self.writeLazyBindInfoTable(); - try self.writeExportInfo(); + try self.writeRebaseInfoTableZld(); + try self.writeBindInfoTableZld(); + try self.writeLazyBindInfoTableZld(); + try self.writeExportInfoZld(); try self.writeDices(); { @@ -2791,7 +2791,7 @@ fn flushZld(self: *MachO) !void { } try self.writeSymbolTable(); - try self.writeStringTable(); + try self.writeStringTableZld(); { // Seal __LINKEDIT size @@ -2856,6 +2856,244 @@ fn setEntryPoint(self: *MachO) !void { ec.stacksize = self.base.options.stack_size_override orelse 0; } +fn writeRebaseInfoTableZld(self: *MachO) !void { + var pointers = std.ArrayList(bind.Pointer).init(self.base.allocator); + defer pointers.deinit(); + + { + var it = self.blocks.iterator(); + while (it.next()) |entry| { + const match = entry.key_ptr.*; + var block: *TextBlock = entry.value_ptr.*; + + if (match.seg == self.text_segment_cmd_index.?) continue; // __TEXT is non-writable + + const seg = self.load_commands.items[match.seg].Segment; + + while (true) { + const sym = self.locals.items[block.local_sym_index]; + const base_offset = sym.n_value - seg.inner.vmaddr; + + for (block.rebases.items) |offset| { + try pointers.append(.{ + .offset = base_offset + offset, + .segment_id = match.seg, + }); + } + + if (block.prev) |prev| { + block = prev; + } else break; + } + } + } + + if (self.got_section_index) |idx| { + const seg = self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; + const sect = seg.sections.items[idx]; + const base_offset = sect.addr - seg.inner.vmaddr; + const segment_id = @intCast(u16, self.data_const_segment_cmd_index.?); + + for (self.got_entries.items) |entry, i| { + if (entry.where == .import) continue; + + try pointers.append(.{ + .offset = base_offset + i * @sizeOf(u64), + .segment_id = segment_id, + }); + } + } + + if (self.la_symbol_ptr_section_index) |idx| { + const seg = self.load_commands.items[self.data_segment_cmd_index.?].Segment; + const sect = seg.sections.items[idx]; + const base_offset = sect.addr - seg.inner.vmaddr; + const segment_id = @intCast(u16, self.data_segment_cmd_index.?); + + try pointers.ensureUnusedCapacity(self.stubs.items.len); + for (self.stubs.items) |_, i| { + pointers.appendAssumeCapacity(.{ + .offset = base_offset + i * @sizeOf(u64), + .segment_id = segment_id, + }); + } + } + + std.sort.sort(bind.Pointer, pointers.items, {}, bind.pointerCmp); + + const size = try bind.rebaseInfoSize(pointers.items); + var buffer = try self.base.allocator.alloc(u8, @intCast(usize, size)); + defer self.base.allocator.free(buffer); + + var stream = std.io.fixedBufferStream(buffer); + try bind.writeRebaseInfo(pointers.items, stream.writer()); + + const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; + const dyld_info = &self.load_commands.items[self.dyld_info_cmd_index.?].DyldInfoOnly; + dyld_info.rebase_off = @intCast(u32, seg.inner.fileoff); + dyld_info.rebase_size = @intCast(u32, mem.alignForwardGeneric(u64, buffer.len, @sizeOf(u64))); + seg.inner.filesize += dyld_info.rebase_size; + + log.debug("writing rebase info from 0x{x} to 0x{x}", .{ dyld_info.rebase_off, dyld_info.rebase_off + dyld_info.rebase_size }); + + try self.base.file.?.pwriteAll(buffer, dyld_info.rebase_off); +} + +fn writeBindInfoTableZld(self: *MachO) !void { + var pointers = std.ArrayList(bind.Pointer).init(self.base.allocator); + defer pointers.deinit(); + + if (self.got_section_index) |idx| { + const seg = self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; + const sect = seg.sections.items[idx]; + const base_offset = sect.addr - seg.inner.vmaddr; + const segment_id = @intCast(u16, self.data_const_segment_cmd_index.?); + + for (self.got_entries.items) |entry, i| { + if (entry.where == .local) continue; + + const sym = self.imports.items[entry.where_index]; + try pointers.append(.{ + .offset = base_offset + i * @sizeOf(u64), + .segment_id = segment_id, + .dylib_ordinal = unpackDylibOrdinal(sym.n_desc), + .name = self.getString(sym.n_strx), + }); + } + } + + { + var it = self.blocks.iterator(); + while (it.next()) |entry| { + const match = entry.key_ptr.*; + var block: *TextBlock = entry.value_ptr.*; + + if (match.seg == self.text_segment_cmd_index.?) continue; // __TEXT is non-writable + + const seg = self.load_commands.items[match.seg].Segment; + + while (true) { + const sym = self.locals.items[block.local_sym_index]; + const base_offset = sym.n_value - seg.inner.vmaddr; + + for (block.bindings.items) |binding| { + const bind_sym = self.imports.items[binding.local_sym_index]; + try pointers.append(.{ + .offset = binding.offset + base_offset, + .segment_id = match.seg, + .dylib_ordinal = unpackDylibOrdinal(bind_sym.n_desc), + .name = self.getString(bind_sym.n_strx), + }); + } + + if (block.prev) |prev| { + block = prev; + } else break; + } + } + } + + const size = try bind.bindInfoSize(pointers.items); + var buffer = try self.base.allocator.alloc(u8, @intCast(usize, size)); + defer self.base.allocator.free(buffer); + + var stream = std.io.fixedBufferStream(buffer); + try bind.writeBindInfo(pointers.items, stream.writer()); + + const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; + const dyld_info = &self.load_commands.items[self.dyld_info_cmd_index.?].DyldInfoOnly; + dyld_info.bind_off = @intCast(u32, seg.inner.fileoff + seg.inner.filesize); + dyld_info.bind_size = @intCast(u32, mem.alignForwardGeneric(u64, buffer.len, @alignOf(u64))); + seg.inner.filesize += dyld_info.bind_size; + + log.debug("writing binding info from 0x{x} to 0x{x}", .{ dyld_info.bind_off, dyld_info.bind_off + dyld_info.bind_size }); + + try self.base.file.?.pwriteAll(buffer, dyld_info.bind_off); +} + +fn writeLazyBindInfoTableZld(self: *MachO) !void { + var pointers = std.ArrayList(bind.Pointer).init(self.base.allocator); + defer pointers.deinit(); + + if (self.la_symbol_ptr_section_index) |idx| { + const seg = self.load_commands.items[self.data_segment_cmd_index.?].Segment; + const sect = seg.sections.items[idx]; + const base_offset = sect.addr - seg.inner.vmaddr; + const segment_id = @intCast(u16, self.data_segment_cmd_index.?); + + try pointers.ensureUnusedCapacity(self.stubs.items.len); + + for (self.stubs.items) |import_id, i| { + const sym = self.imports.items[import_id]; + pointers.appendAssumeCapacity(.{ + .offset = base_offset + i * @sizeOf(u64), + .segment_id = segment_id, + .dylib_ordinal = unpackDylibOrdinal(sym.n_desc), + .name = self.getString(sym.n_strx), + }); + } + } + + const size = try bind.lazyBindInfoSize(pointers.items); + var buffer = try self.base.allocator.alloc(u8, @intCast(usize, size)); + defer self.base.allocator.free(buffer); + + var stream = std.io.fixedBufferStream(buffer); + try bind.writeLazyBindInfo(pointers.items, stream.writer()); + + const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; + const dyld_info = &self.load_commands.items[self.dyld_info_cmd_index.?].DyldInfoOnly; + dyld_info.lazy_bind_off = @intCast(u32, seg.inner.fileoff + seg.inner.filesize); + dyld_info.lazy_bind_size = @intCast(u32, mem.alignForwardGeneric(u64, buffer.len, @alignOf(u64))); + seg.inner.filesize += dyld_info.lazy_bind_size; + + log.debug("writing lazy binding info from 0x{x} to 0x{x}", .{ dyld_info.lazy_bind_off, dyld_info.lazy_bind_off + dyld_info.lazy_bind_size }); + + try self.base.file.?.pwriteAll(buffer, dyld_info.lazy_bind_off); + try self.populateLazyBindOffsetsInStubHelper(buffer); +} + +fn writeExportInfoZld(self: *MachO) !void { + var trie = Trie.init(self.base.allocator); + defer trie.deinit(); + + const text_segment = self.load_commands.items[self.text_segment_cmd_index.?].Segment; + const base_address = text_segment.inner.vmaddr; + + // TODO handle macho.EXPORT_SYMBOL_FLAGS_REEXPORT and macho.EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER. + log.debug("writing export trie", .{}); + + for (self.globals.items) |sym| { + const sym_name = self.getString(sym.n_strx); + log.debug(" | putting '{s}' defined at 0x{x}", .{ sym_name, sym.n_value }); + + try trie.put(.{ + .name = sym_name, + .vmaddr_offset = sym.n_value - base_address, + .export_flags = macho.EXPORT_SYMBOL_FLAGS_KIND_REGULAR, + }); + } + + try trie.finalize(); + + var buffer = try self.base.allocator.alloc(u8, @intCast(usize, trie.size)); + defer self.base.allocator.free(buffer); + + var stream = std.io.fixedBufferStream(buffer); + const nwritten = try trie.write(stream.writer()); + assert(nwritten == trie.size); + + const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; + const dyld_info = &self.load_commands.items[self.dyld_info_cmd_index.?].DyldInfoOnly; + dyld_info.export_off = @intCast(u32, seg.inner.fileoff + seg.inner.filesize); + dyld_info.export_size = @intCast(u32, mem.alignForwardGeneric(u64, buffer.len, @alignOf(u64))); + seg.inner.filesize += dyld_info.export_size; + + log.debug("writing export info from 0x{x} to 0x{x}", .{ dyld_info.export_off, dyld_info.export_off + dyld_info.export_size }); + + try self.base.file.?.pwriteAll(buffer, dyld_info.export_off); +} + fn writeSymbolTable(self: *MachO) !void { const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; const symtab = &self.load_commands.items[self.symtab_cmd_index.?].Symtab; @@ -5222,6 +5460,23 @@ fn writeStringTable(self: *MachO) !void { self.strtab_dirty = false; } +fn writeStringTableZld(self: *MachO) !void { + const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; + const symtab = &self.load_commands.items[self.symtab_cmd_index.?].Symtab; + symtab.stroff = @intCast(u32, seg.inner.fileoff + seg.inner.filesize); + symtab.strsize = @intCast(u32, mem.alignForwardGeneric(u64, self.strtab.items.len, @alignOf(u64))); + seg.inner.filesize += symtab.strsize; + + log.debug("writing string table from 0x{x} to 0x{x}", .{ symtab.stroff, symtab.stroff + symtab.strsize }); + + try self.base.file.?.pwriteAll(self.strtab.items, symtab.stroff); + + if (symtab.strsize > self.strtab.items.len and self.base.options.target.cpu.arch == .x86_64) { + // This is the last section, so we need to pad it out. + try self.base.file.?.pwriteAll(&[_]u8{0}, seg.inner.fileoff + seg.inner.filesize - 1); + } +} + fn updateLinkeditSegmentSizes(self: *MachO) !void { if (!self.load_commands_dirty) return; From 1843ecf51b240c43a4a9a9cadbcc1286b9b9f41a Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Tue, 20 Jul 2021 10:07:46 +0200 Subject: [PATCH 70/81] macho: add export to the symbol resolver in updateDeclExports so that we can track globals for symbol resolution like in the traditional linker. --- src/link/MachO.zig | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 8aac8820da..f5e31c2eeb 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -3726,7 +3726,7 @@ pub fn updateDeclExports( const i = if (self.globals_free_list.popOrNull()) |i| i else blk: { _ = self.globals.addOneAssumeCapacity(); self.export_info_dirty = true; - break :blk self.globals.items.len - 1; + break :blk @intCast(u32, self.globals.items.len - 1); }; self.globals.items[i] = .{ .n_strx = name_str_index, @@ -3735,6 +3735,14 @@ pub fn updateDeclExports( .n_desc = n_desc, .n_value = decl_sym.n_value, }; + const resolv_name = try self.base.allocator.dupe(u8, exp_name); + const resolv = try self.symbol_resolver.getOrPut(self.base.allocator, resolv_name); + defer if (resolv.found_existing) self.base.allocator.free(resolv_name); + resolv.value_ptr.* = .{ + .where = .global, + .where_index = i, + .local_sym_index = decl.link.macho.local_sym_index, + }; exp.link.macho.sym_index = @intCast(u32, i); } From a442b165f1e219b429e497e1de26780612762871 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Tue, 20 Jul 2021 20:33:07 +0200 Subject: [PATCH 71/81] macho: add stub relocs when adding extern fn in self-hosted. --- src/codegen.zig | 51 +++++------ src/link/MachO.zig | 162 +++++++++++++++++------------------ src/link/MachO/Object.zig | 4 +- src/link/MachO/TextBlock.zig | 2 +- 4 files changed, 105 insertions(+), 114 deletions(-) diff --git a/src/codegen.zig b/src/codegen.zig index ec75cbadc6..7cb7119f0d 100644 --- a/src/codegen.zig +++ b/src/codegen.zig @@ -2523,36 +2523,29 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { } } else if (func_value.castTag(.extern_fn)) |func_payload| { const decl = func_payload.data; - const decl_name = try std.fmt.allocPrint(self.bin_file.allocator, "_{s}", .{decl.name}); - defer self.bin_file.allocator.free(decl_name); - const already_defined = macho_file.symbol_resolver.contains(decl_name); - const resolv = macho_file.symbol_resolver.get(decl_name) orelse blk: { - break :blk try macho_file.addExternFn(decl_name); - }; - const start = self.code.items.len; - const len: usize = blk: { - switch (arch) { - .x86_64 => { - // callq - try self.code.ensureCapacity(self.code.items.len + 5); - self.code.appendSliceAssumeCapacity(&[5]u8{ 0xe8, 0x0, 0x0, 0x0, 0x0 }); - break :blk 5; - }, - .aarch64 => { - // bl - writeInt(u32, try self.code.addManyAsArray(4), 0); - break :blk 4; - }, - else => unreachable, // unsupported architecture on MachO - } - }; - try macho_file.stub_fixups.append(self.bin_file.allocator, .{ - .symbol = resolv.where_index, - .already_defined = already_defined, - .start = start, - .len = len, + const where_index = try macho_file.addExternFn(mem.spanZ(decl.name)); + const offset = @intCast(u32, self.code.items.len); + switch (arch) { + .x86_64 => { + // callq + try self.code.ensureCapacity(self.code.items.len + 5); + self.code.appendSliceAssumeCapacity(&[5]u8{ 0xe8, 0x0, 0x0, 0x0, 0x0 }); + }, + .aarch64 => { + // bl + writeInt(u32, try self.code.addManyAsArray(4), Instruction.bl(0).toU32()); + }, + else => unreachable, // unsupported architecture on MachO + } + // Add relocation to the decl. + try decl.link.macho.relocs.append(self.bin_file.allocator, .{ + .offset = offset, + .where = .import, + .where_index = where_index, + .payload = .{ .branch = .{ + .arch = arch, + } }, }); - // We mark the space and fix it up later. } else { return self.fail(inst.base.src, "TODO implement calling bitcasted functions", .{}); } diff --git a/src/link/MachO.zig b/src/link/MachO.zig index f5e31c2eeb..452b6992bc 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -159,6 +159,14 @@ strtab_needs_relocation: bool = false, has_dices: bool = false, has_stabs: bool = false, +pending_updates: std.ArrayListUnmanaged(struct { + kind: enum { + got, + stub, + }, + index: u32, +}) = .{}, + /// A list of text blocks that have surplus capacity. This list can have false /// positives, as functions grow and shrink over time, only sometimes being added /// or removed from the freelist. @@ -179,6 +187,7 @@ text_block_free_list: std.ArrayListUnmanaged(*TextBlock) = .{}, /// Pointer to the last allocated text block last_text_block: ?*TextBlock = null, +managed_blocks: std.ArrayListUnmanaged(TextBlock) = .{}, blocks: std.AutoHashMapUnmanaged(MatchingSection, *TextBlock) = .{}, /// A list of all PIE fixups required for this run of the linker. @@ -190,13 +199,6 @@ blocks: std.AutoHashMapUnmanaged(MatchingSection, *TextBlock) = .{}, /// backends. pie_fixups: std.ArrayListUnmanaged(PIEFixup) = .{}, -/// A list of all stub (extern decls) fixups required for this run of the linker. -/// Warning, this is currently NOT thread-safe. See the TODO below. -/// TODO Move this list inside `updateDecl` where it should be allocated -/// prior to calling `generateSymbol`, and then immediately deallocated -/// rather than sitting in the global scope. -stub_fixups: std.ArrayListUnmanaged(StubFixup) = .{}, - const SymbolWithLoc = struct { // Table where the symbol can be found. where: enum { @@ -229,19 +231,6 @@ pub const PIEFixup = struct { size: usize, }; -pub const StubFixup = struct { - /// Id of extern (lazy) symbol. - symbol: u32, - /// Signals whether the symbol has already been declared before. If so, - /// then there is no need to rewrite the stub entry and related. - already_defined: bool, - /// Where in the byte stream we should perform the fixup. - start: usize, - /// The length of the byte stream. For x86_64, this will be - /// variable. For aarch64, it will be fixed at 4 bytes. - len: usize, -}; - /// When allocating, the ideal_capacity is calculated by /// actual_capacity + (actual_capacity / ideal_factor) const ideal_factor = 2; @@ -2244,9 +2233,7 @@ fn resolveSymbols(self: *MachO) !void { .local_sym_index = local_sym_index, }; - const block = try self.base.allocator.create(TextBlock); - errdefer self.base.allocator.destroy(block); - + const block = try self.managed_blocks.addOne(self.base.allocator); block.* = TextBlock.empty; block.local_sym_index = local_sym_index; block.code = code; @@ -2382,9 +2369,7 @@ fn resolveSymbols(self: *MachO) !void { // We create an empty atom for this symbol. // TODO perhaps we should special-case special symbols? Create a separate // linked list of atoms? - const block = try self.base.allocator.create(TextBlock); - errdefer self.base.allocator.destroy(block); - + const block = try self.managed_blocks.addOne(self.base.allocator); block.* = TextBlock.empty; block.local_sym_index = local_sym_index; block.code = try self.base.allocator.alloc(u8, 0); @@ -3243,9 +3228,8 @@ pub fn deinit(self: *MachO) void { ds.deinit(self.base.allocator); } + self.pending_updates.deinit(self.base.allocator); self.pie_fixups.deinit(self.base.allocator); - self.stub_fixups.deinit(self.base.allocator); - self.text_block_free_list.deinit(self.base.allocator); self.got_entries.deinit(self.base.allocator); self.got_entries_map.deinit(self.base.allocator); self.got_entries_free_list.deinit(self.base.allocator); @@ -3288,8 +3272,12 @@ pub fn deinit(self: *MachO) void { } self.load_commands.deinit(self.base.allocator); - // TODO dealloc all blocks + for (self.managed_blocks.items) |*block| { + block.deinit(self.base.allocator); + } + self.managed_blocks.deinit(self.base.allocator); self.blocks.deinit(self.base.allocator); + self.text_block_free_list.deinit(self.base.allocator); } pub fn closeFiles(self: MachO) void { @@ -3302,6 +3290,9 @@ pub fn closeFiles(self: MachO) void { } fn freeTextBlock(self: *MachO, text_block: *TextBlock) void { + log.debug("freeTextBlock {*}", .{text_block}); + // text_block.deinit(self.base.allocator); + var already_have_free_list_node = false; { var i: usize = 0; @@ -3467,18 +3458,22 @@ pub fn updateDecl(self: *MachO, module: *Module, decl: *Module.Decl) !void { .val = decl.val, }, &code_buffer, .none); - const code = switch (res) { - .externally_managed => |x| x, - .appended => code_buffer.items, - .fail => |em| { - // Clear any PIE fixups for this decl. - self.pie_fixups.shrinkRetainingCapacity(0); - // Clear any stub fixups for this decl. - self.stub_fixups.shrinkRetainingCapacity(0); - decl.analysis = .codegen_failure; - try module.failed_decls.put(module.gpa, decl, em); - return; - }, + const code = blk: { + switch (res) { + .externally_managed => |x| break :blk x, + .appended => { + decl.link.macho.code = code_buffer.toOwnedSlice(); + log.warn("WAT", .{}); + break :blk decl.link.macho.code; + }, + .fail => |em| { + // Clear any PIE fixups for this decl. + self.pie_fixups.shrinkRetainingCapacity(0); + decl.analysis = .codegen_failure; + try module.failed_decls.put(module.gpa, decl, em); + return; + }, + } }; const required_alignment = decl.ty.abiAlignment(self.base.options.target); @@ -3559,12 +3554,12 @@ pub fn updateDecl(self: *MachO, module: *Module, decl: *Module.Decl) !void { switch (self.base.options.target.cpu.arch) { .x86_64 => { const displacement = try math.cast(u32, target_addr - this_addr - 4); - mem.writeIntLittle(u32, code_buffer.items[fixup.offset..][0..4], displacement); + mem.writeIntLittle(u32, decl.link.macho.code[fixup.offset..][0..4], displacement); }, .aarch64 => { // TODO optimize instruction based on jump length (use ldr(literal) + nop if possible). { - const inst = code_buffer.items[fixup.offset..][0..4]; + const inst = decl.link.macho.code[fixup.offset..][0..4]; var parsed = mem.bytesAsValue(meta.TagPayload( aarch64.Instruction, aarch64.Instruction.pc_relative_address, @@ -3576,7 +3571,7 @@ pub fn updateDecl(self: *MachO, module: *Module, decl: *Module.Decl) !void { parsed.immlo = @truncate(u2, pages); } { - const inst = code_buffer.items[fixup.offset + 4 ..][0..4]; + const inst = decl.link.macho.code[fixup.offset + 4 ..][0..4]; var parsed = mem.bytesAsValue(meta.TagPayload( aarch64.Instruction, aarch64.Instruction.load_store_register, @@ -3590,39 +3585,24 @@ pub fn updateDecl(self: *MachO, module: *Module, decl: *Module.Decl) !void { } } - // Resolve stubs (if any) - const text_segment = self.load_commands.items[self.text_segment_cmd_index.?].Segment; - const stubs = text_segment.sections.items[self.stubs_section_index.?]; - for (self.stub_fixups.items) |fixup| { - const stubs_index = self.stubs_map.get(fixup.symbol) orelse unreachable; - const stub_addr = stubs.addr + stubs_index * stubs.reserved2; - const text_addr = symbol.n_value + fixup.start; - switch (self.base.options.target.cpu.arch) { - .x86_64 => { - assert(stub_addr >= text_addr + fixup.len); - const displacement = try math.cast(u32, stub_addr - text_addr - fixup.len); - var placeholder = code_buffer.items[fixup.start + fixup.len - @sizeOf(u32) ..][0..@sizeOf(u32)]; - mem.writeIntSliceLittle(u32, placeholder, displacement); - }, - .aarch64 => { - assert(stub_addr >= text_addr); - const displacement = try math.cast(i28, stub_addr - text_addr); - var placeholder = code_buffer.items[fixup.start..][0..fixup.len]; - mem.writeIntSliceLittle(u32, placeholder, aarch64.Instruction.bl(displacement).toU32()); - }, - else => unreachable, // unsupported target architecture - } - if (!fixup.already_defined) { - try self.writeStub(stubs_index); - try self.writeStubInStubHelper(stubs_index); - try self.writeLazySymbolPointer(stubs_index); + // Resolve relocations + try decl.link.macho.resolveRelocs(self); - self.rebase_info_dirty = true; - self.lazy_binding_info_dirty = true; + // Apply pending updates + while (self.pending_updates.popOrNull()) |update| { + switch (update.kind) { + .got => unreachable, + .stub => { + try self.writeStub(update.index); + try self.writeStubInStubHelper(update.index); + try self.writeLazySymbolPointer(update.index); + self.rebase_info_dirty = true; + self.lazy_binding_info_dirty = true; + }, } } - self.stub_fixups.shrinkRetainingCapacity(0); + const text_segment = self.load_commands.items[self.text_segment_cmd_index.?].Segment; const text_section = text_segment.sections.items[self.text_section_index.?]; const section_offset = symbol.n_value - text_section.addr; const file_offset = text_section.offset + section_offset; @@ -3756,6 +3736,7 @@ pub fn deleteExport(self: *MachO, exp: Export) void { } pub fn freeDecl(self: *MachO, decl: *Module.Decl) void { + log.debug("freeDecl {*}", .{decl}); // Appending to free lists is allowed to fail because the free lists are heuristics based anyway. self.freeTextBlock(&decl.link.macho); if (decl.link.macho.local_sym_index != 0) { @@ -4314,7 +4295,8 @@ fn allocateTextBlock(self: *MachO, text_block: *TextBlock, new_block_size: u64, // should be deleted because the block that it points to has grown to take up // more of the extra capacity. if (!big_block.freeListEligible(self.*)) { - _ = self.text_block_free_list.swapRemove(i); + const bl = self.text_block_free_list.swapRemove(i); + bl.deinit(self.base.allocator); } else { i += 1; } @@ -4386,25 +4368,43 @@ fn allocateTextBlock(self: *MachO, text_block: *TextBlock, new_block_size: u64, return vaddr; } -pub fn addExternFn(self: *MachO, name: []const u8) !SymbolWithLoc { - log.debug("adding new extern function '{s}' with dylib ordinal 1", .{name}); +pub fn addExternFn(self: *MachO, name: []const u8) !u32 { + const sym_name = try std.fmt.allocPrint(self.base.allocator, "_{s}", .{name}); + const already_defined = self.symbol_resolver.contains(sym_name); + + if (already_defined) { + const resolv = self.symbol_resolver.get(sym_name) orelse unreachable; + self.base.allocator.free(sym_name); + return resolv.where_index; + } + + log.debug("adding new extern function '{s}' with dylib ordinal 1", .{sym_name}); const import_sym_index = @intCast(u32, self.imports.items.len); try self.imports.append(self.base.allocator, .{ - .n_strx = try self.makeString(name), + .n_strx = try self.makeString(sym_name), .n_type = macho.N_UNDF | macho.N_EXT, .n_sect = 0, .n_desc = packDylibOrdinal(1), .n_value = 0, }); - const resolv = .{ + try self.symbol_resolver.putNoClobber(self.base.allocator, sym_name, .{ .where = .import, .where_index = import_sym_index, - }; - try self.symbol_resolver.putNoClobber(self.base.allocator, try self.base.allocator.dupe(u8, name), resolv); + }); + const stubs_index = @intCast(u32, self.stubs.items.len); try self.stubs.append(self.base.allocator, import_sym_index); try self.stubs_map.putNoClobber(self.base.allocator, import_sym_index, stubs_index); - return resolv; + + // TODO discuss this. The caller context expects codegen.InnerError{ OutOfMemory, CodegenFail }, + // which obviously doesn't include file writing op errors. So instead of trying to write the stub + // entry right here and now, queue it up and dispose of when updating decl. + try self.pending_updates.append(self.base.allocator, .{ + .kind = .stub, + .index = stubs_index, + }); + + return import_sym_index; } const NextSegmentAddressAndOffset = struct { diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index eaab05140e..407918456b 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -723,9 +723,7 @@ pub fn parseTextBlocks(self: *Object, macho_file: *MachO) !void { break :blk block_local_sym_index; }; - const block = try self.allocator.create(TextBlock); - errdefer self.allocator.destroy(block); - + const block = try macho_file.managed_blocks.addOne(macho_file.base.allocator); block.* = TextBlock.empty; block.local_sym_index = block_local_sym_index; block.code = try self.allocator.dupe(u8, code); diff --git a/src/link/MachO/TextBlock.zig b/src/link/MachO/TextBlock.zig index 0b6ff20d94..d28b9630e6 100644 --- a/src/link/MachO/TextBlock.zig +++ b/src/link/MachO/TextBlock.zig @@ -558,9 +558,9 @@ pub fn deinit(self: *TextBlock, allocator: *Allocator) void { self.bindings.deinit(allocator); self.rebases.deinit(allocator); self.relocs.deinit(allocator); - self.allocator.free(self.code); self.contained.deinit(allocator); self.aliases.deinit(allocator); + allocator.free(self.code); } /// Returns how much room there is to grow in virtual address space. From 5276ce8e638d3d295c443d8bc33e5c51538ae540 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Tue, 20 Jul 2021 23:37:22 +0200 Subject: [PATCH 72/81] macho: use adapters to directly reference strtab Thanks to this, we no longer need to do allocs per symbol name landing in the symbol resolver, plus we do not need to actively track if the string was already inserted into the string table. --- src/link/MachO.zig | 149 +++++++++++++++++++++++------------ src/link/MachO/TextBlock.zig | 10 ++- 2 files changed, 105 insertions(+), 54 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 452b6992bc..f271e00950 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -127,7 +127,7 @@ globals: std.ArrayListUnmanaged(macho.nlist_64) = .{}, imports: std.ArrayListUnmanaged(macho.nlist_64) = .{}, undefs: std.ArrayListUnmanaged(macho.nlist_64) = .{}, tentatives: std.ArrayListUnmanaged(macho.nlist_64) = .{}, -symbol_resolver: std.StringArrayHashMapUnmanaged(SymbolWithLoc) = .{}, +symbol_resolver: std.AutoHashMapUnmanaged(u32, SymbolWithLoc) = .{}, locals_free_list: std.ArrayListUnmanaged(u32) = .{}, globals_free_list: std.ArrayListUnmanaged(u32) = .{}, @@ -135,6 +135,7 @@ globals_free_list: std.ArrayListUnmanaged(u32) = .{}, stub_helper_stubs_start_off: ?u64 = null, strtab: std.ArrayListUnmanaged(u8) = .{}, +strtab_dir: std.HashMapUnmanaged(u32, u32, StringIndexContext, std.hash_map.default_max_load_percentage) = .{}, got_entries: std.ArrayListUnmanaged(GotIndirectionKey) = .{}, got_entries_map: std.AutoHashMapUnmanaged(GotIndirectionKey, u32) = .{}, @@ -199,6 +200,33 @@ blocks: std.AutoHashMapUnmanaged(MatchingSection, *TextBlock) = .{}, /// backends. pie_fixups: std.ArrayListUnmanaged(PIEFixup) = .{}, +const StringIndexContext = struct { + strtab: *std.ArrayListUnmanaged(u8), + + pub fn eql(self: StringIndexContext, a: u32, b: u32) bool { + return a == b; + } + + pub fn hash(self: StringIndexContext, x: u32) u64 { + const x_slice = mem.spanZ(@ptrCast([*:0]const u8, self.strtab.items.ptr) + x); + return std.hash_map.hashString(x_slice); + } +}; + +pub const StringSliceAdapter = struct { + strtab: *std.ArrayListUnmanaged(u8), + + pub fn eql(self: StringSliceAdapter, a_slice: []const u8, b: u32) bool { + const b_slice = mem.spanZ(@ptrCast([*:0]const u8, self.strtab.items.ptr) + b); + return mem.eql(u8, a_slice, b_slice); + } + + pub fn hash(self: StringSliceAdapter, adapted_key: []const u8) u64 { + _ = self; + return std.hash_map.hashString(adapted_key); + } +}; + const SymbolWithLoc = struct { // Table where the symbol can be found. where: enum { @@ -882,7 +910,10 @@ fn linkWithZld(self: *MachO, comp: *Compilation) !void { { // Add dyld_stub_binder as the final GOT entry. - const resolv = self.symbol_resolver.get("dyld_stub_binder") orelse unreachable; + const n_strx = self.strtab_dir.getAdapted(@as([]const u8, "dyld_stub_binder"), StringSliceAdapter{ + .strtab = &self.strtab, + }) orelse unreachable; + const resolv = self.symbol_resolver.get(n_strx) orelse unreachable; const got_index = @intCast(u32, self.got_entries.items.len); const got_entry = GotIndirectionKey{ .where = .import, @@ -1722,14 +1753,17 @@ fn allocateTextBlocks(self: *MachO) !void { } // Update globals - for (self.symbol_resolver.values()) |resolv| { - if (resolv.where != .global) continue; + { + var sym_it = self.symbol_resolver.valueIterator(); + while (sym_it.next()) |resolv| { + if (resolv.where != .global) continue; - assert(resolv.local_sym_index != 0); - const local_sym = self.locals.items[resolv.local_sym_index]; - const sym = &self.globals.items[resolv.where_index]; - sym.n_value = local_sym.n_value; - sym.n_sect = local_sym.n_sect; + assert(resolv.local_sym_index != 0); + const local_sym = self.locals.items[resolv.local_sym_index]; + const sym = &self.globals.items[resolv.where_index]; + sym.n_value = local_sym.n_value; + sym.n_sect = local_sym.n_sect; + } } } @@ -1821,7 +1855,10 @@ fn writeStubHelperCommon(self: *MachO) !void { code[9] = 0xff; code[10] = 0x25; { - const resolv = self.symbol_resolver.get("dyld_stub_binder") orelse unreachable; + const n_strx = self.strtab_dir.getAdapted(@as([]const u8, "dyld_stub_binder"), StringSliceAdapter{ + .strtab = &self.strtab, + }) orelse unreachable; + const resolv = self.symbol_resolver.get(n_strx) orelse unreachable; const got_index = self.got_entries_map.get(.{ .where = .import, .where_index = resolv.where_index, @@ -1869,7 +1906,10 @@ fn writeStubHelperCommon(self: *MachO) !void { code[10] = 0xbf; code[11] = 0xa9; binder_blk_outer: { - const resolv = self.symbol_resolver.get("dyld_stub_binder") orelse unreachable; + const n_strx = self.strtab_dir.getAdapted(@as([]const u8, "dyld_stub_binder"), StringSliceAdapter{ + .strtab = &self.strtab, + }) orelse unreachable; + const resolv = self.symbol_resolver.get(n_strx) orelse unreachable; const got_index = self.got_entries_map.get(.{ .where = .import, .where_index = resolv.where_index, @@ -1965,19 +2005,9 @@ fn resolveSymbolsInObject(self: *MachO, object_id: u16) !void { return error.UnhandledSymbolType; } + const n_strx = try self.makeString(sym_name); if (symbolIsSect(sym)) { // Defined symbol regardless of scope lands in the locals symbol table. - const n_strx = blk: { - if (self.symbol_resolver.get(sym_name)) |resolv| { - switch (resolv.where) { - .global => break :blk self.globals.items[resolv.where_index].n_strx, - .tentative => break :blk self.tentatives.items[resolv.where_index].n_strx, - .undef => break :blk self.undefs.items[resolv.where_index].n_strx, - .import => unreachable, - } - } - break :blk try self.makeString(sym_name); - }; const local_sym_index = @intCast(u32, self.locals.items.len); try self.locals.append(self.base.allocator, .{ .n_strx = n_strx, @@ -1993,7 +2023,7 @@ fn resolveSymbolsInObject(self: *MachO, object_id: u16) !void { if (!symbolIsExt(sym)) continue; const local = self.locals.items[local_sym_index]; - const resolv = self.symbol_resolver.getPtr(sym_name) orelse { + const resolv = self.symbol_resolver.getPtr(n_strx) orelse { const global_sym_index = @intCast(u32, self.globals.items.len); try self.globals.append(self.base.allocator, .{ .n_strx = n_strx, @@ -2002,7 +2032,7 @@ fn resolveSymbolsInObject(self: *MachO, object_id: u16) !void { .n_desc = sym.n_desc, .n_value = sym.n_value, }); - try self.symbol_resolver.putNoClobber(self.base.allocator, try self.base.allocator.dupe(u8, sym_name), .{ + try self.symbol_resolver.putNoClobber(self.base.allocator, n_strx, .{ .where = .global, .where_index = global_sym_index, .local_sym_index = local_sym_index, @@ -2072,7 +2102,7 @@ fn resolveSymbolsInObject(self: *MachO, object_id: u16) !void { }; } else if (symbolIsTentative(sym)) { // Symbol is a tentative definition. - const resolv = self.symbol_resolver.getPtr(sym_name) orelse { + const resolv = self.symbol_resolver.getPtr(n_strx) orelse { const tent_sym_index = @intCast(u32, self.tentatives.items.len); try self.tentatives.append(self.base.allocator, .{ .n_strx = try self.makeString(sym_name), @@ -2081,7 +2111,7 @@ fn resolveSymbolsInObject(self: *MachO, object_id: u16) !void { .n_desc = sym.n_desc, .n_value = sym.n_value, }); - try self.symbol_resolver.putNoClobber(self.base.allocator, try self.base.allocator.dupe(u8, sym_name), .{ + try self.symbol_resolver.putNoClobber(self.base.allocator, n_strx, .{ .where = .tentative, .where_index = tent_sym_index, .file = object_id, @@ -2126,7 +2156,7 @@ fn resolveSymbolsInObject(self: *MachO, object_id: u16) !void { } } else { // Symbol is undefined. - if (self.symbol_resolver.contains(sym_name)) continue; + if (self.symbol_resolver.contains(n_strx)) continue; const undef_sym_index = @intCast(u32, self.undefs.items.len); try self.undefs.append(self.base.allocator, .{ @@ -2136,7 +2166,7 @@ fn resolveSymbolsInObject(self: *MachO, object_id: u16) !void { .n_desc = 0, .n_value = 0, }); - try self.symbol_resolver.putNoClobber(self.base.allocator, try self.base.allocator.dupe(u8, sym_name), .{ + try self.symbol_resolver.putNoClobber(self.base.allocator, n_strx, .{ .where = .undef, .where_index = undef_sym_index, .file = object_id, @@ -2214,7 +2244,7 @@ fn resolveSymbols(self: *MachO) !void { mem.set(u8, code, 0); const alignment = (sym.n_desc >> 8) & 0x0f; - const resolv = self.symbol_resolver.getPtr(sym_name) orelse unreachable; + const resolv = self.symbol_resolver.getPtr(sym.n_strx) orelse unreachable; const local_sym_index = @intCast(u32, self.locals.items.len); var nlist = macho.nlist_64{ .n_strx = sym.n_strx, @@ -2263,15 +2293,16 @@ fn resolveSymbols(self: *MachO) !void { // Third pass, resolve symbols in dynamic libraries. { // Put dyld_stub_binder as an undefined special symbol. + const n_strx = try self.makeString("dyld_stub_binder"); const undef_sym_index = @intCast(u32, self.undefs.items.len); try self.undefs.append(self.base.allocator, .{ - .n_strx = try self.makeString("dyld_stub_binder"), + .n_strx = n_strx, .n_type = macho.N_UNDF, .n_sect = 0, .n_desc = 0, .n_value = 0, }); - try self.symbol_resolver.putNoClobber(self.base.allocator, try self.base.allocator.dupe(u8, "dyld_stub_binder"), .{ + try self.symbol_resolver.putNoClobber(self.base.allocator, n_strx, .{ .where = .undef, .where_index = undef_sym_index, }); @@ -2304,7 +2335,7 @@ fn resolveSymbols(self: *MachO) !void { try referenced.putNoClobber(dylib, {}); } - const resolv = self.symbol_resolver.getPtr(sym_name) orelse unreachable; + const resolv = self.symbol_resolver.getPtr(sym.n_strx) orelse unreachable; const undef = &self.undefs.items[resolv.where_index]; const import_sym_index = @intCast(u32, self.imports.items.len); try self.imports.append(self.base.allocator, .{ @@ -2331,7 +2362,10 @@ fn resolveSymbols(self: *MachO) !void { } // Fourth pass, handle synthetic symbols and flag any undefined references. - if (self.symbol_resolver.getPtr("___dso_handle")) |resolv| blk: { + if (self.strtab_dir.getAdapted(@as([]const u8, "___dso_handle"), StringSliceAdapter{ + .strtab = &self.strtab, + })) |n_strx| blk: { + const resolv = self.symbol_resolver.getPtr(n_strx) orelse break :blk; if (resolv.where != .undef) break :blk; const undef = &self.undefs.items[resolv.where_index]; @@ -2390,7 +2424,7 @@ fn resolveSymbols(self: *MachO) !void { if (symbolIsNull(sym)) continue; const sym_name = self.getString(sym.n_strx); - const resolv = self.symbol_resolver.get(sym_name) orelse unreachable; + const resolv = self.symbol_resolver.get(sym.n_strx) orelse unreachable; log.err("undefined reference to symbol '{s}'", .{sym_name}); log.err(" first referenced in '{s}'", .{self.objects.items[resolv.file].name.?}); @@ -2830,10 +2864,13 @@ fn setEntryPoint(self: *MachO) !void { // TODO we should respect the -entry flag passed in by the user to set a custom // entrypoint. For now, assume default of `_main`. const seg = self.load_commands.items[self.text_segment_cmd_index.?].Segment; - const resolv = self.symbol_resolver.get("_main") orelse { + const n_strx = self.strtab_dir.getAdapted(@as([]const u8, "_main"), StringSliceAdapter{ + .strtab = &self.strtab, + }) orelse { log.err("'_main' export not found", .{}); return error.MissingMainEntrypoint; }; + const resolv = self.symbol_resolver.get(n_strx) orelse unreachable; assert(resolv.where == .global); const sym = self.globals.items[resolv.where_index]; const ec = &self.load_commands.items[self.main_cmd_index.?].Main; @@ -3235,6 +3272,7 @@ pub fn deinit(self: *MachO) void { self.got_entries_free_list.deinit(self.base.allocator); self.stubs.deinit(self.base.allocator); self.stubs_map.deinit(self.base.allocator); + self.strtab_dir.deinit(self.base.allocator); self.strtab.deinit(self.base.allocator); self.undefs.deinit(self.base.allocator); self.tentatives.deinit(self.base.allocator); @@ -3243,10 +3281,6 @@ pub fn deinit(self: *MachO) void { self.globals_free_list.deinit(self.base.allocator); self.locals.deinit(self.base.allocator); self.locals_free_list.deinit(self.base.allocator); - - for (self.symbol_resolver.keys()) |key| { - self.base.allocator.free(key); - } self.symbol_resolver.deinit(self.base.allocator); for (self.objects.items) |object| { @@ -3715,9 +3749,7 @@ pub fn updateDeclExports( .n_desc = n_desc, .n_value = decl_sym.n_value, }; - const resolv_name = try self.base.allocator.dupe(u8, exp_name); - const resolv = try self.symbol_resolver.getOrPut(self.base.allocator, resolv_name); - defer if (resolv.found_existing) self.base.allocator.free(resolv_name); + const resolv = try self.symbol_resolver.getOrPut(self.base.allocator, name_str_index); resolv.value_ptr.* = .{ .where = .global, .where_index = i, @@ -4233,17 +4265,19 @@ pub fn populateMissingMetadata(self: *MachO) !void { }); self.load_commands_dirty = true; } - if (!self.symbol_resolver.contains("dyld_stub_binder")) { + if (!self.strtab_dir.containsAdapted(@as([]const u8, "dyld_stub_binder"), StringSliceAdapter{ + .strtab = &self.strtab, + })) { const import_sym_index = @intCast(u32, self.imports.items.len); + const n_strx = try self.makeString("dyld_stub_binder"); try self.imports.append(self.base.allocator, .{ - .n_strx = try self.makeString("dyld_stub_binder"), + .n_strx = n_strx, .n_type = macho.N_UNDF | macho.N_EXT, .n_sect = 0, .n_desc = packDylibOrdinal(1), .n_value = 0, }); - const name = try self.base.allocator.dupe(u8, "dyld_stub_binder"); - try self.symbol_resolver.putNoClobber(self.base.allocator, name, .{ + try self.symbol_resolver.putNoClobber(self.base.allocator, n_strx, .{ .where = .import, .where_index = import_sym_index, }); @@ -4370,24 +4404,26 @@ fn allocateTextBlock(self: *MachO, text_block: *TextBlock, new_block_size: u64, pub fn addExternFn(self: *MachO, name: []const u8) !u32 { const sym_name = try std.fmt.allocPrint(self.base.allocator, "_{s}", .{name}); - const already_defined = self.symbol_resolver.contains(sym_name); + defer self.base.allocator.free(sym_name); - if (already_defined) { - const resolv = self.symbol_resolver.get(sym_name) orelse unreachable; - self.base.allocator.free(sym_name); + if (self.strtab_dir.getAdapted(@as([]const u8, sym_name), StringSliceAdapter{ + .strtab = &self.strtab, + })) |n_strx| { + const resolv = self.symbol_resolver.get(n_strx) orelse unreachable; return resolv.where_index; } log.debug("adding new extern function '{s}' with dylib ordinal 1", .{sym_name}); const import_sym_index = @intCast(u32, self.imports.items.len); + const n_strx = try self.makeString(sym_name); try self.imports.append(self.base.allocator, .{ - .n_strx = try self.makeString(sym_name), + .n_strx = n_strx, .n_type = macho.N_UNDF | macho.N_EXT, .n_sect = 0, .n_desc = packDylibOrdinal(1), .n_value = 0, }); - try self.symbol_resolver.putNoClobber(self.base.allocator, sym_name, .{ + try self.symbol_resolver.putNoClobber(self.base.allocator, n_strx, .{ .where = .import, .where_index = import_sym_index, }); @@ -5609,6 +5645,11 @@ pub fn padToIdeal(actual_size: anytype) @TypeOf(actual_size) { } pub fn makeString(self: *MachO, string: []const u8) !u32 { + if (self.strtab_dir.getAdapted(@as([]const u8, string), StringSliceAdapter{ .strtab = &self.strtab })) |off| { + log.debug("reusing string '{s}' at offset 0x{x}", .{ string, off }); + return off; + } + try self.strtab.ensureUnusedCapacity(self.base.allocator, string.len + 1); const new_off = @intCast(u32, self.strtab.items.len); @@ -5617,6 +5658,10 @@ pub fn makeString(self: *MachO, string: []const u8) !u32 { self.strtab.appendSliceAssumeCapacity(string); self.strtab.appendAssumeCapacity(0); + try self.strtab_dir.putContext(self.base.allocator, new_off, new_off, StringIndexContext{ + .strtab = &self.strtab, + }); + return new_off; } diff --git a/src/link/MachO/TextBlock.zig b/src/link/MachO/TextBlock.zig index d28b9630e6..e7c0139623 100644 --- a/src/link/MachO/TextBlock.zig +++ b/src/link/MachO/TextBlock.zig @@ -640,7 +640,10 @@ fn initRelocFromObject(rel: macho.relocation_info, object: *Object, ctx: RelocCo parsed_rel.where = .local; parsed_rel.where_index = where_index; } else { - const resolv = ctx.macho_file.symbol_resolver.get(sym_name) orelse unreachable; + const n_strx = ctx.macho_file.strtab_dir.getAdapted(@as([]const u8, sym_name), MachO.StringSliceAdapter{ + .strtab = &ctx.macho_file.strtab, + }) orelse unreachable; + const resolv = ctx.macho_file.symbol_resolver.get(n_strx) orelse unreachable; switch (resolv.where) { .global => { parsed_rel.where = .local; @@ -704,7 +707,10 @@ pub fn parseRelocsFromObject( const where_index = object.symbol_mapping.get(rel.r_symbolnum) orelse unreachable; subtractor = where_index; } else { - const resolv = ctx.macho_file.symbol_resolver.get(sym_name) orelse unreachable; + const n_strx = ctx.macho_file.strtab_dir.getAdapted(@as([]const u8, sym_name), MachO.StringSliceAdapter{ + .strtab = &ctx.macho_file.strtab, + }) orelse unreachable; + const resolv = ctx.macho_file.symbol_resolver.get(n_strx) orelse unreachable; assert(resolv.where == .global); subtractor = resolv.local_sym_index; } From 3bfde76cff60a0d9c70d8e660dd633bed17a314f Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Wed, 21 Jul 2021 15:46:55 +0200 Subject: [PATCH 73/81] macho: fix text block management For the time being, until we rewrite how atoms are handled across linkers, store two tables in the MachO linker: one for TextBlocks directly created and managed by the linker, and one for TextBlocks that were spawned by Module.Decl. This allows for correct memory clean up after linking is done. --- src/link/MachO.zig | 37 +++++++++++++++++++++++++++++-------- src/link/MachO/Object.zig | 3 ++- 2 files changed, 31 insertions(+), 9 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index f271e00950..6fc5989a1d 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -188,9 +188,21 @@ text_block_free_list: std.ArrayListUnmanaged(*TextBlock) = .{}, /// Pointer to the last allocated text block last_text_block: ?*TextBlock = null, -managed_blocks: std.ArrayListUnmanaged(TextBlock) = .{}, +/// List of TextBlocks that are owned directly by the linker. +/// Currently these are only TextBlocks that are the result of linking +/// object files. TextBlock which take part in incremental linking are +/// at present owned by Module.Decl. +/// TODO consolidate this. +managed_blocks: std.ArrayListUnmanaged(*TextBlock) = .{}, + blocks: std.AutoHashMapUnmanaged(MatchingSection, *TextBlock) = .{}, +/// List of Decls that are currently alive. +/// We store them here so that we can properly dispose of any allocated +/// memory within the TextBlock in the incremental linker. +/// TODO consolidate this. +decls: std.ArrayListUnmanaged(*Module.Decl) = .{}, + /// A list of all PIE fixups required for this run of the linker. /// Warning, this is currently NOT thread-safe. See the TODO below. /// TODO Move this list inside `updateDecl` where it should be allocated @@ -203,7 +215,7 @@ pie_fixups: std.ArrayListUnmanaged(PIEFixup) = .{}, const StringIndexContext = struct { strtab: *std.ArrayListUnmanaged(u8), - pub fn eql(self: StringIndexContext, a: u32, b: u32) bool { + pub fn eql(_: StringIndexContext, a: u32, b: u32) bool { return a == b; } @@ -2224,7 +2236,6 @@ fn resolveSymbols(self: *MachO) !void { for (self.tentatives.items) |sym| { if (symbolIsNull(sym)) continue; - const sym_name = self.getString(sym.n_strx); const match: MatchingSection = blk: { if (self.common_section_index == null) { const data_seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; @@ -2263,12 +2274,13 @@ fn resolveSymbols(self: *MachO) !void { .local_sym_index = local_sym_index, }; - const block = try self.managed_blocks.addOne(self.base.allocator); + const block = try self.base.allocator.create(TextBlock); block.* = TextBlock.empty; block.local_sym_index = local_sym_index; block.code = code; block.size = size; block.alignment = alignment; + try self.managed_blocks.append(self.base.allocator, block); // Update target section's metadata // TODO should we update segment's size here too? @@ -2403,12 +2415,13 @@ fn resolveSymbols(self: *MachO) !void { // We create an empty atom for this symbol. // TODO perhaps we should special-case special symbols? Create a separate // linked list of atoms? - const block = try self.managed_blocks.addOne(self.base.allocator); + const block = try self.base.allocator.create(TextBlock); block.* = TextBlock.empty; block.local_sym_index = local_sym_index; block.code = try self.base.allocator.alloc(u8, 0); block.size = 0; block.alignment = 0; + try self.managed_blocks.append(self.base.allocator, block); if (self.blocks.getPtr(match)) |last| { last.*.next = block; @@ -3306,12 +3319,18 @@ pub fn deinit(self: *MachO) void { } self.load_commands.deinit(self.base.allocator); - for (self.managed_blocks.items) |*block| { + for (self.managed_blocks.items) |block| { block.deinit(self.base.allocator); + self.base.allocator.destroy(block); } self.managed_blocks.deinit(self.base.allocator); self.blocks.deinit(self.base.allocator); self.text_block_free_list.deinit(self.base.allocator); + + for (self.decls.items) |decl| { + decl.link.macho.deinit(self.base.allocator); + } + self.decls.deinit(self.base.allocator); } pub fn closeFiles(self: MachO) void { @@ -3325,7 +3344,7 @@ pub fn closeFiles(self: MachO) void { fn freeTextBlock(self: *MachO, text_block: *TextBlock) void { log.debug("freeTextBlock {*}", .{text_block}); - // text_block.deinit(self.base.allocator); + text_block.deinit(self.base.allocator); var already_have_free_list_node = false; { @@ -3412,6 +3431,9 @@ pub fn allocateDeclIndexes(self: *MachO, decl: *Module.Decl) !void { try self.locals.ensureUnusedCapacity(self.base.allocator, 1); try self.got_entries.ensureUnusedCapacity(self.base.allocator, 1); + try self.decls.ensureUnusedCapacity(self.base.allocator, 1); + + self.decls.appendAssumeCapacity(decl); if (self.locals_free_list.popOrNull()) |i| { log.debug("reusing symbol index {d} for {s}", .{ i, decl.name }); @@ -3497,7 +3519,6 @@ pub fn updateDecl(self: *MachO, module: *Module, decl: *Module.Decl) !void { .externally_managed => |x| break :blk x, .appended => { decl.link.macho.code = code_buffer.toOwnedSlice(); - log.warn("WAT", .{}); break :blk decl.link.macho.code; }, .fail => |em| { diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index 407918456b..7291e0929c 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -723,12 +723,13 @@ pub fn parseTextBlocks(self: *Object, macho_file: *MachO) !void { break :blk block_local_sym_index; }; - const block = try macho_file.managed_blocks.addOne(macho_file.base.allocator); + const block = try macho_file.base.allocator.create(TextBlock); block.* = TextBlock.empty; block.local_sym_index = block_local_sym_index; block.code = try self.allocator.dupe(u8, code); block.size = sect.size; block.alignment = sect.@"align"; + try macho_file.managed_blocks.append(macho_file.base.allocator, block); try block.parseRelocsFromObject(self.allocator, relocs, self, .{ .base_addr = 0, From 845c906e6a2ed9206840bd189d85bf9525687102 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Wed, 21 Jul 2021 17:58:05 +0200 Subject: [PATCH 74/81] macho: add relocations for GOT cells in self-hosted compiler. --- src/codegen.zig | 51 +++++++++++++++++++++++----------------- src/link/MachO.zig | 58 ++++++---------------------------------------- 2 files changed, 37 insertions(+), 72 deletions(-) diff --git a/src/codegen.zig b/src/codegen.zig index 7cb7119f0d..5d57cf0728 100644 --- a/src/codegen.zig +++ b/src/codegen.zig @@ -2506,7 +2506,6 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { }) orelse unreachable; break :blk got.addr + got_index * @sizeOf(u64); }; - log.debug("got_addr = 0x{x}", .{got_addr}); switch (arch) { .x86_64 => { try self.genSetReg(inst.base.src, Type.initTag(.u64), .rax, .{ .memory = got_addr }); @@ -3864,19 +3863,8 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { .memory => |addr| { if (self.bin_file.options.pie) { // PC-relative displacement to the entry in the GOT table. - // TODO we should come up with our own, backend independent relocation types - // which each backend (Elf, MachO, etc.) would then translate into an actual - // fixup when linking. - // adrp reg, pages - if (self.bin_file.cast(link.File.MachO)) |macho_file| { - try macho_file.pie_fixups.append(self.bin_file.allocator, .{ - .target_addr = addr, - .offset = self.code.items.len, - .size = 4, - }); - } else { - return self.fail(src, "TODO implement genSetReg for PIE GOT indirection on this platform", .{}); - } + // adrp + const offset = @intCast(u32, self.code.items.len); mem.writeIntLittle( u32, try self.code.addManyAsArray(4), @@ -3889,6 +3877,26 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { .offset = Instruction.LoadStoreOffset.imm(0), }, }).toU32()); + + if (self.bin_file.cast(link.File.MachO)) |macho_file| { + const decl = macho_file.active_decl.?; + // Page reloc for adrp instruction. + try decl.link.macho.relocs.append(self.bin_file.allocator, .{ + .offset = offset, + .where = .local, + .where_index = decl.link.macho.local_sym_index, + .payload = .{ .page = .{ .kind = .got } }, + }); + // Pageoff reloc for adrp instruction. + try decl.link.macho.relocs.append(self.bin_file.allocator, .{ + .offset = offset + 4, + .where = .local, + .where_index = decl.link.macho.local_sym_index, + .payload = .{ .page_off = .{ .kind = .got } }, + }); + } else { + return self.fail(src, "TODO implement genSetReg for PIE GOT indirection on this platform", .{}); + } } else { // The value is in memory at a hard-coded address. // If the type is a pointer, it means the pointer address is at this memory location. @@ -4128,6 +4136,7 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { const abi_size = ty.abiSize(self.target.*); const encoder = try X8664Encoder.init(self.code, 10); + const offset = @intCast(u32, self.code.items.len); // LEA reg, [] // We encode the instruction FIRST because prefixes may or may not appear. @@ -4141,14 +4150,14 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { encoder.modRm_RIPDisp32(reg.low_id()); encoder.disp32(0); - // TODO we should come up with our own, backend independent relocation types - // which each backend (Elf, MachO, etc.) would then translate into an actual - // fixup when linking. if (self.bin_file.cast(link.File.MachO)) |macho_file| { - try macho_file.pie_fixups.append(self.bin_file.allocator, .{ - .target_addr = x, - .offset = self.code.items.len - 4, - .size = 4, + const decl = macho_file.active_decl.?; + // Load reloc for LEA instruction. + try decl.link.macho.relocs.append(self.bin_file.allocator, .{ + .offset = offset, + .where = .local, + .where_index = decl.link.macho.local_sym_index, + .payload = .{ .load = .{ .kind = .got } }, }); } else { return self.fail(src, "TODO implement genSetReg for PIE GOT indirection on this platform", .{}); diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 6fc5989a1d..52329e47d1 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -203,14 +203,11 @@ blocks: std.AutoHashMapUnmanaged(MatchingSection, *TextBlock) = .{}, /// TODO consolidate this. decls: std.ArrayListUnmanaged(*Module.Decl) = .{}, -/// A list of all PIE fixups required for this run of the linker. -/// Warning, this is currently NOT thread-safe. See the TODO below. -/// TODO Move this list inside `updateDecl` where it should be allocated -/// prior to calling `generateSymbol`, and then immediately deallocated -/// rather than sitting in the global scope. -/// TODO We should also rewrite this using generic relocations common to all -/// backends. -pie_fixups: std.ArrayListUnmanaged(PIEFixup) = .{}, +/// Currently active Module.Decl. +/// TODO this might not be necessary if we figure out how to pass Module.Decl instance +/// to codegen.genSetReg() or alterntively move PIE displacement for MCValue{ .memory = x } +/// somewhere else in the codegen. +active_decl: ?*Module.Decl = null, const StringIndexContext = struct { strtab: *std.ArrayListUnmanaged(u8), @@ -3279,7 +3276,6 @@ pub fn deinit(self: *MachO) void { } self.pending_updates.deinit(self.base.allocator); - self.pie_fixups.deinit(self.base.allocator); self.got_entries.deinit(self.base.allocator); self.got_entries_map.deinit(self.base.allocator); self.got_entries_free_list.deinit(self.base.allocator); @@ -3497,6 +3493,8 @@ pub fn updateDecl(self: *MachO, module: *Module, decl: *Module.Decl) !void { } } + self.active_decl = decl; + const res = if (debug_buffers) |*dbg| try codegen.generateSymbol(&self.base, decl.srcLoc(), .{ .ty = decl.ty, @@ -3522,8 +3520,6 @@ pub fn updateDecl(self: *MachO, module: *Module, decl: *Module.Decl) !void { break :blk decl.link.macho.code; }, .fail => |em| { - // Clear any PIE fixups for this decl. - self.pie_fixups.shrinkRetainingCapacity(0); decl.analysis = .codegen_failure; try module.failed_decls.put(module.gpa, decl, em); return; @@ -3600,46 +3596,6 @@ pub fn updateDecl(self: *MachO, module: *Module, decl: *Module.Decl) !void { try self.writeGotEntry(got_index); } - // Calculate displacements to target addr (if any). - while (self.pie_fixups.popOrNull()) |fixup| { - assert(fixup.size == 4); - const this_addr = symbol.n_value + fixup.offset; - const target_addr = fixup.target_addr; - - switch (self.base.options.target.cpu.arch) { - .x86_64 => { - const displacement = try math.cast(u32, target_addr - this_addr - 4); - mem.writeIntLittle(u32, decl.link.macho.code[fixup.offset..][0..4], displacement); - }, - .aarch64 => { - // TODO optimize instruction based on jump length (use ldr(literal) + nop if possible). - { - const inst = decl.link.macho.code[fixup.offset..][0..4]; - var parsed = mem.bytesAsValue(meta.TagPayload( - aarch64.Instruction, - aarch64.Instruction.pc_relative_address, - ), inst); - const this_page = @intCast(i32, this_addr >> 12); - const target_page = @intCast(i32, target_addr >> 12); - const pages = @bitCast(u21, @intCast(i21, target_page - this_page)); - parsed.immhi = @truncate(u19, pages >> 2); - parsed.immlo = @truncate(u2, pages); - } - { - const inst = decl.link.macho.code[fixup.offset + 4 ..][0..4]; - var parsed = mem.bytesAsValue(meta.TagPayload( - aarch64.Instruction, - aarch64.Instruction.load_store_register, - ), inst); - const narrowed = @truncate(u12, target_addr); - const offset = try math.divExact(u12, narrowed, 8); - parsed.offset = offset; - } - }, - else => unreachable, // unsupported target architecture - } - } - // Resolve relocations try decl.link.macho.resolveRelocs(self); From e05b1e0e07708ca16c1a90e51a668faf51d883f4 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Wed, 21 Jul 2021 23:05:03 +0200 Subject: [PATCH 75/81] macho: fix reloc generation for stubs and GOT entries The current approach is somewhat hacky, however, works well for one-off self-hosted linking. --- src/codegen.zig | 63 ++++++++++++++++++++++++++++++++++--------------- 1 file changed, 44 insertions(+), 19 deletions(-) diff --git a/src/codegen.zig b/src/codegen.zig index 5d57cf0728..0339cf29f8 100644 --- a/src/codegen.zig +++ b/src/codegen.zig @@ -2523,21 +2523,25 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { } else if (func_value.castTag(.extern_fn)) |func_payload| { const decl = func_payload.data; const where_index = try macho_file.addExternFn(mem.spanZ(decl.name)); - const offset = @intCast(u32, self.code.items.len); - switch (arch) { - .x86_64 => { - // callq - try self.code.ensureCapacity(self.code.items.len + 5); - self.code.appendSliceAssumeCapacity(&[5]u8{ 0xe8, 0x0, 0x0, 0x0, 0x0 }); - }, - .aarch64 => { - // bl - writeInt(u32, try self.code.addManyAsArray(4), Instruction.bl(0).toU32()); - }, - else => unreachable, // unsupported architecture on MachO - } + const offset = blk: { + switch (arch) { + .x86_64 => { + // callq + try self.code.ensureCapacity(self.code.items.len + 5); + self.code.appendSliceAssumeCapacity(&[5]u8{ 0xe8, 0x0, 0x0, 0x0, 0x0 }); + break :blk @intCast(u32, self.code.items.len) - 4; + }, + .aarch64 => { + const offset = @intCast(u32, self.code.items.len); + // bl + writeInt(u32, try self.code.addManyAsArray(4), Instruction.bl(0).toU32()); + break :blk offset; + }, + else => unreachable, // unsupported architecture on MachO + } + }; // Add relocation to the decl. - try decl.link.macho.relocs.append(self.bin_file.allocator, .{ + try macho_file.active_decl.?.link.macho.relocs.append(self.bin_file.allocator, .{ .offset = offset, .where = .import, .where_index = where_index, @@ -3879,19 +3883,29 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { }).toU32()); if (self.bin_file.cast(link.File.MachO)) |macho_file| { + // TODO this is super awkward. We are reversing the address of the GOT entry here. + // We should probably have it cached or move the reloc adding somewhere else. + const got_addr = blk: { + const seg = macho_file.load_commands.items[macho_file.data_const_segment_cmd_index.?].Segment; + const got = seg.sections.items[macho_file.got_section_index.?]; + break :blk got.addr; + }; + const where_index = blk: for (macho_file.got_entries.items) |key, id| { + if (got_addr + id * @sizeOf(u64) == addr) break :blk key.where_index; + } else unreachable; const decl = macho_file.active_decl.?; // Page reloc for adrp instruction. try decl.link.macho.relocs.append(self.bin_file.allocator, .{ .offset = offset, .where = .local, - .where_index = decl.link.macho.local_sym_index, + .where_index = where_index, .payload = .{ .page = .{ .kind = .got } }, }); // Pageoff reloc for adrp instruction. try decl.link.macho.relocs.append(self.bin_file.allocator, .{ .offset = offset + 4, .where = .local, - .where_index = decl.link.macho.local_sym_index, + .where_index = where_index, .payload = .{ .page_off = .{ .kind = .got } }, }); } else { @@ -4136,7 +4150,6 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { const abi_size = ty.abiSize(self.target.*); const encoder = try X8664Encoder.init(self.code, 10); - const offset = @intCast(u32, self.code.items.len); // LEA reg, [] // We encode the instruction FIRST because prefixes may or may not appear. @@ -4150,13 +4163,25 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { encoder.modRm_RIPDisp32(reg.low_id()); encoder.disp32(0); + const offset = @intCast(u32, self.code.items.len); + if (self.bin_file.cast(link.File.MachO)) |macho_file| { + // TODO this is super awkward. We are reversing the address of the GOT entry here. + // We should probably have it cached or move the reloc adding somewhere else. + const got_addr = blk: { + const seg = macho_file.load_commands.items[macho_file.data_const_segment_cmd_index.?].Segment; + const got = seg.sections.items[macho_file.got_section_index.?]; + break :blk got.addr; + }; + const where_index = blk: for (macho_file.got_entries.items) |key, id| { + if (got_addr + id * @sizeOf(u64) == x) break :blk key.where_index; + } else unreachable; const decl = macho_file.active_decl.?; // Load reloc for LEA instruction. try decl.link.macho.relocs.append(self.bin_file.allocator, .{ - .offset = offset, + .offset = offset - 4, .where = .local, - .where_index = decl.link.macho.local_sym_index, + .where_index = where_index, .payload = .{ .load = .{ .kind = .got } }, }); } else { From d0edd37f690c3e6cf3f8a7fc7a27016ba9b010ce Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Wed, 21 Jul 2021 23:38:20 +0200 Subject: [PATCH 76/81] macho: fix bug when freeing Decl Take into account that an already freed Decl will no longer be available as `decl.link.macho` causing a potential "inactive union field" panic. --- src/link/MachO.zig | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 52329e47d1..9bf8ec4a75 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -197,11 +197,11 @@ managed_blocks: std.ArrayListUnmanaged(*TextBlock) = .{}, blocks: std.AutoHashMapUnmanaged(MatchingSection, *TextBlock) = .{}, -/// List of Decls that are currently alive. +/// Table of Decls that are currently alive. /// We store them here so that we can properly dispose of any allocated /// memory within the TextBlock in the incremental linker. /// TODO consolidate this. -decls: std.ArrayListUnmanaged(*Module.Decl) = .{}, +decls: std.AutoArrayHashMapUnmanaged(*Module.Decl, void) = .{}, /// Currently active Module.Decl. /// TODO this might not be necessary if we figure out how to pass Module.Decl instance @@ -3323,7 +3323,7 @@ pub fn deinit(self: *MachO) void { self.blocks.deinit(self.base.allocator); self.text_block_free_list.deinit(self.base.allocator); - for (self.decls.items) |decl| { + for (self.decls.keys()) |decl| { decl.link.macho.deinit(self.base.allocator); } self.decls.deinit(self.base.allocator); @@ -3427,9 +3427,8 @@ pub fn allocateDeclIndexes(self: *MachO, decl: *Module.Decl) !void { try self.locals.ensureUnusedCapacity(self.base.allocator, 1); try self.got_entries.ensureUnusedCapacity(self.base.allocator, 1); - try self.decls.ensureUnusedCapacity(self.base.allocator, 1); - self.decls.appendAssumeCapacity(decl); + try self.decls.putNoClobber(self.base.allocator, decl, {}); if (self.locals_free_list.popOrNull()) |i| { log.debug("reusing symbol index {d} for {s}", .{ i, decl.name }); @@ -3598,6 +3597,9 @@ pub fn updateDecl(self: *MachO, module: *Module, decl: *Module.Decl) !void { // Resolve relocations try decl.link.macho.resolveRelocs(self); + // TODO this requires further investigation: should we dispose of resolved relocs, or keep them + // so that we can reapply them when moving/growing sections? + decl.link.macho.relocs.clearRetainingCapacity(); // Apply pending updates while (self.pending_updates.popOrNull()) |update| { @@ -3746,6 +3748,7 @@ pub fn deleteExport(self: *MachO, exp: Export) void { pub fn freeDecl(self: *MachO, decl: *Module.Decl) void { log.debug("freeDecl {*}", .{decl}); + _ = self.decls.swapRemove(decl); // Appending to free lists is allowed to fail because the free lists are heuristics based anyway. self.freeTextBlock(&decl.link.macho); if (decl.link.macho.local_sym_index != 0) { From ca90efe88e3b354884a82d341936e5a0724d74c8 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Thu, 22 Jul 2021 14:05:12 +0200 Subject: [PATCH 77/81] macho: fix memory leaks when emptying TextBlocks This happens on every call to `TextBlock.empty` by the `Module`. --- src/link/MachO.zig | 31 +++++++++++++++++++++--------- src/link/MachO/Object.zig | 3 ++- src/link/MachO/TextBlock.zig | 37 ++++++++++++++++++------------------ 3 files changed, 42 insertions(+), 29 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 6481bfa847..d1d25a313c 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -1828,7 +1828,7 @@ fn writeTextBlocks(self: *MachO) !void { }); try block.resolveRelocs(self); - mem.copy(u8, code[aligned_base_off..][0..block.size], block.code); + mem.copy(u8, code[aligned_base_off..][0..block.size], block.code.items); // TODO NOP for machine code instead of just zeroing out const padding_len = aligned_base_off - base_off; @@ -2262,6 +2262,7 @@ fn resolveSymbols(self: *MachO) !void { const size = sym.n_value; const code = try self.base.allocator.alloc(u8, size); + defer self.base.allocator.free(code); mem.set(u8, code, 0); const alignment = (sym.n_desc >> 8) & 0x0f; @@ -2287,11 +2288,12 @@ fn resolveSymbols(self: *MachO) !void { const block = try self.base.allocator.create(TextBlock); block.* = TextBlock.empty; block.local_sym_index = local_sym_index; - block.code = code; block.size = size; block.alignment = alignment; try self.managed_blocks.append(self.base.allocator, block); + try block.code.appendSlice(self.base.allocator, code); + // Update target section's metadata // TODO should we update segment's size here too? // How does it tie with incremental space allocs? @@ -2428,7 +2430,6 @@ fn resolveSymbols(self: *MachO) !void { const block = try self.base.allocator.create(TextBlock); block.* = TextBlock.empty; block.local_sym_index = local_sym_index; - block.code = try self.base.allocator.alloc(u8, 0); block.size = 0; block.alignment = 0; try self.managed_blocks.append(self.base.allocator, block); @@ -3527,7 +3528,13 @@ pub fn updateFunc(self: *MachO, module: *Module, func: *Module.Fn, air: Air, liv try codegen.generateFunction(&self.base, decl.srcLoc(), func, air, liveness, &code_buffer, .none); switch (res) { .appended => { - decl.link.macho.code = code_buffer.toOwnedSlice(); + // TODO clearing the code and relocs buffer should probably be orchestrated + // in a different, smarter, more automatic way somewhere else, in a more centralised + // way than this. + // If we don't clear the buffers here, we are up for some nasty surprises when + // this TextBlock is reused later on and was not freed by freeTextBlock(). + decl.link.macho.code.clearAndFree(self.base.allocator); + try decl.link.macho.code.appendSlice(self.base.allocator, code_buffer.items); }, .fail => |em| { decl.analysis = .codegen_failure; @@ -3536,9 +3543,9 @@ pub fn updateFunc(self: *MachO, module: *Module, func: *Module.Fn, air: Air, liv }, } - const symbol = try self.placeDecl(decl, decl.link.macho.code.len); + const symbol = try self.placeDecl(decl, decl.link.macho.code.items.len); - try self.writeCode(symbol, decl.link.macho.code); + try self.writeCode(symbol, decl.link.macho.code.items); if (debug_buffers) |db| { try self.d_sym.?.commitDeclDebugInfo( @@ -3613,8 +3620,14 @@ pub fn updateDecl(self: *MachO, module: *Module, decl: *Module.Decl) !void { switch (res) { .externally_managed => |x| break :blk x, .appended => { - decl.link.macho.code = code_buffer.toOwnedSlice(); - break :blk decl.link.macho.code; + // TODO clearing the code and relocs buffer should probably be orchestrated + // in a different, smarter, more automatic way somewhere else, in a more centralised + // way than this. + // If we don't clear the buffers here, we are up for some nasty surprises when + // this TextBlock is reused later on and was not freed by freeTextBlock(). + decl.link.macho.code.clearAndFree(self.base.allocator); + try decl.link.macho.code.appendSlice(self.base.allocator, code_buffer.items); + break :blk decl.link.macho.code.items; }, .fail => |em| { decl.analysis = .codegen_failure; @@ -3705,7 +3718,7 @@ fn placeDecl(self: *MachO, decl: *Module.Decl, code_len: usize) !*macho.nlist_64 try decl.link.macho.resolveRelocs(self); // TODO this requires further investigation: should we dispose of resolved relocs, or keep them // so that we can reapply them when moving/growing sections? - decl.link.macho.relocs.clearRetainingCapacity(); + decl.link.macho.relocs.clearAndFree(self.base.allocator); // Apply pending updates while (self.pending_updates.popOrNull()) |update| { diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index 7291e0929c..46d82dec31 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -726,11 +726,12 @@ pub fn parseTextBlocks(self: *Object, macho_file: *MachO) !void { const block = try macho_file.base.allocator.create(TextBlock); block.* = TextBlock.empty; block.local_sym_index = block_local_sym_index; - block.code = try self.allocator.dupe(u8, code); block.size = sect.size; block.alignment = sect.@"align"; try macho_file.managed_blocks.append(macho_file.base.allocator, block); + try block.code.appendSlice(macho_file.base.allocator, code); + try block.parseRelocsFromObject(self.allocator, relocs, self, .{ .base_addr = 0, .macho_file = macho_file, diff --git a/src/link/MachO/TextBlock.zig b/src/link/MachO/TextBlock.zig index e7c0139623..9dc02d1f4d 100644 --- a/src/link/MachO/TextBlock.zig +++ b/src/link/MachO/TextBlock.zig @@ -30,7 +30,7 @@ aliases: std.ArrayListUnmanaged(u32) = .{}, contained: std.ArrayListUnmanaged(SymbolAtOffset) = .{}, /// Code (may be non-relocated) this block represents -code: []u8, +code: std.ArrayListUnmanaged(u8) = .{}, /// Size and alignment of this text block /// Unlike in Elf, we need to store the size of this symbol as part of @@ -196,9 +196,9 @@ pub const Relocation = struct { }; if (self.is_64bit) { - mem.writeIntLittle(u64, args.block.code[args.offset..][0..8], @bitCast(u64, result)); + mem.writeIntLittle(u64, args.block.code.items[args.offset..][0..8], @bitCast(u64, result)); } else { - mem.writeIntLittle(u32, args.block.code[args.offset..][0..4], @truncate(u32, @bitCast(u64, result))); + mem.writeIntLittle(u32, args.block.code.items[args.offset..][0..4], @truncate(u32, @bitCast(u64, result))); } } @@ -226,7 +226,7 @@ pub const Relocation = struct { i28, @intCast(i64, args.target_addr) - @intCast(i64, args.source_addr), ); - const code = args.block.code[args.offset..][0..4]; + const code = args.block.code.items[args.offset..][0..4]; var inst = aarch64.Instruction{ .unconditional_branch_immediate = mem.bytesToValue(meta.TagPayload( aarch64.Instruction, @@ -241,7 +241,7 @@ pub const Relocation = struct { i32, @intCast(i64, args.target_addr) - @intCast(i64, args.source_addr) - 4, ); - mem.writeIntLittle(u32, args.block.code[args.offset..][0..4], @bitCast(u32, displacement)); + mem.writeIntLittle(u32, args.block.code.items[args.offset..][0..4], @bitCast(u32, displacement)); }, else => return error.UnsupportedCpuArchitecture, } @@ -269,7 +269,7 @@ pub const Relocation = struct { const target_page = @intCast(i32, target_addr >> 12); const pages = @bitCast(u21, @intCast(i21, target_page - source_page)); - const code = args.block.code[args.offset..][0..4]; + const code = args.block.code.items[args.offset..][0..4]; var inst = aarch64.Instruction{ .pc_relative_address = mem.bytesToValue(meta.TagPayload( aarch64.Instruction, @@ -315,7 +315,7 @@ pub const Relocation = struct { }; pub fn resolve(self: PageOff, args: ResolveArgs) !void { - const code = args.block.code[args.offset..][0..4]; + const code = args.block.code.items[args.offset..][0..4]; switch (self.kind) { .page => { @@ -445,7 +445,7 @@ pub const Relocation = struct { pub const PointerToGot = struct { pub fn resolve(_: PointerToGot, args: ResolveArgs) !void { const result = try math.cast(i32, @intCast(i64, args.target_addr) - @intCast(i64, args.source_addr)); - mem.writeIntLittle(u32, args.block.code[args.offset..][0..4], @bitCast(u32, result)); + mem.writeIntLittle(u32, args.block.code.items[args.offset..][0..4], @bitCast(u32, result)); } pub fn format(self: PointerToGot, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { @@ -466,7 +466,7 @@ pub const Relocation = struct { i32, target_addr - @intCast(i64, args.source_addr) - self.correction - 4, ); - mem.writeIntLittle(u32, args.block.code[args.offset..][0..4], @bitCast(u32, displacement)); + mem.writeIntLittle(u32, args.block.code.items[args.offset..][0..4], @bitCast(u32, displacement)); } pub fn format(self: Signed, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { @@ -489,13 +489,13 @@ pub const Relocation = struct { pub fn resolve(self: Load, args: ResolveArgs) !void { if (self.kind == .tlvp) { // We need to rewrite the opcode from movq to leaq. - args.block.code[args.offset - 2] = 0x8d; + args.block.code.items[args.offset - 2] = 0x8d; } const displacement = try math.cast( i32, @intCast(i64, args.target_addr) - @intCast(i64, args.source_addr) - 4 + self.addend, ); - mem.writeIntLittle(u32, args.block.code[args.offset..][0..4], @bitCast(u32, displacement)); + mem.writeIntLittle(u32, args.block.code.items[args.offset..][0..4], @bitCast(u32, displacement)); } pub fn format(self: Load, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { @@ -542,7 +542,6 @@ pub const Relocation = struct { pub const empty = TextBlock{ .local_sym_index = 0, - .code = undefined, .size = 0, .alignment = 0, .prev = null, @@ -560,7 +559,7 @@ pub fn deinit(self: *TextBlock, allocator: *Allocator) void { self.relocs.deinit(allocator); self.contained.deinit(allocator); self.aliases.deinit(allocator); - allocator.free(self.code); + self.code.deinit(allocator); } /// Returns how much room there is to grow in virtual address space. @@ -914,9 +913,9 @@ fn parseUnsigned( }; var addend: i64 = if (is_64bit) - mem.readIntLittle(i64, self.code[out.offset..][0..8]) + mem.readIntLittle(i64, self.code.items[out.offset..][0..8]) else - mem.readIntLittle(i32, self.code[out.offset..][0..4]); + mem.readIntLittle(i32, self.code.items[out.offset..][0..4]); if (rel.r_extern == 0) { assert(out.where == .local); @@ -970,7 +969,7 @@ fn parsePageOff(self: TextBlock, rel: macho.relocation_info, out: *Relocation, a const rel_type = @intToEnum(macho.reloc_type_arm64, rel.r_type); const op_kind: ?Relocation.PageOff.OpKind = blk: { if (rel_type != .ARM64_RELOC_PAGEOFF12) break :blk null; - const op_kind: Relocation.PageOff.OpKind = if (isArithmeticOp(self.code[out.offset..][0..4])) + const op_kind: Relocation.PageOff.OpKind = if (isArithmeticOp(self.code.items[out.offset..][0..4])) .arithmetic else .load; @@ -1013,7 +1012,7 @@ fn parseSigned(self: TextBlock, rel: macho.relocation_info, out: *Relocation, ct .X86_64_RELOC_SIGNED_4 => 4, else => unreachable, }; - var addend: i64 = mem.readIntLittle(i32, self.code[out.offset..][0..4]) + correction; + var addend: i64 = mem.readIntLittle(i32, self.code.items[out.offset..][0..4]) + correction; if (rel.r_extern == 0) { const source_sym = ctx.macho_file.locals.items[self.local_sym_index]; @@ -1038,7 +1037,7 @@ fn parseLoad(self: TextBlock, rel: macho.relocation_info, out: *Relocation) void const rel_type = @intToEnum(macho.reloc_type_x86_64, rel.r_type); const addend: i32 = if (rel_type == .X86_64_RELOC_GOT) - mem.readIntLittle(i32, self.code[out.offset..][0..4]) + mem.readIntLittle(i32, self.code.items[out.offset..][0..4]) else 0; @@ -1173,7 +1172,7 @@ pub fn print_this(self: *const TextBlock, macho_file: MachO) void { } } } - log.warn(" code.len = {}", .{self.code.len}); + log.warn(" code.len = {}", .{self.code.items.len}); if (self.relocs.items.len > 0) { log.warn(" relocations:", .{}); for (self.relocs.items) |rel| { From 773863150a96fcb9ddb3eccb585d10342d10cb78 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Thu, 22 Jul 2021 14:50:06 +0200 Subject: [PATCH 78/81] macho: fix incorrect prealloc in traditional path --- src/link/MachO.zig | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index d1d25a313c..d742e6ec12 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -425,7 +425,8 @@ pub fn flush(self: *MachO, comp: *Compilation) !void { } } - if (build_options.is_stage1) { + const use_stage1 = build_options.is_stage1 and self.base.options.use_stage1; + if (use_stage1) { return self.linkWithZld(comp); } else { switch (self.base.options.effectiveOutputMode()) { @@ -3153,7 +3154,7 @@ fn writeSymbolTable(self: *MachO) !void { if (object.debug_info == null) continue; // Open scope - try locals.ensureUnusedCapacity(4); + try locals.ensureUnusedCapacity(3); locals.appendAssumeCapacity(.{ .n_strx = try self.makeString(object.tu_comp_dir.?), .n_type = macho.N_SO, @@ -3192,7 +3193,7 @@ fn writeSymbolTable(self: *MachO) !void { } // Close scope - locals.appendAssumeCapacity(.{ + try locals.append(.{ .n_strx = 0, .n_type = macho.N_SO, .n_sect = 0, From 4fd0cb7618ffb5428981672f6a21c411599f51b2 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Thu, 22 Jul 2021 16:00:31 +0200 Subject: [PATCH 79/81] macho: sort nlists within object before filtering by type Previously, we'd filter the nlists assuming they were correctly ordered by type: local < extern defined < undefined within the object's symbol table but this doesn't seem to be guaranteed, therefore, we sort by type and address in one go, and filter defined from undefined afterwards. --- src/link/MachO/Object.zig | 82 +++++++++++++++------------------------ 1 file changed, 31 insertions(+), 51 deletions(-) diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index 46d82dec31..c5ff19b9be 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -295,7 +295,20 @@ const NlistWithIndex = struct { index: u32, fn lessThan(_: void, lhs: NlistWithIndex, rhs: NlistWithIndex) bool { - return lhs.nlist.n_value < rhs.nlist.n_value; + // We sort by type: defined < undefined, and + // afterwards by address in each group. Normally, dysymtab should + // be enough to guarantee the sort, but turns out not every compiler + // is kind enough to specify the symbols in the correct order. + if (MachO.symbolIsSect(lhs.nlist)) { + if (MachO.symbolIsSect(rhs.nlist)) { + // Same group, sort by address. + return lhs.nlist.n_value < rhs.nlist.n_value; + } else { + return true; + } + } else { + return false; + } } fn filterInSection(symbols: []NlistWithIndex, sect: macho.section_64) []NlistWithIndex { @@ -488,22 +501,27 @@ pub fn parseTextBlocks(self: *Object, macho_file: *MachO) !void { log.debug("analysing {s}", .{self.name.?}); - const dysymtab = self.load_commands.items[self.dysymtab_cmd_index.?].Dysymtab; - // We only care about defined symbols, so filter every other out. - const nlists = self.symtab.items[dysymtab.ilocalsym..dysymtab.iundefsym]; + // You would expect that the symbol table is at least pre-sorted based on symbol's type: + // local < extern defined < undefined. Unfortunately, this is not guaranteed! For instance, + // the GO compiler does not necessarily respect that therefore we sort immediately by type + // and address within. + var sorted_all_nlists = std.ArrayList(NlistWithIndex).init(self.allocator); + defer sorted_all_nlists.deinit(); + try sorted_all_nlists.ensureTotalCapacity(self.symtab.items.len); - var sorted_nlists = std.ArrayList(NlistWithIndex).init(self.allocator); - defer sorted_nlists.deinit(); - try sorted_nlists.ensureTotalCapacity(nlists.len); - - for (nlists) |nlist, index| { - sorted_nlists.appendAssumeCapacity(.{ + for (self.symtab.items) |nlist, index| { + sorted_all_nlists.appendAssumeCapacity(.{ .nlist = nlist, - .index = @intCast(u32, index + dysymtab.ilocalsym), + .index = @intCast(u32, index), }); } - sort.sort(NlistWithIndex, sorted_nlists.items, {}, NlistWithIndex.lessThan); + sort.sort(NlistWithIndex, sorted_all_nlists.items, {}, NlistWithIndex.lessThan); + + const dysymtab = self.load_commands.items[self.dysymtab_cmd_index.?].Dysymtab; + + // We only care about defined symbols, so filter every other out. + const sorted_nlists = sorted_all_nlists.items[dysymtab.ilocalsym..dysymtab.iundefsym]; for (seg.sections.items) |sect, id| { const sect_id = @intCast(u8, id); @@ -530,7 +548,7 @@ pub fn parseTextBlocks(self: *Object, macho_file: *MachO) !void { const relocs = mem.bytesAsSlice(macho.relocation_info, raw_relocs); // Symbols within this section only. - const filtered_nlists = NlistWithIndex.filterInSection(sorted_nlists.items, sect); + const filtered_nlists = NlistWithIndex.filterInSection(sorted_nlists, sect); // Is there any padding between symbols within the section? // const is_splittable = self.header.?.flags & macho.MH_SUBSECTIONS_VIA_SYMBOLS != 0; @@ -810,44 +828,6 @@ pub fn parseTextBlocks(self: *Object, macho_file: *MachO) !void { } } -pub fn symbolFromReloc(self: *Object, macho_file: *MachO, rel: macho.relocation_info) !*Symbol { - const symbol = blk: { - if (rel.r_extern == 1) { - break :blk self.symbols.items[rel.r_symbolnum]; - } else { - const sect_id = @intCast(u8, rel.r_symbolnum - 1); - const symbol = self.sections_as_symbols.get(sect_id) orelse symbol: { - // We need a valid pointer to Symbol even if there is no symbol, so we create a - // dummy symbol upfront which will later be populated when created a TextBlock from - // the target section here. - const seg = self.load_commands.items[self.segment_cmd_index.?].Segment; - const sect = seg.sections.items[sect_id]; - const name = try std.fmt.allocPrint(self.allocator, "l_{s}_{s}_{s}", .{ - self.name.?, - segmentName(sect), - sectionName(sect), - }); - defer self.allocator.free(name); - const symbol = try macho_file.allocator.create(Symbol); - symbol.* = .{ - .strx = try macho_file.makeString(name), - .payload = .{ - .regular = .{ - .linkage = .translation_unit, - .address = sect.addr, - .file = self, - }, - }, - }; - try self.sections_as_symbols.putNoClobber(self.allocator, sect_id, symbol); - break :symbol symbol; - }; - break :blk symbol; - } - }; - return symbol; -} - fn parseSymtab(self: *Object) !void { const index = self.symtab_cmd_index orelse return; const symtab_cmd = self.load_commands.items[index].Symtab; From a4feb97cdfb330207f3da05402983bf3a71de64e Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Thu, 22 Jul 2021 22:19:15 +0200 Subject: [PATCH 80/81] macho: assign and cache section ordinals upon creation then, when sorting sections within segments, clear and redo the ordinals since we re-apply them to symbols anyway. It is vital to have the ordinals consistent with parsing and resolving relocs however. --- src/link/MachO.zig | 117 +++++++++++++++++++++++++---------- src/link/MachO/Object.zig | 4 +- src/link/MachO/TextBlock.zig | 6 +- 3 files changed, 88 insertions(+), 39 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index d742e6ec12..b2f048d78b 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -167,6 +167,9 @@ strtab_needs_relocation: bool = false, has_dices: bool = false, has_stabs: bool = false, +section_ordinals: std.ArrayListUnmanaged(MatchingSection) = .{}, +section_to_ordinal: std.AutoHashMapUnmanaged(MatchingSection, u8) = .{}, + pending_updates: std.ArrayListUnmanaged(struct { kind: enum { got, @@ -925,6 +928,13 @@ fn linkWithZld(self: *MachO, comp: *Compilation) !void { else => unreachable, }; + // Initialize section ordinals with null ordinal pointing at + // PAGEZERO segment. + try self.section_ordinals.append(self.base.allocator, .{ + .seg = 0, + .sect = 0, + }); + try self.populateMetadata(); try self.parseInputFiles(positionals.items, self.base.options.sysroot); try self.parseLibs(libs.items, self.base.options.sysroot); @@ -1482,6 +1492,10 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio } }; + if (res) |match| { + try self.createSectionOrdinal(match); + } + return res; } @@ -1606,6 +1620,38 @@ fn sortSections(self: *MachO) !void { self.blocks.deinit(self.base.allocator); self.blocks = transient; } + + { + // Create new section ordinals. + self.section_ordinals.clearRetainingCapacity(); + self.section_to_ordinal.clearRetainingCapacity(); + // First ordinal is always null + self.section_ordinals.appendAssumeCapacity(.{ + .seg = 0, + .sect = 0, + }); + const text_seg = self.load_commands.items[self.text_segment_cmd_index.?].Segment; + for (text_seg.sections.items) |_, sect_id| { + try self.createSectionOrdinal(.{ + .seg = self.text_segment_cmd_index.?, + .sect = @intCast(u16, sect_id), + }); + } + const data_const_seg = self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; + for (data_const_seg.sections.items) |_, sect_id| { + try self.createSectionOrdinal(.{ + .seg = self.data_const_segment_cmd_index.?, + .sect = @intCast(u16, sect_id), + }); + } + const data_seg = self.load_commands.items[self.data_segment_cmd_index.?].Segment; + for (data_seg.sections.items) |_, sect_id| { + try self.createSectionOrdinal(.{ + .seg = self.data_segment_cmd_index.?, + .sect = @intCast(u16, sect_id), + }); + } + } } fn allocateTextSegment(self: *MachO) !void { @@ -1732,7 +1778,7 @@ fn allocateTextBlocks(self: *MachO) !void { const sect = seg.sections.items[match.sect]; var base_addr: u64 = sect.addr; - const n_sect = self.sectionId(match); + const n_sect = self.section_to_ordinal.get(match) orelse unreachable; log.debug(" within section {s},{s}", .{ commands.segmentName(sect), commands.sectionName(sect) }); log.debug(" {}", .{sect}); @@ -2260,6 +2306,7 @@ fn resolveSymbols(self: *MachO) !void { .sect = self.common_section_index.?, }; }; + try self.createSectionOrdinal(match); const size = sym.n_value; const code = try self.base.allocator.alloc(u8, size); @@ -2272,7 +2319,7 @@ fn resolveSymbols(self: *MachO) !void { var nlist = macho.nlist_64{ .n_strx = sym.n_strx, .n_type = macho.N_SECT, - .n_sect = self.sectionId(match), + .n_sect = self.section_to_ordinal.get(match) orelse unreachable, .n_desc = 0, .n_value = 0, }; @@ -2402,7 +2449,7 @@ fn resolveSymbols(self: *MachO) !void { var nlist = macho.nlist_64{ .n_strx = undef.n_strx, .n_type = macho.N_SECT, - .n_sect = self.sectionId(match), + .n_sect = self.section_to_ordinal.get(match) orelse unreachable, .n_desc = 0, .n_value = 0, }; @@ -2498,6 +2545,10 @@ fn populateMetadata(self: *MachO) !void { .@"align" = alignment, .flags = macho.S_REGULAR | macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS, }); + try self.createSectionOrdinal(.{ + .seg = self.text_segment_cmd_index.?, + .sect = self.text_section_index.?, + }); } if (self.stubs_section_index == null) { @@ -2518,6 +2569,10 @@ fn populateMetadata(self: *MachO) !void { .flags = macho.S_SYMBOL_STUBS | macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS, .reserved2 = stub_size, }); + try self.createSectionOrdinal(.{ + .seg = self.text_segment_cmd_index.?, + .sect = self.stubs_section_index.?, + }); } if (self.stub_helper_section_index == null) { @@ -2538,6 +2593,10 @@ fn populateMetadata(self: *MachO) !void { .@"align" = alignment, .flags = macho.S_REGULAR | macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS, }); + try self.createSectionOrdinal(.{ + .seg = self.text_segment_cmd_index.?, + .sect = self.stub_helper_section_index.?, + }); } if (self.data_const_segment_cmd_index == null) { @@ -2557,6 +2616,10 @@ fn populateMetadata(self: *MachO) !void { .@"align" = 3, // 2^3 = @sizeOf(u64) .flags = macho.S_NON_LAZY_SYMBOL_POINTERS, }); + try self.createSectionOrdinal(.{ + .seg = self.data_const_segment_cmd_index.?, + .sect = self.got_section_index.?, + }); } if (self.data_segment_cmd_index == null) { @@ -2576,6 +2639,10 @@ fn populateMetadata(self: *MachO) !void { .@"align" = 3, // 2^3 = @sizeOf(u64) .flags = macho.S_LAZY_SYMBOL_POINTERS, }); + try self.createSectionOrdinal(.{ + .seg = self.data_segment_cmd_index.?, + .sect = self.la_symbol_ptr_section_index.?, + }); } if (self.data_section_index == null) { @@ -2584,6 +2651,10 @@ fn populateMetadata(self: *MachO) !void { try data_seg.addSection(self.base.allocator, "__data", .{ .@"align" = 3, // 2^3 = @sizeOf(u64) }); + try self.createSectionOrdinal(.{ + .seg = self.data_segment_cmd_index.?, + .sect = self.data_section_index.?, + }); } if (self.linkedit_segment_cmd_index == null) { @@ -3290,6 +3361,8 @@ pub fn deinit(self: *MachO) void { ds.deinit(self.base.allocator); } + self.section_ordinals.deinit(self.base.allocator); + self.section_to_ordinal.deinit(self.base.allocator); self.pending_updates.deinit(self.base.allocator); self.got_entries.deinit(self.base.allocator); self.got_entries_map.deinit(self.base.allocator); @@ -5816,37 +5889,6 @@ pub fn symbolIsTemp(sym: macho.nlist_64, sym_name: []const u8) bool { return mem.startsWith(u8, sym_name, "l") or mem.startsWith(u8, sym_name, "L"); } -pub fn sectionId(self: MachO, match: MatchingSection) u8 { - // TODO there might be a more generic way of doing this. - var section: u8 = 0; - for (self.load_commands.items) |cmd, cmd_id| { - if (cmd != .Segment) break; - if (cmd_id == match.seg) { - section += @intCast(u8, match.sect) + 1; - break; - } - section += @intCast(u8, cmd.Segment.sections.items.len); - } - return section; -} - -pub fn unpackSectionId(self: MachO, section_id: u8) MatchingSection { - var match: MatchingSection = undefined; - var section: u8 = 0; - outer: for (self.load_commands.items) |cmd, cmd_id| { - assert(cmd == .Segment); - for (cmd.Segment.sections.items) |_, sect_id| { - section += 1; - if (section_id == section) { - match.seg = @intCast(u16, cmd_id); - match.sect = @intCast(u16, sect_id); - break :outer; - } - } - } - return match; -} - fn packDylibOrdinal(ordinal: u16) u16 { return ordinal * macho.N_SYMBOL_RESOLVER; } @@ -5867,3 +5909,10 @@ pub fn findFirst(comptime T: type, haystack: []T, start: usize, predicate: anyty } return i; } + +fn createSectionOrdinal(self: *MachO, match: MatchingSection) !void { + if (self.section_to_ordinal.contains(match)) return; + const ordinal = @intCast(u8, self.section_ordinals.items.len); + try self.section_ordinals.append(self.base.allocator, match); + try self.section_to_ordinal.putNoClobber(self.base.allocator, match, ordinal); +} diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index c5ff19b9be..846b87a65a 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -733,7 +733,7 @@ pub fn parseTextBlocks(self: *Object, macho_file: *MachO) !void { try macho_file.locals.append(macho_file.base.allocator, .{ .n_strx = try macho_file.makeString(sym_name), .n_type = macho.N_SECT, - .n_sect = macho_file.sectionId(match), + .n_sect = macho_file.section_to_ordinal.get(match) orelse unreachable, .n_desc = 0, .n_value = sect.addr, }); @@ -779,7 +779,7 @@ pub fn parseTextBlocks(self: *Object, macho_file: *MachO) !void { const nlist = nlist_with_index.nlist; const local_sym_index = self.symbol_mapping.get(nlist_with_index.index) orelse unreachable; const local = &macho_file.locals.items[local_sym_index]; - local.n_sect = macho_file.sectionId(match); + local.n_sect = macho_file.section_to_ordinal.get(match) orelse unreachable; const stab: ?TextBlock.Stab = if (self.debug_info) |di| blk: { // TODO there has to be a better to handle this. diff --git a/src/link/MachO/TextBlock.zig b/src/link/MachO/TextBlock.zig index 9dc02d1f4d..17d1d82db8 100644 --- a/src/link/MachO/TextBlock.zig +++ b/src/link/MachO/TextBlock.zig @@ -620,7 +620,7 @@ fn initRelocFromObject(rel: macho.relocation_info, object: *Object, ctx: RelocCo try ctx.macho_file.locals.append(ctx.macho_file.base.allocator, .{ .n_strx = try ctx.macho_file.makeString(sym_name), .n_type = macho.N_SECT, - .n_sect = ctx.macho_file.sectionId(match), + .n_sect = ctx.macho_file.section_to_ordinal.get(match) orelse unreachable, .n_desc = 0, .n_value = sect.addr, }); @@ -832,7 +832,7 @@ pub fn parseRelocsFromObject( }, .local => { const source_sym = ctx.macho_file.locals.items[self.local_sym_index]; - const match = ctx.macho_file.unpackSectionId(source_sym.n_sect); + const match = ctx.macho_file.section_ordinals.items[source_sym.n_sect]; const seg = ctx.macho_file.load_commands.items[match.seg].Segment; const sect = seg.sections.items[match.sect]; const sect_type = commands.sectionType(sect); @@ -1096,7 +1096,7 @@ pub fn resolveRelocs(self: *TextBlock, macho_file: *MachO) !void { const sym = macho_file.locals.items[rel.where_index]; const is_tlv = is_tlv: { const source_sym = macho_file.locals.items[self.local_sym_index]; - const match = macho_file.unpackSectionId(source_sym.n_sect); + const match = macho_file.section_ordinals.items[source_sym.n_sect]; const seg = macho_file.load_commands.items[match.seg].Segment; const sect = seg.sections.items[match.sect]; break :is_tlv commands.sectionType(sect) == macho.S_THREAD_LOCAL_VARIABLES; From 1beda818e1c10bde98b35759b3c131a864be58d9 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Fri, 23 Jul 2021 11:51:48 +0200 Subject: [PATCH 81/81] macho: re-enable parsing sections into atoms However, make it default only when building in release modes since it's a prelude to advanced dead code stripping not very useful in debug. --- src/link/MachO.zig | 123 +++++++------ src/link/MachO/Object.zig | 347 +++++++++++++++++------------------ src/link/MachO/TextBlock.zig | 76 ++++---- 3 files changed, 273 insertions(+), 273 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index b2f048d78b..a8749c1dfb 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -965,60 +965,7 @@ fn linkWithZld(self: *MachO, comp: *Compilation) !void { try self.allocateDataSegment(); self.allocateLinkeditSegment(); try self.allocateTextBlocks(); - - // log.warn("locals", .{}); - // for (self.locals.items) |sym, id| { - // log.warn(" {d}: {s}, {}", .{ id, self.getString(sym.n_strx), sym }); - // } - - // log.warn("globals", .{}); - // for (self.globals.items) |sym, id| { - // log.warn(" {d}: {s}, {}", .{ id, self.getString(sym.n_strx), sym }); - // } - - // log.warn("tentatives", .{}); - // for (self.tentatives.items) |sym, id| { - // log.warn(" {d}: {s}, {}", .{ id, self.getString(sym.n_strx), sym }); - // } - - // log.warn("undefines", .{}); - // for (self.undefs.items) |sym, id| { - // log.warn(" {d}: {s}, {}", .{ id, self.getString(sym.n_strx), sym }); - // } - - // log.warn("imports", .{}); - // for (self.imports.items) |sym, id| { - // log.warn(" {d}: {s}, {}", .{ id, self.getString(sym.n_strx), sym }); - // } - - // log.warn("symbol resolver", .{}); - // for (self.symbol_resolver.keys()) |key| { - // log.warn(" {s} => {}", .{ key, self.symbol_resolver.get(key).? }); - // } - - // log.warn("mappings", .{}); - // for (self.objects.items) |object, id| { - // const object_id = @intCast(u16, id); - // log.warn(" in object {s}", .{object.name.?}); - // for (object.symtab.items) |sym, sym_id| { - // if (object.symbol_mapping.get(@intCast(u32, sym_id))) |local_id| { - // log.warn(" | {d} => {d}", .{ sym_id, local_id }); - // } else { - // log.warn(" | {d} no local mapping for {s}", .{ sym_id, object.getString(sym.n_strx) }); - // } - // } - // } - - // var it = self.blocks.iterator(); - // while (it.next()) |entry| { - // const seg = self.load_commands.items[entry.key_ptr.seg].Segment; - // const sect = seg.sections.items[entry.key_ptr.sect]; - - // log.warn("\n\n{s},{s} contents:", .{ segmentName(sect), sectionName(sect) }); - // log.warn(" {}", .{sect}); - // entry.value_ptr.*.print(self); - // } - + self.printSymtabAndTextBlock(); try self.flushZld(); } @@ -2086,6 +2033,7 @@ fn resolveSymbolsInObject(self: *MachO, object_id: u16) !void { .n_value = sym.n_value, }); try object.symbol_mapping.putNoClobber(self.base.allocator, sym_id, local_sym_index); + try object.reverse_symbol_mapping.putNoClobber(self.base.allocator, local_sym_index, sym_id); // If the symbol's scope is not local aka translation unit, then we need work out // if we should save the symbol as a global, or potentially flag the error. @@ -5916,3 +5864,70 @@ fn createSectionOrdinal(self: *MachO, match: MatchingSection) !void { try self.section_ordinals.append(self.base.allocator, match); try self.section_to_ordinal.putNoClobber(self.base.allocator, match, ordinal); } + +fn printSymtabAndTextBlock(self: *MachO) void { + log.debug("locals", .{}); + for (self.locals.items) |sym, id| { + log.debug(" {d}: {s}, {}", .{ id, self.getString(sym.n_strx), sym }); + } + + log.debug("globals", .{}); + for (self.globals.items) |sym, id| { + log.debug(" {d}: {s}, {}", .{ id, self.getString(sym.n_strx), sym }); + } + + log.debug("tentatives", .{}); + for (self.tentatives.items) |sym, id| { + log.debug(" {d}: {s}, {}", .{ id, self.getString(sym.n_strx), sym }); + } + + log.debug("undefines", .{}); + for (self.undefs.items) |sym, id| { + log.debug(" {d}: {s}, {}", .{ id, self.getString(sym.n_strx), sym }); + } + + log.debug("imports", .{}); + for (self.imports.items) |sym, id| { + log.debug(" {d}: {s}, {}", .{ id, self.getString(sym.n_strx), sym }); + } + + { + log.debug("symbol resolver", .{}); + var it = self.symbol_resolver.keyIterator(); + while (it.next()) |key_ptr| { + const sym_name = self.getString(key_ptr.*); + log.debug(" {s} => {}", .{ sym_name, self.symbol_resolver.get(key_ptr.*).? }); + } + } + + log.debug("mappings", .{}); + for (self.objects.items) |object| { + log.debug(" in object {s}", .{object.name.?}); + for (object.symtab.items) |sym, sym_id| { + if (object.symbol_mapping.get(@intCast(u32, sym_id))) |local_id| { + log.debug(" | {d} => {d}", .{ sym_id, local_id }); + } else { + log.debug(" | {d} no local mapping for {s}", .{ sym_id, object.getString(sym.n_strx) }); + } + } + } + + { + var it = self.blocks.iterator(); + while (it.next()) |entry| { + const seg = self.load_commands.items[entry.key_ptr.seg].Segment; + const sect = seg.sections.items[entry.key_ptr.sect]; + + var block: *TextBlock = entry.value_ptr.*; + + log.debug("\n\n{s},{s} contents:", .{ commands.segmentName(sect), commands.sectionName(sect) }); + log.debug("{}", .{sect}); + log.debug("{}", .{block}); + + while (block.prev) |prev| { + block = prev; + log.debug("{}", .{block}); + } + } + } +} diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index 846b87a65a..fc17669e04 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -55,7 +55,11 @@ mtime: ?u64 = null, text_blocks: std.ArrayListUnmanaged(*TextBlock) = .{}, sections_as_symbols: std.AutoHashMapUnmanaged(u16, u32) = .{}, + +// TODO symbol mapping and its inverse can probably be simple arrays +// instead of hash maps. symbol_mapping: std.AutoHashMapUnmanaged(u32, u32) = .{}, +reverse_symbol_mapping: std.AutoHashMapUnmanaged(u32, u32) = .{}, const DebugInfo = struct { inner: dwarf.DwarfInfo, @@ -164,6 +168,7 @@ pub fn deinit(self: *Object) void { self.text_blocks.deinit(self.allocator); self.sections_as_symbols.deinit(self.allocator); self.symbol_mapping.deinit(self.allocator); + self.reverse_symbol_mapping.deinit(self.allocator); if (self.debug_info) |*db| { db.deinit(self.allocator); @@ -367,7 +372,7 @@ const TextBlockParser = struct { } else if (MachO.symbolIsPext(rhs.nlist) or MachO.symbolIsWeakDef(rhs.nlist)) { return !MachO.symbolIsExt(lhs.nlist); } else { - return true; + return false; } } @@ -392,15 +397,7 @@ const TextBlockParser = struct { } else null; for (aliases.items) |*nlist_with_index| { - nlist_with_index.index = self.symbol_mapping.get(nlist_with_index.index); - const sym = self.object.symbols.items[nlist_with_index.index]; - if (sym.payload != .regular) { - log.err("expected a regular symbol, found {s}", .{sym.payload}); - log.err(" when remapping {s}", .{self.macho_file.getString(sym.strx)}); - return error.SymbolIsNotRegular; - } - assert(sym.payload.regular.local_sym_index != 0); // This means the symbol has not been properly resolved. - nlist_with_index.index = sym.payload.regular.local_sym_index; + nlist_with_index.index = self.object.symbol_mapping.get(nlist_with_index.index) orelse unreachable; } if (aliases.items.len > 1) { @@ -409,15 +406,13 @@ const TextBlockParser = struct { NlistWithIndex, aliases.items, SeniorityContext{ .object = self.object }, - @This().lessThanBySeniority, + TextBlockParser.lessThanBySeniority, ); } const senior_nlist = aliases.pop(); - const senior_sym = self.macho_file.locals.items[senior_nlist.index]; - assert(senior_sym.payload == .regular); - senior_sym.payload.regular.segment_id = self.match.seg; - senior_sym.payload.regular.section_id = self.match.sect; + const senior_sym = &self.macho_file.locals.items[senior_nlist.index]; + senior_sym.n_sect = self.macho_file.section_to_ordinal.get(self.match) orelse unreachable; const start_addr = senior_nlist.nlist.n_value - self.section.addr; const end_addr = if (next_nlist) |n| n.nlist.n_value - self.section.addr else self.section.size; @@ -442,33 +437,29 @@ const TextBlockParser = struct { } } } - if (self.macho_file.globals.contains(self.macho_file.getString(senior_sym.strx))) break :blk .global; + // TODO + // if (self.macho_file.globals.contains(self.macho_file.getString(senior_sym.strx))) break :blk .global; break :blk .static; } else null; - const block = try self.allocator.create(TextBlock); - errdefer self.allocator.destroy(block); - - block.* = TextBlock.init(self.allocator); + const block = try self.macho_file.base.allocator.create(TextBlock); + block.* = TextBlock.empty; block.local_sym_index = senior_nlist.index; block.stab = stab; - block.code = try self.allocator.dupe(u8, code); block.size = size; block.alignment = actual_align; + try self.macho_file.managed_blocks.append(self.macho_file.base.allocator, block); - if (aliases.items.len > 0) { - try block.aliases.ensureTotalCapacity(aliases.items.len); - for (aliases.items) |alias| { - block.aliases.appendAssumeCapacity(alias.index); + try block.code.appendSlice(self.macho_file.base.allocator, code); - const sym = self.macho_file.locals.items[alias.index]; - const reg = &sym.payload.regular; - reg.segment_id = self.match.seg; - reg.section_id = self.match.sect; - } + try block.aliases.ensureTotalCapacity(self.macho_file.base.allocator, aliases.items.len); + for (aliases.items) |alias| { + block.aliases.appendAssumeCapacity(alias.index); + const sym = &self.macho_file.locals.items[alias.index]; + sym.n_sect = self.macho_file.section_to_ordinal.get(self.match) orelse unreachable; } - try block.parseRelocsFromObject(self.allocator, relocs, object, .{ + try block.parseRelocsFromObject(self.macho_file.base.allocator, self.relocs, self.object, .{ .base_addr = start_addr, .macho_file = self.macho_file, }); @@ -479,7 +470,7 @@ const TextBlockParser = struct { senior_nlist.nlist.n_value, senior_nlist.nlist.n_value + size, ); - try block.dices.ensureTotalCapacity(dices.len); + try block.dices.ensureTotalCapacity(self.macho_file.base.allocator, dices.len); for (dices) |dice| { block.dices.appendAssumeCapacity(.{ @@ -518,10 +509,22 @@ pub fn parseTextBlocks(self: *Object, macho_file: *MachO) !void { sort.sort(NlistWithIndex, sorted_all_nlists.items, {}, NlistWithIndex.lessThan); - const dysymtab = self.load_commands.items[self.dysymtab_cmd_index.?].Dysymtab; + // Well, shit, sometimes compilers skip the dysymtab load command altogether, meaning we + // have to infer the start of undef section in the symtab ourselves. + const iundefsym = if (self.dysymtab_cmd_index) |cmd_index| blk: { + const dysymtab = self.load_commands.items[cmd_index].Dysymtab; + break :blk dysymtab.iundefsym; + } else blk: { + var iundefsym: usize = sorted_all_nlists.items.len; + while (iundefsym > 0) : (iundefsym -= 1) { + const nlist = sorted_all_nlists.items[iundefsym]; + if (MachO.symbolIsSect(nlist.nlist)) break; + } + break :blk iundefsym; + }; // We only care about defined symbols, so filter every other out. - const sorted_nlists = sorted_all_nlists.items[dysymtab.ilocalsym..dysymtab.iundefsym]; + const sorted_nlists = sorted_all_nlists.items[0..iundefsym]; for (seg.sections.items) |sect, id| { const sect_id = @intCast(u8, id); @@ -550,11 +553,12 @@ pub fn parseTextBlocks(self: *Object, macho_file: *MachO) !void { // Symbols within this section only. const filtered_nlists = NlistWithIndex.filterInSection(sorted_nlists, sect); - // Is there any padding between symbols within the section? - // const is_splittable = self.header.?.flags & macho.MH_SUBSECTIONS_VIA_SYMBOLS != 0; - // TODO is it perhaps worth skip parsing subsections in Debug mode and not worry about - // duplicates at all? Need some benchmarks! - // const is_splittable = false; + // In release mode, if the object file was generated with dead code stripping optimisations, + // note it now and parse sections as atoms. + const is_splittable = blk: { + if (macho_file.base.options.optimize_mode == .Debug) break :blk false; + break :blk self.header.?.flags & macho.MH_SUBSECTIONS_VIA_SYMBOLS != 0; + }; macho_file.has_dices = blk: { if (self.text_section_index) |index| { @@ -566,157 +570,152 @@ pub fn parseTextBlocks(self: *Object, macho_file: *MachO) !void { }; macho_file.has_stabs = macho_file.has_stabs or self.debug_info != null; - { - // next: { - // if (is_splittable) blocks: { - // if (filtered_nlists.len == 0) break :blocks; + next: { + if (is_splittable) blocks: { + if (filtered_nlists.len == 0) break :blocks; - // // If the first nlist does not match the start of the section, - // // then we need encapsulate the memory range [section start, first symbol) - // // as a temporary symbol and insert the matching TextBlock. - // const first_nlist = filtered_nlists[0].nlist; - // if (first_nlist.n_value > sect.addr) { - // const symbol = self.sections_as_symbols.get(sect_id) orelse symbol: { - // const name = try std.fmt.allocPrint(self.allocator, "l_{s}_{s}_{s}", .{ - // self.name.?, - // segmentName(sect), - // sectionName(sect), - // }); - // defer self.allocator.free(name); - // const symbol = try zld.allocator.create(Symbol); - // symbol.* = .{ - // .strx = try zld.makeString(name), - // .payload = .{ .undef = .{} }, - // }; - // try self.sections_as_symbols.putNoClobber(self.allocator, sect_id, symbol); - // break :symbol symbol; - // }; + // If the first nlist does not match the start of the section, + // then we need to encapsulate the memory range [section start, first symbol) + // as a temporary symbol and insert the matching TextBlock. + const first_nlist = filtered_nlists[0].nlist; + if (first_nlist.n_value > sect.addr) { + const sym_name = try std.fmt.allocPrint(self.allocator, "l_{s}_{s}_{s}", .{ + self.name.?, + segmentName(sect), + sectionName(sect), + }); + defer self.allocator.free(sym_name); - // const local_sym_index = @intCast(u32, zld.locals.items.len); - // symbol.payload = .{ - // .regular = .{ - // .linkage = .translation_unit, - // .address = sect.addr, - // .segment_id = match.seg, - // .section_id = match.sect, - // .file = self, - // .local_sym_index = local_sym_index, - // }, - // }; - // try zld.locals.append(zld.allocator, symbol); + const block_local_sym_index = self.sections_as_symbols.get(sect_id) orelse blk: { + const block_local_sym_index = @intCast(u32, macho_file.locals.items.len); + try macho_file.locals.append(macho_file.base.allocator, .{ + .n_strx = try macho_file.makeString(sym_name), + .n_type = macho.N_SECT, + .n_sect = macho_file.section_to_ordinal.get(match) orelse unreachable, + .n_desc = 0, + .n_value = sect.addr, + }); + try self.sections_as_symbols.putNoClobber(self.allocator, sect_id, block_local_sym_index); + break :blk block_local_sym_index; + }; - // const block_code = code[0 .. first_nlist.n_value - sect.addr]; - // const block_size = block_code.len; + const block_code = code[0 .. first_nlist.n_value - sect.addr]; + const block_size = block_code.len; - // const block = try self.allocator.create(TextBlock); - // errdefer self.allocator.destroy(block); + const block = try macho_file.base.allocator.create(TextBlock); + block.* = TextBlock.empty; + block.local_sym_index = block_local_sym_index; + block.size = block_size; + block.alignment = sect.@"align"; + try macho_file.managed_blocks.append(macho_file.base.allocator, block); - // block.* = TextBlock.init(self.allocator); - // block.local_sym_index = local_sym_index; - // block.code = try self.allocator.dupe(u8, block_code); - // block.size = block_size; - // block.alignment = sect.@"align"; + try block.code.appendSlice(macho_file.base.allocator, block_code); - // const block_relocs = filterRelocs(relocs, 0, block_size); - // if (block_relocs.len > 0) { - // try self.parseRelocs(zld, block_relocs, block, 0); - // } + try block.parseRelocsFromObject(self.allocator, relocs, self, .{ + .base_addr = 0, + .macho_file = macho_file, + }); - // if (zld.has_dices) { - // const dices = filterDice(self.data_in_code_entries.items, sect.addr, sect.addr + block_size); - // try block.dices.ensureTotalCapacity(dices.len); + if (macho_file.has_dices) { + const dices = filterDice(self.data_in_code_entries.items, sect.addr, sect.addr + block_size); + try block.dices.ensureTotalCapacity(macho_file.base.allocator, dices.len); - // for (dices) |dice| { - // block.dices.appendAssumeCapacity(.{ - // .offset = dice.offset - try math.cast(u32, sect.addr), - // .length = dice.length, - // .kind = dice.kind, - // }); - // } - // } + for (dices) |dice| { + block.dices.appendAssumeCapacity(.{ + .offset = dice.offset - try math.cast(u32, sect.addr), + .length = dice.length, + .kind = dice.kind, + }); + } + } - // // Update target section's metadata - // // TODO should we update segment's size here too? - // // How does it tie with incremental space allocs? - // const tseg = &zld.load_commands.items[match.seg].Segment; - // const tsect = &tseg.sections.items[match.sect]; - // const new_alignment = math.max(tsect.@"align", block.alignment); - // const new_alignment_pow_2 = try math.powi(u32, 2, new_alignment); - // const new_size = mem.alignForwardGeneric(u64, tsect.size, new_alignment_pow_2) + block.size; - // tsect.size = new_size; - // tsect.@"align" = new_alignment; + // Update target section's metadata + // TODO should we update segment's size here too? + // How does it tie with incremental space allocs? + const tseg = &macho_file.load_commands.items[match.seg].Segment; + const tsect = &tseg.sections.items[match.sect]; + const new_alignment = math.max(tsect.@"align", block.alignment); + const new_alignment_pow_2 = try math.powi(u32, 2, new_alignment); + const new_size = mem.alignForwardGeneric(u64, tsect.size, new_alignment_pow_2) + block.size; + tsect.size = new_size; + tsect.@"align" = new_alignment; - // if (zld.blocks.getPtr(match)) |last| { - // last.*.next = block; - // block.prev = last.*; - // last.* = block; - // } else { - // try zld.blocks.putNoClobber(zld.allocator, match, block); - // } + if (macho_file.blocks.getPtr(match)) |last| { + last.*.next = block; + block.prev = last.*; + last.* = block; + } else { + try macho_file.blocks.putNoClobber(macho_file.base.allocator, match, block); + } - // try self.text_blocks.append(self.allocator, block); - // } + try self.text_blocks.append(self.allocator, block); + } - // var parser = TextBlockParser{ - // .allocator = self.allocator, - // .section = sect, - // .code = code, - // .relocs = relocs, - // .object = self, - // .zld = zld, - // .nlists = filtered_nlists, - // .match = match, - // }; + var parser = TextBlockParser{ + .allocator = self.allocator, + .section = sect, + .code = code, + .relocs = relocs, + .object = self, + .macho_file = macho_file, + .nlists = filtered_nlists, + .match = match, + }; - // while (try parser.next()) |block| { - // const sym = zld.locals.items[block.local_sym_index]; - // const reg = &sym.payload.regular; - // if (reg.file) |file| { - // if (file != self) { - // log.debug("deduping definition of {s} in {s}", .{ zld.getString(sym.strx), self.name.? }); - // block.deinit(); - // self.allocator.destroy(block); - // continue; - // } - // } + while (try parser.next()) |block| { + const sym = macho_file.locals.items[block.local_sym_index]; + const is_ext = blk: { + const orig_sym_id = self.reverse_symbol_mapping.get(block.local_sym_index) orelse unreachable; + break :blk MachO.symbolIsExt(self.symtab.items[orig_sym_id]); + }; + if (is_ext) { + if (macho_file.symbol_resolver.get(sym.n_strx)) |resolv| { + assert(resolv.where == .global); + const global_object = macho_file.objects.items[resolv.file]; + if (global_object != self) { + log.debug("deduping definition of {s} in {s}", .{ + macho_file.getString(sym.n_strx), + self.name.?, + }); + log.debug(" already defined in {s}", .{global_object.name.?}); + continue; + } + } + } - // if (reg.address == sect.addr) { - // if (self.sections_as_symbols.get(sect_id)) |alias| { - // // Add alias. - // const local_sym_index = @intCast(u32, zld.locals.items.len); - // const reg_alias = &alias.payload.regular; - // reg_alias.segment_id = match.seg; - // reg_alias.section_id = match.sect; - // reg_alias.local_sym_index = local_sym_index; - // try block.aliases.append(local_sym_index); - // try zld.locals.append(zld.allocator, alias); - // } - // } + if (sym.n_value == sect.addr) { + if (self.sections_as_symbols.get(sect_id)) |alias| { + // In x86_64 relocs, it can so happen that the compiler refers to the same + // atom by both the actual assigned symbol and the start of the section. In this + // case, we need to link the two together so add an alias. + try block.aliases.append(macho_file.base.allocator, alias); + } + } - // // Update target section's metadata - // // TODO should we update segment's size here too? - // // How does it tie with incremental space allocs? - // const tseg = &zld.load_commands.items[match.seg].Segment; - // const tsect = &tseg.sections.items[match.sect]; - // const new_alignment = math.max(tsect.@"align", block.alignment); - // const new_alignment_pow_2 = try math.powi(u32, 2, new_alignment); - // const new_size = mem.alignForwardGeneric(u64, tsect.size, new_alignment_pow_2) + block.size; - // tsect.size = new_size; - // tsect.@"align" = new_alignment; + // Update target section's metadata + // TODO should we update segment's size here too? + // How does it tie with incremental space allocs? + const tseg = &macho_file.load_commands.items[match.seg].Segment; + const tsect = &tseg.sections.items[match.sect]; + const new_alignment = math.max(tsect.@"align", block.alignment); + const new_alignment_pow_2 = try math.powi(u32, 2, new_alignment); + const new_size = mem.alignForwardGeneric(u64, tsect.size, new_alignment_pow_2) + block.size; + tsect.size = new_size; + tsect.@"align" = new_alignment; - // if (zld.blocks.getPtr(match)) |last| { - // last.*.next = block; - // block.prev = last.*; - // last.* = block; - // } else { - // try zld.blocks.putNoClobber(zld.allocator, match, block); - // } + if (macho_file.blocks.getPtr(match)) |last| { + last.*.next = block; + block.prev = last.*; + last.* = block; + } else { + try macho_file.blocks.putNoClobber(macho_file.base.allocator, match, block); + } - // try self.text_blocks.append(self.allocator, block); - // } + try self.text_blocks.append(self.allocator, block); + } - // break :next; - // } + break :next; + } // Since there is no symbol to refer to this block, we create // a temp one, unless we already did that when working out the relocations @@ -757,7 +756,7 @@ pub fn parseTextBlocks(self: *Object, macho_file: *MachO) !void { if (macho_file.has_dices) { const dices = filterDice(self.data_in_code_entries.items, sect.addr, sect.addr + sect.size); - try block.dices.ensureTotalCapacity(self.allocator, dices.len); + try block.dices.ensureTotalCapacity(macho_file.base.allocator, dices.len); for (dices) |dice| { block.dices.appendAssumeCapacity(.{ @@ -820,7 +819,7 @@ pub fn parseTextBlocks(self: *Object, macho_file: *MachO) !void { block.prev = last.*; last.* = block; } else { - try macho_file.blocks.putNoClobber(self.allocator, match, block); + try macho_file.blocks.putNoClobber(macho_file.base.allocator, match, block); } try self.text_blocks.append(self.allocator, block); diff --git a/src/link/MachO/TextBlock.zig b/src/link/MachO/TextBlock.zig index 17d1d82db8..8dca7bc37b 100644 --- a/src/link/MachO/TextBlock.zig +++ b/src/link/MachO/TextBlock.zig @@ -75,6 +75,21 @@ pub const SymbolAtOffset = struct { local_sym_index: u32, offset: u64, stab: ?Stab = null, + + pub fn format( + self: SymbolAtOffset, + comptime fmt: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, + ) !void { + _ = fmt; + _ = options; + try std.fmt.format(writer, "{{ {d}: .offset = {d}", .{ self.local_sym_index, self.offset }); + if (self.stab) |stab| { + try std.fmt.format(writer, ", .stab = {any}", .{stab}); + } + try std.fmt.format(writer, " }}", .{}); + } }; pub const Stab = union(enum) { @@ -1150,53 +1165,24 @@ pub fn resolveRelocs(self: *TextBlock, macho_file: *MachO) !void { } } -pub fn print_this(self: *const TextBlock, macho_file: MachO) void { - log.warn("TextBlock", .{}); - log.warn(" {}: {}", .{ self.local_sym_index, macho_file.locals.items[self.local_sym_index] }); +pub fn format(self: TextBlock, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { + _ = fmt; + _ = options; + try std.fmt.format(writer, "TextBlock {{ ", .{}); + try std.fmt.format(writer, ".local_sym_index = {d}, ", .{self.local_sym_index}); + try std.fmt.format(writer, ".aliases = {any}, ", .{self.aliases.items}); + try std.fmt.format(writer, ".contained = {any}, ", .{self.contained.items}); + try std.fmt.format(writer, ".code = {*}, ", .{self.code.items}); + try std.fmt.format(writer, ".size = {d}, ", .{self.size}); + try std.fmt.format(writer, ".alignment = {d}, ", .{self.alignment}); + try std.fmt.format(writer, ".relocs = {any}, ", .{self.relocs.items}); + try std.fmt.format(writer, ".rebases = {any}, ", .{self.rebases.items}); + try std.fmt.format(writer, ".bindings = {any}, ", .{self.bindings.items}); + try std.fmt.format(writer, ".dices = {any}, ", .{self.dices.items}); if (self.stab) |stab| { - log.warn(" stab: {}", .{stab}); + try std.fmt.format(writer, ".stab = {any}, ", .{stab}); } - if (self.aliases.items.len > 0) { - log.warn(" aliases: {any}", .{self.aliases.items}); - } - if (self.references.count() > 0) { - log.warn(" references: {any}", .{self.references.keys()}); - } - if (self.contained) |contained| { - log.warn(" contained symbols:", .{}); - for (contained) |sym_at_off| { - if (sym_at_off.stab) |stab| { - log.warn(" {}: {}, stab: {}", .{ sym_at_off.offset, sym_at_off.local_sym_index, stab }); - } else { - log.warn(" {}: {}", .{ sym_at_off.offset, sym_at_off.local_sym_index }); - } - } - } - log.warn(" code.len = {}", .{self.code.items.len}); - if (self.relocs.items.len > 0) { - log.warn(" relocations:", .{}); - for (self.relocs.items) |rel| { - log.warn(" {}", .{rel}); - } - } - if (self.rebases.items.len > 0) { - log.warn(" rebases: {any}", .{self.rebases.items}); - } - if (self.bindings.items.len > 0) { - log.warn(" bindings: {any}", .{self.bindings.items}); - } - if (self.dices.items.len > 0) { - log.warn(" dices: {any}", .{self.dices.items}); - } - log.warn(" size = {}", .{self.size}); - log.warn(" align = {}", .{self.alignment}); -} - -pub fn print(self: *const TextBlock, macho_file: MachO) void { - if (self.prev) |prev| { - prev.print(macho_file); - } - self.print_this(macho_file); + try std.fmt.format(writer, "}}", .{}); } const RelocIterator = struct {