From 9306dbd6194251e816f11bc8f420fac0d1ca8835 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Mon, 7 Sep 2020 07:18:44 +0200 Subject: [PATCH] Fix bug where __text section would get overwritten Fixes a bug where the last written load command would accidentally override the beginning of the __text section. Also defines missing MachO constants and relocation structs/enums. Signed-off-by: Jakub Konka --- lib/std/macho.zig | 117 +++++++++++++++++++++++++++++++-- src-self-hosted/link/MachO.zig | 35 ++++++---- 2 files changed, 132 insertions(+), 20 deletions(-) diff --git a/lib/std/macho.zig b/lib/std/macho.zig index 5217a73763..d3296ee171 100644 --- a/lib/std/macho.zig +++ b/lib/std/macho.zig @@ -647,6 +647,32 @@ pub const nlist_64 = extern struct { n_value: u64, }; +/// Format of a relocation entry of a Mach-O file. Modified from the 4.3BSD +/// format. The modifications from the original format were changing the value +/// of the r_symbolnum field for "local" (r_extern == 0) relocation entries. +/// This modification is required to support symbols in an arbitrary number of +/// sections not just the three sections (text, data and bss) in a 4.3BSD file. +/// Also the last 4 bits have had the r_type tag added to them. +pub const relocation_info = packed struct { + /// offset in the section to what is being relocated + r_address: i32, + + /// symbol index if r_extern == 1 or section ordinal if r_extern == 0 + r_symbolnum: u24, + + /// was relocated pc relative already + r_pcrel: u1, + + /// 0=byte, 1=word, 2=long, 3=quad + r_length: u2, + + /// does not include value of sym referenced + r_extern: u1, + + /// if not 0, machine specific relocation type + r_type: u4, +}; + /// After MacOS X 10.1 when a new load command is added that is required to be /// understood by the dynamic linker for the image to execute properly the /// LC_REQ_DYLD bit will be or'ed into the load command constant. If the dynamic @@ -1086,13 +1112,58 @@ pub const N_ECOML = 0xe8; /// second stab entry with length information pub const N_LENG = 0xfe; -/// If a segment contains any sections marked with S_ATTR_DEBUG then all -/// sections in that segment must have this attribute. No section other than -/// a section marked with this attribute may reference the contents of this -/// section. A section with this attribute may contain no symbols and must have -/// a section type S_REGULAR. The static linker will not copy section contents -/// from sections with this attribute into its output file. These sections -/// generally contain DWARF debugging info. +// For the two types of symbol pointers sections and the symbol stubs section +// they have indirect symbol table entries. For each of the entries in the +// section the indirect symbol table entries, in corresponding order in the +// indirect symbol table, start at the index stored in the reserved1 field +// of the section structure. Since the indirect symbol table entries +// correspond to the entries in the section the number of indirect symbol table +// entries is inferred from the size of the section divided by the size of the +// entries in the section. For symbol pointers sections the size of the entries +// in the section is 4 bytes and for symbol stubs sections the byte size of the +// stubs is stored in the reserved2 field of the section structure. + +/// section with only non-lazy symbol pointers +pub const S_NON_LAZY_SYMBOL_POINTERS = 0x6; + +/// section with only lazy symbol pointers +pub const S_LAZY_SYMBOL_POINTERS = 0x7; + +/// section with only symbol stubs, byte size of stub in the reserved2 field +pub const S_SYMBOL_STUBS = 0x8; + +/// section with only function pointers for initialization +pub const S_MOD_INIT_FUNC_POINTERS = 0x9; + +/// section with only function pointers for termination +pub const S_MOD_TERM_FUNC_POINTERS = 0xa; + +/// section contains symbols that are to be coalesced +pub const S_COALESCED = 0xb; + +/// zero fill on demand section (that can be larger than 4 gigabytes) +pub const S_GB_ZEROFILL = 0xc; + +/// section with only pairs of function pointers for interposing +pub const S_INTERPOSING = 0xd; + +/// section with only 16 byte literals +pub const S_16BYTE_LITERALS = 0xe; + +/// section contains DTrace Object Format +pub const S_DTRACE_DOF = 0xf; + +/// section with only lazy symbol pointers to lazy loaded dylibs +pub const S_LAZY_DYLIB_SYMBOL_POINTERS = 0x10; + +// If a segment contains any sections marked with S_ATTR_DEBUG then all +// sections in that segment must have this attribute. No section other than +// a section marked with this attribute may reference the contents of this +// section. A section with this attribute may contain no symbols and must have +// a section type S_REGULAR. The static linker will not copy section contents +// from sections with this attribute into its output file. These sections +// generally contain DWARF debugging info. + /// a debug section pub const S_ATTR_DEBUG = 0x02000000; @@ -1154,3 +1225,35 @@ pub const VM_PROT_WRITE: vm_prot_t = 0x2; /// VM execute permission pub const VM_PROT_EXECUTE: vm_prot_t = 0x4; + +pub const reloc_type_x86_64 = packed enum(u4) { + /// for absolute addresses + X86_64_RELOC_UNSIGNED = 0, + + /// for signed 32-bit displacement + X86_64_RELOC_SIGNED, + + /// a CALL/JMP instruction with 32-bit displacement + X86_64_RELOC_BRANCH, + + /// a MOVQ load of a GOT entry + X86_64_RELOC_GOT_LOAD, + + /// other GOT references + X86_64_RELOC_GOT, + + /// must be followed by a X86_64_RELOC_UNSIGNED + X86_64_RELOC_SUBTRACTOR, + + /// for signed 32-bit displacement with a -1 addend + X86_64_RELOC_SIGNED_1, + + /// for signed 32-bit displacement with a -2 addend + X86_64_RELOC_SIGNED_2, + + /// for signed 32-bit displacement with a -4 addend + X86_64_RELOC_SIGNED_4, + + /// for thread local variables + X86_64_RELOC_TLV, +}; diff --git a/src-self-hosted/link/MachO.zig b/src-self-hosted/link/MachO.zig index 27d0488f25..4b068d45bd 100644 --- a/src-self-hosted/link/MachO.zig +++ b/src-self-hosted/link/MachO.zig @@ -32,6 +32,20 @@ const LoadCommand = union(enum) { .Dysymtab => |x| x.cmdsize, }; } + + pub fn write(self: LoadCommand, file: *fs.File, offset: u64) !void { + return switch (self) { + .Segment => |cmd| writeGeneric(cmd, file, offset), + .LinkeditData => |cmd| writeGeneric(cmd, file, offset), + .Symtab => |cmd| writeGeneric(cmd, file, offset), + .Dysymtab => |cmd| writeGeneric(cmd, file, offset), + }; + } + + fn writeGeneric(cmd: anytype, file: *fs.File, offset: u64) !void { + const slice = [1]@TypeOf(cmd){cmd}; + return file.pwriteAll(mem.sliceAsBytes(slice[0..1]), offset); + } }; base: File, @@ -258,8 +272,7 @@ pub fn flush(self: *MachO, module: *Module) !void { var last_cmd_offset: usize = @sizeOf(macho.mach_header_64); for (self.load_commands.items) |cmd| { - const cmd_to_write = [1]@TypeOf(cmd){cmd}; - try self.base.file.?.pwriteAll(mem.sliceAsBytes(cmd_to_write[0..1]), last_cmd_offset); + try cmd.write(&self.base.file.?, last_cmd_offset); last_cmd_offset += cmd.cmdsize(); } const off = @sizeOf(macho.mach_header_64) + @sizeOf(macho.segment_command_64); @@ -346,19 +359,18 @@ pub fn updateDecl(self: *MachO, module: *Module, decl: *Module.Decl) !void { .n_desc = 0, .n_value = addr, }; - self.offset_table.items[decl.link.macho.offset_table_index.?] = addr; + // Since we updated the vaddr and the size, each corresponding export symbol also needs to be updated. + const decl_exports = module.decl_exports.get(decl) orelse &[0]*Module.Export{}; + try self.updateDeclExports(module, decl, decl_exports); try self.writeSymbol(decl.link.macho.symbol_table_index.?); const text_section = self.sections.items[self.text_section_index.?]; const section_offset = symbol.n_value - text_section.addr; const file_offset = text_section.offset + section_offset; log.debug("file_offset 0x{x}\n", .{file_offset}); - try self.base.file.?.pwriteAll(code, file_offset); - // Since we updated the vaddr and the size, each corresponding export symbol also needs to be updated. - const decl_exports = module.decl_exports.get(decl) orelse &[0]*Module.Export{}; - return self.updateDeclExports(module, decl, decl_exports); + try self.base.file.?.pwriteAll(code, file_offset); } pub fn updateDeclLineNumber(self: *MachO, module: *Module, decl: *const Module.Decl) !void {} @@ -374,7 +386,7 @@ pub fn updateDeclExports( if (decl.link.macho.symbol_table_index == null) return; - var decl_sym = self.symbol_table.items[decl.link.macho.symbol_table_index.?]; + const decl_sym = &self.symbol_table.items[decl.link.macho.symbol_table_index.?]; // TODO implement if (exports.len == 0) return; @@ -504,10 +516,7 @@ fn allocateTextBlock(self: *MachO, text_block: *TextBlock, new_block_size: u64, const text_capacity = self.allocatedSize(text_section.offset); const needed_size = (addr + new_block_size) - text_section.addr; log.debug("text capacity 0x{x}, needed size 0x{x}\n", .{ text_capacity, needed_size }); - - if (needed_size > text_capacity) { - // TODO handle growth - } + assert(needed_size <= text_capacity); // TODO handle growth self.last_text_block = text_block; text_section.size = needed_size; @@ -659,7 +668,7 @@ fn writeSymbol(self: *MachO, index: usize) !void { defer tracy.end(); const symtab = &self.load_commands.items[self.symtab_cmd_index.?].Symtab; - var sym = [1]macho.nlist_64{self.symbol_table.items[index]}; + const sym = [1]macho.nlist_64{self.symbol_table.items[index]}; const off = symtab.symoff + @sizeOf(macho.nlist_64) * index; log.debug("writing symbol {} at 0x{x}\n", .{ sym[0], off }); try self.base.file.?.pwriteAll(mem.sliceAsBytes(sym[0..1]), off);