const TextBlock = @This(); const std = @import("std"); const build_options = @import("build_options"); const aarch64 = @import("../../codegen/aarch64.zig"); const assert = std.debug.assert; const commands = @import("commands.zig"); const log = std.log.scoped(.text_block); const macho = std.macho; const math = std.math; const mem = std.mem; const meta = std.meta; const Allocator = mem.Allocator; const Arch = std.Target.Cpu.Arch; const MachO = @import("../MachO.zig"); const Object = @import("Object.zig"); /// Each decl always gets a local symbol with the fully qualified name. /// The vaddr and size are found here directly. /// The file offset is found by computing the vaddr offset from the section vaddr /// the symbol references, and adding that to the file offset of the section. /// If this field is 0, it means the codegen size = 0 and there is no symbol or /// offset table entry. local_sym_index: u32, /// List of symbol aliases pointing to the same block via different nlists aliases: std.ArrayListUnmanaged(u32) = .{}, /// List of symbols contained within this block contained: std.ArrayListUnmanaged(SymbolAtOffset) = .{}, /// Code (may be non-relocated) this block represents code: std.ArrayListUnmanaged(u8) = .{}, /// Size and alignment of this text block /// Unlike in Elf, we need to store the size of this symbol as part of /// the TextBlock since macho.nlist_64 lacks this information. size: u64, alignment: u32, relocs: std.ArrayListUnmanaged(Relocation) = .{}, /// List of offsets contained within this block that need rebasing by the dynamic /// loader in presence of ASLR rebases: std.ArrayListUnmanaged(u64) = .{}, /// List of offsets contained within this block that will be dynamically bound /// by the dynamic loader and contain pointers to resolved (at load time) extern /// symbols (aka proxies aka imports) bindings: std.ArrayListUnmanaged(SymbolAtOffset) = .{}, /// List of lazy bindings lazy_bindings: std.ArrayListUnmanaged(SymbolAtOffset) = .{}, /// List of data-in-code entries. This is currently specific to x86_64 only. dices: std.ArrayListUnmanaged(macho.data_in_code_entry) = .{}, /// Stab entry for this block. This is currently specific to a binary created /// by linking object files in a traditional sense - in incremental sense, we /// bypass stabs altogether to produce dSYM bundle directly with fully relocated /// DWARF sections. stab: ?Stab = null, /// Points to the previous and next neighbours next: ?*TextBlock, prev: ?*TextBlock, /// Previous/next linked list pointers. /// This is the linked list node for this Decl's corresponding .debug_info tag. dbg_info_prev: ?*TextBlock, dbg_info_next: ?*TextBlock, /// Offset into .debug_info pointing to the tag for this Decl. dbg_info_off: u32, /// Size of the .debug_info tag for this Decl, not including padding. dbg_info_len: u32, dirty: bool = true, pub const SymbolAtOffset = struct { local_sym_index: u32, offset: u64, stab: ?Stab = null, pub fn format( self: SymbolAtOffset, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype, ) !void { _ = fmt; _ = options; try std.fmt.format(writer, "{{ {d}: .offset = {d}", .{ self.local_sym_index, self.offset }); if (self.stab) |stab| { try std.fmt.format(writer, ", .stab = {any}", .{stab}); } try std.fmt.format(writer, " }}", .{}); } }; pub const Stab = union(enum) { function: u64, static, global, pub fn asNlists(stab: Stab, local_sym_index: u32, macho_file: anytype) ![]macho.nlist_64 { var nlists = std.ArrayList(macho.nlist_64).init(macho_file.base.allocator); defer nlists.deinit(); const sym = macho_file.locals.items[local_sym_index]; switch (stab) { .function => |size| { try nlists.ensureUnusedCapacity(4); nlists.appendAssumeCapacity(.{ .n_strx = 0, .n_type = macho.N_BNSYM, .n_sect = sym.n_sect, .n_desc = 0, .n_value = sym.n_value, }); nlists.appendAssumeCapacity(.{ .n_strx = sym.n_strx, .n_type = macho.N_FUN, .n_sect = sym.n_sect, .n_desc = 0, .n_value = sym.n_value, }); nlists.appendAssumeCapacity(.{ .n_strx = 0, .n_type = macho.N_FUN, .n_sect = 0, .n_desc = 0, .n_value = size, }); nlists.appendAssumeCapacity(.{ .n_strx = 0, .n_type = macho.N_ENSYM, .n_sect = sym.n_sect, .n_desc = 0, .n_value = size, }); }, .global => { try nlists.append(.{ .n_strx = sym.n_strx, .n_type = macho.N_GSYM, .n_sect = 0, .n_desc = 0, .n_value = 0, }); }, .static => { try nlists.append(.{ .n_strx = sym.n_strx, .n_type = macho.N_STSYM, .n_sect = sym.n_sect, .n_desc = 0, .n_value = sym.n_value, }); }, } return nlists.toOwnedSlice(); } }; pub const Relocation = struct { /// Offset within the `block`s code buffer. /// Note relocation size can be inferred by relocation's kind. offset: u32, where: enum { local, undef, }, where_index: u32, payload: union(enum) { unsigned: Unsigned, branch: Branch, page: Page, page_off: PageOff, pointer_to_got: PointerToGot, signed: Signed, load: Load, }, const ResolveArgs = struct { block: *TextBlock, offset: u32, source_addr: u64, target_addr: u64, macho_file: *MachO, }; pub const Unsigned = struct { subtractor: ?u32, /// Addend embedded directly in the relocation slot addend: i64, /// Extracted from r_length: /// => 3 implies true /// => 2 implies false /// => * is unreachable is_64bit: bool, pub fn resolve(self: Unsigned, args: ResolveArgs) !void { const result = blk: { if (self.subtractor) |subtractor| { const sym = args.macho_file.locals.items[subtractor]; break :blk @intCast(i64, args.target_addr) - @intCast(i64, sym.n_value) + self.addend; } else { break :blk @intCast(i64, args.target_addr) + self.addend; } }; if (self.is_64bit) { mem.writeIntLittle(u64, args.block.code.items[args.offset..][0..8], @bitCast(u64, result)); } else { mem.writeIntLittle(u32, args.block.code.items[args.offset..][0..4], @truncate(u32, @bitCast(u64, result))); } } pub fn format(self: Unsigned, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { _ = fmt; _ = options; try std.fmt.format(writer, "Unsigned {{ ", .{}); if (self.subtractor) |sub| { try std.fmt.format(writer, ".subtractor = {}, ", .{sub}); } try std.fmt.format(writer, ".addend = {}, ", .{self.addend}); const length: usize = if (self.is_64bit) 8 else 4; try std.fmt.format(writer, ".length = {}, ", .{length}); try std.fmt.format(writer, "}}", .{}); } }; pub const Branch = struct { arch: Arch, pub fn resolve(self: Branch, args: ResolveArgs) !void { switch (self.arch) { .aarch64 => { const displacement = try math.cast( i28, @intCast(i64, args.target_addr) - @intCast(i64, args.source_addr), ); const code = args.block.code.items[args.offset..][0..4]; var inst = aarch64.Instruction{ .unconditional_branch_immediate = mem.bytesToValue(meta.TagPayload( aarch64.Instruction, aarch64.Instruction.unconditional_branch_immediate, ), code), }; inst.unconditional_branch_immediate.imm26 = @truncate(u26, @bitCast(u28, displacement >> 2)); mem.writeIntLittle(u32, code, inst.toU32()); }, .x86_64 => { const displacement = try math.cast( i32, @intCast(i64, args.target_addr) - @intCast(i64, args.source_addr) - 4, ); mem.writeIntLittle(u32, args.block.code.items[args.offset..][0..4], @bitCast(u32, displacement)); }, else => return error.UnsupportedCpuArchitecture, } } pub fn format(self: Branch, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { _ = self; _ = fmt; _ = options; try std.fmt.format(writer, "Branch {{}}", .{}); } }; pub const Page = struct { kind: enum { page, got, tlvp, }, addend: u32 = 0, pub fn resolve(self: Page, args: ResolveArgs) !void { const target_addr = args.target_addr + self.addend; const source_page = @intCast(i32, args.source_addr >> 12); const target_page = @intCast(i32, target_addr >> 12); const pages = @bitCast(u21, @intCast(i21, target_page - source_page)); const code = args.block.code.items[args.offset..][0..4]; var inst = aarch64.Instruction{ .pc_relative_address = mem.bytesToValue(meta.TagPayload( aarch64.Instruction, aarch64.Instruction.pc_relative_address, ), code), }; inst.pc_relative_address.immhi = @truncate(u19, pages >> 2); inst.pc_relative_address.immlo = @truncate(u2, pages); mem.writeIntLittle(u32, code, inst.toU32()); } pub fn format(self: Page, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { _ = fmt; _ = options; try std.fmt.format(writer, "Page {{ ", .{}); switch (self.kind) { .page => {}, .got => { try std.fmt.format(writer, ".got, ", .{}); }, .tlvp => { try std.fmt.format(writer, ".tlvp", .{}); }, } try std.fmt.format(writer, ".addend = {}, ", .{self.addend}); try std.fmt.format(writer, "}}", .{}); } }; pub const PageOff = struct { kind: enum { page, got, tlvp, }, addend: u32 = 0, op_kind: ?OpKind = null, pub const OpKind = enum { arithmetic, load, }; pub fn resolve(self: PageOff, args: ResolveArgs) !void { const code = args.block.code.items[args.offset..][0..4]; switch (self.kind) { .page => { const target_addr = args.target_addr + self.addend; const narrowed = @truncate(u12, target_addr); const op_kind = self.op_kind orelse unreachable; var inst: aarch64.Instruction = blk: { switch (op_kind) { .arithmetic => { break :blk .{ .add_subtract_immediate = mem.bytesToValue(meta.TagPayload( aarch64.Instruction, aarch64.Instruction.add_subtract_immediate, ), code), }; }, .load => { break :blk .{ .load_store_register = mem.bytesToValue(meta.TagPayload( aarch64.Instruction, aarch64.Instruction.load_store_register, ), code), }; }, } }; if (op_kind == .arithmetic) { inst.add_subtract_immediate.imm12 = narrowed; } else { const offset: u12 = blk: { if (inst.load_store_register.size == 0) { if (inst.load_store_register.v == 1) { // 128-bit SIMD is scaled by 16. break :blk try math.divExact(u12, narrowed, 16); } // Otherwise, 8-bit SIMD or ldrb. break :blk narrowed; } else { const denom: u4 = try math.powi(u4, 2, inst.load_store_register.size); break :blk try math.divExact(u12, narrowed, denom); } }; inst.load_store_register.offset = offset; } mem.writeIntLittle(u32, code, inst.toU32()); }, .got => { const narrowed = @truncate(u12, args.target_addr); var inst: aarch64.Instruction = .{ .load_store_register = mem.bytesToValue(meta.TagPayload( aarch64.Instruction, aarch64.Instruction.load_store_register, ), code), }; const offset = try math.divExact(u12, narrowed, 8); inst.load_store_register.offset = offset; mem.writeIntLittle(u32, code, inst.toU32()); }, .tlvp => { const RegInfo = struct { rd: u5, rn: u5, size: u1, }; const reg_info: RegInfo = blk: { if (isArithmeticOp(code)) { const inst = mem.bytesToValue(meta.TagPayload( aarch64.Instruction, aarch64.Instruction.add_subtract_immediate, ), code); break :blk .{ .rd = inst.rd, .rn = inst.rn, .size = inst.sf, }; } else { const inst = mem.bytesToValue(meta.TagPayload( aarch64.Instruction, aarch64.Instruction.load_store_register, ), code); break :blk .{ .rd = inst.rt, .rn = inst.rn, .size = @truncate(u1, inst.size), }; } }; const narrowed = @truncate(u12, args.target_addr); var inst = aarch64.Instruction{ .add_subtract_immediate = .{ .rd = reg_info.rd, .rn = reg_info.rn, .imm12 = narrowed, .sh = 0, .s = 0, .op = 0, .sf = reg_info.size, }, }; mem.writeIntLittle(u32, code, inst.toU32()); }, } } pub fn format(self: PageOff, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { _ = fmt; _ = options; try std.fmt.format(writer, "PageOff {{ ", .{}); switch (self.kind) { .page => {}, .got => { try std.fmt.format(writer, ".got, ", .{}); }, .tlvp => { try std.fmt.format(writer, ".tlvp, ", .{}); }, } try std.fmt.format(writer, ".addend = {}, ", .{self.addend}); try std.fmt.format(writer, ".op_kind = {s}, ", .{self.op_kind}); try std.fmt.format(writer, "}}", .{}); } }; pub const PointerToGot = struct { pub fn resolve(_: PointerToGot, args: ResolveArgs) !void { const result = try math.cast(i32, @intCast(i64, args.target_addr) - @intCast(i64, args.source_addr)); mem.writeIntLittle(u32, args.block.code.items[args.offset..][0..4], @bitCast(u32, result)); } pub fn format(self: PointerToGot, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { _ = self; _ = fmt; _ = options; try std.fmt.format(writer, "PointerToGot {{}}", .{}); } }; pub const Signed = struct { addend: i64, correction: i4, pub fn resolve(self: Signed, args: ResolveArgs) !void { const target_addr = @intCast(i64, args.target_addr) + self.addend; const displacement = try math.cast( i32, target_addr - @intCast(i64, args.source_addr) - self.correction - 4, ); mem.writeIntLittle(u32, args.block.code.items[args.offset..][0..4], @bitCast(u32, displacement)); } pub fn format(self: Signed, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { _ = fmt; _ = options; try std.fmt.format(writer, "Signed {{ ", .{}); try std.fmt.format(writer, ".addend = {}, ", .{self.addend}); try std.fmt.format(writer, ".correction = {}, ", .{self.correction}); try std.fmt.format(writer, "}}", .{}); } }; pub const Load = struct { kind: enum { got, tlvp, }, addend: i32 = 0, pub fn resolve(self: Load, args: ResolveArgs) !void { if (self.kind == .tlvp) { // We need to rewrite the opcode from movq to leaq. args.block.code.items[args.offset - 2] = 0x8d; } const displacement = try math.cast( i32, @intCast(i64, args.target_addr) - @intCast(i64, args.source_addr) - 4 + self.addend, ); mem.writeIntLittle(u32, args.block.code.items[args.offset..][0..4], @bitCast(u32, displacement)); } pub fn format(self: Load, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { _ = fmt; _ = options; try std.fmt.format(writer, "Load {{ ", .{}); try std.fmt.format(writer, "{s}, ", .{self.kind}); try std.fmt.format(writer, ".addend = {}, ", .{self.addend}); try std.fmt.format(writer, "}}", .{}); } }; pub fn resolve(self: Relocation, args: ResolveArgs) !void { switch (self.payload) { .unsigned => |unsigned| try unsigned.resolve(args), .branch => |branch| try branch.resolve(args), .page => |page| try page.resolve(args), .page_off => |page_off| try page_off.resolve(args), .pointer_to_got => |pointer_to_got| try pointer_to_got.resolve(args), .signed => |signed| try signed.resolve(args), .load => |load| try load.resolve(args), } } pub fn format(self: Relocation, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { try std.fmt.format(writer, "Relocation {{ ", .{}); try std.fmt.format(writer, ".offset = {}, ", .{self.offset}); try std.fmt.format(writer, ".where = {}, ", .{self.where}); try std.fmt.format(writer, ".where_index = {d}, ", .{self.where_index}); switch (self.payload) { .unsigned => |unsigned| try unsigned.format(fmt, options, writer), .branch => |branch| try branch.format(fmt, options, writer), .page => |page| try page.format(fmt, options, writer), .page_off => |page_off| try page_off.format(fmt, options, writer), .pointer_to_got => |pointer_to_got| try pointer_to_got.format(fmt, options, writer), .signed => |signed| try signed.format(fmt, options, writer), .load => |load| try load.format(fmt, options, writer), } try std.fmt.format(writer, "}}", .{}); } }; pub const empty = TextBlock{ .local_sym_index = 0, .size = 0, .alignment = 0, .prev = null, .next = null, .dbg_info_prev = null, .dbg_info_next = null, .dbg_info_off = undefined, .dbg_info_len = undefined, }; pub fn deinit(self: *TextBlock, allocator: *Allocator) void { self.dices.deinit(allocator); self.lazy_bindings.deinit(allocator); self.bindings.deinit(allocator); self.rebases.deinit(allocator); self.relocs.deinit(allocator); self.contained.deinit(allocator); self.aliases.deinit(allocator); self.code.deinit(allocator); } /// Returns how much room there is to grow in virtual address space. /// File offset relocation happens transparently, so it is not included in /// this calculation. pub fn capacity(self: TextBlock, macho_file: MachO) u64 { const self_sym = macho_file.locals.items[self.local_sym_index]; if (self.next) |next| { const next_sym = macho_file.locals.items[next.local_sym_index]; return next_sym.n_value - self_sym.n_value; } else { // We are the last block. // The capacity is limited only by virtual address space. return std.math.maxInt(u64) - self_sym.n_value; } } pub fn freeListEligible(self: TextBlock, macho_file: MachO) bool { // No need to keep a free list node for the last block. const next = self.next orelse return false; const self_sym = macho_file.locals.items[self.local_sym_index]; const next_sym = macho_file.locals.items[next.local_sym_index]; const cap = next_sym.n_value - self_sym.n_value; const ideal_cap = MachO.padToIdeal(self.size); if (cap <= ideal_cap) return false; const surplus = cap - ideal_cap; return surplus >= MachO.min_text_capacity; } const RelocContext = struct { base_addr: u64 = 0, allocator: *Allocator, object: *Object, macho_file: *MachO, }; fn initRelocFromObject(rel: macho.relocation_info, context: RelocContext) !Relocation { var parsed_rel = Relocation{ .offset = @intCast(u32, @intCast(u64, rel.r_address) - context.base_addr), .where = undefined, .where_index = undefined, .payload = undefined, }; if (rel.r_extern == 0) { const sect_id = @intCast(u16, rel.r_symbolnum - 1); const local_sym_index = context.object.sections_as_symbols.get(sect_id) orelse blk: { const seg = context.object.load_commands.items[context.object.segment_cmd_index.?].Segment; const sect = seg.sections.items[sect_id]; const match = (try context.macho_file.getMatchingSection(sect)) orelse unreachable; const local_sym_index = @intCast(u32, context.macho_file.locals.items.len); const sym_name = try std.fmt.allocPrint(context.allocator, "l_{s}_{s}_{s}", .{ context.object.name, commands.segmentName(sect), commands.sectionName(sect), }); defer context.allocator.free(sym_name); try context.macho_file.locals.append(context.allocator, .{ .n_strx = try context.macho_file.makeString(sym_name), .n_type = macho.N_SECT, .n_sect = @intCast(u8, context.macho_file.section_ordinals.getIndex(match).? + 1), .n_desc = 0, .n_value = sect.addr, }); try context.object.sections_as_symbols.putNoClobber(context.allocator, sect_id, local_sym_index); break :blk local_sym_index; }; parsed_rel.where = .local; parsed_rel.where_index = local_sym_index; } else { const sym = context.object.symtab.items[rel.r_symbolnum]; const sym_name = context.object.getString(sym.n_strx); if (MachO.symbolIsSect(sym) and !MachO.symbolIsExt(sym)) { const where_index = context.object.symbol_mapping.get(rel.r_symbolnum) orelse unreachable; parsed_rel.where = .local; parsed_rel.where_index = where_index; } else { const n_strx = context.macho_file.strtab_dir.getAdapted(@as([]const u8, sym_name), MachO.StringSliceAdapter{ .strtab = &context.macho_file.strtab, }) orelse unreachable; const resolv = context.macho_file.symbol_resolver.get(n_strx) orelse unreachable; switch (resolv.where) { .global => { parsed_rel.where = .local; parsed_rel.where_index = resolv.local_sym_index; }, .undef => { parsed_rel.where = .undef; parsed_rel.where_index = resolv.where_index; }, } } } return parsed_rel; } pub fn parseRelocs(self: *TextBlock, relocs: []macho.relocation_info, context: RelocContext) !void { const filtered_relocs = filterRelocs(relocs, context.base_addr, context.base_addr + self.size); var it = RelocIterator{ .buffer = filtered_relocs, }; var addend: u32 = 0; var subtractor: ?u32 = null; const arch = context.macho_file.base.options.target.cpu.arch; while (it.next()) |rel| { if (isAddend(rel, arch)) { // Addend is not a relocation with effect on the TextBlock, so // parse it and carry on. assert(addend == 0); // Oh no, addend was not reset! addend = rel.r_symbolnum; // Verify ADDEND is followed by a PAGE21 or PAGEOFF12. const next = @intToEnum(macho.reloc_type_arm64, it.peek().r_type); switch (next) { .ARM64_RELOC_PAGE21, .ARM64_RELOC_PAGEOFF12 => {}, else => { log.err("unexpected relocation type: expected PAGE21 or PAGEOFF12, found {s}", .{next}); return error.UnexpectedRelocationType; }, } continue; } if (isSubtractor(rel, arch)) { // Subtractor is not a relocation with effect on the TextBlock, so // parse it and carry on. assert(subtractor == null); // Oh no, subtractor was not reset! assert(rel.r_extern == 1); const sym = context.object.symtab.items[rel.r_symbolnum]; const sym_name = context.object.getString(sym.n_strx); if (MachO.symbolIsSect(sym) and !MachO.symbolIsExt(sym)) { const where_index = context.object.symbol_mapping.get(rel.r_symbolnum) orelse unreachable; subtractor = where_index; } else { const n_strx = context.macho_file.strtab_dir.getAdapted(@as([]const u8, sym_name), MachO.StringSliceAdapter{ .strtab = &context.macho_file.strtab, }) orelse unreachable; const resolv = context.macho_file.symbol_resolver.get(n_strx) orelse unreachable; assert(resolv.where == .global); subtractor = resolv.local_sym_index; } // Verify SUBTRACTOR is followed by UNSIGNED. switch (arch) { .aarch64 => { const next = @intToEnum(macho.reloc_type_arm64, it.peek().r_type); if (next != .ARM64_RELOC_UNSIGNED) { log.err("unexpected relocation type: expected UNSIGNED, found {s}", .{next}); return error.UnexpectedRelocationType; } }, .x86_64 => { const next = @intToEnum(macho.reloc_type_x86_64, it.peek().r_type); if (next != .X86_64_RELOC_UNSIGNED) { log.err("unexpected relocation type: expected UNSIGNED, found {s}", .{next}); return error.UnexpectedRelocationType; } }, else => unreachable, } continue; } var parsed_rel = try initRelocFromObject(rel, context); switch (arch) { .aarch64 => { const rel_type = @intToEnum(macho.reloc_type_arm64, rel.r_type); switch (rel_type) { .ARM64_RELOC_ADDEND => unreachable, .ARM64_RELOC_SUBTRACTOR => unreachable, .ARM64_RELOC_BRANCH26 => { self.parseBranch(rel, &parsed_rel, context); }, .ARM64_RELOC_UNSIGNED => { self.parseUnsigned(rel, &parsed_rel, subtractor, context); subtractor = null; }, .ARM64_RELOC_PAGE21, .ARM64_RELOC_GOT_LOAD_PAGE21, .ARM64_RELOC_TLVP_LOAD_PAGE21, => { self.parsePage(rel, &parsed_rel, addend); if (rel_type == .ARM64_RELOC_PAGE21) addend = 0; }, .ARM64_RELOC_PAGEOFF12, .ARM64_RELOC_GOT_LOAD_PAGEOFF12, .ARM64_RELOC_TLVP_LOAD_PAGEOFF12, => { self.parsePageOff(rel, &parsed_rel, addend); if (rel_type == .ARM64_RELOC_PAGEOFF12) addend = 0; }, .ARM64_RELOC_POINTER_TO_GOT => { self.parsePointerToGot(rel, &parsed_rel); }, } }, .x86_64 => { switch (@intToEnum(macho.reloc_type_x86_64, rel.r_type)) { .X86_64_RELOC_SUBTRACTOR => unreachable, .X86_64_RELOC_BRANCH => { self.parseBranch(rel, &parsed_rel, context); }, .X86_64_RELOC_UNSIGNED => { self.parseUnsigned(rel, &parsed_rel, subtractor, context); subtractor = null; }, .X86_64_RELOC_SIGNED, .X86_64_RELOC_SIGNED_1, .X86_64_RELOC_SIGNED_2, .X86_64_RELOC_SIGNED_4, => { self.parseSigned(rel, &parsed_rel, context); }, .X86_64_RELOC_GOT_LOAD, .X86_64_RELOC_GOT, .X86_64_RELOC_TLV, => { self.parseLoad(rel, &parsed_rel); }, } }, else => unreachable, } try self.relocs.append(context.allocator, parsed_rel); const is_via_got = switch (parsed_rel.payload) { .pointer_to_got => true, .load => |load| load.kind == .got, .page => |page| page.kind == .got, .page_off => |page_off| page_off.kind == .got, else => false, }; if (is_via_got) blk: { const key = MachO.GotIndirectionKey{ .where = switch (parsed_rel.where) { .local => .local, .undef => .undef, }, .where_index = parsed_rel.where_index, }; if (context.macho_file.got_entries_map.contains(key)) break :blk; const atom = try context.macho_file.createGotAtom(key); try context.macho_file.got_entries_map.putNoClobber(context.macho_file.base.allocator, key, atom); const match = MachO.MatchingSection{ .seg = context.macho_file.data_const_segment_cmd_index.?, .sect = context.macho_file.got_section_index.?, }; _ = try context.macho_file.allocateAtom(atom, match); context.macho_file.dyld_info_dirty = true; } else if (parsed_rel.payload == .unsigned) { switch (parsed_rel.where) { .undef => { try self.bindings.append(context.allocator, .{ .local_sym_index = parsed_rel.where_index, .offset = parsed_rel.offset, }); }, .local => { const source_sym = context.macho_file.locals.items[self.local_sym_index]; const match = context.macho_file.section_ordinals.keys()[source_sym.n_sect - 1]; const seg = context.macho_file.load_commands.items[match.seg].Segment; const sect = seg.sections.items[match.sect]; const sect_type = commands.sectionType(sect); const should_rebase = rebase: { if (!parsed_rel.payload.unsigned.is_64bit) break :rebase false; // TODO actually, a check similar to what dyld is doing, that is, verifying // that the segment is writable should be enough here. const is_right_segment = blk: { if (context.macho_file.data_segment_cmd_index) |idx| { if (match.seg == idx) { break :blk true; } } if (context.macho_file.data_const_segment_cmd_index) |idx| { if (match.seg == idx) { break :blk true; } } break :blk false; }; if (!is_right_segment) break :rebase false; if (sect_type != macho.S_LITERAL_POINTERS and sect_type != macho.S_REGULAR and sect_type != macho.S_MOD_INIT_FUNC_POINTERS and sect_type != macho.S_MOD_TERM_FUNC_POINTERS) { break :rebase false; } break :rebase true; }; if (should_rebase) { try self.rebases.append(context.allocator, parsed_rel.offset); } }, } } else if (parsed_rel.payload == .branch) blk: { if (parsed_rel.where != .undef) break :blk; if (context.macho_file.stubs_map.contains(parsed_rel.where_index)) break :blk; const stub_helper_atom = try context.macho_file.createStubHelperAtom(); const laptr_atom = try context.macho_file.createLazyPointerAtom( stub_helper_atom.local_sym_index, parsed_rel.where_index, ); const stub_atom = try context.macho_file.createStubAtom(laptr_atom.local_sym_index); try context.macho_file.stubs_map.putNoClobber(context.allocator, parsed_rel.where_index, stub_atom); _ = try context.macho_file.allocateAtom(stub_helper_atom, .{ .seg = context.macho_file.text_segment_cmd_index.?, .sect = context.macho_file.stub_helper_section_index.?, }); _ = try context.macho_file.allocateAtom(laptr_atom, .{ .seg = context.macho_file.data_segment_cmd_index.?, .sect = context.macho_file.la_symbol_ptr_section_index.?, }); _ = try context.macho_file.allocateAtom(stub_atom, .{ .seg = context.macho_file.text_segment_cmd_index.?, .sect = context.macho_file.stubs_section_index.?, }); } } } fn isAddend(rel: macho.relocation_info, arch: Arch) bool { if (arch != .aarch64) return false; return @intToEnum(macho.reloc_type_arm64, rel.r_type) == .ARM64_RELOC_ADDEND; } fn isSubtractor(rel: macho.relocation_info, arch: Arch) bool { return switch (arch) { .aarch64 => @intToEnum(macho.reloc_type_arm64, rel.r_type) == .ARM64_RELOC_SUBTRACTOR, .x86_64 => @intToEnum(macho.reloc_type_x86_64, rel.r_type) == .X86_64_RELOC_SUBTRACTOR, else => unreachable, }; } fn parseUnsigned( self: TextBlock, rel: macho.relocation_info, out: *Relocation, subtractor: ?u32, context: RelocContext, ) void { assert(rel.r_pcrel == 0); const is_64bit: bool = switch (rel.r_length) { 3 => true, 2 => false, else => unreachable, }; var addend: i64 = if (is_64bit) mem.readIntLittle(i64, self.code.items[out.offset..][0..8]) else mem.readIntLittle(i32, self.code.items[out.offset..][0..4]); if (rel.r_extern == 0) { assert(out.where == .local); const target_sym = context.macho_file.locals.items[out.where_index]; addend -= @intCast(i64, target_sym.n_value); } out.payload = .{ .unsigned = .{ .subtractor = subtractor, .is_64bit = is_64bit, .addend = addend, }, }; } fn parseBranch(self: TextBlock, rel: macho.relocation_info, out: *Relocation, context: RelocContext) void { _ = self; assert(rel.r_pcrel == 1); assert(rel.r_length == 2); out.payload = .{ .branch = .{ .arch = context.macho_file.base.options.target.cpu.arch, }, }; } fn parsePage(self: TextBlock, rel: macho.relocation_info, out: *Relocation, addend: u32) void { _ = self; assert(rel.r_pcrel == 1); assert(rel.r_length == 2); out.payload = .{ .page = .{ .kind = switch (@intToEnum(macho.reloc_type_arm64, rel.r_type)) { .ARM64_RELOC_PAGE21 => .page, .ARM64_RELOC_GOT_LOAD_PAGE21 => .got, .ARM64_RELOC_TLVP_LOAD_PAGE21 => .tlvp, else => unreachable, }, .addend = addend, }, }; } fn parsePageOff(self: TextBlock, rel: macho.relocation_info, out: *Relocation, addend: u32) void { assert(rel.r_pcrel == 0); assert(rel.r_length == 2); const rel_type = @intToEnum(macho.reloc_type_arm64, rel.r_type); const op_kind: ?Relocation.PageOff.OpKind = blk: { if (rel_type != .ARM64_RELOC_PAGEOFF12) break :blk null; const op_kind: Relocation.PageOff.OpKind = if (isArithmeticOp(self.code.items[out.offset..][0..4])) .arithmetic else .load; break :blk op_kind; }; out.payload = .{ .page_off = .{ .kind = switch (rel_type) { .ARM64_RELOC_PAGEOFF12 => .page, .ARM64_RELOC_GOT_LOAD_PAGEOFF12 => .got, .ARM64_RELOC_TLVP_LOAD_PAGEOFF12 => .tlvp, else => unreachable, }, .addend = addend, .op_kind = op_kind, }, }; } fn parsePointerToGot(self: TextBlock, rel: macho.relocation_info, out: *Relocation) void { _ = self; assert(rel.r_pcrel == 1); assert(rel.r_length == 2); out.payload = .{ .pointer_to_got = .{}, }; } fn parseSigned(self: TextBlock, rel: macho.relocation_info, out: *Relocation, context: RelocContext) void { assert(rel.r_pcrel == 1); assert(rel.r_length == 2); const rel_type = @intToEnum(macho.reloc_type_x86_64, rel.r_type); const correction: i4 = switch (rel_type) { .X86_64_RELOC_SIGNED => 0, .X86_64_RELOC_SIGNED_1 => 1, .X86_64_RELOC_SIGNED_2 => 2, .X86_64_RELOC_SIGNED_4 => 4, else => unreachable, }; var addend: i64 = mem.readIntLittle(i32, self.code.items[out.offset..][0..4]) + correction; if (rel.r_extern == 0) { const source_sym = context.macho_file.locals.items[self.local_sym_index]; const target_sym = switch (out.where) { .local => context.macho_file.locals.items[out.where_index], .undef => context.macho_file.undefs.items[out.where_index], }; addend = @intCast(i64, source_sym.n_value + out.offset + 4) + addend - @intCast(i64, target_sym.n_value); } out.payload = .{ .signed = .{ .correction = correction, .addend = addend, }, }; } fn parseLoad(self: TextBlock, rel: macho.relocation_info, out: *Relocation) void { assert(rel.r_pcrel == 1); assert(rel.r_length == 2); const rel_type = @intToEnum(macho.reloc_type_x86_64, rel.r_type); const addend: i32 = if (rel_type == .X86_64_RELOC_GOT) mem.readIntLittle(i32, self.code.items[out.offset..][0..4]) else 0; out.payload = .{ .load = .{ .kind = switch (rel_type) { .X86_64_RELOC_GOT_LOAD, .X86_64_RELOC_GOT => .got, .X86_64_RELOC_TLV => .tlvp, else => unreachable, }, .addend = addend, }, }; } pub fn resolveRelocs(self: *TextBlock, macho_file: *MachO) !void { for (self.relocs.items) |rel| { log.debug("relocating {}", .{rel}); const source_addr = blk: { const sym = macho_file.locals.items[self.local_sym_index]; break :blk sym.n_value + rel.offset; }; const target_addr = blk: { const is_via_got = switch (rel.payload) { .pointer_to_got => true, .page => |page| page.kind == .got, .page_off => |page_off| page_off.kind == .got, .load => |load| load.kind == .got, else => false, }; if (is_via_got) { const atom = macho_file.got_entries_map.get(.{ .where = switch (rel.where) { .local => .local, .undef => .undef, }, .where_index = rel.where_index, }) orelse { const sym = switch (rel.where) { .local => macho_file.locals.items[rel.where_index], .undef => macho_file.undefs.items[rel.where_index], }; log.err("expected GOT entry for symbol '{s}'", .{macho_file.getString(sym.n_strx)}); log.err(" this is an internal linker error", .{}); return error.FailedToResolveRelocationTarget; }; break :blk macho_file.locals.items[atom.local_sym_index].n_value; } switch (rel.where) { .local => { const sym = macho_file.locals.items[rel.where_index]; const is_tlv = is_tlv: { const source_sym = macho_file.locals.items[self.local_sym_index]; const match = macho_file.section_ordinals.keys()[source_sym.n_sect - 1]; const seg = macho_file.load_commands.items[match.seg].Segment; const sect = seg.sections.items[match.sect]; break :is_tlv commands.sectionType(sect) == macho.S_THREAD_LOCAL_VARIABLES; }; if (is_tlv) { // For TLV relocations, the value specified as a relocation is the displacement from the // TLV initializer (either value in __thread_data or zero-init in __thread_bss) to the first // defined TLV template init section in the following order: // * wrt to __thread_data if defined, then // * wrt to __thread_bss const seg = macho_file.load_commands.items[macho_file.data_segment_cmd_index.?].Segment; const base_address = inner: { if (macho_file.tlv_data_section_index) |i| { break :inner seg.sections.items[i].addr; } else if (macho_file.tlv_bss_section_index) |i| { break :inner seg.sections.items[i].addr; } else { log.err("threadlocal variables present but no initializer sections found", .{}); log.err(" __thread_data not found", .{}); log.err(" __thread_bss not found", .{}); return error.FailedToResolveRelocationTarget; } }; break :blk sym.n_value - base_address; } break :blk sym.n_value; }, .undef => { const atom = macho_file.stubs_map.get(rel.where_index) orelse { // TODO this is required for incremental when we don't have every symbol // resolved when creating relocations. In this case, we will insert a branch // reloc to an undef symbol which may happen to be defined within the binary. // Then, the undef we point at will be a null symbol (free symbol) which we // should remove/repurpose. To circumvent this (for now), we check if the symbol // we point to is garbage, and if so we fall back to symbol resolver to find by name. const n_strx = macho_file.undefs.items[rel.where_index].n_strx; if (macho_file.symbol_resolver.get(n_strx)) |resolv| inner: { if (resolv.where != .global) break :inner; break :blk macho_file.globals.items[resolv.where_index].n_value; } // TODO verify in TextBlock that the symbol is indeed dynamically bound. break :blk 0; // Dynamically bound by dyld. }; break :blk macho_file.locals.items[atom.local_sym_index].n_value; }, } }; log.debug(" | source_addr = 0x{x}", .{source_addr}); log.debug(" | target_addr = 0x{x}", .{target_addr}); try rel.resolve(.{ .block = self, .offset = rel.offset, .source_addr = source_addr, .target_addr = target_addr, .macho_file = macho_file, }); } } pub fn format(self: TextBlock, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { _ = fmt; _ = options; try std.fmt.format(writer, "TextBlock {{ ", .{}); try std.fmt.format(writer, ".local_sym_index = {d}, ", .{self.local_sym_index}); try std.fmt.format(writer, ".aliases = {any}, ", .{self.aliases.items}); try std.fmt.format(writer, ".contained = {any}, ", .{self.contained.items}); try std.fmt.format(writer, ".code = {*}, ", .{self.code.items}); try std.fmt.format(writer, ".size = {d}, ", .{self.size}); try std.fmt.format(writer, ".alignment = {d}, ", .{self.alignment}); try std.fmt.format(writer, ".relocs = {any}, ", .{self.relocs.items}); try std.fmt.format(writer, ".rebases = {any}, ", .{self.rebases.items}); try std.fmt.format(writer, ".bindings = {any}, ", .{self.bindings.items}); try std.fmt.format(writer, ".dices = {any}, ", .{self.dices.items}); if (self.stab) |stab| { try std.fmt.format(writer, ".stab = {any}, ", .{stab}); } try std.fmt.format(writer, "}}", .{}); } const RelocIterator = struct { buffer: []const macho.relocation_info, index: i32 = -1, pub fn next(self: *RelocIterator) ?macho.relocation_info { self.index += 1; if (self.index < self.buffer.len) { return self.buffer[@intCast(u32, self.index)]; } return null; } pub fn peek(self: RelocIterator) macho.relocation_info { assert(self.index + 1 < self.buffer.len); return self.buffer[@intCast(u32, self.index + 1)]; } }; fn filterRelocs(relocs: []macho.relocation_info, start_addr: u64, end_addr: u64) []macho.relocation_info { const Predicate = struct { addr: u64, pub fn predicate(self: @This(), rel: macho.relocation_info) bool { return rel.r_address < self.addr; } }; const start = MachO.findFirst(macho.relocation_info, relocs, 0, Predicate{ .addr = end_addr }); const end = MachO.findFirst(macho.relocation_info, relocs, start, Predicate{ .addr = start_addr }); return relocs[start..end]; } inline fn isArithmeticOp(inst: *const [4]u8) bool { const group_decode = @truncate(u5, inst[3]); return ((group_decode >> 2) == 4); }