From 407745a5e91685d52189548620d112a4b34c8127 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sat, 17 Jul 2021 00:00:18 +0200 Subject: [PATCH] zld: simplify and move Relocations into TextBlock It makes sense to have them as a dependent type since they only ever deal with TextBlocks. Simplify Relocations to rely on symbol indices and symbol resolver rather than pointers. --- CMakeLists.txt | 1 - src/link/MachO/Object.zig | 117 +---- src/link/MachO/TextBlock.zig | 905 ++++++++++++++++++++++++++++++++++- src/link/MachO/Zld.zig | 50 +- src/link/MachO/reloc.zig | 840 -------------------------------- 5 files changed, 937 insertions(+), 976 deletions(-) delete mode 100644 src/link/MachO/reloc.zig diff --git a/CMakeLists.txt b/CMakeLists.txt index ea3cd5107e..83352beea8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -586,7 +586,6 @@ set(ZIG_STAGE2_SOURCES "${CMAKE_SOURCE_DIR}/src/link/MachO/Zld.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO/bind.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO/commands.zig" - "${CMAKE_SOURCE_DIR}/src/link/MachO/reloc.zig" "${CMAKE_SOURCE_DIR}/src/link/Wasm.zig" "${CMAKE_SOURCE_DIR}/src/link/tapi.zig" "${CMAKE_SOURCE_DIR}/src/link/tapi/parse.zig" diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index 1c074a97c7..031d71bd9d 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -290,19 +290,6 @@ pub fn readLoadCommands(self: *Object, reader: anytype) !void { } } -fn findFirst(comptime T: type, haystack: []T, start: usize, predicate: anytype) usize { - if (!@hasDecl(@TypeOf(predicate), "predicate")) - @compileError("Predicate is required to define fn predicate(@This(), T) bool"); - - if (start == haystack.len) return start; - - var i = start; - while (i < haystack.len) : (i += 1) { - if (predicate.predicate(haystack[i])) break; - } - return i; -} - const NlistWithIndex = struct { nlist: macho.nlist_64, index: u32, @@ -315,44 +302,29 @@ const NlistWithIndex = struct { const Predicate = struct { addr: u64, - fn predicate(self: @This(), symbol: NlistWithIndex) bool { + pub fn predicate(self: @This(), symbol: NlistWithIndex) bool { return symbol.nlist.n_value >= self.addr; } }; - const start = findFirst(NlistWithIndex, symbols, 0, Predicate{ .addr = sect.addr }); - const end = findFirst(NlistWithIndex, symbols, start, Predicate{ .addr = sect.addr + sect.size }); + const start = Zld.findFirst(NlistWithIndex, symbols, 0, Predicate{ .addr = sect.addr }); + const end = Zld.findFirst(NlistWithIndex, symbols, start, Predicate{ .addr = sect.addr + sect.size }); return symbols[start..end]; } }; -fn filterRelocs(relocs: []macho.relocation_info, start_addr: u64, end_addr: u64) []macho.relocation_info { - const Predicate = struct { - addr: u64, - - fn predicate(self: @This(), rel: macho.relocation_info) bool { - return rel.r_address < self.addr; - } - }; - - const start = findFirst(macho.relocation_info, relocs, 0, Predicate{ .addr = end_addr }); - const end = findFirst(macho.relocation_info, relocs, start, Predicate{ .addr = start_addr }); - - return relocs[start..end]; -} - fn filterDice(dices: []macho.data_in_code_entry, start_addr: u64, end_addr: u64) []macho.data_in_code_entry { const Predicate = struct { addr: u64, - fn predicate(self: @This(), dice: macho.data_in_code_entry) bool { + pub fn predicate(self: @This(), dice: macho.data_in_code_entry) bool { return dice.offset >= self.addr; } }; - const start = findFirst(macho.data_in_code_entry, dices, 0, Predicate{ .addr = start_addr }); - const end = findFirst(macho.data_in_code_entry, dices, start, Predicate{ .addr = end_addr }); + const start = Zld.findFirst(macho.data_in_code_entry, dices, 0, Predicate{ .addr = start_addr }); + const end = Zld.findFirst(macho.data_in_code_entry, dices, start, Predicate{ .addr = end_addr }); return dices[start..end]; } @@ -483,10 +455,10 @@ const TextBlockParser = struct { } } - const relocs = filterRelocs(self.relocs, start_addr, end_addr); - if (relocs.len > 0) { - try self.object.parseRelocs(self.zld, relocs, block, start_addr); - } + try block.parseRelocsFromObject(relocs, object, .{ + .base_addr = start_addr, + .zld = self.zld, + }); if (self.zld.has_dices) { const dices = filterDice( @@ -745,8 +717,6 @@ pub fn parseTextBlocks(self: *Object, zld: *Zld) !void { .n_desc = 0, .n_value = sect.addr, }); - const block_local = &zld.locals.items[block_local_sym_index]; - block_local.n_sect = zld.sectionId(match); const block = try self.allocator.create(TextBlock); errdefer self.allocator.destroy(block); @@ -757,69 +727,10 @@ pub fn parseTextBlocks(self: *Object, zld: *Zld) !void { block.size = sect.size; block.alignment = sect.@"align"; - try block.relocs.ensureTotalCapacity(relocs.len); - for (relocs) |rel| { - const out_rel: TextBlock.Relocation = outer: { - if (rel.r_extern == 0) { - const rel_sect_id = @intCast(u16, rel.r_symbolnum - 1); - const sect_sym_index = self.sections_as_symbols.get(rel_sect_id) orelse blk: { - const sect_sym_index = @intCast(u32, zld.locals.items.len); - const sect_sym_name = try std.fmt.allocPrint(self.allocator, "l_{s}_{s}_{s}", .{ - self.name.?, - segmentName(sect), - sectionName(sect), - }); - defer self.allocator.free(sect_sym_name); - try zld.locals.append(zld.allocator, .{ - .n_strx = try zld.makeString(sect_sym_name), - .n_type = macho.N_SECT, - .n_sect = 0, - .n_desc = 0, - .n_value = 0, - }); - try self.sections_as_symbols.putNoClobber(self.allocator, rel_sect_id, sect_sym_index); - break :blk sect_sym_index; - }; - break :outer .{ - .inner = rel, - .where = .local, - .where_index = sect_sym_index, - }; - } - - const rel_sym = self.symtab.items[rel.r_symbolnum]; - const rel_sym_name = self.getString(rel_sym.n_strx); - - if (Zld.symbolIsSect(rel_sym) and !Zld.symbolIsExt(rel_sym)) { - const where_index = self.symbol_mapping.get(rel.r_symbolnum) orelse unreachable; - break :outer .{ - .inner = rel, - .where = .local, - .where_index = where_index, - }; - } - - const resolv = zld.symbol_resolver.get(rel_sym_name) orelse unreachable; - switch (resolv.where) { - .global => { - break :outer .{ - .inner = rel, - .where = .local, - .where_index = resolv.local_sym_index, - }; - }, - .import => { - break :outer .{ - .inner = rel, - .where = .import, - .where_index = resolv.where_index, - }; - }, - else => unreachable, - } - }; - block.relocs.appendAssumeCapacity(out_rel); - } + try block.parseRelocsFromObject(relocs, self, .{ + .base_addr = 0, + .zld = zld, + }); if (zld.has_dices) { const dices = filterDice(self.data_in_code_entries.items, sect.addr, sect.addr + sect.size); diff --git a/src/link/MachO/TextBlock.zig b/src/link/MachO/TextBlock.zig index 04cb33855c..20283dfc9d 100644 --- a/src/link/MachO/TextBlock.zig +++ b/src/link/MachO/TextBlock.zig @@ -1,6 +1,7 @@ const TextBlock = @This(); const std = @import("std"); +const assert = std.debug.assert; const commands = @import("commands.zig"); const log = std.log.scoped(.text_block); const macho = std.macho; @@ -8,6 +9,7 @@ const mem = std.mem; const Allocator = mem.Allocator; const Arch = std.Target.Cpu.Arch; +const Object = @import("Object.zig"); const Zld = @import("Zld.zig"); allocator: *Allocator, @@ -102,12 +104,396 @@ pub const Stab = union(enum) { }; pub const Relocation = struct { - inner: macho.relocation_info, + /// Offset within the `block`s code buffer. + /// Note relocation size can be inferred by relocation's kind. + offset: u32, + where: enum { local, import, }, + where_index: u32, + + payload: union(enum) { + unsigned: Unsigned, + branch: Branch, + page: Page, + page_off: PageOff, + pointer_to_got: PointerToGot, + signed: Signed, + load: Load, + }, + + const ResolveArgs = struct { + block: *TextBlock, + offset: u32, + source_addr: u64, + target_addr: u64, + zld: *Zld, + }; + + pub const Unsigned = struct { + subtractor: ?u32, + + /// Addend embedded directly in the relocation slot + addend: i64, + + /// Extracted from r_length: + /// => 3 implies true + /// => 2 implies false + /// => * is unreachable + is_64bit: bool, + + pub fn resolve(self: Unsigned, args: ResolveArgs) !void { + const result = if (self.subtractor) |subtractor| + @intCast(i64, args.target_addr) - @intCast(i64, subtractor.payload.regular.address) + self.addend + else + @intCast(i64, args.target_addr) + self.addend; + + if (self.is_64bit) { + mem.writeIntLittle(u64, args.block.code[args.offset..][0..8], @bitCast(u64, result)); + } else { + mem.writeIntLittle(u32, args.block.code[args.offset..][0..4], @truncate(u32, @bitCast(u64, result))); + } + } + + pub fn format(self: Unsigned, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { + _ = fmt; + _ = options; + try std.fmt.format(writer, "Unsigned {{ ", .{}); + if (self.subtractor) |sub| { + try std.fmt.format(writer, ".subtractor = {}, ", .{sub}); + } + try std.fmt.format(writer, ".addend = {}, ", .{self.addend}); + const length: usize = if (self.is_64bit) 8 else 4; + try std.fmt.format(writer, ".length = {}, ", .{length}); + try std.fmt.format(writer, "}}", .{}); + } + }; + + pub const Branch = struct { + arch: Arch, + + pub fn resolve(self: Branch, args: ResolveArgs) !void { + switch (self.arch) { + .aarch64 => { + const displacement = try math.cast( + i28, + @intCast(i64, args.target_addr) - @intCast(i64, args.source_addr), + ); + const code = args.block.code[args.offset..][0..4]; + var inst = aarch64.Instruction{ + .unconditional_branch_immediate = mem.bytesToValue(meta.TagPayload( + aarch64.Instruction, + aarch64.Instruction.unconditional_branch_immediate, + ), code), + }; + inst.unconditional_branch_immediate.imm26 = @truncate(u26, @bitCast(u28, displacement >> 2)); + mem.writeIntLittle(u32, code, inst.toU32()); + }, + .x86_64 => { + const displacement = try math.cast( + i32, + @intCast(i64, args.target_addr) - @intCast(i64, args.source_addr) - 4, + ); + mem.writeIntLittle(u32, args.block.code[args.offset..][0..4], @bitCast(u32, displacement)); + }, + else => return error.UnsupportedCpuArchitecture, + } + } + + pub fn format(self: Branch, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { + _ = self; + _ = fmt; + _ = options; + try std.fmt.format(writer, "Branch {{}}", .{}); + } + }; + + pub const Page = struct { + kind: enum { + page, + got, + tlvp, + }, + addend: u32 = 0, + + pub fn resolve(self: Page, args: ResolveArgs) !void { + const target_addr = args.target_addr + self.addend; + const source_page = @intCast(i32, args.source_addr >> 12); + const target_page = @intCast(i32, target_addr >> 12); + const pages = @bitCast(u21, @intCast(i21, target_page - source_page)); + + const code = args.block.code[args.offset..][0..4]; + var inst = aarch64.Instruction{ + .pc_relative_address = mem.bytesToValue(meta.TagPayload( + aarch64.Instruction, + aarch64.Instruction.pc_relative_address, + ), code), + }; + inst.pc_relative_address.immhi = @truncate(u19, pages >> 2); + inst.pc_relative_address.immlo = @truncate(u2, pages); + + mem.writeIntLittle(u32, code, inst.toU32()); + } + + pub fn format(self: Page, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { + _ = fmt; + _ = options; + try std.fmt.format(writer, "Page {{ ", .{}); + switch (self.kind) { + .page => {}, + .got => { + try std.fmt.format(writer, ".got, ", .{}); + }, + .tlvp => { + try std.fmt.format(writer, ".tlvp", .{}); + }, + } + try std.fmt.format(writer, ".addend = {}, ", .{self.addend}); + try std.fmt.format(writer, "}}", .{}); + } + }; + + pub const PageOff = struct { + kind: enum { + page, + got, + tlvp, + }, + addend: u32 = 0, + op_kind: ?OpKind = null, + + pub const OpKind = enum { + arithmetic, + load, + }; + + pub fn resolve(self: PageOff, args: ResolveArgs) !void { + const code = args.block.code[args.offset..][0..4]; + + switch (self.kind) { + .page => { + const target_addr = args.target_addr + self.addend; + const narrowed = @truncate(u12, target_addr); + + const op_kind = self.op_kind orelse unreachable; + var inst: aarch64.Instruction = blk: { + switch (op_kind) { + .arithmetic => { + break :blk .{ + .add_subtract_immediate = mem.bytesToValue(meta.TagPayload( + aarch64.Instruction, + aarch64.Instruction.add_subtract_immediate, + ), code), + }; + }, + .load => { + break :blk .{ + .load_store_register = mem.bytesToValue(meta.TagPayload( + aarch64.Instruction, + aarch64.Instruction.load_store_register, + ), code), + }; + }, + } + }; + + if (op_kind == .arithmetic) { + inst.add_subtract_immediate.imm12 = narrowed; + } else { + const offset: u12 = blk: { + if (inst.load_store_register.size == 0) { + if (inst.load_store_register.v == 1) { + // 128-bit SIMD is scaled by 16. + break :blk try math.divExact(u12, narrowed, 16); + } + // Otherwise, 8-bit SIMD or ldrb. + break :blk narrowed; + } else { + const denom: u4 = try math.powi(u4, 2, inst.load_store_register.size); + break :blk try math.divExact(u12, narrowed, denom); + } + }; + inst.load_store_register.offset = offset; + } + + mem.writeIntLittle(u32, code, inst.toU32()); + }, + .got => { + const narrowed = @truncate(u12, args.target_addr); + var inst: aarch64.Instruction = .{ + .load_store_register = mem.bytesToValue(meta.TagPayload( + aarch64.Instruction, + aarch64.Instruction.load_store_register, + ), code), + }; + const offset = try math.divExact(u12, narrowed, 8); + inst.load_store_register.offset = offset; + mem.writeIntLittle(u32, code, inst.toU32()); + }, + .tlvp => { + const RegInfo = struct { + rd: u5, + rn: u5, + size: u1, + }; + const reg_info: RegInfo = blk: { + if (isArithmeticOp(code)) { + const inst = mem.bytesToValue(meta.TagPayload( + aarch64.Instruction, + aarch64.Instruction.add_subtract_immediate, + ), code); + break :blk .{ + .rd = inst.rd, + .rn = inst.rn, + .size = inst.sf, + }; + } else { + const inst = mem.bytesToValue(meta.TagPayload( + aarch64.Instruction, + aarch64.Instruction.load_store_register, + ), code); + break :blk .{ + .rd = inst.rt, + .rn = inst.rn, + .size = @truncate(u1, inst.size), + }; + } + }; + const narrowed = @truncate(u12, args.target_addr); + var inst = aarch64.Instruction{ + .add_subtract_immediate = .{ + .rd = reg_info.rd, + .rn = reg_info.rn, + .imm12 = narrowed, + .sh = 0, + .s = 0, + .op = 0, + .sf = reg_info.size, + }, + }; + mem.writeIntLittle(u32, code, inst.toU32()); + }, + } + } + + pub fn format(self: PageOff, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { + _ = fmt; + _ = options; + try std.fmt.format(writer, "PageOff {{ ", .{}); + switch (self.kind) { + .page => {}, + .got => { + try std.fmt.format(writer, ".got, ", .{}); + }, + .tlvp => { + try std.fmt.format(writer, ".tlvp, ", .{}); + }, + } + try std.fmt.format(writer, ".addend = {}, ", .{self.addend}); + try std.fmt.format(writer, ".op_kind = {s}, ", .{self.op_kind}); + try std.fmt.format(writer, "}}", .{}); + } + }; + + pub const PointerToGot = struct { + pub fn resolve(_: PointerToGot, args: ResolveArgs) !void { + const result = try math.cast(i32, @intCast(i64, args.target_addr) - @intCast(i64, args.source_addr)); + mem.writeIntLittle(u32, args.block.code[args.offset..][0..4], @bitCast(u32, result)); + } + + pub fn format(self: PointerToGot, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { + _ = self; + _ = fmt; + _ = options; + try std.fmt.format(writer, "PointerToGot {{}}", .{}); + } + }; + + pub const Signed = struct { + addend: i64, + correction: i4, + + pub fn resolve(self: Signed, args: ResolveArgs) !void { + const target_addr = @intCast(i64, args.target_addr) + self.addend; + const displacement = try math.cast( + i32, + target_addr - @intCast(i64, args.source_addr) - self.correction - 4, + ); + mem.writeIntLittle(u32, block.code[offset..][0..4], @bitCast(u32, displacement)); + } + + pub fn format(self: Signed, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { + _ = fmt; + _ = options; + try std.fmt.format(writer, "Signed {{ ", .{}); + try std.fmt.format(writer, ".addend = {}, ", .{self.addend}); + try std.fmt.format(writer, ".correction = {}, ", .{self.correction}); + try std.fmt.format(writer, "}}", .{}); + } + }; + + pub const Load = struct { + kind: enum { + got, + tlvp, + }, + addend: i32 = 0, + + pub fn resolve(self: Load, block: *TextBlock, offset: u32, args: ResolveArgs) !void { + if (self.kind == .tlvp) { + // We need to rewrite the opcode from movq to leaq. + block.code[offset - 2] = 0x8d; + } + const displacement = try math.cast( + i32, + @intCast(i64, args.target_addr) - @intCast(i64, args.source_addr) - 4 + self.addend, + ); + mem.writeIntLittle(u32, block.code[offset..][0..4], @bitCast(u32, displacement)); + } + + pub fn format(self: Load, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { + _ = fmt; + _ = options; + try std.fmt.format(writer, "Load {{ ", .{}); + try std.fmt.format(writer, "{s}, ", .{self.kind}); + try std.fmt.format(writer, ".addend = {}, ", .{self.addend}); + try std.fmt.format(writer, "}}", .{}); + } + }; + + pub fn resolve(self: Relocation, block: *TextBlock, args: ResolveArgs) !void { + switch (self.payload) { + .unsigned => |unsigned| try unsigned.resolve(block, self.offset, args), + .branch => |branch| try branch.resolve(block, self.offset, args), + .page => |page| try page.resolve(block, self.offset, args), + .page_off => |page_off| try page_off.resolve(block, self.offset, args), + .pointer_to_got => |pointer_to_got| try pointer_to_got.resolve(block, self.offset, args), + .signed => |signed| try signed.resolve(block, self.offset, args), + .load => |load| try load.resolve(block, self.offset, args), + } + } + + pub fn format(self: Relocation, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { + try std.fmt.format(writer, "Relocation {{ ", .{}); + try std.fmt.format(writer, ".offset = {}, ", .{self.offset}); + try std.fmt.format(writer, ".where = {}, ", .{self.where}); + try std.fmt.format(writer, ".where_index = {d}, ", .{self.where_index}); + + switch (self.payload) { + .unsigned => |unsigned| try unsigned.format(fmt, options, writer), + .branch => |branch| try branch.format(fmt, options, writer), + .page => |page| try page.format(fmt, options, writer), + .page_off => |page_off| try page_off.format(fmt, options, writer), + .pointer_to_got => |pointer_to_got| try pointer_to_got.format(fmt, options, writer), + .signed => |signed| try signed.format(fmt, options, writer), + .load => |load| try load.format(fmt, options, writer), + } + + try std.fmt.format(writer, "}}", .{}); + } }; pub fn init(allocator: *Allocator) TextBlock { @@ -139,6 +525,462 @@ pub fn deinit(self: *TextBlock) void { self.dices.deinit(); } +const RelocContext = struct { + base_addr: u64 = 0, + zld: *Zld, +}; + +fn initRelocFromObject(rel: macho.relocation_info, object: *Object, ctx: RelocContext) !Relocation { + var parsed_rel = Relocation{ + .offset = @intCast(u32, @intCast(u64, rel.r_address) - ctx.base_addr), + .where = undefined, + .where_index = undefined, + .payload = undefined, + }; + + if (rel.r_extern == 0) { + const sect_id = @intCast(u16, rel.r_symbolnum - 1); + + const local_sym_index = object.sections_as_symbols.get(sect_id) orelse blk: { + const seg = object.load_commands.items[object.segment_cmd_index.?].Segment; + const sect = seg.sections.items[sect_id]; + const local_sym_index = @intCast(u32, ctx.zld.locals.items.len); + const sym_name = try std.fmt.allocPrint(ctx.zld.allocator, "l_{s}_{s}_{s}", .{ + object.name.?, + commands.segmentName(sect), + commands.sectionName(sect), + }); + defer ctx.zld.allocator.free(sym_name); + + try ctx.zld.locals.append(ctx.zld.allocator, .{ + .n_strx = try ctx.zld.makeString(sym_name), + .n_type = macho.N_SECT, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }); + try object.sections_as_symbols.putNoClobber(object.allocator, sect_id, local_sym_index); + break :blk local_sym_index; + }; + + parsed_rel.where = .local; + parsed_rel.where_index = local_sym_index; + } else { + const sym = object.symtab.items[rel.r_symbolnum]; + const sym_name = object.getString(sym.n_strx); + + if (Zld.symbolIsSect(sym) and !Zld.symbolIsExt(sym)) { + const where_index = object.symbol_mapping.get(rel.r_symbolnum) orelse unreachable; + parsed_rel.where = .local; + parsed_rel.where_index = where_index; + } else { + const resolv = ctx.zld.symbol_resolver.get(sym_name) orelse unreachable; + switch (resolv.where) { + .global => { + parsed_rel.where = .local; + parsed_rel.where_index = resolv.local_sym_index; + }, + .import => { + parsed_rel.where = .import; + parsed_rel.where_index = resolv.where_index; + }, + else => unreachable, + } + } + } + + return parsed_rel; +} + +pub fn parseRelocsFromObject( + self: *TextBlock, + relocs: []macho.relocation_info, + object: *Object, + ctx: RelocContext, +) !void { + const filtered_relocs = filterRelocs(relocs, ctx.base_addr, ctx.base_addr + self.size); + var it = RelocIterator{ + .buffer = filtered_relocs, + }; + + var addend: u32 = 0; + var subtractor: ?u32 = null; + + while (it.next()) |rel| { + if (isAddend(rel, object.arch.?)) { + // Addend is not a relocation with effect on the TextBlock, so + // parse it and carry on. + assert(addend == 0); // Oh no, addend was not reset! + addend = rel.r_symbolnum; + + // Verify ADDEND is followed by a load. + const next = @intToEnum(macho.reloc_type_arm64, it.peek().r_type); + switch (next) { + .ARM64_RELOC_PAGE21, .ARM64_RELOC_PAGEOFF12 => {}, + else => { + log.err("unexpected relocation type: expected PAGE21 or PAGEOFF12, found {s}", .{next}); + return error.UnexpectedRelocationType; + }, + } + continue; + } + + if (isSubtractor(rel, object.arch.?)) { + // Subtractor is not a relocation with effect on the TextBlock, so + // parse it and carry on. + assert(subtractor == null); // Oh no, subtractor was not reset! + assert(rel.r_extern == 1); + const sym = object.symtab.items[rel.r_symbolnum]; + const sym_name = object.getString(sym.n_strx); + + if (Zld.symbolIsSect(sym) and !Zld.symbolIsExt(sym)) { + const where_index = object.symbol_mapping.get(rel.r_symbolnum) orelse unreachable; + subtractor = where_index; + } else { + const resolv = ctx.zld.symbol_resolver.get(sym_name) orelse unreachable; + assert(resolv.where == .global); + subtractor = resolv.local_sym_index; + } + + // Verify SUBTRACTOR is followed by UNSIGNED. + switch (object.arch.?) { + .aarch64 => { + const next = @intToEnum(macho.reloc_type_arm64, it.peek().r_type); + if (next != .ARM64_RELOC_UNSIGNED) { + log.err("unexpected relocation type: expected UNSIGNED, found {s}", .{next}); + return error.UnexpectedRelocationType; + } + }, + .x86_64 => { + const next = @intToEnum(macho.reloc_type_x86_64, it.peek().r_type); + if (next != .X86_64_RELOC_UNSIGNED) { + log.err("unexpected relocation type: expected UNSIGNED, found {s}", .{next}); + return error.UnexpectedRelocationType; + } + }, + else => unreachable, + } + continue; + } + + var parsed_rel = try initRelocFromObject(rel, object, ctx); + + switch (object.arch.?) { + .aarch64 => { + const rel_type = @intToEnum(macho.reloc_type_arm64, rel.r_type); + switch (rel_type) { + .ARM64_RELOC_ADDEND => unreachable, + .ARM64_RELOC_SUBTRACTOR => unreachable, + .ARM64_RELOC_BRANCH26 => { + self.parseBranch(rel, &parsed_rel, ctx); + }, + .ARM64_RELOC_UNSIGNED => { + self.parseUnsigned(rel, &parsed_rel, subtractor, ctx); + subtractor = null; + }, + .ARM64_RELOC_PAGE21, + .ARM64_RELOC_GOT_LOAD_PAGE21, + .ARM64_RELOC_TLVP_LOAD_PAGE21, + => { + self.parsePage(rel, &parsed_rel, addend); + if (rel_type == .ARM64_RELOC_PAGE21) + addend = 0; + }, + .ARM64_RELOC_PAGEOFF12, + .ARM64_RELOC_GOT_LOAD_PAGEOFF12, + .ARM64_RELOC_TLVP_LOAD_PAGEOFF12, + => { + self.parsePageOff(rel, &parsed_rel, addend, ctx); + if (rel_type == .ARM64_RELOC_PAGEOFF12) + addend = 0; + }, + .ARM64_RELOC_POINTER_TO_GOT => { + self.parsePointerToGot(rel, &parsed_rel); + }, + } + }, + .x86_64 => { + switch (@intToEnum(macho.reloc_type_x86_64, rel.r_type)) { + .X86_64_RELOC_SUBTRACTOR => unreachable, + .X86_64_RELOC_BRANCH => { + self.parseBranch(rel, &parsed_rel, ctx); + }, + .X86_64_RELOC_UNSIGNED => { + self.parseUnsigned(rel, &parsed_rel, subtractor, ctx); + subtractor = null; + }, + .X86_64_RELOC_SIGNED, + .X86_64_RELOC_SIGNED_1, + .X86_64_RELOC_SIGNED_2, + .X86_64_RELOC_SIGNED_4, + => { + self.parseSigned(rel, &parsed_rel, ctx); + }, + .X86_64_RELOC_GOT_LOAD, + .X86_64_RELOC_GOT, + .X86_64_RELOC_TLV, + => { + self.parseLoad(rel, &parsed_rel); + }, + } + }, + else => unreachable, + } + + try self.relocs.append(parsed_rel); + + if (parsed_rel.where == .local) { + try self.references.put(parsed_rel.where_index, {}); + } + + const is_via_got = switch (parsed_rel.payload) { + .pointer_to_got => true, + .load => |load| load.kind == .got, + .page => |page| page.kind == .got, + .page_off => |page_off| page_off.kind == .got, + else => false, + }; + + if (is_via_got) blk: { + const key = Zld.GotIndirectionKey{ + .where = switch (parsed_rel.where) { + .local => .local, + .import => .import, + }, + .where_index = parsed_rel.where_index, + }; + if (ctx.zld.got_entries.contains(key)) break :blk; + + try ctx.zld.got_entries.putNoClobber(ctx.zld.allocator, key, {}); + } else if (parsed_rel.payload == .unsigned) { + switch (parsed_rel.where) { + .import => { + log.warn("WAT {s}", .{ctx.zld.getString(ctx.zld.imports.items[parsed_rel.where_index].n_strx)}); + try self.bindings.append(.{ + .local_sym_index = parsed_rel.where_index, + .offset = parsed_rel.offset, + }); + }, + .local => { + const source_sym = ctx.zld.locals.items[self.local_sym_index]; + const match = ctx.zld.unpackSectionId(source_sym.n_sect); + const seg = ctx.zld.load_commands.items[match.seg].Segment; + const sect = seg.sections.items[match.sect]; + const sect_type = commands.sectionType(sect); + + const should_rebase = rebase: { + if (!parsed_rel.payload.unsigned.is_64bit) break :rebase false; + + // TODO actually, a check similar to what dyld is doing, that is, verifying + // that the segment is writable should be enough here. + const is_right_segment = blk: { + if (ctx.zld.data_segment_cmd_index) |idx| { + if (match.seg == idx) { + break :blk true; + } + } + if (ctx.zld.data_const_segment_cmd_index) |idx| { + if (match.seg == idx) { + break :blk true; + } + } + break :blk false; + }; + + if (!is_right_segment) break :rebase false; + if (sect_type != macho.S_LITERAL_POINTERS and + sect_type != macho.S_REGULAR and + sect_type != macho.S_MOD_INIT_FUNC_POINTERS and + sect_type != macho.S_MOD_TERM_FUNC_POINTERS) + { + break :rebase false; + } + + break :rebase true; + }; + + if (should_rebase) { + try self.rebases.append(parsed_rel.offset); + } + }, + } + } else if (parsed_rel.payload == .branch) blk: { + if (parsed_rel.where != .import) break :blk; + if (ctx.zld.stubs.contains(parsed_rel.where_index)) break :blk; + + try ctx.zld.stubs.putNoClobber(ctx.zld.allocator, parsed_rel.where_index, {}); + } + } +} + +fn isAddend(rel: macho.relocation_info, arch: Arch) bool { + if (arch != .aarch64) return false; + return @intToEnum(macho.reloc_type_arm64, rel.r_type) == .ARM64_RELOC_ADDEND; +} + +fn isSubtractor(rel: macho.relocation_info, arch: Arch) bool { + return switch (arch) { + .aarch64 => @intToEnum(macho.reloc_type_arm64, rel.r_type) == .ARM64_RELOC_SUBTRACTOR, + .x86_64 => @intToEnum(macho.reloc_type_x86_64, rel.r_type) == .X86_64_RELOC_SUBTRACTOR, + else => unreachable, + }; +} + +fn parseUnsigned( + self: TextBlock, + rel: macho.relocation_info, + out: *Relocation, + subtractor: ?u32, + ctx: RelocContext, +) void { + assert(rel.r_pcrel == 0); + + const is_64bit: bool = switch (rel.r_length) { + 3 => true, + 2 => false, + else => unreachable, + }; + + var addend: i64 = if (is_64bit) + mem.readIntLittle(i64, self.code[out.offset..][0..8]) + else + mem.readIntLittle(i32, self.code[out.offset..][0..4]); + + if (rel.r_extern == 0) { + assert(out.where == .local); + const target_sym = ctx.zld.locals.items[out.where_index]; + addend -= @intCast(i64, target_sym.n_value); + } + + out.payload = .{ + .unsigned = .{ + .subtractor = subtractor, + .is_64bit = is_64bit, + .addend = addend, + }, + }; +} + +fn parseBranch(self: TextBlock, rel: macho.relocation_info, out: *Relocation, ctx: RelocContext) void { + assert(rel.r_pcrel == 1); + assert(rel.r_length == 2); + + out.payload = .{ + .branch = .{ + .arch = ctx.zld.target.?.cpu.arch, + }, + }; +} + +fn parsePage(self: TextBlock, rel: macho.relocation_info, out: *Relocation, addend: u32) void { + _ = self; + assert(rel.r_pcrel == 1); + assert(rel.r_length == 2); + + out.payload = .{ + .page = .{ + .kind = switch (@intToEnum(macho.reloc_type_arm64, rel.r_type)) { + .ARM64_RELOC_PAGE21 => .page, + .ARM64_RELOC_GOT_LOAD_PAGE21 => .got, + .ARM64_RELOC_TLVP_LOAD_PAGE21 => .tlvp, + else => unreachable, + }, + .addend = addend, + }, + }; +} + +fn parsePageOff(self: TextBlock, rel: macho.relocation_info, out: *Relocation, addend: u32, ctx: RelocContext) void { + assert(rel.r_pcrel == 0); + assert(rel.r_length == 2); + + const rel_type = @intToEnum(macho.reloc_type_arm64, rel.r_type); + const op_kind: ?Relocation.PageOff.OpKind = blk: { + if (rel_type != .ARM64_RELOC_PAGEOFF12) break :blk null; + const op_kind: Relocation.PageOff.OpKind = if (isArithmeticOp(self.code[out.offset..][0..4])) + .arithmetic + else + .load; + break :blk op_kind; + }; + + out.payload = .{ + .page_off = .{ + .kind = switch (rel_type) { + .ARM64_RELOC_PAGEOFF12 => .page, + .ARM64_RELOC_GOT_LOAD_PAGEOFF12 => .got, + .ARM64_RELOC_TLVP_LOAD_PAGEOFF12 => .tlvp, + else => unreachable, + }, + .addend = addend, + .op_kind = op_kind, + }, + }; +} + +fn parsePointerToGot(self: TextBlock, rel: macho.relocation_info, out: *Relocation) void { + _ = self; + assert(rel.r_pcrel == 1); + assert(rel.r_length == 2); + + out.payload = .{ + .pointer_to_got = .{}, + }; +} + +fn parseSigned(self: TextBlock, rel: macho.relocation_info, out: *Relocation, ctx: RelocContext) void { + assert(rel.r_pcrel == 1); + assert(rel.r_length == 2); + + const rel_type = @intToEnum(macho.reloc_type_x86_64, rel.r_type); + const correction: i4 = switch (rel_type) { + .X86_64_RELOC_SIGNED => 0, + .X86_64_RELOC_SIGNED_1 => 1, + .X86_64_RELOC_SIGNED_2 => 2, + .X86_64_RELOC_SIGNED_4 => 4, + else => unreachable, + }; + var addend: i64 = mem.readIntLittle(i32, self.code[out.offset..][0..4]) + correction; + + if (rel.r_extern == 0) { + const source_sym = ctx.zld.locals.items[self.local_sym_index]; + const target_sym = switch (out.where) { + .local => ctx.zld.locals.items[out.where_index], + .import => ctx.zld.imports.items[out.where_index], + }; + addend = @intCast(i64, source_sym.n_value + out.offset + 4) + addend - @intCast(i64, target_sym.n_value); + } + + out.payload = .{ + .signed = .{ + .correction = correction, + .addend = addend, + }, + }; +} + +fn parseLoad(self: TextBlock, rel: macho.relocation_info, out: *Relocation) void { + assert(rel.r_pcrel == 1); + assert(rel.r_length == 2); + + const rel_type = @intToEnum(macho.reloc_type_x86_64, rel.r_type); + const addend: i32 = if (rel_type == .X86_64_RELOC_GOT) + mem.readIntLittle(i32, self.code[out.offset..][0..4]) + else + 0; + + out.payload = .{ + .load = .{ + .kind = switch (rel_type) { + .X86_64_RELOC_GOT_LOAD, .X86_64_RELOC_GOT => .got, + .X86_64_RELOC_TLV => .tlvp, + else => unreachable, + }, + .addend = addend, + }, + }; +} + pub fn resolveRelocs(self: *TextBlock, zld: *Zld) !void { for (self.relocs.items) |rel| { log.debug("relocating {}", .{rel}); @@ -148,7 +990,15 @@ pub fn resolveRelocs(self: *TextBlock, zld: *Zld) !void { break :blk sym.n_value + rel.offset; }; const target_addr = blk: { - if (isGotIndirection(rel, zld.target.?.cpu.arch)) { + const is_via_got = switch (rel.payload) { + .pointer_to_got => true, + .page => |page| page.kind == .got, + .page_off => |page_off| page_off.kind == .got, + .load => |load| load.kind == .got, + else => false, + }; + + if (is_via_got) { const dc_seg = zld.load_commands.items[zld.data_const_segment_cmd_index.?].Segment; const got = dc_seg.sections.items[zld.got_section_index.?]; const got_index = rel.target.got_index orelse { @@ -270,21 +1120,40 @@ pub fn print(self: *const TextBlock, zld: *Zld) void { self.print_this(zld); } -fn isGotIndirection(rel: macho.relocation_info, arch: Arch) bool { - return switch (arch) { - .aarch64 => switch (@intToEnum(macho.reloc_type_arm64, rel.r_type)) { - .ARM64_RELOC_POINTER_TO_GOT, - .ARM64_RELOC_GOT_LOAD_PAGE21, - .ARM64_RELOC_GOT_LOAD_PAGEOFF12, - => true, - else => false, - }, - .x86_64 => switch (@intToEnum(macho.reloc_type_x86_64, rel.r_type)) { - .X86_64_RELOC_GOT, - .X86_64_RELOC_GOT_LOAD, - => true, - else => false, - }, - else => unreachable, +const RelocIterator = struct { + buffer: []const macho.relocation_info, + index: i32 = -1, + + pub fn next(self: *RelocIterator) ?macho.relocation_info { + self.index += 1; + if (self.index < self.buffer.len) { + return self.buffer[@intCast(u32, self.index)]; + } + return null; + } + + pub fn peek(self: RelocIterator) macho.relocation_info { + assert(self.index + 1 < self.buffer.len); + return self.buffer[@intCast(u32, self.index + 1)]; + } +}; + +fn filterRelocs(relocs: []macho.relocation_info, start_addr: u64, end_addr: u64) []macho.relocation_info { + const Predicate = struct { + addr: u64, + + pub fn predicate(self: @This(), rel: macho.relocation_info) bool { + return rel.r_address < self.addr; + } }; + + const start = Zld.findFirst(macho.relocation_info, relocs, 0, Predicate{ .addr = end_addr }); + const end = Zld.findFirst(macho.relocation_info, relocs, start, Predicate{ .addr = start_addr }); + + return relocs[start..end]; +} + +inline fn isArithmeticOp(inst: *const [4]u8) bool { + const group_decode = @truncate(u5, inst[3]); + return ((group_decode >> 2) == 4); } diff --git a/src/link/MachO/Zld.zig b/src/link/MachO/Zld.zig index 862e6b5b0c..67cd007ebf 100644 --- a/src/link/MachO/Zld.zig +++ b/src/link/MachO/Zld.zig @@ -108,8 +108,8 @@ symbol_resolver: std.StringArrayHashMapUnmanaged(SymbolWithLoc) = .{}, strtab: std.ArrayListUnmanaged(u8) = .{}, -// stubs: std.ArrayListUnmanaged(*Symbol) = .{}, -got_entries: std.ArrayListUnmanaged(GotEntry) = .{}, +stubs: std.AutoArrayHashMapUnmanaged(u32, void) = .{}, +got_entries: std.AutoArrayHashMapUnmanaged(GotIndirectionKey, void) = .{}, stub_helper_stubs_start_off: ?u64 = null, @@ -131,20 +131,12 @@ const SymbolWithLoc = struct { file: u16 = 0, }; -pub const GotEntry = struct { - /// GOT entry can either be a local pointer or an extern (nonlazy) import. - kind: enum { +pub const GotIndirectionKey = struct { + where: enum { local, import, }, - - /// Id to the macho.nlist_64 from the respective table: either locals or nonlazy imports. - /// TODO I'm more and more inclined to just manage a single, max two symbol tables - /// rather than 4 as we currently do, but I'll follow up in the future PR. - local_sym_index: u32, - - /// Index of this entry in the GOT. - got_index: u32, + where_index: u32, }; pub const Output = struct { @@ -161,7 +153,7 @@ pub fn init(allocator: *Allocator) !Zld { } pub fn deinit(self: *Zld) void { - // self.stubs.deinit(self.allocator); + self.stubs.deinit(self.allocator); self.got_entries.deinit(self.allocator); for (self.load_commands.items) |*lc| { @@ -3043,3 +3035,33 @@ pub fn sectionId(self: Zld, match: MatchingSection) u8 { } return section; } + +pub fn unpackSectionId(self: Zld, section_id: u8) MatchingSection { + var match: MatchingSection = undefined; + var section: u8 = 0; + outer: for (self.load_commands.items) |cmd, cmd_id| { + assert(cmd == .Segment); + for (cmd.Segment.sections.items) |_, sect_id| { + section += 1; + if (section_id == section) { + match.seg = @intCast(u16, cmd_id); + match.sect = @intCast(u16, sect_id); + break :outer; + } + } + } + return match; +} + +pub fn findFirst(comptime T: type, haystack: []T, start: usize, predicate: anytype) usize { + if (!@hasDecl(@TypeOf(predicate), "predicate")) + @compileError("Predicate is required to define fn predicate(@This(), T) bool"); + + if (start == haystack.len) return start; + + var i = start; + while (i < haystack.len) : (i += 1) { + if (predicate.predicate(haystack[i])) break; + } + return i; +} diff --git a/src/link/MachO/reloc.zig b/src/link/MachO/reloc.zig deleted file mode 100644 index 1d0c0466d6..0000000000 --- a/src/link/MachO/reloc.zig +++ /dev/null @@ -1,840 +0,0 @@ -const std = @import("std"); -const aarch64 = @import("../../codegen/aarch64.zig"); -const assert = std.debug.assert; -const commands = @import("commands.zig"); -const log = std.log.scoped(.reloc); -const macho = std.macho; -const math = std.math; -const mem = std.mem; -const meta = std.meta; - -const Allocator = mem.Allocator; -const Arch = std.Target.Cpu.Arch; -const Object = @import("Object.zig"); -const Symbol = @import("Symbol.zig"); -const TextBlock = @import("TextBlock.zig"); -const Zld = @import("Zld.zig"); - -pub const Relocation = struct { - /// Offset within the `block`s code buffer. - /// Note relocation size can be inferred by relocation's kind. - offset: u32, - - /// Target symbol: either a regular or a proxy. - target: *Symbol, - - payload: union(enum) { - unsigned: Unsigned, - branch: Branch, - page: Page, - page_off: PageOff, - pointer_to_got: PointerToGot, - signed: Signed, - load: Load, - }, - - const ResolveArgs = struct { - block: *TextBlock, - offset: u32, - source_addr: u64, - target_addr: u64, - }; - - pub const Unsigned = struct { - subtractor: ?*Symbol = null, - - /// Addend embedded directly in the relocation slot - addend: i64, - - /// Extracted from r_length: - /// => 3 implies true - /// => 2 implies false - /// => * is unreachable - is_64bit: bool, - - pub fn resolve(self: Unsigned, args: ResolveArgs) !void { - const result = if (self.subtractor) |subtractor| - @intCast(i64, args.target_addr) - @intCast(i64, subtractor.payload.regular.address) + self.addend - else - @intCast(i64, args.target_addr) + self.addend; - - if (self.is_64bit) { - mem.writeIntLittle(u64, args.block.code[args.offset..][0..8], @bitCast(u64, result)); - } else { - mem.writeIntLittle(u32, args.block.code[args.offset..][0..4], @truncate(u32, @bitCast(u64, result))); - } - } - - pub fn format(self: Unsigned, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { - _ = fmt; - _ = options; - try std.fmt.format(writer, "Unsigned {{ ", .{}); - if (self.subtractor) |sub| { - try std.fmt.format(writer, ".subtractor = {}, ", .{sub}); - } - try std.fmt.format(writer, ".addend = {}, ", .{self.addend}); - const length: usize = if (self.is_64bit) 8 else 4; - try std.fmt.format(writer, ".length = {}, ", .{length}); - try std.fmt.format(writer, "}}", .{}); - } - }; - - pub const Branch = struct { - arch: Arch, - - pub fn resolve(self: Branch, args: ResolveArgs) !void { - switch (self.arch) { - .aarch64 => { - const displacement = try math.cast( - i28, - @intCast(i64, args.target_addr) - @intCast(i64, args.source_addr), - ); - const code = args.block.code[args.offset..][0..4]; - var inst = aarch64.Instruction{ - .unconditional_branch_immediate = mem.bytesToValue(meta.TagPayload( - aarch64.Instruction, - aarch64.Instruction.unconditional_branch_immediate, - ), code), - }; - inst.unconditional_branch_immediate.imm26 = @truncate(u26, @bitCast(u28, displacement >> 2)); - mem.writeIntLittle(u32, code, inst.toU32()); - }, - .x86_64 => { - const displacement = try math.cast( - i32, - @intCast(i64, args.target_addr) - @intCast(i64, args.source_addr) - 4, - ); - mem.writeIntLittle(u32, args.block.code[args.offset..][0..4], @bitCast(u32, displacement)); - }, - else => return error.UnsupportedCpuArchitecture, - } - } - - pub fn format(self: Branch, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { - _ = self; - _ = fmt; - _ = options; - try std.fmt.format(writer, "Branch {{}}", .{}); - } - }; - - pub const Page = struct { - kind: enum { - page, - got, - tlvp, - }, - addend: ?u32 = null, - - pub fn resolve(self: Page, args: ResolveArgs) !void { - const target_addr = if (self.addend) |addend| args.target_addr + addend else args.target_addr; - const source_page = @intCast(i32, args.source_addr >> 12); - const target_page = @intCast(i32, target_addr >> 12); - const pages = @bitCast(u21, @intCast(i21, target_page - source_page)); - - const code = args.block.code[args.offset..][0..4]; - var inst = aarch64.Instruction{ - .pc_relative_address = mem.bytesToValue(meta.TagPayload( - aarch64.Instruction, - aarch64.Instruction.pc_relative_address, - ), code), - }; - inst.pc_relative_address.immhi = @truncate(u19, pages >> 2); - inst.pc_relative_address.immlo = @truncate(u2, pages); - - mem.writeIntLittle(u32, code, inst.toU32()); - } - - pub fn format(self: Page, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { - _ = fmt; - _ = options; - try std.fmt.format(writer, "Page {{ ", .{}); - switch (self.kind) { - .page => {}, - .got => { - try std.fmt.format(writer, ".got, ", .{}); - }, - .tlvp => { - try std.fmt.format(writer, ".tlvp", .{}); - }, - } - if (self.addend) |add| { - try std.fmt.format(writer, ".addend = {}, ", .{add}); - } - try std.fmt.format(writer, "}}", .{}); - } - }; - - pub const PageOff = struct { - kind: enum { - page, - got, - tlvp, - }, - addend: ?u32 = null, - op_kind: ?OpKind = null, - - pub const OpKind = enum { - arithmetic, - load, - }; - - pub fn resolve(self: PageOff, args: ResolveArgs) !void { - const code = args.block.code[args.offset..][0..4]; - - switch (self.kind) { - .page => { - const target_addr = if (self.addend) |addend| args.target_addr + addend else args.target_addr; - const narrowed = @truncate(u12, target_addr); - - const op_kind = self.op_kind orelse unreachable; - var inst: aarch64.Instruction = blk: { - switch (op_kind) { - .arithmetic => { - break :blk .{ - .add_subtract_immediate = mem.bytesToValue(meta.TagPayload( - aarch64.Instruction, - aarch64.Instruction.add_subtract_immediate, - ), code), - }; - }, - .load => { - break :blk .{ - .load_store_register = mem.bytesToValue(meta.TagPayload( - aarch64.Instruction, - aarch64.Instruction.load_store_register, - ), code), - }; - }, - } - }; - - if (op_kind == .arithmetic) { - inst.add_subtract_immediate.imm12 = narrowed; - } else { - const offset: u12 = blk: { - if (inst.load_store_register.size == 0) { - if (inst.load_store_register.v == 1) { - // 128-bit SIMD is scaled by 16. - break :blk try math.divExact(u12, narrowed, 16); - } - // Otherwise, 8-bit SIMD or ldrb. - break :blk narrowed; - } else { - const denom: u4 = try math.powi(u4, 2, inst.load_store_register.size); - break :blk try math.divExact(u12, narrowed, denom); - } - }; - inst.load_store_register.offset = offset; - } - - mem.writeIntLittle(u32, code, inst.toU32()); - }, - .got => { - const narrowed = @truncate(u12, args.target_addr); - var inst: aarch64.Instruction = .{ - .load_store_register = mem.bytesToValue(meta.TagPayload( - aarch64.Instruction, - aarch64.Instruction.load_store_register, - ), code), - }; - const offset = try math.divExact(u12, narrowed, 8); - inst.load_store_register.offset = offset; - mem.writeIntLittle(u32, code, inst.toU32()); - }, - .tlvp => { - const RegInfo = struct { - rd: u5, - rn: u5, - size: u1, - }; - const reg_info: RegInfo = blk: { - if (isArithmeticOp(code)) { - const inst = mem.bytesToValue(meta.TagPayload( - aarch64.Instruction, - aarch64.Instruction.add_subtract_immediate, - ), code); - break :blk .{ - .rd = inst.rd, - .rn = inst.rn, - .size = inst.sf, - }; - } else { - const inst = mem.bytesToValue(meta.TagPayload( - aarch64.Instruction, - aarch64.Instruction.load_store_register, - ), code); - break :blk .{ - .rd = inst.rt, - .rn = inst.rn, - .size = @truncate(u1, inst.size), - }; - } - }; - const narrowed = @truncate(u12, args.target_addr); - var inst = aarch64.Instruction{ - .add_subtract_immediate = .{ - .rd = reg_info.rd, - .rn = reg_info.rn, - .imm12 = narrowed, - .sh = 0, - .s = 0, - .op = 0, - .sf = reg_info.size, - }, - }; - mem.writeIntLittle(u32, code, inst.toU32()); - }, - } - } - - pub fn format(self: PageOff, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { - _ = fmt; - _ = options; - try std.fmt.format(writer, "PageOff {{ ", .{}); - switch (self.kind) { - .page => {}, - .got => { - try std.fmt.format(writer, ".got, ", .{}); - }, - .tlvp => { - try std.fmt.format(writer, ".tlvp, ", .{}); - }, - } - if (self.addend) |add| { - try std.fmt.format(writer, ".addend = {}, ", .{add}); - } - if (self.op_kind) |op| { - try std.fmt.format(writer, ".op_kind = {s}, ", .{op}); - } - try std.fmt.format(writer, "}}", .{}); - } - }; - - pub const PointerToGot = struct { - pub fn resolve(_: PointerToGot, args: ResolveArgs) !void { - const result = try math.cast(i32, @intCast(i64, args.target_addr) - @intCast(i64, args.source_addr)); - mem.writeIntLittle(u32, args.block.code[args.offset..][0..4], @bitCast(u32, result)); - } - - pub fn format(self: PointerToGot, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { - _ = self; - _ = fmt; - _ = options; - try std.fmt.format(writer, "PointerToGot {{}}", .{}); - } - }; - - pub const Signed = struct { - addend: i64, - correction: i4, - - pub fn resolve(self: Signed, args: ResolveArgs) !void { - const target_addr = @intCast(i64, args.target_addr) + self.addend; - const displacement = try math.cast( - i32, - target_addr - @intCast(i64, args.source_addr) - self.correction - 4, - ); - mem.writeIntLittle(u32, args.block.code[args.offset..][0..4], @bitCast(u32, displacement)); - } - - pub fn format(self: Signed, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { - _ = fmt; - _ = options; - try std.fmt.format(writer, "Signed {{ ", .{}); - try std.fmt.format(writer, ".addend = {}, ", .{self.addend}); - try std.fmt.format(writer, ".correction = {}, ", .{self.correction}); - try std.fmt.format(writer, "}}", .{}); - } - }; - - pub const Load = struct { - kind: enum { - got, - tlvp, - }, - addend: ?i32 = null, - - pub fn resolve(self: Load, args: ResolveArgs) !void { - if (self.kind == .tlvp) { - // We need to rewrite the opcode from movq to leaq. - args.block.code[args.offset - 2] = 0x8d; - } - const addend = if (self.addend) |addend| addend else 0; - const displacement = try math.cast( - i32, - @intCast(i64, args.target_addr) - @intCast(i64, args.source_addr) - 4 + addend, - ); - mem.writeIntLittle(u32, args.block.code[args.offset..][0..4], @bitCast(u32, displacement)); - } - - pub fn format(self: Load, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { - _ = fmt; - _ = options; - try std.fmt.format(writer, "Load {{ ", .{}); - try std.fmt.format(writer, "{s}, ", .{self.kind}); - if (self.addend) |addend| { - try std.fmt.format(writer, ".addend = {}, ", .{addend}); - } - try std.fmt.format(writer, "}}", .{}); - } - }; - - pub fn resolve(self: Relocation, block: *TextBlock, source_addr: u64, target_addr: u64) !void { - const args = ResolveArgs{ - .block = block, - .offset = self.offset, - .source_addr = source_addr, - .target_addr = target_addr, - }; - switch (self.payload) { - .unsigned => |unsigned| try unsigned.resolve(args), - .branch => |branch| try branch.resolve(args), - .page => |page| try page.resolve(args), - .page_off => |page_off| try page_off.resolve(args), - .pointer_to_got => |pointer_to_got| try pointer_to_got.resolve(args), - .signed => |signed| try signed.resolve(args), - .load => |load| try load.resolve(args), - } - } - - pub fn format(self: Relocation, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { - try std.fmt.format(writer, "Relocation {{ ", .{}); - try std.fmt.format(writer, ".offset = {}, ", .{self.offset}); - try std.fmt.format(writer, ".target = {}, ", .{self.target}); - - switch (self.payload) { - .unsigned => |unsigned| try unsigned.format(fmt, options, writer), - .branch => |branch| try branch.format(fmt, options, writer), - .page => |page| try page.format(fmt, options, writer), - .page_off => |page_off| try page_off.format(fmt, options, writer), - .pointer_to_got => |pointer_to_got| try pointer_to_got.format(fmt, options, writer), - .signed => |signed| try signed.format(fmt, options, writer), - .load => |load| try load.format(fmt, options, writer), - } - - try std.fmt.format(writer, "}}", .{}); - } -}; - -pub const RelocIterator = struct { - buffer: []const macho.relocation_info, - index: i32 = -1, - - pub fn next(self: *RelocIterator) ?macho.relocation_info { - self.index += 1; - if (self.index < self.buffer.len) { - return self.buffer[@intCast(u32, self.index)]; - } - return null; - } - - pub fn peek(self: RelocIterator) macho.relocation_info { - assert(self.index + 1 < self.buffer.len); - return self.buffer[@intCast(u32, self.index + 1)]; - } -}; - -pub const Parser = struct { - object: *Object, - zld: *Zld, - it: *RelocIterator, - block: *TextBlock, - - /// Base address of the parsed text block in the source section. - base_addr: u64, - - /// Used only when targeting aarch64 - addend: ?u32 = null, - - /// Parsed subtractor symbol from _RELOC_SUBTRACTOR reloc type. - subtractor: ?*Symbol = null, - - pub fn parse(self: *Parser) !void { - while (self.it.next()) |rel| { - const out_rel = blk: { - switch (self.object.arch.?) { - .aarch64 => { - const out_rel = switch (@intToEnum(macho.reloc_type_arm64, rel.r_type)) { - .ARM64_RELOC_BRANCH26 => try self.parseBranch(rel), - .ARM64_RELOC_SUBTRACTOR => { - // Subtractor is not a relocation with effect on the TextBlock, so - // parse it and carry on. - try self.parseSubtractor(rel); - - // Verify SUBTRACTOR is followed by UNSIGNED. - const next = @intToEnum(macho.reloc_type_arm64, self.it.peek().r_type); - if (next != .ARM64_RELOC_UNSIGNED) { - log.err("unexpected relocation type: expected UNSIGNED, found {s}", .{next}); - return error.UnexpectedRelocationType; - } - continue; - }, - .ARM64_RELOC_UNSIGNED => try self.parseUnsigned(rel), - .ARM64_RELOC_ADDEND => { - // Addend is not a relocation with effect on the TextBlock, so - // parse it and carry on. - try self.parseAddend(rel); - - // Verify ADDEND is followed by a load. - const next = @intToEnum(macho.reloc_type_arm64, self.it.peek().r_type); - switch (next) { - .ARM64_RELOC_PAGE21, .ARM64_RELOC_PAGEOFF12 => {}, - else => { - log.err("unexpected relocation type: expected PAGE21 or PAGEOFF12, found {s}", .{next}); - return error.UnexpectedRelocationType; - }, - } - continue; - }, - .ARM64_RELOC_PAGE21, - .ARM64_RELOC_GOT_LOAD_PAGE21, - .ARM64_RELOC_TLVP_LOAD_PAGE21, - => try self.parsePage(rel), - .ARM64_RELOC_PAGEOFF12, - .ARM64_RELOC_GOT_LOAD_PAGEOFF12, - .ARM64_RELOC_TLVP_LOAD_PAGEOFF12, - => try self.parsePageOff(rel), - .ARM64_RELOC_POINTER_TO_GOT => try self.parsePointerToGot(rel), - }; - break :blk out_rel; - }, - .x86_64 => { - const out_rel = switch (@intToEnum(macho.reloc_type_x86_64, rel.r_type)) { - .X86_64_RELOC_BRANCH => try self.parseBranch(rel), - .X86_64_RELOC_SUBTRACTOR => { - // Subtractor is not a relocation with effect on the TextBlock, so - // parse it and carry on. - try self.parseSubtractor(rel); - - // Verify SUBTRACTOR is followed by UNSIGNED. - const next = @intToEnum(macho.reloc_type_x86_64, self.it.peek().r_type); - if (next != .X86_64_RELOC_UNSIGNED) { - log.err("unexpected relocation type: expected UNSIGNED, found {s}", .{next}); - return error.UnexpectedRelocationType; - } - continue; - }, - .X86_64_RELOC_UNSIGNED => try self.parseUnsigned(rel), - .X86_64_RELOC_SIGNED, - .X86_64_RELOC_SIGNED_1, - .X86_64_RELOC_SIGNED_2, - .X86_64_RELOC_SIGNED_4, - => try self.parseSigned(rel), - .X86_64_RELOC_GOT_LOAD, - .X86_64_RELOC_GOT, - .X86_64_RELOC_TLV, - => try self.parseLoad(rel), - }; - break :blk out_rel; - }, - else => unreachable, - } - }; - try self.block.relocs.append(out_rel); - - if (out_rel.target.payload == .regular) { - try self.block.references.put(out_rel.target.payload.regular.local_sym_index, {}); - } - - const is_via_got = switch (out_rel.payload) { - .pointer_to_got => true, - .load => |load| load.kind == .got, - .page => |page| page.kind == .got, - .page_off => |page_off| page_off.kind == .got, - else => false, - }; - - if (is_via_got and out_rel.target.got_index == null) { - const index = @intCast(u32, self.zld.got_entries.items.len); - out_rel.target.got_index = index; - try self.zld.got_entries.append(self.zld.allocator, out_rel.target); - - log.debug("adding GOT entry for symbol {s} at index {}", .{ - self.zld.getString(out_rel.target.strx), - index, - }); - } else if (out_rel.payload == .unsigned) { - const sym = out_rel.target; - switch (sym.payload) { - .proxy => |proxy| { - try self.block.bindings.append(.{ - .local_sym_index = proxy.local_sym_index, - .offset = out_rel.offset, - }); - }, - else => { - const source_sym = self.zld.locals.items[self.block.local_sym_index]; - const source_reg = &source_sym.payload.regular; - const seg = self.zld.load_commands.items[source_reg.segment_id].Segment; - const sect = seg.sections.items[source_reg.section_id]; - const sect_type = commands.sectionType(sect); - - const should_rebase = rebase: { - if (!out_rel.payload.unsigned.is_64bit) break :rebase false; - - // TODO actually, a check similar to what dyld is doing, that is, verifying - // that the segment is writable should be enough here. - const is_right_segment = blk: { - if (self.zld.data_segment_cmd_index) |idx| { - if (source_reg.segment_id == idx) { - break :blk true; - } - } - if (self.zld.data_const_segment_cmd_index) |idx| { - if (source_reg.segment_id == idx) { - break :blk true; - } - } - break :blk false; - }; - - if (!is_right_segment) break :rebase false; - if (sect_type != macho.S_LITERAL_POINTERS and - sect_type != macho.S_REGULAR and - sect_type != macho.S_MOD_INIT_FUNC_POINTERS and - sect_type != macho.S_MOD_TERM_FUNC_POINTERS) - { - break :rebase false; - } - - break :rebase true; - }; - - if (should_rebase) { - try self.block.rebases.append(out_rel.offset); - } - }, - } - } else if (out_rel.payload == .branch) blk: { - const sym = out_rel.target; - - if (sym.stubs_index != null) break :blk; - if (sym.payload != .proxy) break :blk; - - const index = @intCast(u32, self.zld.stubs.items.len); - sym.stubs_index = index; - try self.zld.stubs.append(self.zld.allocator, sym); - - log.debug("adding stub entry for symbol {s} at index {}", .{ self.zld.getString(sym.strx), index }); - } - } - } - - fn parseBaseRelInfo(self: *Parser, rel: macho.relocation_info) !Relocation { - const offset = @intCast(u32, @intCast(u64, rel.r_address) - self.base_addr); - const target = try self.object.symbolFromReloc(self.zld, rel); - return Relocation{ - .offset = offset, - .target = target, - .payload = undefined, - }; - } - - fn parseUnsigned(self: *Parser, rel: macho.relocation_info) !Relocation { - defer { - // Reset parser's subtractor state - self.subtractor = null; - } - - assert(rel.r_pcrel == 0); - - var parsed = try self.parseBaseRelInfo(rel); - const is_64bit: bool = switch (rel.r_length) { - 3 => true, - 2 => false, - else => unreachable, - }; - - var addend: i64 = if (is_64bit) - mem.readIntLittle(i64, self.block.code[parsed.offset..][0..8]) - else - mem.readIntLittle(i32, self.block.code[parsed.offset..][0..4]); - - if (rel.r_extern == 0) { - addend -= @intCast(i64, parsed.target.payload.regular.address); - } - - parsed.payload = .{ - .unsigned = .{ - .subtractor = self.subtractor, - .is_64bit = is_64bit, - .addend = addend, - }, - }; - - return parsed; - } - - fn parseBranch(self: *Parser, rel: macho.relocation_info) !Relocation { - assert(rel.r_pcrel == 1); - assert(rel.r_length == 2); - - var parsed = try self.parseBaseRelInfo(rel); - parsed.payload = .{ - .branch = .{ - .arch = self.object.arch.?, - }, - }; - return parsed; - } - - fn parsePage(self: *Parser, rel: macho.relocation_info) !Relocation { - assert(rel.r_pcrel == 1); - assert(rel.r_length == 2); - - const rel_type = @intToEnum(macho.reloc_type_arm64, rel.r_type); - - defer if (rel_type == .ARM64_RELOC_PAGE21) { - // Reset parser's addend state - self.addend = null; - }; - - const addend = if (rel_type == .ARM64_RELOC_PAGE21) - self.addend - else - null; - - var parsed = try self.parseBaseRelInfo(rel); - parsed.payload = .{ - .page = .{ - .kind = switch (rel_type) { - .ARM64_RELOC_PAGE21 => .page, - .ARM64_RELOC_GOT_LOAD_PAGE21 => .got, - .ARM64_RELOC_TLVP_LOAD_PAGE21 => .tlvp, - else => unreachable, - }, - .addend = addend, - }, - }; - return parsed; - } - - fn parsePageOff(self: *Parser, rel: macho.relocation_info) !Relocation { - assert(rel.r_pcrel == 0); - assert(rel.r_length == 2); - - const rel_type = @intToEnum(macho.reloc_type_arm64, rel.r_type); - - defer if (rel_type == .ARM64_RELOC_PAGEOFF12) { - // Reset parser's addend state - self.addend = null; - }; - - const addend = if (rel_type == .ARM64_RELOC_PAGEOFF12) - self.addend - else - null; - - var parsed = try self.parseBaseRelInfo(rel); - const op_kind: ?Relocation.PageOff.OpKind = blk: { - if (rel_type != .ARM64_RELOC_PAGEOFF12) break :blk null; - const op_kind: Relocation.PageOff.OpKind = if (isArithmeticOp(self.block.code[parsed.offset..][0..4])) - .arithmetic - else - .load; - break :blk op_kind; - }; - - parsed.payload = .{ - .page_off = .{ - .kind = switch (rel_type) { - .ARM64_RELOC_PAGEOFF12 => .page, - .ARM64_RELOC_GOT_LOAD_PAGEOFF12 => .got, - .ARM64_RELOC_TLVP_LOAD_PAGEOFF12 => .tlvp, - else => unreachable, - }, - .addend = addend, - .op_kind = op_kind, - }, - }; - return parsed; - } - - fn parsePointerToGot(self: *Parser, rel: macho.relocation_info) !Relocation { - assert(rel.r_pcrel == 1); - assert(rel.r_length == 2); - - var parsed = try self.parseBaseRelInfo(rel); - parsed.payload = .{ - .pointer_to_got = .{}, - }; - return parsed; - } - - fn parseAddend(self: *Parser, rel: macho.relocation_info) !void { - assert(rel.r_pcrel == 0); - assert(rel.r_extern == 0); - assert(self.addend == null); - - self.addend = rel.r_symbolnum; - } - - fn parseSigned(self: *Parser, rel: macho.relocation_info) !Relocation { - assert(rel.r_pcrel == 1); - assert(rel.r_length == 2); - - var parsed = try self.parseBaseRelInfo(rel); - const rel_type = @intToEnum(macho.reloc_type_x86_64, rel.r_type); - const correction: i4 = switch (rel_type) { - .X86_64_RELOC_SIGNED => 0, - .X86_64_RELOC_SIGNED_1 => 1, - .X86_64_RELOC_SIGNED_2 => 2, - .X86_64_RELOC_SIGNED_4 => 4, - else => unreachable, - }; - var addend: i64 = mem.readIntLittle(i32, self.block.code[parsed.offset..][0..4]) + correction; - - if (rel.r_extern == 0) { - const source_sym = self.zld.locals.items[self.block.local_sym_index].payload.regular; - const source_addr = source_sym.address + parsed.offset + 4; - const target_sym = parsed.target.payload.regular; - addend = @intCast(i64, source_addr) + addend - @intCast(i64, target_sym.address); - } - - parsed.payload = .{ - .signed = .{ - .correction = correction, - .addend = addend, - }, - }; - - return parsed; - } - - fn parseSubtractor(self: *Parser, rel: macho.relocation_info) !void { - assert(rel.r_pcrel == 0); - assert(self.subtractor == null); - - self.subtractor = try self.object.symbolFromReloc(self.zld, rel); - } - - fn parseLoad(self: *Parser, rel: macho.relocation_info) !Relocation { - assert(rel.r_pcrel == 1); - assert(rel.r_length == 2); - - var parsed = try self.parseBaseRelInfo(rel); - const rel_type = @intToEnum(macho.reloc_type_x86_64, rel.r_type); - const addend = if (rel_type == .X86_64_RELOC_GOT) - mem.readIntLittle(i32, self.block.code[parsed.offset..][0..4]) - else - null; - - parsed.payload = .{ - .load = .{ - .kind = switch (rel_type) { - .X86_64_RELOC_GOT_LOAD, .X86_64_RELOC_GOT => .got, - .X86_64_RELOC_TLV => .tlvp, - else => unreachable, - }, - .addend = addend, - }, - }; - return parsed; - } -}; - -inline fn isArithmeticOp(inst: *const [4]u8) bool { - const group_decode = @truncate(u5, inst[3]); - return ((group_decode >> 2) == 4); -}