Merge pull request #9676 from ziglang/zld-incr

MachO: merges stage1 with self-hosted codepath
This commit is contained in:
Jakub Konka 2021-09-14 14:20:11 +02:00 committed by GitHub
commit 85f065a511
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 3171 additions and 4139 deletions

View File

@ -574,11 +574,11 @@ set(ZIG_STAGE2_SOURCES
"${CMAKE_SOURCE_DIR}/src/link/Plan9/aout.zig"
"${CMAKE_SOURCE_DIR}/src/link/MachO.zig"
"${CMAKE_SOURCE_DIR}/src/link/MachO/Archive.zig"
"${CMAKE_SOURCE_DIR}/src/link/MachO/Atom.zig"
"${CMAKE_SOURCE_DIR}/src/link/MachO/CodeSignature.zig"
"${CMAKE_SOURCE_DIR}/src/link/MachO/DebugSymbols.zig"
"${CMAKE_SOURCE_DIR}/src/link/MachO/Dylib.zig"
"${CMAKE_SOURCE_DIR}/src/link/MachO/Object.zig"
"${CMAKE_SOURCE_DIR}/src/link/MachO/TextBlock.zig"
"${CMAKE_SOURCE_DIR}/src/link/MachO/Trie.zig"
"${CMAKE_SOURCE_DIR}/src/link/MachO/bind.zig"
"${CMAKE_SOURCE_DIR}/src/link/MachO/commands.zig"

View File

@ -601,35 +601,35 @@ pub const segment_command = extern struct {
/// command and their size is reflected in cmdsize.
pub const segment_command_64 = extern struct {
/// LC_SEGMENT_64
cmd: u32,
cmd: u32 = LC_SEGMENT_64,
/// includes sizeof section_64 structs
cmdsize: u32,
cmdsize: u32 = @sizeOf(segment_command_64),
/// segment name
segname: [16]u8,
/// memory address of this segment
vmaddr: u64,
vmaddr: u64 = 0,
/// memory size of this segment
vmsize: u64,
vmsize: u64 = 0,
/// file offset of this segment
fileoff: u64,
fileoff: u64 = 0,
/// amount to map from the file
filesize: u64,
filesize: u64 = 0,
/// maximum VM protection
maxprot: vm_prot_t,
maxprot: vm_prot_t = VM_PROT_NONE,
/// initial VM protection
initprot: vm_prot_t,
initprot: vm_prot_t = VM_PROT_NONE,
/// number of sections in segment
nsects: u32,
flags: u32,
nsects: u32 = 0,
flags: u32 = 0,
};
/// A segment is made up of zero or more sections. Non-MH_OBJECT files have
@ -700,34 +700,34 @@ pub const section_64 = extern struct {
segname: [16]u8,
/// memory address of this section
addr: u64,
addr: u64 = 0,
/// size in bytes of this section
size: u64,
size: u64 = 0,
/// file offset of this section
offset: u32,
offset: u32 = 0,
/// section alignment (power of 2)
@"align": u32,
@"align": u32 = 0,
/// file offset of relocation entries
reloff: u32,
reloff: u32 = 0,
/// number of relocation entries
nreloc: u32,
nreloc: u32 = 0,
/// flags (section type and attributes
flags: u32,
flags: u32 = S_REGULAR,
/// reserved (for offset or index)
reserved1: u32,
reserved1: u32 = 0,
/// reserved (for count or sizeof)
reserved2: u32,
reserved2: u32 = 0,
/// reserved
reserved3: u32,
reserved3: u32 = 0,
};
pub const nlist = extern struct {

View File

@ -2816,24 +2816,21 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
if (self.air.value(callee)) |func_value| {
if (func_value.castTag(.function)) |func_payload| {
const func = func_payload.data;
const got_addr = blk: {
const seg = macho_file.load_commands.items[macho_file.data_const_segment_cmd_index.?].Segment;
const got = seg.sections.items[macho_file.got_section_index.?];
const got_index = macho_file.got_entries_map.get(.{
.where = .local,
.where_index = func.owner_decl.link.macho.local_sym_index,
}) orelse unreachable;
break :blk got.addr + got_index * @sizeOf(u64);
};
// TODO I'm hacking my way through here by repurposing .memory for storing
// index to the GOT target symbol index.
switch (arch) {
.x86_64 => {
try self.genSetReg(Type.initTag(.u64), .rax, .{ .memory = got_addr });
try self.genSetReg(Type.initTag(.u64), .rax, .{
.memory = func.owner_decl.link.macho.local_sym_index,
});
// callq *%rax
try self.code.ensureCapacity(self.code.items.len + 2);
self.code.appendSliceAssumeCapacity(&[2]u8{ 0xff, 0xd0 });
},
.aarch64 => {
try self.genSetReg(Type.initTag(.u64), .x30, .{ .memory = got_addr });
try self.genSetReg(Type.initTag(.u64), .x30, .{
.memory = func.owner_decl.link.macho.local_sym_index,
});
// blr x30
writeInt(u32, try self.code.addManyAsArray(4), Instruction.blr(.x30).toU32());
},
@ -4345,29 +4342,20 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
}).toU32());
if (self.bin_file.cast(link.File.MachO)) |macho_file| {
// TODO this is super awkward. We are reversing the address of the GOT entry here.
// We should probably have it cached or move the reloc adding somewhere else.
const got_addr = blk: {
const seg = macho_file.load_commands.items[macho_file.data_const_segment_cmd_index.?].Segment;
const got = seg.sections.items[macho_file.got_section_index.?];
break :blk got.addr;
};
const where_index = blk: for (macho_file.got_entries.items) |key, id| {
if (got_addr + id * @sizeOf(u64) == addr) break :blk key.where_index;
} else unreachable;
// TODO I think the reloc might be in the wrong place.
const decl = macho_file.active_decl.?;
// Page reloc for adrp instruction.
try decl.link.macho.relocs.append(self.bin_file.allocator, .{
.offset = offset,
.where = .local,
.where_index = where_index,
.where_index = @intCast(u32, addr),
.payload = .{ .page = .{ .kind = .got } },
});
// Pageoff reloc for adrp instruction.
try decl.link.macho.relocs.append(self.bin_file.allocator, .{
.offset = offset + 4,
.where = .local,
.where_index = where_index,
.where_index = @intCast(u32, addr),
.payload = .{ .page_off = .{ .kind = .got } },
});
} else {
@ -4628,22 +4616,13 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
const offset = @intCast(u32, self.code.items.len);
if (self.bin_file.cast(link.File.MachO)) |macho_file| {
// TODO this is super awkward. We are reversing the address of the GOT entry here.
// We should probably have it cached or move the reloc adding somewhere else.
const got_addr = blk: {
const seg = macho_file.load_commands.items[macho_file.data_const_segment_cmd_index.?].Segment;
const got = seg.sections.items[macho_file.got_section_index.?];
break :blk got.addr;
};
const where_index = blk: for (macho_file.got_entries.items) |key, id| {
if (got_addr + id * @sizeOf(u64) == x) break :blk key.where_index;
} else unreachable;
// TODO I think the reloc might be in the wrong place.
const decl = macho_file.active_decl.?;
// Load reloc for LEA instruction.
try decl.link.macho.relocs.append(self.bin_file.allocator, .{
.offset = offset - 4,
.where = .local,
.where_index = where_index,
.where_index = @intCast(u32, x),
.payload = .{ .load = .{ .kind = .got } },
});
} else {
@ -4869,17 +4848,10 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
const got = &elf_file.program_headers.items[elf_file.phdr_got_index.?];
const got_addr = got.p_vaddr + decl.link.elf.offset_table_index * ptr_bytes;
return MCValue{ .memory = got_addr };
} else if (self.bin_file.cast(link.File.MachO)) |macho_file| {
const got_addr = blk: {
const seg = macho_file.load_commands.items[macho_file.data_const_segment_cmd_index.?].Segment;
const got = seg.sections.items[macho_file.got_section_index.?];
const got_index = macho_file.got_entries_map.get(.{
.where = .local,
.where_index = decl.link.macho.local_sym_index,
}) orelse unreachable;
break :blk got.addr + got_index * ptr_bytes;
};
return MCValue{ .memory = got_addr };
} else if (self.bin_file.cast(link.File.MachO)) |_| {
// TODO I'm hacking my way through here by repurposing .memory for storing
// index to the GOT target symbol index.
return MCValue{ .memory = decl.link.macho.local_sym_index };
} else if (self.bin_file.cast(link.File.Coff)) |coff_file| {
const got_addr = coff_file.offset_table_virtual_address + decl.link.coff.offset_table_index * ptr_bytes;
return MCValue{ .memory = got_addr };

File diff suppressed because it is too large Load Diff

View File

@ -1,6 +1,7 @@
const TextBlock = @This();
const Atom = @This();
const std = @import("std");
const build_options = @import("build_options");
const aarch64 = @import("../../codegen/aarch64.zig");
const assert = std.debug.assert;
const commands = @import("commands.zig");
@ -9,6 +10,7 @@ const macho = std.macho;
const math = std.math;
const mem = std.mem;
const meta = std.meta;
const trace = @import("../../tracy.zig").trace;
const Allocator = mem.Allocator;
const Arch = std.Target.Cpu.Arch;
@ -24,54 +26,63 @@ const StringIndexAdapter = std.hash_map.StringIndexAdapter;
/// offset table entry.
local_sym_index: u32,
/// List of symbol aliases pointing to the same block via different nlists
/// List of symbol aliases pointing to the same atom via different nlists
aliases: std.ArrayListUnmanaged(u32) = .{},
/// List of symbols contained within this block
/// List of symbols contained within this atom
contained: std.ArrayListUnmanaged(SymbolAtOffset) = .{},
/// Code (may be non-relocated) this block represents
/// Code (may be non-relocated) this atom represents
code: std.ArrayListUnmanaged(u8) = .{},
/// Size and alignment of this text block
/// Size and alignment of this atom
/// Unlike in Elf, we need to store the size of this symbol as part of
/// the TextBlock since macho.nlist_64 lacks this information.
/// the atom since macho.nlist_64 lacks this information.
size: u64,
/// Alignment of this atom as a power of 2.
/// For instance, aligmment of 0 should be read as 2^0 = 1 byte aligned.
alignment: u32,
/// List of relocations belonging to this atom.
relocs: std.ArrayListUnmanaged(Relocation) = .{},
/// List of offsets contained within this block that need rebasing by the dynamic
/// loader in presence of ASLR
/// List of offsets contained within this atom that need rebasing by the dynamic
/// loader in presence of ASLR.
rebases: std.ArrayListUnmanaged(u64) = .{},
/// List of offsets contained within this block that will be dynamically bound
/// List of offsets contained within this atom that will be dynamically bound
/// by the dynamic loader and contain pointers to resolved (at load time) extern
/// symbols (aka proxies aka imports)
bindings: std.ArrayListUnmanaged(SymbolAtOffset) = .{},
/// List of lazy bindings
lazy_bindings: std.ArrayListUnmanaged(SymbolAtOffset) = .{},
/// List of data-in-code entries. This is currently specific to x86_64 only.
dices: std.ArrayListUnmanaged(macho.data_in_code_entry) = .{},
/// Stab entry for this block. This is currently specific to a binary created
/// Stab entry for this atom. This is currently specific to a binary created
/// by linking object files in a traditional sense - in incremental sense, we
/// bypass stabs altogether to produce dSYM bundle directly with fully relocated
/// DWARF sections.
stab: ?Stab = null,
/// Points to the previous and next neighbours
next: ?*TextBlock,
prev: ?*TextBlock,
next: ?*Atom,
prev: ?*Atom,
/// Previous/next linked list pointers.
/// This is the linked list node for this Decl's corresponding .debug_info tag.
dbg_info_prev: ?*TextBlock,
dbg_info_next: ?*TextBlock,
dbg_info_prev: ?*Atom,
dbg_info_next: ?*Atom,
/// Offset into .debug_info pointing to the tag for this Decl.
dbg_info_off: u32,
/// Size of the .debug_info tag for this Decl, not including padding.
dbg_info_len: u32,
dirty: bool = true,
pub const SymbolAtOffset = struct {
local_sym_index: u32,
offset: u64,
@ -160,7 +171,7 @@ pub const Stab = union(enum) {
};
pub const Relocation = struct {
/// Offset within the `block`s code buffer.
/// Offset within the atom's code buffer.
/// Note relocation size can be inferred by relocation's kind.
offset: u32,
@ -182,7 +193,7 @@ pub const Relocation = struct {
},
const ResolveArgs = struct {
block: *TextBlock,
block: *Atom,
offset: u32,
source_addr: u64,
target_addr: u64,
@ -238,10 +249,21 @@ pub const Relocation = struct {
pub fn resolve(self: Branch, args: ResolveArgs) !void {
switch (self.arch) {
.aarch64 => {
const displacement = try math.cast(
const displacement = math.cast(
i28,
@intCast(i64, args.target_addr) - @intCast(i64, args.source_addr),
);
) catch |err| switch (err) {
error.Overflow => {
log.err("jump too big to encode as i28 displacement value", .{});
log.err(" (target - source) = displacement => 0x{x} - 0x{x} = 0x{x}", .{
args.target_addr,
args.source_addr,
@intCast(i64, args.target_addr) - @intCast(i64, args.source_addr),
});
log.err(" TODO implement branch islands to extend jump distance for arm64", .{});
return error.TODOImplementBranchIslands;
},
};
const code = args.block.code.items[args.offset..][0..4];
var inst = aarch64.Instruction{
.unconditional_branch_immediate = mem.bytesToValue(meta.TagPayload(
@ -474,13 +496,13 @@ pub const Relocation = struct {
pub const Signed = struct {
addend: i64,
correction: i4,
correction: u3,
pub fn resolve(self: Signed, args: ResolveArgs) !void {
const target_addr = @intCast(i64, args.target_addr) + self.addend;
const displacement = try math.cast(
i32,
target_addr - @intCast(i64, args.source_addr) - self.correction - 4,
target_addr - @intCast(i64, args.source_addr + self.correction + 4),
);
mem.writeIntLittle(u32, args.block.code.items[args.offset..][0..4], @bitCast(u32, displacement));
}
@ -556,7 +578,7 @@ pub const Relocation = struct {
}
};
pub const empty = TextBlock{
pub const empty = Atom{
.local_sym_index = 0,
.size = 0,
.alignment = 0,
@ -568,8 +590,9 @@ pub const empty = TextBlock{
.dbg_info_len = undefined,
};
pub fn deinit(self: *TextBlock, allocator: *Allocator) void {
pub fn deinit(self: *Atom, allocator: *Allocator) void {
self.dices.deinit(allocator);
self.lazy_bindings.deinit(allocator);
self.bindings.deinit(allocator);
self.rebases.deinit(allocator);
self.relocs.deinit(allocator);
@ -578,23 +601,34 @@ pub fn deinit(self: *TextBlock, allocator: *Allocator) void {
self.code.deinit(allocator);
}
pub fn clearRetainingCapacity(self: *Atom) void {
self.dices.clearRetainingCapacity();
self.lazy_bindings.clearRetainingCapacity();
self.bindings.clearRetainingCapacity();
self.rebases.clearRetainingCapacity();
self.relocs.clearRetainingCapacity();
self.contained.clearRetainingCapacity();
self.aliases.clearRetainingCapacity();
self.code.clearRetainingCapacity();
}
/// Returns how much room there is to grow in virtual address space.
/// File offset relocation happens transparently, so it is not included in
/// this calculation.
pub fn capacity(self: TextBlock, macho_file: MachO) u64 {
pub fn capacity(self: Atom, macho_file: MachO) u64 {
const self_sym = macho_file.locals.items[self.local_sym_index];
if (self.next) |next| {
const next_sym = macho_file.locals.items[next.local_sym_index];
return next_sym.n_value - self_sym.n_value;
} else {
// We are the last block.
// We are the last atom.
// The capacity is limited only by virtual address space.
return std.math.maxInt(u64) - self_sym.n_value;
}
}
pub fn freeListEligible(self: TextBlock, macho_file: MachO) bool {
// No need to keep a free list node for the last block.
pub fn freeListEligible(self: Atom, macho_file: MachO) bool {
// No need to keep a free list node for the last atom.
const next = self.next orelse return false;
const self_sym = macho_file.locals.items[self.local_sym_index];
const next_sym = macho_file.locals.items[next.local_sym_index];
@ -607,14 +641,16 @@ pub fn freeListEligible(self: TextBlock, macho_file: MachO) bool {
const RelocContext = struct {
base_addr: u64 = 0,
base_offset: u64 = 0,
allocator: *Allocator,
object: *Object,
macho_file: *MachO,
parsed_atoms: *Object.ParsedAtoms,
};
fn initRelocFromObject(rel: macho.relocation_info, context: RelocContext) !Relocation {
var parsed_rel = Relocation{
.offset = @intCast(u32, @intCast(u64, rel.r_address) - context.base_addr),
.offset = @intCast(u32, @intCast(u64, rel.r_address) - context.base_offset),
.where = undefined,
.where_index = undefined,
.payload = undefined,
@ -640,7 +676,7 @@ fn initRelocFromObject(rel: macho.relocation_info, context: RelocContext) !Reloc
.n_type = macho.N_SECT,
.n_sect = @intCast(u8, context.macho_file.section_ordinals.getIndex(match).? + 1),
.n_desc = 0,
.n_value = sect.addr,
.n_value = 0,
});
try context.object.sections_as_symbols.putNoClobber(context.allocator, sect_id, local_sym_index);
break :blk local_sym_index;
@ -677,8 +713,11 @@ fn initRelocFromObject(rel: macho.relocation_info, context: RelocContext) !Reloc
return parsed_rel;
}
pub fn parseRelocs(self: *TextBlock, relocs: []macho.relocation_info, context: RelocContext) !void {
const filtered_relocs = filterRelocs(relocs, context.base_addr, context.base_addr + self.size);
pub fn parseRelocs(self: *Atom, relocs: []macho.relocation_info, context: RelocContext) !void {
const tracy = trace(@src());
defer tracy.end();
const filtered_relocs = filterRelocs(relocs, context.base_offset, context.base_offset + self.size);
var it = RelocIterator{
.buffer = filtered_relocs,
};
@ -831,9 +870,20 @@ pub fn parseRelocs(self: *TextBlock, relocs: []macho.relocation_info, context: R
};
if (context.macho_file.got_entries_map.contains(key)) break :blk;
const got_index = @intCast(u32, context.macho_file.got_entries.items.len);
try context.macho_file.got_entries.append(context.allocator, key);
try context.macho_file.got_entries_map.putNoClobber(context.allocator, key, got_index);
const atom = try context.macho_file.createGotAtom(key);
try context.macho_file.got_entries_map.putNoClobber(context.macho_file.base.allocator, key, atom);
const match = MachO.MatchingSection{
.seg = context.macho_file.data_const_segment_cmd_index.?,
.sect = context.macho_file.got_section_index.?,
};
if (context.parsed_atoms.getPtr(match)) |last| {
last.*.next = atom;
atom.prev = last.*;
last.* = atom;
} else {
try context.parsed_atoms.putNoClobber(match, atom);
}
} else if (parsed_rel.payload == .unsigned) {
switch (parsed_rel.where) {
.undef => {
@ -889,9 +939,53 @@ pub fn parseRelocs(self: *TextBlock, relocs: []macho.relocation_info, context: R
if (parsed_rel.where != .undef) break :blk;
if (context.macho_file.stubs_map.contains(parsed_rel.where_index)) break :blk;
const stubs_index = @intCast(u32, context.macho_file.stubs.items.len);
try context.macho_file.stubs.append(context.allocator, parsed_rel.where_index);
try context.macho_file.stubs_map.putNoClobber(context.allocator, parsed_rel.where_index, stubs_index);
const stub_helper_atom = try context.macho_file.createStubHelperAtom();
const laptr_atom = try context.macho_file.createLazyPointerAtom(
stub_helper_atom.local_sym_index,
parsed_rel.where_index,
);
const stub_atom = try context.macho_file.createStubAtom(laptr_atom.local_sym_index);
try context.macho_file.stubs_map.putNoClobber(context.allocator, parsed_rel.where_index, stub_atom);
// TODO clean this up!
if (context.parsed_atoms.getPtr(.{
.seg = context.macho_file.text_segment_cmd_index.?,
.sect = context.macho_file.stub_helper_section_index.?,
})) |last| {
last.*.next = stub_helper_atom;
stub_helper_atom.prev = last.*;
last.* = stub_helper_atom;
} else {
try context.parsed_atoms.putNoClobber(.{
.seg = context.macho_file.text_segment_cmd_index.?,
.sect = context.macho_file.stub_helper_section_index.?,
}, stub_helper_atom);
}
if (context.parsed_atoms.getPtr(.{
.seg = context.macho_file.text_segment_cmd_index.?,
.sect = context.macho_file.stubs_section_index.?,
})) |last| {
last.*.next = stub_atom;
stub_atom.prev = last.*;
last.* = stub_atom;
} else {
try context.parsed_atoms.putNoClobber(.{
.seg = context.macho_file.text_segment_cmd_index.?,
.sect = context.macho_file.stubs_section_index.?,
}, stub_atom);
}
if (context.parsed_atoms.getPtr(.{
.seg = context.macho_file.data_segment_cmd_index.?,
.sect = context.macho_file.la_symbol_ptr_section_index.?,
})) |last| {
last.*.next = laptr_atom;
laptr_atom.prev = last.*;
last.* = laptr_atom;
} else {
try context.parsed_atoms.putNoClobber(.{
.seg = context.macho_file.data_segment_cmd_index.?,
.sect = context.macho_file.la_symbol_ptr_section_index.?,
}, laptr_atom);
}
}
}
}
@ -910,7 +1004,7 @@ fn isSubtractor(rel: macho.relocation_info, arch: Arch) bool {
}
fn parseUnsigned(
self: TextBlock,
self: Atom,
rel: macho.relocation_info,
out: *Relocation,
subtractor: ?u32,
@ -930,9 +1024,9 @@ fn parseUnsigned(
mem.readIntLittle(i32, self.code.items[out.offset..][0..4]);
if (rel.r_extern == 0) {
assert(out.where == .local);
const target_sym = context.macho_file.locals.items[out.where_index];
addend -= @intCast(i64, target_sym.n_value);
const seg = context.object.load_commands.items[context.object.segment_cmd_index.?].Segment;
const target_sect_base_addr = seg.sections.items[rel.r_symbolnum - 1].addr;
addend -= @intCast(i64, target_sect_base_addr);
}
out.payload = .{
@ -944,7 +1038,7 @@ fn parseUnsigned(
};
}
fn parseBranch(self: TextBlock, rel: macho.relocation_info, out: *Relocation, context: RelocContext) void {
fn parseBranch(self: Atom, rel: macho.relocation_info, out: *Relocation, context: RelocContext) void {
_ = self;
assert(rel.r_pcrel == 1);
assert(rel.r_length == 2);
@ -956,7 +1050,7 @@ fn parseBranch(self: TextBlock, rel: macho.relocation_info, out: *Relocation, co
};
}
fn parsePage(self: TextBlock, rel: macho.relocation_info, out: *Relocation, addend: u32) void {
fn parsePage(self: Atom, rel: macho.relocation_info, out: *Relocation, addend: u32) void {
_ = self;
assert(rel.r_pcrel == 1);
assert(rel.r_length == 2);
@ -974,7 +1068,7 @@ fn parsePage(self: TextBlock, rel: macho.relocation_info, out: *Relocation, adde
};
}
fn parsePageOff(self: TextBlock, rel: macho.relocation_info, out: *Relocation, addend: u32) void {
fn parsePageOff(self: Atom, rel: macho.relocation_info, out: *Relocation, addend: u32) void {
assert(rel.r_pcrel == 0);
assert(rel.r_length == 2);
@ -1002,7 +1096,7 @@ fn parsePageOff(self: TextBlock, rel: macho.relocation_info, out: *Relocation, a
};
}
fn parsePointerToGot(self: TextBlock, rel: macho.relocation_info, out: *Relocation) void {
fn parsePointerToGot(self: Atom, rel: macho.relocation_info, out: *Relocation) void {
_ = self;
assert(rel.r_pcrel == 1);
assert(rel.r_length == 2);
@ -1012,12 +1106,12 @@ fn parsePointerToGot(self: TextBlock, rel: macho.relocation_info, out: *Relocati
};
}
fn parseSigned(self: TextBlock, rel: macho.relocation_info, out: *Relocation, context: RelocContext) void {
fn parseSigned(self: Atom, rel: macho.relocation_info, out: *Relocation, context: RelocContext) void {
assert(rel.r_pcrel == 1);
assert(rel.r_length == 2);
const rel_type = @intToEnum(macho.reloc_type_x86_64, rel.r_type);
const correction: i4 = switch (rel_type) {
const correction: u3 = switch (rel_type) {
.X86_64_RELOC_SIGNED => 0,
.X86_64_RELOC_SIGNED_1 => 1,
.X86_64_RELOC_SIGNED_2 => 2,
@ -1027,12 +1121,9 @@ fn parseSigned(self: TextBlock, rel: macho.relocation_info, out: *Relocation, co
var addend: i64 = mem.readIntLittle(i32, self.code.items[out.offset..][0..4]) + correction;
if (rel.r_extern == 0) {
const source_sym = context.macho_file.locals.items[self.local_sym_index];
const target_sym = switch (out.where) {
.local => context.macho_file.locals.items[out.where_index],
.undef => context.macho_file.undefs.items[out.where_index],
};
addend = @intCast(i64, source_sym.n_value + out.offset + 4) + addend - @intCast(i64, target_sym.n_value);
const seg = context.object.load_commands.items[context.object.segment_cmd_index.?].Segment;
const target_sect_base_addr = seg.sections.items[rel.r_symbolnum - 1].addr;
addend += @intCast(i64, context.base_addr + out.offset + correction + 4) - @intCast(i64, target_sect_base_addr);
}
out.payload = .{
@ -1043,7 +1134,7 @@ fn parseSigned(self: TextBlock, rel: macho.relocation_info, out: *Relocation, co
};
}
fn parseLoad(self: TextBlock, rel: macho.relocation_info, out: *Relocation) void {
fn parseLoad(self: Atom, rel: macho.relocation_info, out: *Relocation) void {
assert(rel.r_pcrel == 1);
assert(rel.r_length == 2);
@ -1065,7 +1156,10 @@ fn parseLoad(self: TextBlock, rel: macho.relocation_info, out: *Relocation) void
};
}
pub fn resolveRelocs(self: *TextBlock, macho_file: *MachO) !void {
pub fn resolveRelocs(self: *Atom, macho_file: *MachO) !void {
const tracy = trace(@src());
defer tracy.end();
for (self.relocs.items) |rel| {
log.debug("relocating {}", .{rel});
@ -1083,9 +1177,7 @@ pub fn resolveRelocs(self: *TextBlock, macho_file: *MachO) !void {
};
if (is_via_got) {
const dc_seg = macho_file.load_commands.items[macho_file.data_const_segment_cmd_index.?].Segment;
const got = dc_seg.sections.items[macho_file.got_section_index.?];
const got_index = macho_file.got_entries_map.get(.{
const atom = macho_file.got_entries_map.get(.{
.where = switch (rel.where) {
.local => .local,
.undef => .undef,
@ -1100,7 +1192,7 @@ pub fn resolveRelocs(self: *TextBlock, macho_file: *MachO) !void {
log.err(" this is an internal linker error", .{});
return error.FailedToResolveRelocationTarget;
};
break :blk got.addr + got_index * @sizeOf(u64);
break :blk macho_file.locals.items[atom.local_sym_index].n_value;
}
switch (rel.where) {
@ -1138,13 +1230,24 @@ pub fn resolveRelocs(self: *TextBlock, macho_file: *MachO) !void {
break :blk sym.n_value;
},
.undef => {
const stubs_index = macho_file.stubs_map.get(rel.where_index) orelse {
const atom = macho_file.stubs_map.get(rel.where_index) orelse {
// TODO this is required for incremental when we don't have every symbol
// resolved when creating relocations. In this case, we will insert a branch
// reloc to an undef symbol which may happen to be defined within the binary.
// Then, the undef we point at will be a null symbol (free symbol) which we
// should remove/repurpose. To circumvent this (for now), we check if the symbol
// we point to is garbage, and if so we fall back to symbol resolver to find by name.
const n_strx = macho_file.undefs.items[rel.where_index].n_strx;
if (macho_file.symbol_resolver.get(n_strx)) |resolv| inner: {
if (resolv.where != .global) break :inner;
break :blk macho_file.globals.items[resolv.where_index].n_value;
}
// TODO verify in TextBlock that the symbol is indeed dynamically bound.
break :blk 0; // Dynamically bound by dyld.
};
const segment = macho_file.load_commands.items[macho_file.text_segment_cmd_index.?].Segment;
const stubs = segment.sections.items[macho_file.stubs_section_index.?];
break :blk stubs.addr + stubs_index * stubs.reserved2;
break :blk macho_file.locals.items[atom.local_sym_index].n_value;
},
}
};
@ -1162,7 +1265,7 @@ pub fn resolveRelocs(self: *TextBlock, macho_file: *MachO) !void {
}
}
pub fn format(self: TextBlock, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void {
pub fn format(self: Atom, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void {
_ = fmt;
_ = options;
try std.fmt.format(writer, "TextBlock {{ ", .{});

View File

@ -5,25 +5,25 @@ const assert = std.debug.assert;
const fs = std.fs;
const log = std.log.scoped(.dsym);
const macho = std.macho;
const math = std.math;
const mem = std.mem;
const DW = std.dwarf;
const leb = std.leb;
const Allocator = mem.Allocator;
const build_options = @import("build_options");
const commands = @import("commands.zig");
const trace = @import("../../tracy.zig").trace;
const LoadCommand = commands.LoadCommand;
const Module = @import("../../Module.zig");
const Type = @import("../../type.zig").Type;
const link = @import("../../link.zig");
const MachO = @import("../MachO.zig");
const SrcFn = MachO.SrcFn;
const TextBlock = MachO.TextBlock;
const padToIdeal = MachO.padToIdeal;
const commands = @import("commands.zig");
const emptyHeader = commands.emptyHeader;
const LoadCommand = commands.LoadCommand;
const SegmentCommand = commands.SegmentCommand;
const SrcFn = MachO.SrcFn;
const makeStaticString = MachO.makeStaticString;
const padToIdeal = MachO.padToIdeal;
const page_size: u16 = 0x1000;
@ -188,107 +188,86 @@ pub fn populateMissingMetadata(self: *DebugSymbols, allocator: *Allocator) !void
log.debug("found __DWARF segment free space 0x{x} to 0x{x}", .{ off, off + needed_size });
try self.load_commands.append(allocator, .{
.Segment = SegmentCommand.empty("__DWARF", .{
.vmaddr = vmaddr,
.vmsize = needed_size,
.fileoff = off,
.filesize = needed_size,
}),
.Segment = .{
.inner = .{
.segname = makeStaticString("__DWARF"),
.vmaddr = vmaddr,
.vmsize = needed_size,
.fileoff = off,
.filesize = needed_size,
},
},
});
self.load_commands_dirty = true;
}
if (self.debug_str_section_index == null) {
const dwarf_segment = &self.load_commands.items[self.dwarf_segment_cmd_index.?].Segment;
self.debug_str_section_index = @intCast(u16, dwarf_segment.sections.items.len);
assert(self.debug_string_table.items.len == 0);
try dwarf_segment.addSection(allocator, "__debug_str", .{
.addr = dwarf_segment.inner.vmaddr,
.size = @intCast(u32, self.debug_string_table.items.len),
.offset = @intCast(u32, dwarf_segment.inner.fileoff),
.@"align" = 1,
});
self.load_commands_dirty = true;
self.debug_str_section_index = try self.allocateSection(
"__debug_str",
@intCast(u32, self.debug_string_table.items.len),
0,
);
self.debug_string_table_dirty = true;
}
if (self.debug_info_section_index == null) {
const dwarf_segment = &self.load_commands.items[self.dwarf_segment_cmd_index.?].Segment;
self.debug_info_section_index = @intCast(u16, dwarf_segment.sections.items.len);
const file_size_hint = 200;
const p_align = 1;
const off = dwarf_segment.findFreeSpace(file_size_hint, p_align, null);
log.debug("found __debug_info free space 0x{x} to 0x{x}", .{ off, off + file_size_hint });
try dwarf_segment.addSection(allocator, "__debug_info", .{
.addr = dwarf_segment.inner.vmaddr + off - dwarf_segment.inner.fileoff,
.size = file_size_hint,
.offset = @intCast(u32, off),
.@"align" = p_align,
});
self.load_commands_dirty = true;
self.debug_info_section_index = try self.allocateSection("__debug_info", 200, 0);
self.debug_info_header_dirty = true;
}
if (self.debug_abbrev_section_index == null) {
const dwarf_segment = &self.load_commands.items[self.dwarf_segment_cmd_index.?].Segment;
self.debug_abbrev_section_index = @intCast(u16, dwarf_segment.sections.items.len);
const file_size_hint = 128;
const p_align = 1;
const off = dwarf_segment.findFreeSpace(file_size_hint, p_align, null);
log.debug("found __debug_abbrev free space 0x{x} to 0x{x}", .{ off, off + file_size_hint });
try dwarf_segment.addSection(allocator, "__debug_abbrev", .{
.addr = dwarf_segment.inner.vmaddr + off - dwarf_segment.inner.fileoff,
.size = file_size_hint,
.offset = @intCast(u32, off),
.@"align" = p_align,
});
self.load_commands_dirty = true;
self.debug_abbrev_section_index = try self.allocateSection("__debug_abbrev", 128, 0);
self.debug_abbrev_section_dirty = true;
}
if (self.debug_aranges_section_index == null) {
const dwarf_segment = &self.load_commands.items[self.dwarf_segment_cmd_index.?].Segment;
self.debug_aranges_section_index = @intCast(u16, dwarf_segment.sections.items.len);
const file_size_hint = 160;
const p_align = 16;
const off = dwarf_segment.findFreeSpace(file_size_hint, p_align, null);
log.debug("found __debug_aranges free space 0x{x} to 0x{x}", .{ off, off + file_size_hint });
try dwarf_segment.addSection(allocator, "__debug_aranges", .{
.addr = dwarf_segment.inner.vmaddr + off - dwarf_segment.inner.fileoff,
.size = file_size_hint,
.offset = @intCast(u32, off),
.@"align" = p_align,
});
self.load_commands_dirty = true;
self.debug_aranges_section_index = try self.allocateSection("__debug_aranges", 160, 4);
self.debug_aranges_section_dirty = true;
}
if (self.debug_line_section_index == null) {
const dwarf_segment = &self.load_commands.items[self.dwarf_segment_cmd_index.?].Segment;
self.debug_line_section_index = @intCast(u16, dwarf_segment.sections.items.len);
const file_size_hint = 250;
const p_align = 1;
const off = dwarf_segment.findFreeSpace(file_size_hint, p_align, null);
log.debug("found __debug_line free space 0x{x} to 0x{x}", .{ off, off + file_size_hint });
try dwarf_segment.addSection(allocator, "__debug_line", .{
.addr = dwarf_segment.inner.vmaddr + off - dwarf_segment.inner.fileoff,
.size = file_size_hint,
.offset = @intCast(u32, off),
.@"align" = p_align,
});
self.load_commands_dirty = true;
self.debug_line_section_index = try self.allocateSection("__debug_line", 250, 0);
self.debug_line_header_dirty = true;
}
}
fn allocateSection(self: *DebugSymbols, sectname: []const u8, size: u64, alignment: u16) !u16 {
const seg = &self.load_commands.items[self.dwarf_segment_cmd_index.?].Segment;
var sect = macho.section_64{
.sectname = makeStaticString(sectname),
.segname = seg.inner.segname,
.size = @intCast(u32, size),
.@"align" = alignment,
};
const alignment_pow_2 = try math.powi(u32, 2, alignment);
const off = seg.findFreeSpace(size, alignment_pow_2, null);
assert(off + size <= seg.inner.fileoff + seg.inner.filesize); // TODO expand
log.debug("found {s},{s} section free space 0x{x} to 0x{x}", .{
commands.segmentName(sect),
commands.sectionName(sect),
off,
off + size,
});
sect.addr = seg.inner.vmaddr + off - seg.inner.fileoff;
sect.offset = @intCast(u32, off);
const index = @intCast(u16, seg.sections.items.len);
try seg.sections.append(self.base.base.allocator, sect);
seg.inner.cmdsize += @sizeOf(macho.section_64);
seg.inner.nsects += 1;
// TODO
// const match = MatchingSection{
// .seg = segment_id,
// .sect = index,
// };
// _ = try self.section_ordinals.getOrPut(self.base.allocator, match);
// try self.block_free_lists.putNoClobber(self.base.allocator, match, .{});
self.load_commands_dirty = true;
return index;
}
pub fn flushModule(self: *DebugSymbols, allocator: *Allocator, options: link.Options) !void {
// TODO This linker code currently assumes there is only 1 compilation unit and it corresponds to the
// Zig source code.
@ -614,15 +593,18 @@ pub fn deinit(self: *DebugSymbols, allocator: *Allocator) void {
}
fn copySegmentCommand(self: *DebugSymbols, allocator: *Allocator, base_cmd: SegmentCommand) !SegmentCommand {
var cmd = SegmentCommand.empty("", .{
.cmdsize = base_cmd.inner.cmdsize,
.vmaddr = base_cmd.inner.vmaddr,
.vmsize = base_cmd.inner.vmsize,
.maxprot = base_cmd.inner.maxprot,
.initprot = base_cmd.inner.initprot,
.nsects = base_cmd.inner.nsects,
.flags = base_cmd.inner.flags,
});
var cmd = SegmentCommand{
.inner = .{
.segname = undefined,
.cmdsize = base_cmd.inner.cmdsize,
.vmaddr = base_cmd.inner.vmaddr,
.vmsize = base_cmd.inner.vmsize,
.maxprot = base_cmd.inner.maxprot,
.initprot = base_cmd.inner.initprot,
.nsects = base_cmd.inner.nsects,
.flags = base_cmd.inner.flags,
},
};
mem.copy(u8, &cmd.inner.segname, &base_cmd.inner.segname);
try cmd.sections.ensureCapacity(allocator, cmd.inner.nsects);
@ -692,7 +674,7 @@ fn writeLoadCommands(self: *DebugSymbols, allocator: *Allocator) !void {
}
fn writeHeader(self: *DebugSymbols) !void {
var header = emptyHeader(.{
var header = commands.emptyHeader(.{
.filetype = macho.MH_DSYM,
});

View File

@ -1,6 +1,7 @@
const Object = @This();
const std = @import("std");
const build_options = @import("build_options");
const assert = std.debug.assert;
const dwarf = std.dwarf;
const fs = std.fs;
@ -13,11 +14,12 @@ const sort = std.sort;
const commands = @import("commands.zig");
const segmentName = commands.segmentName;
const sectionName = commands.sectionName;
const trace = @import("../../tracy.zig").trace;
const Allocator = mem.Allocator;
const Atom = @import("Atom.zig");
const LoadCommand = commands.LoadCommand;
const MachO = @import("../MachO.zig");
const TextBlock = @import("TextBlock.zig");
file: fs.File,
name: []const u8,
@ -54,7 +56,7 @@ tu_name: ?[]const u8 = null,
tu_comp_dir: ?[]const u8 = null,
mtime: ?u64 = null,
text_blocks: std.ArrayListUnmanaged(*TextBlock) = .{},
atoms: std.ArrayListUnmanaged(*Atom) = .{},
sections_as_symbols: std.AutoHashMapUnmanaged(u16, u32) = .{},
// TODO symbol mapping and its inverse can probably be simple arrays
@ -62,6 +64,8 @@ sections_as_symbols: std.AutoHashMapUnmanaged(u16, u32) = .{},
symbol_mapping: std.AutoHashMapUnmanaged(u32, u32) = .{},
reverse_symbol_mapping: std.AutoHashMapUnmanaged(u32, u32) = .{},
analyzed: bool = false,
const DebugInfo = struct {
inner: dwarf.DwarfInfo,
debug_info: []u8,
@ -134,7 +138,7 @@ pub fn deinit(self: *Object, allocator: *Allocator) void {
self.data_in_code_entries.deinit(allocator);
self.symtab.deinit(allocator);
self.strtab.deinit(allocator);
self.text_blocks.deinit(allocator);
self.atoms.deinit(allocator);
self.sections_as_symbols.deinit(allocator);
self.symbol_mapping.deinit(allocator);
self.reverse_symbol_mapping.deinit(allocator);
@ -316,16 +320,17 @@ const Context = struct {
object: *Object,
macho_file: *MachO,
match: MachO.MatchingSection,
parsed_atoms: *ParsedAtoms,
};
const TextBlockParser = struct {
const AtomParser = struct {
section: macho.section_64,
code: []u8,
relocs: []macho.relocation_info,
nlists: []NlistWithIndex,
index: u32 = 0,
fn peek(self: TextBlockParser) ?NlistWithIndex {
fn peek(self: AtomParser) ?NlistWithIndex {
return if (self.index + 1 < self.nlists.len) self.nlists[self.index + 1] else null;
}
@ -339,9 +344,12 @@ const TextBlockParser = struct {
}
}
pub fn next(self: *TextBlockParser, context: Context) !?*TextBlock {
pub fn next(self: *AtomParser, context: Context) !?*Atom {
if (self.index == self.nlists.len) return null;
const tracy = trace(@src());
defer tracy.end();
var aliases = std.ArrayList(NlistWithIndex).init(context.allocator);
defer aliases.deinit();
@ -364,12 +372,12 @@ const TextBlockParser = struct {
}
if (aliases.items.len > 1) {
// Bubble-up senior symbol as the main link to the text block.
// Bubble-up senior symbol as the main link to the atom.
sort.sort(
NlistWithIndex,
aliases.items,
context,
TextBlockParser.lessThanBySeniority,
AtomParser.lessThanBySeniority,
);
}
@ -389,12 +397,12 @@ const TextBlockParser = struct {
else
max_align;
const stab: ?TextBlock.Stab = if (context.object.debug_info) |di| blk: {
const stab: ?Atom.Stab = if (context.object.debug_info) |di| blk: {
// TODO there has to be a better to handle this.
for (di.inner.func_list.items) |func| {
if (func.pc_range) |range| {
if (senior_nlist.nlist.n_value >= range.start and senior_nlist.nlist.n_value < range.end) {
break :blk TextBlock.Stab{
break :blk Atom.Stab{
.function = range.end - range.start,
};
}
@ -405,28 +413,31 @@ const TextBlockParser = struct {
break :blk .static;
} else null;
const block = try context.allocator.create(TextBlock);
block.* = TextBlock.empty;
block.local_sym_index = senior_nlist.index;
block.stab = stab;
block.size = size;
block.alignment = actual_align;
try context.macho_file.managed_blocks.append(context.allocator, block);
const atom = try context.macho_file.createEmptyAtom(senior_nlist.index, size, actual_align);
atom.stab = stab;
try block.code.appendSlice(context.allocator, code);
const is_zerofill = blk: {
const section_type = commands.sectionType(self.section);
break :blk section_type == macho.S_ZEROFILL or section_type == macho.S_THREAD_LOCAL_ZEROFILL;
};
if (!is_zerofill) {
mem.copy(u8, atom.code.items, code);
}
try block.aliases.ensureTotalCapacity(context.allocator, aliases.items.len);
try atom.aliases.ensureTotalCapacity(context.allocator, aliases.items.len);
for (aliases.items) |alias| {
block.aliases.appendAssumeCapacity(alias.index);
atom.aliases.appendAssumeCapacity(alias.index);
const sym = &context.macho_file.locals.items[alias.index];
sym.n_sect = @intCast(u8, context.macho_file.section_ordinals.getIndex(context.match).? + 1);
}
try block.parseRelocs(self.relocs, .{
.base_addr = start_addr,
try atom.parseRelocs(self.relocs, .{
.base_addr = self.section.addr,
.base_offset = start_addr,
.allocator = context.allocator,
.object = context.object,
.macho_file = context.macho_file,
.parsed_atoms = context.parsed_atoms,
});
if (context.macho_file.has_dices) {
@ -435,10 +446,10 @@ const TextBlockParser = struct {
senior_nlist.nlist.n_value,
senior_nlist.nlist.n_value + size,
);
try block.dices.ensureTotalCapacity(context.allocator, dices.len);
try atom.dices.ensureTotalCapacity(context.allocator, dices.len);
for (dices) |dice| {
block.dices.appendAssumeCapacity(.{
atom.dices.appendAssumeCapacity(.{
.offset = dice.offset - try math.cast(u32, senior_nlist.nlist.n_value),
.length = dice.length,
.kind = dice.kind,
@ -448,16 +459,22 @@ const TextBlockParser = struct {
self.index += 1;
return block;
return atom;
}
};
pub fn parseTextBlocks(
pub const ParsedAtoms = std.AutoHashMap(MachO.MatchingSection, *Atom);
pub fn parseIntoAtoms(
self: *Object,
allocator: *Allocator,
object_id: u16,
macho_file: *MachO,
) !void {
) !ParsedAtoms {
const tracy = trace(@src());
defer tracy.end();
var parsed_atoms = ParsedAtoms.init(allocator);
const seg = self.load_commands.items[self.segment_cmd_index.?].Segment;
log.debug("analysing {s}", .{self.name});
@ -498,7 +515,7 @@ pub fn parseTextBlocks(
for (seg.sections.items) |sect, id| {
const sect_id = @intCast(u8, id);
log.debug("putting section '{s},{s}' as a TextBlock", .{
log.debug("putting section '{s},{s}' as an Atom", .{
segmentName(sect),
sectionName(sect),
});
@ -523,14 +540,17 @@ pub fn parseTextBlocks(
// Symbols within this section only.
const filtered_nlists = NlistWithIndex.filterInSection(sorted_nlists, sect);
// TODO rewrite and re-enable dead-code stripping optimisation. I think it might make sense
// to do this in a standalone pass after we parse the sections as atoms.
// In release mode, if the object file was generated with dead code stripping optimisations,
// note it now and parse sections as atoms.
const is_splittable = blk: {
if (macho_file.base.options.optimize_mode == .Debug) break :blk false;
break :blk self.header.?.flags & macho.MH_SUBSECTIONS_VIA_SYMBOLS != 0;
};
// const is_splittable = blk: {
// if (macho_file.base.options.optimize_mode == .Debug) break :blk false;
// break :blk self.header.?.flags & macho.MH_SUBSECTIONS_VIA_SYMBOLS != 0;
// };
const is_splittable = false;
macho_file.has_dices = blk: {
macho_file.has_dices = macho_file.has_dices or blk: {
if (self.text_section_index) |index| {
if (index != id) break :blk false;
if (self.data_in_code_entries.items.len == 0) break :blk false;
@ -541,12 +561,12 @@ pub fn parseTextBlocks(
macho_file.has_stabs = macho_file.has_stabs or self.debug_info != null;
next: {
if (is_splittable) blocks: {
if (filtered_nlists.len == 0) break :blocks;
if (is_splittable) atoms: {
if (filtered_nlists.len == 0) break :atoms;
// If the first nlist does not match the start of the section,
// then we need to encapsulate the memory range [section start, first symbol)
// as a temporary symbol and insert the matching TextBlock.
// as a temporary symbol and insert the matching Atom.
const first_nlist = filtered_nlists[0].nlist;
if (first_nlist.n_value > sect.addr) {
const sym_name = try std.fmt.allocPrint(allocator, "l_{s}_{s}_{s}", .{
@ -556,44 +576,45 @@ pub fn parseTextBlocks(
});
defer allocator.free(sym_name);
const block_local_sym_index = self.sections_as_symbols.get(sect_id) orelse blk: {
const block_local_sym_index = @intCast(u32, macho_file.locals.items.len);
const atom_local_sym_index = self.sections_as_symbols.get(sect_id) orelse blk: {
const atom_local_sym_index = @intCast(u32, macho_file.locals.items.len);
try macho_file.locals.append(allocator, .{
.n_strx = try macho_file.makeString(sym_name),
.n_type = macho.N_SECT,
.n_sect = @intCast(u8, macho_file.section_ordinals.getIndex(match).? + 1),
.n_desc = 0,
.n_value = sect.addr,
.n_value = 0,
});
try self.sections_as_symbols.putNoClobber(allocator, sect_id, block_local_sym_index);
break :blk block_local_sym_index;
try self.sections_as_symbols.putNoClobber(allocator, sect_id, atom_local_sym_index);
break :blk atom_local_sym_index;
};
const atom_code = code[0 .. first_nlist.n_value - sect.addr];
const atom_size = atom_code.len;
const atom = try macho_file.createEmptyAtom(atom_local_sym_index, atom_size, sect.@"align");
const block_code = code[0 .. first_nlist.n_value - sect.addr];
const block_size = block_code.len;
const is_zerofill = blk: {
const section_type = commands.sectionType(sect);
break :blk section_type == macho.S_ZEROFILL or section_type == macho.S_THREAD_LOCAL_ZEROFILL;
};
if (!is_zerofill) {
mem.copy(u8, atom.code.items, atom_code);
}
const block = try allocator.create(TextBlock);
block.* = TextBlock.empty;
block.local_sym_index = block_local_sym_index;
block.size = block_size;
block.alignment = sect.@"align";
try macho_file.managed_blocks.append(allocator, block);
try block.code.appendSlice(allocator, block_code);
try block.parseRelocs(relocs, .{
.base_addr = 0,
try atom.parseRelocs(relocs, .{
.base_addr = sect.addr,
.base_offset = 0,
.allocator = allocator,
.object = self,
.macho_file = macho_file,
.parsed_atoms = &parsed_atoms,
});
if (macho_file.has_dices) {
const dices = filterDice(self.data_in_code_entries.items, sect.addr, sect.addr + block_size);
try block.dices.ensureTotalCapacity(allocator, dices.len);
const dices = filterDice(self.data_in_code_entries.items, sect.addr, sect.addr + atom_size);
try atom.dices.ensureTotalCapacity(allocator, dices.len);
for (dices) |dice| {
block.dices.appendAssumeCapacity(.{
atom.dices.appendAssumeCapacity(.{
.offset = dice.offset - try math.cast(u32, sect.addr),
.length = dice.length,
.kind = dice.kind,
@ -601,29 +622,17 @@ pub fn parseTextBlocks(
}
}
// Update target section's metadata
// TODO should we update segment's size here too?
// How does it tie with incremental space allocs?
const tseg = &macho_file.load_commands.items[match.seg].Segment;
const tsect = &tseg.sections.items[match.sect];
const new_alignment = math.max(tsect.@"align", block.alignment);
const new_alignment_pow_2 = try math.powi(u32, 2, new_alignment);
const new_size = mem.alignForwardGeneric(u64, tsect.size, new_alignment_pow_2) + block.size;
tsect.size = new_size;
tsect.@"align" = new_alignment;
if (macho_file.blocks.getPtr(match)) |last| {
last.*.next = block;
block.prev = last.*;
last.* = block;
if (parsed_atoms.getPtr(match)) |last| {
last.*.next = atom;
atom.prev = last.*;
last.* = atom;
} else {
try macho_file.blocks.putNoClobber(allocator, match, block);
try parsed_atoms.putNoClobber(match, atom);
}
try self.text_blocks.append(allocator, block);
try self.atoms.append(allocator, atom);
}
var parser = TextBlockParser{
var parser = AtomParser{
.section = sect,
.code = code,
.relocs = relocs,
@ -635,10 +644,11 @@ pub fn parseTextBlocks(
.object = self,
.macho_file = macho_file,
.match = match,
})) |block| {
const sym = macho_file.locals.items[block.local_sym_index];
.parsed_atoms = &parsed_atoms,
})) |atom| {
const sym = macho_file.locals.items[atom.local_sym_index];
const is_ext = blk: {
const orig_sym_id = self.reverse_symbol_mapping.get(block.local_sym_index) orelse unreachable;
const orig_sym_id = self.reverse_symbol_mapping.get(atom.local_sym_index) orelse unreachable;
break :blk MachO.symbolIsExt(self.symtab.items[orig_sym_id]);
};
if (is_ext) {
@ -662,38 +672,26 @@ pub fn parseTextBlocks(
// In x86_64 relocs, it can so happen that the compiler refers to the same
// atom by both the actual assigned symbol and the start of the section. In this
// case, we need to link the two together so add an alias.
try block.aliases.append(allocator, alias);
try atom.aliases.append(allocator, alias);
}
}
// Update target section's metadata
// TODO should we update segment's size here too?
// How does it tie with incremental space allocs?
const tseg = &macho_file.load_commands.items[match.seg].Segment;
const tsect = &tseg.sections.items[match.sect];
const new_alignment = math.max(tsect.@"align", block.alignment);
const new_alignment_pow_2 = try math.powi(u32, 2, new_alignment);
const new_size = mem.alignForwardGeneric(u64, tsect.size, new_alignment_pow_2) + block.size;
tsect.size = new_size;
tsect.@"align" = new_alignment;
if (macho_file.blocks.getPtr(match)) |last| {
last.*.next = block;
block.prev = last.*;
last.* = block;
if (parsed_atoms.getPtr(match)) |last| {
last.*.next = atom;
atom.prev = last.*;
last.* = atom;
} else {
try macho_file.blocks.putNoClobber(allocator, match, block);
try parsed_atoms.putNoClobber(match, atom);
}
try self.text_blocks.append(allocator, block);
try self.atoms.append(allocator, atom);
}
break :next;
}
// Since there is no symbol to refer to this block, we create
// Since there is no symbol to refer to this atom, we create
// a temp one, unless we already did that when working out the relocations
// of other text blocks.
// of other atoms.
const sym_name = try std.fmt.allocPrint(allocator, "l_{s}_{s}_{s}", .{
self.name,
segmentName(sect),
@ -701,41 +699,43 @@ pub fn parseTextBlocks(
});
defer allocator.free(sym_name);
const block_local_sym_index = self.sections_as_symbols.get(sect_id) orelse blk: {
const block_local_sym_index = @intCast(u32, macho_file.locals.items.len);
const atom_local_sym_index = self.sections_as_symbols.get(sect_id) orelse blk: {
const atom_local_sym_index = @intCast(u32, macho_file.locals.items.len);
try macho_file.locals.append(allocator, .{
.n_strx = try macho_file.makeString(sym_name),
.n_type = macho.N_SECT,
.n_sect = @intCast(u8, macho_file.section_ordinals.getIndex(match).? + 1),
.n_desc = 0,
.n_value = sect.addr,
.n_value = 0,
});
try self.sections_as_symbols.putNoClobber(allocator, sect_id, block_local_sym_index);
break :blk block_local_sym_index;
try self.sections_as_symbols.putNoClobber(allocator, sect_id, atom_local_sym_index);
break :blk atom_local_sym_index;
};
const atom = try macho_file.createEmptyAtom(atom_local_sym_index, sect.size, sect.@"align");
const block = try allocator.create(TextBlock);
block.* = TextBlock.empty;
block.local_sym_index = block_local_sym_index;
block.size = sect.size;
block.alignment = sect.@"align";
try macho_file.managed_blocks.append(allocator, block);
const is_zerofill = blk: {
const section_type = commands.sectionType(sect);
break :blk section_type == macho.S_ZEROFILL or section_type == macho.S_THREAD_LOCAL_ZEROFILL;
};
if (!is_zerofill) {
mem.copy(u8, atom.code.items, code);
}
try block.code.appendSlice(allocator, code);
try block.parseRelocs(relocs, .{
.base_addr = 0,
try atom.parseRelocs(relocs, .{
.base_addr = sect.addr,
.base_offset = 0,
.allocator = allocator,
.object = self,
.macho_file = macho_file,
.parsed_atoms = &parsed_atoms,
});
if (macho_file.has_dices) {
const dices = filterDice(self.data_in_code_entries.items, sect.addr, sect.addr + sect.size);
try block.dices.ensureTotalCapacity(allocator, dices.len);
try atom.dices.ensureTotalCapacity(allocator, dices.len);
for (dices) |dice| {
block.dices.appendAssumeCapacity(.{
atom.dices.appendAssumeCapacity(.{
.offset = dice.offset - try math.cast(u32, sect.addr),
.length = dice.length,
.kind = dice.kind,
@ -743,12 +743,12 @@ pub fn parseTextBlocks(
}
}
// Since this is block gets a helper local temporary symbol that didn't exist
// Since this is atom gets a helper local temporary symbol that didn't exist
// in the object file which encompasses the entire section, we need traverse
// the filtered symbols and note which symbol is contained within so that
// we can properly allocate addresses down the line.
// While we're at it, we need to update segment,section mapping of each symbol too.
try block.contained.ensureTotalCapacity(allocator, filtered_nlists.len);
try atom.contained.ensureTotalCapacity(allocator, filtered_nlists.len);
for (filtered_nlists) |nlist_with_index| {
const nlist = nlist_with_index.nlist;
@ -756,12 +756,12 @@ pub fn parseTextBlocks(
const local = &macho_file.locals.items[local_sym_index];
local.n_sect = @intCast(u8, macho_file.section_ordinals.getIndex(match).? + 1);
const stab: ?TextBlock.Stab = if (self.debug_info) |di| blk: {
const stab: ?Atom.Stab = if (self.debug_info) |di| blk: {
// TODO there has to be a better to handle this.
for (di.inner.func_list.items) |func| {
if (func.pc_range) |range| {
if (nlist.n_value >= range.start and nlist.n_value < range.end) {
break :blk TextBlock.Stab{
break :blk Atom.Stab{
.function = range.end - range.start,
};
}
@ -772,35 +772,25 @@ pub fn parseTextBlocks(
break :blk .static;
} else null;
block.contained.appendAssumeCapacity(.{
atom.contained.appendAssumeCapacity(.{
.local_sym_index = local_sym_index,
.offset = nlist.n_value - sect.addr,
.stab = stab,
});
}
// Update target section's metadata
// TODO should we update segment's size here too?
// How does it tie with incremental space allocs?
const tseg = &macho_file.load_commands.items[match.seg].Segment;
const tsect = &tseg.sections.items[match.sect];
const new_alignment = math.max(tsect.@"align", block.alignment);
const new_alignment_pow_2 = try math.powi(u32, 2, new_alignment);
const new_size = mem.alignForwardGeneric(u64, tsect.size, new_alignment_pow_2) + block.size;
tsect.size = new_size;
tsect.@"align" = new_alignment;
if (macho_file.blocks.getPtr(match)) |last| {
last.*.next = block;
block.prev = last.*;
last.* = block;
if (parsed_atoms.getPtr(match)) |last| {
last.*.next = atom;
atom.prev = last.*;
last.* = atom;
} else {
try macho_file.blocks.putNoClobber(allocator, match, block);
try parsed_atoms.putNoClobber(match, atom);
}
try self.text_blocks.append(allocator, block);
try self.atoms.append(allocator, atom);
}
}
return parsed_atoms;
}
fn parseSymtab(self: *Object, allocator: *Allocator) !void {

View File

@ -9,15 +9,6 @@ pub const Pointer = struct {
name: ?[]const u8 = null,
};
pub fn pointerCmp(context: void, a: Pointer, b: Pointer) bool {
_ = context;
if (a.segment_id < b.segment_id) return true;
if (a.segment_id == b.segment_id) {
return a.offset < b.offset;
}
return false;
}
pub fn rebaseInfoSize(pointers: []const Pointer) !u64 {
var stream = std.io.countingWriter(std.io.null_writer);
var writer = stream.writer();

View File

@ -9,6 +9,7 @@ const assert = std.debug.assert;
const Allocator = std.mem.Allocator;
const MachO = @import("../MachO.zig");
const makeStaticString = MachO.makeStaticString;
const padToIdeal = MachO.padToIdeal;
pub const HeaderArgs = struct {
@ -217,75 +218,6 @@ pub const SegmentCommand = struct {
inner: macho.segment_command_64,
sections: std.ArrayListUnmanaged(macho.section_64) = .{},
const SegmentOptions = struct {
cmdsize: u32 = @sizeOf(macho.segment_command_64),
vmaddr: u64 = 0,
vmsize: u64 = 0,
fileoff: u64 = 0,
filesize: u64 = 0,
maxprot: macho.vm_prot_t = macho.VM_PROT_NONE,
initprot: macho.vm_prot_t = macho.VM_PROT_NONE,
nsects: u32 = 0,
flags: u32 = 0,
};
pub fn empty(comptime segname: []const u8, opts: SegmentOptions) SegmentCommand {
return .{
.inner = .{
.cmd = macho.LC_SEGMENT_64,
.cmdsize = opts.cmdsize,
.segname = makeStaticString(segname),
.vmaddr = opts.vmaddr,
.vmsize = opts.vmsize,
.fileoff = opts.fileoff,
.filesize = opts.filesize,
.maxprot = opts.maxprot,
.initprot = opts.initprot,
.nsects = opts.nsects,
.flags = opts.flags,
},
};
}
const SectionOptions = struct {
addr: u64 = 0,
size: u64 = 0,
offset: u32 = 0,
@"align": u32 = 0,
reloff: u32 = 0,
nreloc: u32 = 0,
flags: u32 = macho.S_REGULAR,
reserved1: u32 = 0,
reserved2: u32 = 0,
reserved3: u32 = 0,
};
pub fn addSection(
self: *SegmentCommand,
alloc: *Allocator,
comptime sectname: []const u8,
opts: SectionOptions,
) !void {
var section = macho.section_64{
.sectname = makeStaticString(sectname),
.segname = undefined,
.addr = opts.addr,
.size = opts.size,
.offset = opts.offset,
.@"align" = opts.@"align",
.reloff = opts.reloff,
.nreloc = opts.nreloc,
.flags = opts.flags,
.reserved1 = opts.reserved1,
.reserved2 = opts.reserved2,
.reserved3 = opts.reserved3,
};
mem.copy(u8, &section.segname, &self.inner.segname);
try self.sections.append(alloc, section);
self.inner.cmdsize += @sizeOf(macho.section_64);
self.inner.nsects += 1;
}
pub fn read(alloc: *Allocator, reader: anytype) !SegmentCommand {
const inner = try reader.readStruct(macho.segment_command_64);
var segment = SegmentCommand{
@ -314,10 +246,8 @@ pub const SegmentCommand = struct {
}
pub fn allocatedSize(self: SegmentCommand, start: u64) u64 {
assert(start > 0);
if (start == self.inner.fileoff)
return 0;
var min_pos: u64 = std.math.maxInt(u64);
assert(start >= self.inner.fileoff);
var min_pos: u64 = self.inner.fileoff + self.inner.filesize;
for (self.sections.items) |section| {
if (section.offset <= start) continue;
if (section.offset < min_pos) min_pos = section.offset;
@ -337,12 +267,12 @@ pub const SegmentCommand = struct {
return null;
}
pub fn findFreeSpace(self: SegmentCommand, object_size: u64, min_alignment: u16, start: ?u64) u64 {
var st: u64 = if (start) |v| v else self.inner.fileoff;
while (self.detectAllocCollision(st, object_size)) |item_end| {
st = mem.alignForwardGeneric(u64, item_end, min_alignment);
pub fn findFreeSpace(self: SegmentCommand, object_size: u64, min_alignment: u64, start: ?u64) u64 {
var offset: u64 = if (start) |v| v else self.inner.fileoff;
while (self.detectAllocCollision(offset, object_size)) |item_end| {
offset = mem.alignForwardGeneric(u64, item_end, min_alignment);
}
return st;
return offset;
}
fn eql(self: SegmentCommand, other: SegmentCommand) bool {
@ -427,13 +357,6 @@ pub fn createLoadDylibCommand(
return dylib_cmd;
}
fn makeStaticString(bytes: []const u8) [16]u8 {
var buf = [_]u8{0} ** 16;
assert(bytes.len <= buf.len);
mem.copy(u8, &buf, bytes);
return buf;
}
fn parseName(name: *const [16]u8) []const u8 {
const len = mem.indexOfScalar(u8, name, @as(u8, 0)) orelse name.len;
return name[0..len];
@ -514,17 +437,14 @@ test "read-write segment command" {
};
var cmd = SegmentCommand{
.inner = .{
.cmd = macho.LC_SEGMENT_64,
.cmdsize = 152,
.segname = makeStaticString("__TEXT"),
.vmaddr = 4294967296,
.vmsize = 294912,
.fileoff = 0,
.filesize = 294912,
.maxprot = macho.VM_PROT_READ | macho.VM_PROT_WRITE | macho.VM_PROT_EXECUTE,
.initprot = macho.VM_PROT_EXECUTE | macho.VM_PROT_READ,
.nsects = 1,
.flags = 0,
},
};
try cmd.sections.append(gpa, .{
@ -534,12 +454,7 @@ test "read-write segment command" {
.size = 448,
.offset = 16384,
.@"align" = 2,
.reloff = 0,
.nreloc = 0,
.flags = macho.S_REGULAR | macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS,
.reserved1 = 0,
.reserved2 = 0,
.reserved3 = 0,
});
defer cmd.deinit(gpa);
try testRead(gpa, in_buffer, LoadCommand{ .Segment = cmd });

View File

@ -27,8 +27,8 @@ pub fn addCases(ctx: *TestContext) !void {
// Regular old hello world
case.addCompareOutput(
\\extern "c" fn write(usize, usize, usize) usize;
\\extern "c" fn exit(usize) noreturn;
\\extern fn write(usize, usize, usize) usize;
\\extern fn exit(usize) noreturn;
\\
\\pub export fn main() noreturn {
\\ print();
@ -47,8 +47,8 @@ pub fn addCases(ctx: *TestContext) !void {
// Print it 4 times and force growth and realloc.
case.addCompareOutput(
\\extern "c" fn write(usize, usize, usize) usize;
\\extern "c" fn exit(usize) noreturn;
\\extern fn write(usize, usize, usize) usize;
\\extern fn exit(usize) noreturn;
\\
\\pub export fn main() noreturn {
\\ print();
@ -74,8 +74,8 @@ pub fn addCases(ctx: *TestContext) !void {
// Print it once, and change the message.
case.addCompareOutput(
\\extern "c" fn write(usize, usize, usize) usize;
\\extern "c" fn exit(usize) noreturn;
\\extern fn write(usize, usize, usize) usize;
\\extern fn exit(usize) noreturn;
\\
\\pub export fn main() noreturn {
\\ print();
@ -94,8 +94,8 @@ pub fn addCases(ctx: *TestContext) !void {
// Now we print it twice.
case.addCompareOutput(
\\extern "c" fn write(usize, usize, usize) usize;
\\extern "c" fn exit(usize) noreturn;
\\extern fn write(usize, usize, usize) usize;
\\extern fn exit(usize) noreturn;
\\
\\pub export fn main() noreturn {
\\ print();
@ -121,7 +121,7 @@ pub fn addCases(ctx: *TestContext) !void {
// This test case also covers an infrequent scenarion where the string table *may* be relocated
// into the position preceeding the symbol table which results in a dyld error.
case.addCompareOutput(
\\extern "c" fn exit(usize) noreturn;
\\extern fn exit(usize) noreturn;
\\
\\pub export fn main() noreturn {
\\ exit(0);
@ -131,8 +131,8 @@ pub fn addCases(ctx: *TestContext) !void {
);
case.addCompareOutput(
\\extern "c" fn exit(usize) noreturn;
\\extern "c" fn write(usize, usize, usize) usize;
\\extern fn exit(usize) noreturn;
\\extern fn write(usize, usize, usize) usize;
\\
\\pub export fn main() noreturn {
\\ _ = write(1, @ptrToInt("Hey!\n"), 5);