Merge pull request #12140 from ziglang/macho-gc-sections

macho: add support for `-dead_strip` (GC sections) and simplify symbol resolution
This commit is contained in:
Jakub Konka 2022-07-23 00:01:09 -07:00 committed by GitHub
commit a8bfddfaea
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
22 changed files with 3561 additions and 2922 deletions

View File

@ -757,10 +757,12 @@ set(ZIG_STAGE2_SOURCES
"${CMAKE_SOURCE_DIR}/src/link/MachO/Object.zig"
"${CMAKE_SOURCE_DIR}/src/link/MachO/Trie.zig"
"${CMAKE_SOURCE_DIR}/src/link/MachO/bind.zig"
"${CMAKE_SOURCE_DIR}/src/link/MachO/dead_strip.zig"
"${CMAKE_SOURCE_DIR}/src/link/Plan9.zig"
"${CMAKE_SOURCE_DIR}/src/link/Plan9/aout.zig"
"${CMAKE_SOURCE_DIR}/src/link/Wasm.zig"
"${CMAKE_SOURCE_DIR}/src/link/msdos-stub.bin"
"${CMAKE_SOURCE_DIR}/src/link/strtab.zig"
"${CMAKE_SOURCE_DIR}/src/link/tapi.zig"
"${CMAKE_SOURCE_DIR}/src/link/tapi/Tokenizer.zig"
"${CMAKE_SOURCE_DIR}/src/link/tapi/parse.zig"

View File

@ -1561,6 +1561,10 @@ pub const LibExeObjStep = struct {
/// safely garbage-collected during the linking phase.
link_function_sections: bool = false,
/// Remove functions and data that are unreachable by the entry point or
/// exported symbols.
link_gc_sections: ?bool = null,
linker_allow_shlib_undefined: ?bool = null,
/// Permit read-only relocations in read-only segments. Disallowed by default.
@ -2705,6 +2709,9 @@ pub const LibExeObjStep = struct {
if (self.link_function_sections) {
try zig_args.append("-ffunction-sections");
}
if (self.link_gc_sections) |x| {
try zig_args.append(if (x) "--gc-sections" else "--no-gc-sections");
}
if (self.linker_allow_shlib_undefined) |x| {
try zig_args.append(if (x) "-fallow-shlib-undefined" else "-fno-allow-shlib-undefined");
}

View File

@ -50,7 +50,7 @@ pub fn create(builder: *Builder, source: build.FileSource, obj_format: std.Targe
/// For example, if the two extracted values were saved as `vmaddr` and `entryoff` respectively
/// they could then be added with this simple program `vmaddr entryoff +`.
const Action = struct {
tag: enum { match, compute_cmp },
tag: enum { match, not_present, compute_cmp },
phrase: []const u8,
expected: ?ComputeCompareExpected = null,
@ -63,7 +63,7 @@ const Action = struct {
/// name {*}libobjc{*}.dylib => will match `name` followed by a token which contains `libobjc` and `.dylib`
/// in that order with other letters in between
fn match(act: Action, haystack: []const u8, global_vars: anytype) !bool {
assert(act.tag == .match);
assert(act.tag == .match or act.tag == .not_present);
var candidate_var: ?struct { name: []const u8, value: u64 } = null;
var hay_it = mem.tokenize(u8, mem.trim(u8, haystack, " "), " ");
@ -202,6 +202,13 @@ const Check = struct {
}) catch unreachable;
}
fn notPresent(self: *Check, phrase: []const u8) void {
self.actions.append(.{
.tag = .not_present,
.phrase = self.builder.dupe(phrase),
}) catch unreachable;
}
fn computeCmp(self: *Check, phrase: []const u8, expected: ComputeCompareExpected) void {
self.actions.append(.{
.tag = .compute_cmp,
@ -226,6 +233,15 @@ pub fn checkNext(self: *CheckObjectStep, phrase: []const u8) void {
last.match(phrase);
}
/// Adds another searched phrase to the latest created Check with `CheckObjectStep.checkStart(...)`
/// however ensures there is no matching phrase in the output.
/// Asserts at least one check already exists.
pub fn checkNotPresent(self: *CheckObjectStep, phrase: []const u8) void {
assert(self.checks.items.len > 0);
const last = &self.checks.items[self.checks.items.len - 1];
last.notPresent(phrase);
}
/// Creates a new check checking specifically symbol table parsed and dumped from the object
/// file.
/// Issuing this check will force parsing and dumping of the symbol table.
@ -293,6 +309,21 @@ fn make(step: *Step) !void {
return error.TestFailed;
}
},
.not_present => {
while (it.next()) |line| {
if (try act.match(line, &vars)) {
std.debug.print(
\\
\\========= Expected not to find: ===================
\\{s}
\\========= But parsed file does contain it: ========
\\{s}
\\
, .{ act.phrase, output });
return error.TestFailed;
}
}
},
.compute_cmp => {
const res = act.computeCmp(gpa, vars) catch |err| switch (err) {
error.UnknownVariable => {

View File

@ -3174,7 +3174,7 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallOptions.
const func = func_payload.data;
const fn_owner_decl = mod.declPtr(func.owner_decl);
try self.genSetReg(Type.initTag(.u64), .x30, .{
.got_load = fn_owner_decl.link.macho.local_sym_index,
.got_load = fn_owner_decl.link.macho.sym_index,
});
// blr x30
_ = try self.addInst(.{
@ -3190,14 +3190,14 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallOptions.
lib_name,
});
}
const n_strx = try macho_file.getGlobalSymbol(mem.sliceTo(decl_name, 0));
const sym_index = try macho_file.getGlobalSymbol(mem.sliceTo(decl_name, 0));
_ = try self.addInst(.{
.tag = .call_extern,
.data = .{
.extern_fn = .{
.atom_index = mod.declPtr(self.mod_fn.owner_decl).link.macho.local_sym_index,
.sym_name = n_strx,
.relocation = .{
.atom_index = mod.declPtr(self.mod_fn.owner_decl).link.macho.sym_index,
.sym_index = sym_index,
},
},
});
@ -4157,7 +4157,7 @@ fn genSetStack(self: *Self, ty: Type, stack_offset: u32, mcv: MCValue) InnerErro
.data = .{
.payload = try self.addExtra(Mir.LoadMemoryPie{
.register = @enumToInt(src_reg),
.atom_index = mod.declPtr(self.mod_fn.owner_decl).link.macho.local_sym_index,
.atom_index = mod.declPtr(self.mod_fn.owner_decl).link.macho.sym_index,
.sym_index = sym_index,
}),
},
@ -4270,7 +4270,7 @@ fn genSetReg(self: *Self, ty: Type, reg: Register, mcv: MCValue) InnerError!void
.data = .{
.payload = try self.addExtra(Mir.LoadMemoryPie{
.register = @enumToInt(reg),
.atom_index = mod.declPtr(self.mod_fn.owner_decl).link.macho.local_sym_index,
.atom_index = mod.declPtr(self.mod_fn.owner_decl).link.macho.sym_index,
.sym_index = sym_index,
}),
},
@ -4578,8 +4578,8 @@ fn lowerDeclRef(self: *Self, tv: TypedValue, decl_index: Module.Decl.Index) Inne
} else if (self.bin_file.cast(link.File.MachO)) |_| {
// Because MachO is PIE-always-on, we defer memory address resolution until
// the linker has enough info to perform relocations.
assert(decl.link.macho.local_sym_index != 0);
return MCValue{ .got_load = decl.link.macho.local_sym_index };
assert(decl.link.macho.sym_index != 0);
return MCValue{ .got_load = decl.link.macho.sym_index };
} else if (self.bin_file.cast(link.File.Coff)) |coff_file| {
const got_addr = coff_file.offset_table_virtual_address + decl.link.coff.offset_table_index * ptr_bytes;
return MCValue{ .memory = got_addr };

View File

@ -649,7 +649,7 @@ fn mirDebugEpilogueBegin(self: *Emit) !void {
fn mirCallExtern(emit: *Emit, inst: Mir.Inst.Index) !void {
assert(emit.mir.instructions.items(.tag)[inst] == .call_extern);
const extern_fn = emit.mir.instructions.items(.data)[inst].extern_fn;
const relocation = emit.mir.instructions.items(.data)[inst].relocation;
if (emit.bin_file.cast(link.File.MachO)) |macho_file| {
const offset = blk: {
@ -659,10 +659,13 @@ fn mirCallExtern(emit: *Emit, inst: Mir.Inst.Index) !void {
break :blk offset;
};
// Add relocation to the decl.
const atom = macho_file.atom_by_index_table.get(extern_fn.atom_index).?;
const atom = macho_file.atom_by_index_table.get(relocation.atom_index).?;
try atom.relocs.append(emit.bin_file.allocator, .{
.offset = offset,
.target = .{ .global = extern_fn.sym_name },
.target = .{
.sym_index = relocation.sym_index,
.file = null,
},
.addend = 0,
.subtractor = null,
.pcrel = true,
@ -864,7 +867,7 @@ fn mirLoadMemoryPie(emit: *Emit, inst: Mir.Inst.Index) !void {
// Page reloc for adrp instruction.
try atom.relocs.append(emit.bin_file.allocator, .{
.offset = offset,
.target = .{ .local = data.sym_index },
.target = .{ .sym_index = data.sym_index, .file = null },
.addend = 0,
.subtractor = null,
.pcrel = true,
@ -882,7 +885,7 @@ fn mirLoadMemoryPie(emit: *Emit, inst: Mir.Inst.Index) !void {
// Pageoff reloc for adrp instruction.
try atom.relocs.append(emit.bin_file.allocator, .{
.offset = offset + 4,
.target = .{ .local = data.sym_index },
.target = .{ .sym_index = data.sym_index, .file = null },
.addend = 0,
.subtractor = null,
.pcrel = false,

View File

@ -225,14 +225,16 @@ pub const Inst = struct {
///
/// Used by e.g. b
inst: Index,
/// An extern function
/// Relocation for the linker where:
/// * `atom_index` is the index of the source
/// * `sym_index` is the index of the target
///
/// Used by e.g. call_extern
extern_fn: struct {
relocation: struct {
/// Index of the containing atom.
atom_index: u32,
/// Index into the linker's string table.
sym_name: u32,
sym_index: u32,
},
/// A 16-bit immediate value.
///

View File

@ -2563,7 +2563,7 @@ fn lowerDeclRef(self: *Self, tv: TypedValue, decl_index: Module.Decl.Index) Inne
} else if (self.bin_file.cast(link.File.MachO)) |_| {
// TODO I'm hacking my way through here by repurposing .memory for storing
// index to the GOT target symbol index.
return MCValue{ .memory = decl.link.macho.local_sym_index };
return MCValue{ .memory = decl.link.macho.sym_index };
} else if (self.bin_file.cast(link.File.Coff)) |coff_file| {
const got_addr = coff_file.offset_table_virtual_address + decl.link.coff.offset_table_index * ptr_bytes;
return MCValue{ .memory = got_addr };

View File

@ -2644,8 +2644,8 @@ fn loadMemPtrIntoRegister(self: *Self, reg: Register, ptr_ty: Type, ptr: MCValue
.flags = flags,
}),
.data = .{
.load_reloc = .{
.atom_index = fn_owner_decl.link.macho.local_sym_index,
.relocation = .{
.atom_index = fn_owner_decl.link.macho.sym_index,
.sym_index = sym_index,
},
},
@ -3977,7 +3977,7 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallOptions.
const func = func_payload.data;
const fn_owner_decl = mod.declPtr(func.owner_decl);
try self.genSetReg(Type.initTag(.usize), .rax, .{
.got_load = fn_owner_decl.link.macho.local_sym_index,
.got_load = fn_owner_decl.link.macho.sym_index,
});
// callq *%rax
_ = try self.addInst(.{
@ -3997,14 +3997,14 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallOptions.
lib_name,
});
}
const n_strx = try macho_file.getGlobalSymbol(mem.sliceTo(decl_name, 0));
const sym_index = try macho_file.getGlobalSymbol(mem.sliceTo(decl_name, 0));
_ = try self.addInst(.{
.tag = .call_extern,
.ops = undefined,
.data = .{
.extern_fn = .{
.atom_index = mod.declPtr(self.mod_fn.owner_decl).link.macho.local_sym_index,
.sym_name = n_strx,
.relocation = .{
.atom_index = mod.declPtr(self.mod_fn.owner_decl).link.macho.sym_index,
.sym_index = sym_index,
},
},
});
@ -6771,8 +6771,8 @@ fn lowerDeclRef(self: *Self, tv: TypedValue, decl_index: Module.Decl.Index) Inne
} else if (self.bin_file.cast(link.File.MachO)) |_| {
// Because MachO is PIE-always-on, we defer memory address resolution until
// the linker has enough info to perform relocations.
assert(decl.link.macho.local_sym_index != 0);
return MCValue{ .got_load = decl.link.macho.local_sym_index };
assert(decl.link.macho.sym_index != 0);
return MCValue{ .got_load = decl.link.macho.sym_index };
} else if (self.bin_file.cast(link.File.Coff)) |coff_file| {
const got_addr = coff_file.offset_table_virtual_address + decl.link.coff.offset_table_index * ptr_bytes;
return MCValue{ .memory = got_addr };

View File

@ -982,7 +982,7 @@ fn mirLeaPie(emit: *Emit, inst: Mir.Inst.Index) InnerError!void {
const tag = emit.mir.instructions.items(.tag)[inst];
assert(tag == .lea_pie);
const ops = emit.mir.instructions.items(.ops)[inst].decode();
const load_reloc = emit.mir.instructions.items(.data)[inst].load_reloc;
const relocation = emit.mir.instructions.items(.data)[inst].relocation;
// lea reg1, [rip + reloc]
// RM
@ -1001,11 +1001,11 @@ fn mirLeaPie(emit: *Emit, inst: Mir.Inst.Index) InnerError!void {
0b01 => @enumToInt(std.macho.reloc_type_x86_64.X86_64_RELOC_SIGNED),
else => return emit.fail("TODO unused LEA PIE variants 0b10 and 0b11", .{}),
};
const atom = macho_file.atom_by_index_table.get(load_reloc.atom_index).?;
log.debug("adding reloc of type {} to local @{d}", .{ reloc_type, load_reloc.sym_index });
const atom = macho_file.atom_by_index_table.get(relocation.atom_index).?;
log.debug("adding reloc of type {} to local @{d}", .{ reloc_type, relocation.sym_index });
try atom.relocs.append(emit.bin_file.allocator, .{
.offset = @intCast(u32, end_offset - 4),
.target = .{ .local = load_reloc.sym_index },
.target = .{ .sym_index = relocation.sym_index, .file = null },
.addend = 0,
.subtractor = null,
.pcrel = true,
@ -1116,7 +1116,7 @@ fn mirCmpFloatAvx(emit: *Emit, tag: Tag, inst: Mir.Inst.Index) InnerError!void {
fn mirCallExtern(emit: *Emit, inst: Mir.Inst.Index) InnerError!void {
const tag = emit.mir.instructions.items(.tag)[inst];
assert(tag == .call_extern);
const extern_fn = emit.mir.instructions.items(.data)[inst].extern_fn;
const relocation = emit.mir.instructions.items(.data)[inst].relocation;
const offset = blk: {
// callq
@ -1126,10 +1126,13 @@ fn mirCallExtern(emit: *Emit, inst: Mir.Inst.Index) InnerError!void {
if (emit.bin_file.cast(link.File.MachO)) |macho_file| {
// Add relocation to the decl.
const atom = macho_file.atom_by_index_table.get(extern_fn.atom_index).?;
const atom = macho_file.atom_by_index_table.get(relocation.atom_index).?;
try atom.relocs.append(emit.bin_file.allocator, .{
.offset = offset,
.target = .{ .global = extern_fn.sym_name },
.target = .{
.sym_index = relocation.sym_index,
.file = null,
},
.addend = 0,
.subtractor = null,
.pcrel = true,

View File

@ -181,7 +181,7 @@ pub const Inst = struct {
/// 0b00 reg1, [rip + reloc] // via GOT emits X86_64_RELOC_GOT relocation
/// 0b01 reg1, [rip + reloc] // direct load emits X86_64_RELOC_SIGNED relocation
/// Notes:
/// * `Data` contains `load_reloc`
/// * `Data` contains `relocation`
lea_pie,
/// ops flags: form:
@ -368,7 +368,7 @@ pub const Inst = struct {
/// Pseudo-instructions
/// call extern function
/// Notes:
/// * target of the call is stored as `extern_fn` in `Data` union.
/// * target of the call is stored as `relocation` in `Data` union.
call_extern,
/// end of prologue
@ -439,15 +439,10 @@ pub const Inst = struct {
/// A condition code for use with EFLAGS register.
cc: bits.Condition,
},
/// An extern function.
extern_fn: struct {
/// Index of the containing atom.
atom_index: u32,
/// Index into the linker's string table.
sym_name: u32,
},
/// PIE load relocation.
load_reloc: struct {
/// Relocation for the linker where:
/// * `atom_index` is the index of the source
/// * `sym_index` is the index of the target
relocation: struct {
/// Index of the containing atom.
atom_index: u32,
/// Index into the linker's symbol table.

View File

@ -544,12 +544,7 @@ pub const File = struct {
switch (base.tag) {
.coff => return @fieldParentPtr(Coff, "base", base).allocateDeclIndexes(decl_index),
.elf => return @fieldParentPtr(Elf, "base", base).allocateDeclIndexes(decl_index),
.macho => return @fieldParentPtr(MachO, "base", base).allocateDeclIndexes(decl_index) catch |err| switch (err) {
// remap this error code because we are transitioning away from
// `allocateDeclIndexes`.
error.Overflow => return error.OutOfMemory,
error.OutOfMemory => return error.OutOfMemory,
},
.macho => return @fieldParentPtr(MachO, "base", base).allocateDeclIndexes(decl_index),
.wasm => return @fieldParentPtr(Wasm, "base", base).allocateDeclIndexes(decl_index),
.plan9 => return @fieldParentPtr(Plan9, "base", base).allocateDeclIndexes(decl_index),
.c, .spirv, .nvptx => {},

File diff suppressed because it is too large Load Diff

View File

@ -16,7 +16,7 @@ const Arch = std.Target.Cpu.Arch;
const Dwarf = @import("../Dwarf.zig");
const MachO = @import("../MachO.zig");
const Object = @import("Object.zig");
const StringIndexAdapter = std.hash_map.StringIndexAdapter;
const SymbolWithLoc = MachO.SymbolWithLoc;
/// Each decl always gets a local symbol with the fully qualified name.
/// The vaddr and size are found here directly.
@ -24,10 +24,10 @@ const StringIndexAdapter = std.hash_map.StringIndexAdapter;
/// the symbol references, and adding that to the file offset of the section.
/// If this field is 0, it means the codegen size = 0 and there is no symbol or
/// offset table entry.
local_sym_index: u32,
sym_index: u32,
/// List of symbol aliases pointing to the same atom via different nlists
aliases: std.ArrayListUnmanaged(u32) = .{},
/// null means symbol defined by Zig source.
file: ?u32,
/// List of symbols contained within this atom
contained: std.ArrayListUnmanaged(SymbolAtOffset) = .{},
@ -48,26 +48,17 @@ alignment: u32,
relocs: std.ArrayListUnmanaged(Relocation) = .{},
/// List of offsets contained within this atom that need rebasing by the dynamic
/// loader in presence of ASLR.
/// loader for example in presence of ASLR.
rebases: std.ArrayListUnmanaged(u64) = .{},
/// List of offsets contained within this atom that will be dynamically bound
/// by the dynamic loader and contain pointers to resolved (at load time) extern
/// symbols (aka proxies aka imports)
/// symbols (aka proxies aka imports).
bindings: std.ArrayListUnmanaged(Binding) = .{},
/// List of lazy bindings
/// List of lazy bindings (cf bindings above).
lazy_bindings: std.ArrayListUnmanaged(Binding) = .{},
/// List of data-in-code entries. This is currently specific to x86_64 only.
dices: std.ArrayListUnmanaged(macho.data_in_code_entry) = .{},
/// Stab entry for this atom. This is currently specific to a binary created
/// by linking object files in a traditional sense - in incremental sense, we
/// bypass stabs altogether to produce dSYM bundle directly with fully relocated
/// DWARF sections.
stab: ?Stab = null,
/// Points to the previous and next neighbours
next: ?*Atom,
prev: ?*Atom,
@ -77,107 +68,62 @@ dbg_info_atom: Dwarf.Atom,
dirty: bool = true,
pub const Binding = struct {
n_strx: u32,
target: SymbolWithLoc,
offset: u64,
};
pub const SymbolAtOffset = struct {
local_sym_index: u32,
sym_index: u32,
offset: u64,
stab: ?Stab = null,
};
pub const Stab = union(enum) {
function: u64,
static,
global,
pub fn asNlists(stab: Stab, local_sym_index: u32, macho_file: anytype) ![]macho.nlist_64 {
var nlists = std.ArrayList(macho.nlist_64).init(macho_file.base.allocator);
defer nlists.deinit();
const sym = macho_file.locals.items[local_sym_index];
switch (stab) {
.function => |size| {
try nlists.ensureUnusedCapacity(4);
nlists.appendAssumeCapacity(.{
.n_strx = 0,
.n_type = macho.N_BNSYM,
.n_sect = sym.n_sect,
.n_desc = 0,
.n_value = sym.n_value,
});
nlists.appendAssumeCapacity(.{
.n_strx = sym.n_strx,
.n_type = macho.N_FUN,
.n_sect = sym.n_sect,
.n_desc = 0,
.n_value = sym.n_value,
});
nlists.appendAssumeCapacity(.{
.n_strx = 0,
.n_type = macho.N_FUN,
.n_sect = 0,
.n_desc = 0,
.n_value = size,
});
nlists.appendAssumeCapacity(.{
.n_strx = 0,
.n_type = macho.N_ENSYM,
.n_sect = sym.n_sect,
.n_desc = 0,
.n_value = size,
});
},
.global => {
try nlists.append(.{
.n_strx = sym.n_strx,
.n_type = macho.N_GSYM,
.n_sect = 0,
.n_desc = 0,
.n_value = 0,
});
},
.static => {
try nlists.append(.{
.n_strx = sym.n_strx,
.n_type = macho.N_STSYM,
.n_sect = sym.n_sect,
.n_desc = 0,
.n_value = sym.n_value,
});
},
}
return nlists.toOwnedSlice();
}
};
pub const Relocation = struct {
pub const Target = union(enum) {
local: u32,
global: u32,
};
/// Offset within the atom's code buffer.
/// Note relocation size can be inferred by relocation's kind.
offset: u32,
target: Target,
target: MachO.SymbolWithLoc,
addend: i64,
subtractor: ?u32,
subtractor: ?MachO.SymbolWithLoc,
pcrel: bool,
length: u2,
@"type": u4,
pub fn getTargetAtom(self: Relocation, macho_file: *MachO) ?*Atom {
const is_via_got = got: {
switch (macho_file.base.options.target.cpu.arch) {
.aarch64 => break :got switch (@intToEnum(macho.reloc_type_arm64, self.@"type")) {
.ARM64_RELOC_GOT_LOAD_PAGE21,
.ARM64_RELOC_GOT_LOAD_PAGEOFF12,
.ARM64_RELOC_POINTER_TO_GOT,
=> true,
else => false,
},
.x86_64 => break :got switch (@intToEnum(macho.reloc_type_x86_64, self.@"type")) {
.X86_64_RELOC_GOT, .X86_64_RELOC_GOT_LOAD => true,
else => false,
},
else => unreachable,
}
};
if (is_via_got) {
return macho_file.getGotAtomForSymbol(self.target).?; // panic means fatal error
}
if (macho_file.getStubsAtomForSymbol(self.target)) |stubs_atom| return stubs_atom;
if (macho_file.getTlvPtrAtomForSymbol(self.target)) |tlv_ptr_atom| return tlv_ptr_atom;
return macho_file.getAtomForSymbol(self.target);
}
};
pub const empty = Atom{
.local_sym_index = 0,
.sym_index = 0,
.file = null,
.size = 0,
.alignment = 0,
.prev = null,
@ -186,34 +132,66 @@ pub const empty = Atom{
};
pub fn deinit(self: *Atom, allocator: Allocator) void {
self.dices.deinit(allocator);
self.lazy_bindings.deinit(allocator);
self.bindings.deinit(allocator);
self.rebases.deinit(allocator);
self.relocs.deinit(allocator);
self.contained.deinit(allocator);
self.aliases.deinit(allocator);
self.code.deinit(allocator);
}
pub fn clearRetainingCapacity(self: *Atom) void {
self.dices.clearRetainingCapacity();
self.lazy_bindings.clearRetainingCapacity();
self.bindings.clearRetainingCapacity();
self.rebases.clearRetainingCapacity();
self.relocs.clearRetainingCapacity();
self.contained.clearRetainingCapacity();
self.aliases.clearRetainingCapacity();
self.code.clearRetainingCapacity();
}
/// Returns symbol referencing this atom.
pub fn getSymbol(self: Atom, macho_file: *MachO) macho.nlist_64 {
return self.getSymbolPtr(macho_file).*;
}
/// Returns pointer-to-symbol referencing this atom.
pub fn getSymbolPtr(self: Atom, macho_file: *MachO) *macho.nlist_64 {
return macho_file.getSymbolPtr(.{
.sym_index = self.sym_index,
.file = self.file,
});
}
pub fn getSymbolWithLoc(self: Atom) SymbolWithLoc {
return .{ .sym_index = self.sym_index, .file = self.file };
}
/// Returns true if the symbol pointed at with `sym_loc` is contained within this atom.
/// WARNING this function assumes all atoms have been allocated in the virtual memory.
/// Calling it without allocating with `MachO.allocateSymbols` (or equivalent) will
/// give bogus results.
pub fn isSymbolContained(self: Atom, sym_loc: SymbolWithLoc, macho_file: *MachO) bool {
const sym = macho_file.getSymbol(sym_loc);
if (!sym.sect()) return false;
const self_sym = self.getSymbol(macho_file);
return sym.n_value >= self_sym.n_value and sym.n_value < self_sym.n_value + self.size;
}
/// Returns the name of this atom.
pub fn getName(self: Atom, macho_file: *MachO) []const u8 {
return macho_file.getSymbolName(.{
.sym_index = self.sym_index,
.file = self.file,
});
}
/// Returns how much room there is to grow in virtual address space.
/// File offset relocation happens transparently, so it is not included in
/// this calculation.
pub fn capacity(self: Atom, macho_file: MachO) u64 {
const self_sym = macho_file.locals.items[self.local_sym_index];
pub fn capacity(self: Atom, macho_file: *MachO) u64 {
const self_sym = self.getSymbol(macho_file);
if (self.next) |next| {
const next_sym = macho_file.locals.items[next.local_sym_index];
const next_sym = next.getSymbol(macho_file);
return next_sym.n_value - self_sym.n_value;
} else {
// We are the last atom.
@ -222,11 +200,11 @@ pub fn capacity(self: Atom, macho_file: MachO) u64 {
}
}
pub fn freeListEligible(self: Atom, macho_file: MachO) bool {
pub fn freeListEligible(self: Atom, macho_file: *MachO) bool {
// No need to keep a free list node for the last atom.
const next = self.next orelse return false;
const self_sym = macho_file.locals.items[self.local_sym_index];
const next_sym = macho_file.locals.items[next.local_sym_index];
const self_sym = self.getSymbol(macho_file);
const next_sym = next.getSymbol(macho_file);
const cap = next_sym.n_value - self_sym.n_value;
const ideal_cap = MachO.padToIdeal(self.size);
if (cap <= ideal_cap) return false;
@ -235,19 +213,20 @@ pub fn freeListEligible(self: Atom, macho_file: MachO) bool {
}
const RelocContext = struct {
base_addr: u64 = 0,
allocator: Allocator,
object: *Object,
macho_file: *MachO,
base_addr: u64 = 0,
base_offset: i32 = 0,
};
pub fn parseRelocs(self: *Atom, relocs: []macho.relocation_info, context: RelocContext) !void {
pub fn parseRelocs(self: *Atom, relocs: []const macho.relocation_info, context: RelocContext) !void {
const tracy = trace(@src());
defer tracy.end();
const gpa = context.macho_file.base.allocator;
const arch = context.macho_file.base.options.target.cpu.arch;
var addend: i64 = 0;
var subtractor: ?u32 = null;
var subtractor: ?SymbolWithLoc = null;
for (relocs) |rel, i| {
blk: {
@ -284,20 +263,16 @@ pub fn parseRelocs(self: *Atom, relocs: []macho.relocation_info, context: RelocC
}
assert(subtractor == null);
const sym = context.object.symtab.items[rel.r_symbolnum];
const sym_loc = MachO.SymbolWithLoc{
.sym_index = rel.r_symbolnum,
.file = self.file,
};
const sym = context.macho_file.getSymbol(sym_loc);
if (sym.sect() and !sym.ext()) {
subtractor = context.object.symbol_mapping.get(rel.r_symbolnum).?;
subtractor = sym_loc;
} else {
const sym_name = context.object.getString(sym.n_strx);
const n_strx = context.macho_file.strtab_dir.getKeyAdapted(
@as([]const u8, sym_name),
StringIndexAdapter{
.bytes = &context.macho_file.strtab,
},
).?;
const resolv = context.macho_file.symbol_resolver.get(n_strx).?;
assert(resolv.where == .global);
subtractor = resolv.local_sym_index;
const sym_name = context.macho_file.getSymbolName(sym_loc);
subtractor = context.macho_file.globals.get(sym_name).?;
}
// Verify that *_SUBTRACTOR is followed by *_UNSIGNED.
if (relocs.len <= i + 1) {
@ -328,45 +303,42 @@ pub fn parseRelocs(self: *Atom, relocs: []macho.relocation_info, context: RelocC
continue;
}
const object = &context.macho_file.objects.items[self.file.?];
const target = target: {
if (rel.r_extern == 0) {
const sect_id = @intCast(u16, rel.r_symbolnum - 1);
const local_sym_index = context.object.sections_as_symbols.get(sect_id) orelse blk: {
const seg = context.object.load_commands.items[context.object.segment_cmd_index.?].segment;
const sect = seg.sections.items[sect_id];
const sym_index = object.sections_as_symbols.get(sect_id) orelse blk: {
const sect = object.getSourceSection(sect_id);
const match = (try context.macho_file.getMatchingSection(sect)) orelse
unreachable;
const local_sym_index = @intCast(u32, context.macho_file.locals.items.len);
try context.macho_file.locals.append(context.allocator, .{
const sym_index = @intCast(u32, object.symtab.items.len);
try object.symtab.append(gpa, .{
.n_strx = 0,
.n_type = macho.N_SECT,
.n_sect = @intCast(u8, context.macho_file.section_ordinals.getIndex(match).? + 1),
.n_sect = context.macho_file.getSectionOrdinal(match),
.n_desc = 0,
.n_value = 0,
.n_value = sect.addr,
});
try context.object.sections_as_symbols.putNoClobber(context.allocator, sect_id, local_sym_index);
break :blk local_sym_index;
try object.sections_as_symbols.putNoClobber(gpa, sect_id, sym_index);
break :blk sym_index;
};
break :target Relocation.Target{ .local = local_sym_index };
break :target MachO.SymbolWithLoc{ .sym_index = sym_index, .file = self.file };
}
const sym = context.object.symtab.items[rel.r_symbolnum];
const sym_name = context.object.getString(sym.n_strx);
const sym_loc = MachO.SymbolWithLoc{
.sym_index = rel.r_symbolnum,
.file = self.file,
};
const sym = context.macho_file.getSymbol(sym_loc);
if (sym.sect() and !sym.ext()) {
const sym_index = context.object.symbol_mapping.get(rel.r_symbolnum) orelse unreachable;
break :target Relocation.Target{ .local = sym_index };
break :target sym_loc;
} else {
const sym_name = context.macho_file.getSymbolName(sym_loc);
break :target context.macho_file.globals.get(sym_name).?;
}
const n_strx = context.macho_file.strtab_dir.getKeyAdapted(
@as([]const u8, sym_name),
StringIndexAdapter{
.bytes = &context.macho_file.strtab,
},
) orelse unreachable;
break :target Relocation.Target{ .global = n_strx };
};
const offset = @intCast(u32, rel.r_address);
const offset = @intCast(u32, rel.r_address - context.base_offset);
switch (arch) {
.aarch64 => {
@ -388,8 +360,7 @@ pub fn parseRelocs(self: *Atom, relocs: []macho.relocation_info, context: RelocC
else
mem.readIntLittle(i32, self.code.items[offset..][0..4]);
if (rel.r_extern == 0) {
const seg = context.object.load_commands.items[context.object.segment_cmd_index.?].segment;
const target_sect_base_addr = seg.sections.items[rel.r_symbolnum - 1].addr;
const target_sect_base_addr = object.getSourceSection(@intCast(u16, rel.r_symbolnum - 1)).addr;
addend -= @intCast(i64, target_sect_base_addr);
}
try self.addPtrBindingOrRebase(rel, target, context);
@ -397,9 +368,7 @@ pub fn parseRelocs(self: *Atom, relocs: []macho.relocation_info, context: RelocC
.ARM64_RELOC_TLVP_LOAD_PAGE21,
.ARM64_RELOC_TLVP_LOAD_PAGEOFF12,
=> {
if (target == .global) {
try addTlvPtrEntry(target, context);
}
try addTlvPtrEntry(target, context);
},
else => {},
}
@ -423,8 +392,7 @@ pub fn parseRelocs(self: *Atom, relocs: []macho.relocation_info, context: RelocC
else
mem.readIntLittle(i32, self.code.items[offset..][0..4]);
if (rel.r_extern == 0) {
const seg = context.object.load_commands.items[context.object.segment_cmd_index.?].segment;
const target_sect_base_addr = seg.sections.items[rel.r_symbolnum - 1].addr;
const target_sect_base_addr = object.getSourceSection(@intCast(u16, rel.r_symbolnum - 1)).addr;
addend -= @intCast(i64, target_sect_base_addr);
}
try self.addPtrBindingOrRebase(rel, target, context);
@ -445,16 +413,15 @@ pub fn parseRelocs(self: *Atom, relocs: []macho.relocation_info, context: RelocC
if (rel.r_extern == 0) {
// Note for the future self: when r_extern == 0, we should subtract correction from the
// addend.
const seg = context.object.load_commands.items[context.object.segment_cmd_index.?].segment;
const target_sect_base_addr = seg.sections.items[rel.r_symbolnum - 1].addr;
const target_sect_base_addr = object.getSourceSection(@intCast(u16, rel.r_symbolnum - 1)).addr;
// We need to add base_offset, i.e., offset of this atom wrt to the source
// section. Otherwise, the addend will over-/under-shoot.
addend += @intCast(i64, context.base_addr + offset + 4) -
@intCast(i64, target_sect_base_addr);
@intCast(i64, target_sect_base_addr) + context.base_offset;
}
},
.X86_64_RELOC_TLV => {
if (target == .global) {
try addTlvPtrEntry(target, context);
}
try addTlvPtrEntry(target, context);
},
else => {},
}
@ -462,7 +429,7 @@ pub fn parseRelocs(self: *Atom, relocs: []macho.relocation_info, context: RelocC
else => unreachable,
}
try self.relocs.append(context.allocator, .{
try self.relocs.append(gpa, .{
.offset = offset,
.target = target,
.addend = addend,
@ -480,286 +447,182 @@ pub fn parseRelocs(self: *Atom, relocs: []macho.relocation_info, context: RelocC
fn addPtrBindingOrRebase(
self: *Atom,
rel: macho.relocation_info,
target: Relocation.Target,
target: MachO.SymbolWithLoc,
context: RelocContext,
) !void {
switch (target) {
.global => |n_strx| {
try self.bindings.append(context.allocator, .{
.n_strx = n_strx,
.offset = @intCast(u32, rel.r_address),
});
},
.local => {
const source_sym = context.macho_file.locals.items[self.local_sym_index];
const match = context.macho_file.section_ordinals.keys()[source_sym.n_sect - 1];
const seg = context.macho_file.load_commands.items[match.seg].segment;
const sect = seg.sections.items[match.sect];
const sect_type = sect.type_();
const gpa = context.macho_file.base.allocator;
const sym = context.macho_file.getSymbol(target);
if (sym.undf()) {
try self.bindings.append(gpa, .{
.target = target,
.offset = @intCast(u32, rel.r_address - context.base_offset),
});
} else {
const source_sym = self.getSymbol(context.macho_file);
const match = context.macho_file.getMatchingSectionFromOrdinal(source_sym.n_sect);
const sect = context.macho_file.getSection(match);
const sect_type = sect.type_();
const should_rebase = rebase: {
if (rel.r_length != 3) break :rebase false;
const should_rebase = rebase: {
if (rel.r_length != 3) break :rebase false;
// TODO actually, a check similar to what dyld is doing, that is, verifying
// that the segment is writable should be enough here.
const is_right_segment = blk: {
if (context.macho_file.data_segment_cmd_index) |idx| {
if (match.seg == idx) {
break :blk true;
}
// TODO actually, a check similar to what dyld is doing, that is, verifying
// that the segment is writable should be enough here.
const is_right_segment = blk: {
if (context.macho_file.data_segment_cmd_index) |idx| {
if (match.seg == idx) {
break :blk true;
}
if (context.macho_file.data_const_segment_cmd_index) |idx| {
if (match.seg == idx) {
break :blk true;
}
}
break :blk false;
};
if (!is_right_segment) break :rebase false;
if (sect_type != macho.S_LITERAL_POINTERS and
sect_type != macho.S_REGULAR and
sect_type != macho.S_MOD_INIT_FUNC_POINTERS and
sect_type != macho.S_MOD_TERM_FUNC_POINTERS)
{
break :rebase false;
}
break :rebase true;
if (context.macho_file.data_const_segment_cmd_index) |idx| {
if (match.seg == idx) {
break :blk true;
}
}
break :blk false;
};
if (should_rebase) {
try self.rebases.append(context.allocator, @intCast(u32, rel.r_address));
if (!is_right_segment) break :rebase false;
if (sect_type != macho.S_LITERAL_POINTERS and
sect_type != macho.S_REGULAR and
sect_type != macho.S_MOD_INIT_FUNC_POINTERS and
sect_type != macho.S_MOD_TERM_FUNC_POINTERS)
{
break :rebase false;
}
},
break :rebase true;
};
if (should_rebase) {
try self.rebases.append(gpa, @intCast(u32, rel.r_address - context.base_offset));
}
}
}
fn addTlvPtrEntry(target: Relocation.Target, context: RelocContext) !void {
fn addTlvPtrEntry(target: MachO.SymbolWithLoc, context: RelocContext) !void {
const target_sym = context.macho_file.getSymbol(target);
if (!target_sym.undf()) return;
if (context.macho_file.tlv_ptr_entries_table.contains(target)) return;
const index = try context.macho_file.allocateTlvPtrEntry(target);
const atom = try context.macho_file.createTlvPtrAtom(target);
context.macho_file.tlv_ptr_entries.items[index].atom = atom;
const match = (try context.macho_file.getMatchingSection(.{
.segname = MachO.makeStaticString("__DATA"),
.sectname = MachO.makeStaticString("__thread_ptrs"),
.flags = macho.S_THREAD_LOCAL_VARIABLE_POINTERS,
})).?;
if (!context.object.start_atoms.contains(match)) {
try context.object.start_atoms.putNoClobber(context.allocator, match, atom);
}
if (context.object.end_atoms.getPtr(match)) |last| {
last.*.next = atom;
atom.prev = last.*;
last.* = atom;
} else {
try context.object.end_atoms.putNoClobber(context.allocator, match, atom);
}
context.macho_file.tlv_ptr_entries.items[index].sym_index = atom.sym_index;
}
fn addGotEntry(target: Relocation.Target, context: RelocContext) !void {
fn addGotEntry(target: MachO.SymbolWithLoc, context: RelocContext) !void {
if (context.macho_file.got_entries_table.contains(target)) return;
const index = try context.macho_file.allocateGotEntry(target);
const atom = try context.macho_file.createGotAtom(target);
context.macho_file.got_entries.items[index].atom = atom;
const match = MachO.MatchingSection{
.seg = context.macho_file.data_const_segment_cmd_index.?,
.sect = context.macho_file.got_section_index.?,
};
if (!context.object.start_atoms.contains(match)) {
try context.object.start_atoms.putNoClobber(context.allocator, match, atom);
}
if (context.object.end_atoms.getPtr(match)) |last| {
last.*.next = atom;
atom.prev = last.*;
last.* = atom;
} else {
try context.object.end_atoms.putNoClobber(context.allocator, match, atom);
}
context.macho_file.got_entries.items[index].sym_index = atom.sym_index;
}
fn addStub(target: Relocation.Target, context: RelocContext) !void {
if (target != .global) return;
if (context.macho_file.stubs_table.contains(target.global)) return;
// If the symbol has been resolved as defined globally elsewhere (in a different translation unit),
// then skip creating stub entry.
// TODO Is this the correct for the incremental?
if (context.macho_file.symbol_resolver.get(target.global).?.where == .global) return;
fn addStub(target: MachO.SymbolWithLoc, context: RelocContext) !void {
const target_sym = context.macho_file.getSymbol(target);
if (!target_sym.undf()) return;
if (context.macho_file.stubs_table.contains(target)) return;
const stub_index = try context.macho_file.allocateStubEntry(target.global);
const stub_index = try context.macho_file.allocateStubEntry(target);
const stub_helper_atom = try context.macho_file.createStubHelperAtom();
const laptr_atom = try context.macho_file.createLazyPointerAtom(stub_helper_atom.sym_index, target);
const stub_atom = try context.macho_file.createStubAtom(laptr_atom.sym_index);
// TODO clean this up!
const stub_helper_atom = atom: {
const atom = try context.macho_file.createStubHelperAtom();
const match = MachO.MatchingSection{
.seg = context.macho_file.text_segment_cmd_index.?,
.sect = context.macho_file.stub_helper_section_index.?,
};
if (!context.object.start_atoms.contains(match)) {
try context.object.start_atoms.putNoClobber(context.allocator, match, atom);
}
if (context.object.end_atoms.getPtr(match)) |last| {
last.*.next = atom;
atom.prev = last.*;
last.* = atom;
} else {
try context.object.end_atoms.putNoClobber(context.allocator, match, atom);
}
break :atom atom;
};
const laptr_atom = atom: {
const atom = try context.macho_file.createLazyPointerAtom(
stub_helper_atom.local_sym_index,
target.global,
);
const match = MachO.MatchingSection{
.seg = context.macho_file.data_segment_cmd_index.?,
.sect = context.macho_file.la_symbol_ptr_section_index.?,
};
if (!context.object.start_atoms.contains(match)) {
try context.object.start_atoms.putNoClobber(context.allocator, match, atom);
}
if (context.object.end_atoms.getPtr(match)) |last| {
last.*.next = atom;
atom.prev = last.*;
last.* = atom;
} else {
try context.object.end_atoms.putNoClobber(context.allocator, match, atom);
}
break :atom atom;
};
const atom = try context.macho_file.createStubAtom(laptr_atom.local_sym_index);
const match = MachO.MatchingSection{
.seg = context.macho_file.text_segment_cmd_index.?,
.sect = context.macho_file.stubs_section_index.?,
};
if (!context.object.start_atoms.contains(match)) {
try context.object.start_atoms.putNoClobber(context.allocator, match, atom);
}
if (context.object.end_atoms.getPtr(match)) |last| {
last.*.next = atom;
atom.prev = last.*;
last.* = atom;
} else {
try context.object.end_atoms.putNoClobber(context.allocator, match, atom);
}
context.macho_file.stubs.items[stub_index] = atom;
context.macho_file.stubs.items[stub_index].sym_index = stub_atom.sym_index;
}
pub fn resolveRelocs(self: *Atom, macho_file: *MachO) !void {
const tracy = trace(@src());
defer tracy.end();
log.debug("ATOM(%{d}, '{s}')", .{ self.sym_index, self.getName(macho_file) });
for (self.relocs.items) |rel| {
log.debug("relocating {}", .{rel});
const arch = macho_file.base.options.target.cpu.arch;
switch (arch) {
.aarch64 => {
log.debug(" RELA({s}) @ {x} => %{d} in object({d})", .{
@tagName(@intToEnum(macho.reloc_type_arm64, rel.@"type")),
rel.offset,
rel.target.sym_index,
rel.target.file,
});
},
.x86_64 => {
log.debug(" RELA({s}) @ {x} => %{d} in object({d})", .{
@tagName(@intToEnum(macho.reloc_type_x86_64, rel.@"type")),
rel.offset,
rel.target.sym_index,
rel.target.file,
});
},
else => unreachable,
}
const source_addr = blk: {
const sym = macho_file.locals.items[self.local_sym_index];
break :blk sym.n_value + rel.offset;
const source_sym = self.getSymbol(macho_file);
break :blk source_sym.n_value + rel.offset;
};
const is_tlv = is_tlv: {
const source_sym = self.getSymbol(macho_file);
const match = macho_file.getMatchingSectionFromOrdinal(source_sym.n_sect);
const sect = macho_file.getSection(match);
break :is_tlv sect.type_() == macho.S_THREAD_LOCAL_VARIABLES;
};
var is_via_thread_ptrs: bool = false;
const target_addr = blk: {
const is_via_got = got: {
switch (arch) {
.aarch64 => break :got switch (@intToEnum(macho.reloc_type_arm64, rel.@"type")) {
.ARM64_RELOC_GOT_LOAD_PAGE21,
.ARM64_RELOC_GOT_LOAD_PAGEOFF12,
.ARM64_RELOC_POINTER_TO_GOT,
=> true,
else => false,
},
.x86_64 => break :got switch (@intToEnum(macho.reloc_type_x86_64, rel.@"type")) {
.X86_64_RELOC_GOT, .X86_64_RELOC_GOT_LOAD => true,
else => false,
},
else => unreachable,
}
const target_atom = rel.getTargetAtom(macho_file) orelse {
// If there is no atom for target, we still need to check for special, atom-less
// symbols such as `___dso_handle`.
const target_name = macho_file.getSymbolName(rel.target);
assert(macho_file.globals.contains(target_name));
const atomless_sym = macho_file.getSymbol(rel.target);
log.debug(" | atomless target '{s}'", .{target_name});
break :blk atomless_sym.n_value;
};
if (is_via_got) {
const got_index = macho_file.got_entries_table.get(rel.target) orelse {
log.err("expected GOT entry for symbol", .{});
switch (rel.target) {
.local => |sym_index| log.err(" local @{d}", .{sym_index}),
.global => |n_strx| log.err(" global @'{s}'", .{macho_file.getString(n_strx)}),
log.debug(" | target ATOM(%{d}, '{s}') in object({d})", .{
target_atom.sym_index,
target_atom.getName(macho_file),
target_atom.file,
});
// If `rel.target` is contained within the target atom, pull its address value.
const target_sym = if (target_atom.isSymbolContained(rel.target, macho_file))
macho_file.getSymbol(rel.target)
else
target_atom.getSymbol(macho_file);
assert(target_sym.n_desc != MachO.N_DESC_GCED);
const base_address: u64 = if (is_tlv) base_address: {
// For TLV relocations, the value specified as a relocation is the displacement from the
// TLV initializer (either value in __thread_data or zero-init in __thread_bss) to the first
// defined TLV template init section in the following order:
// * wrt to __thread_data if defined, then
// * wrt to __thread_bss
const sect_id: u16 = sect_id: {
if (macho_file.tlv_data_section_index) |i| {
break :sect_id i;
} else if (macho_file.tlv_bss_section_index) |i| {
break :sect_id i;
} else {
log.err("threadlocal variables present but no initializer sections found", .{});
log.err(" __thread_data not found", .{});
log.err(" __thread_bss not found", .{});
return error.FailedToResolveRelocationTarget;
}
log.err(" this is an internal linker error", .{});
return error.FailedToResolveRelocationTarget;
};
const atom = macho_file.got_entries.items[got_index].atom;
break :blk macho_file.locals.items[atom.local_sym_index].n_value;
}
switch (rel.target) {
.local => |sym_index| {
const sym = macho_file.locals.items[sym_index];
const is_tlv = is_tlv: {
const source_sym = macho_file.locals.items[self.local_sym_index];
const match = macho_file.section_ordinals.keys()[source_sym.n_sect - 1];
const seg = macho_file.load_commands.items[match.seg].segment;
const sect = seg.sections.items[match.sect];
break :is_tlv sect.type_() == macho.S_THREAD_LOCAL_VARIABLES;
};
if (is_tlv) {
// For TLV relocations, the value specified as a relocation is the displacement from the
// TLV initializer (either value in __thread_data or zero-init in __thread_bss) to the first
// defined TLV template init section in the following order:
// * wrt to __thread_data if defined, then
// * wrt to __thread_bss
const seg = macho_file.load_commands.items[macho_file.data_segment_cmd_index.?].segment;
const base_address = inner: {
if (macho_file.tlv_data_section_index) |i| {
break :inner seg.sections.items[i].addr;
} else if (macho_file.tlv_bss_section_index) |i| {
break :inner seg.sections.items[i].addr;
} else {
log.err("threadlocal variables present but no initializer sections found", .{});
log.err(" __thread_data not found", .{});
log.err(" __thread_bss not found", .{});
return error.FailedToResolveRelocationTarget;
}
};
break :blk sym.n_value - base_address;
}
break :blk sym.n_value;
},
.global => |n_strx| {
// TODO Still trying to figure out how to possibly use stubs for local symbol indirection with
// branching instructions. If it is not possible, then the best course of action is to
// resurrect the former approach of defering creating synthethic atoms in __got and __la_symbol_ptr
// sections until we resolve the relocations.
const resolv = macho_file.symbol_resolver.get(n_strx).?;
switch (resolv.where) {
.global => break :blk macho_file.globals.items[resolv.where_index].n_value,
.undef => {
if (macho_file.stubs_table.get(n_strx)) |stub_index| {
const atom = macho_file.stubs.items[stub_index];
break :blk macho_file.locals.items[atom.local_sym_index].n_value;
} else {
if (macho_file.tlv_ptr_entries_table.get(rel.target)) |tlv_ptr_index| {
is_via_thread_ptrs = true;
const atom = macho_file.tlv_ptr_entries.items[tlv_ptr_index].atom;
break :blk macho_file.locals.items[atom.local_sym_index].n_value;
}
break :blk 0;
}
},
}
},
}
break :base_address macho_file.getSection(.{
.seg = macho_file.data_segment_cmd_index.?,
.sect = sect_id,
}).addr;
} else 0;
break :blk target_sym.n_value - base_address;
};
log.debug(" | source_addr = 0x{x}", .{source_addr});
log.debug(" | target_addr = 0x{x}", .{target_addr});
log.debug(" | source_addr = 0x{x}", .{source_addr});
switch (arch) {
.aarch64 => {
switch (@intToEnum(macho.reloc_type_arm64, rel.@"type")) {
.ARM64_RELOC_BRANCH26 => {
log.debug(" | target_addr = 0x{x}", .{target_addr});
const displacement = math.cast(
i28,
@intCast(i64, target_addr) - @intCast(i64, source_addr),
@ -788,6 +651,7 @@ pub fn resolveRelocs(self: *Atom, macho_file: *MachO) !void {
.ARM64_RELOC_TLVP_LOAD_PAGE21,
=> {
const actual_target_addr = @intCast(i64, target_addr) + rel.addend;
log.debug(" | target_addr = 0x{x}", .{actual_target_addr});
const source_page = @intCast(i32, source_addr >> 12);
const target_page = @intCast(i32, actual_target_addr >> 12);
const pages = @bitCast(u21, @intCast(i21, target_page - source_page));
@ -805,6 +669,7 @@ pub fn resolveRelocs(self: *Atom, macho_file: *MachO) !void {
.ARM64_RELOC_PAGEOFF12 => {
const code = self.code.items[rel.offset..][0..4];
const actual_target_addr = @intCast(i64, target_addr) + rel.addend;
log.debug(" | target_addr = 0x{x}", .{actual_target_addr});
const narrowed = @truncate(u12, @intCast(u64, actual_target_addr));
if (isArithmeticOp(self.code.items[rel.offset..][0..4])) {
var inst = aarch64.Instruction{
@ -842,6 +707,7 @@ pub fn resolveRelocs(self: *Atom, macho_file: *MachO) !void {
.ARM64_RELOC_GOT_LOAD_PAGEOFF12 => {
const code = self.code.items[rel.offset..][0..4];
const actual_target_addr = @intCast(i64, target_addr) + rel.addend;
log.debug(" | target_addr = 0x{x}", .{actual_target_addr});
const narrowed = @truncate(u12, @intCast(u64, actual_target_addr));
var inst: aarch64.Instruction = .{
.load_store_register = mem.bytesToValue(meta.TagPayload(
@ -856,6 +722,7 @@ pub fn resolveRelocs(self: *Atom, macho_file: *MachO) !void {
.ARM64_RELOC_TLVP_LOAD_PAGEOFF12 => {
const code = self.code.items[rel.offset..][0..4];
const actual_target_addr = @intCast(i64, target_addr) + rel.addend;
log.debug(" | target_addr = 0x{x}", .{actual_target_addr});
const RegInfo = struct {
rd: u5,
@ -886,7 +753,7 @@ pub fn resolveRelocs(self: *Atom, macho_file: *MachO) !void {
}
};
const narrowed = @truncate(u12, @intCast(u64, actual_target_addr));
var inst = if (is_via_thread_ptrs) blk: {
var inst = if (macho_file.tlv_ptr_entries_table.contains(rel.target)) blk: {
const offset = try math.divExact(u12, narrowed, 8);
break :blk aarch64.Instruction{
.load_store_register = .{
@ -913,18 +780,20 @@ pub fn resolveRelocs(self: *Atom, macho_file: *MachO) !void {
mem.writeIntLittle(u32, code, inst.toU32());
},
.ARM64_RELOC_POINTER_TO_GOT => {
log.debug(" | target_addr = 0x{x}", .{target_addr});
const result = math.cast(i32, @intCast(i64, target_addr) - @intCast(i64, source_addr)) orelse return error.Overflow;
mem.writeIntLittle(u32, self.code.items[rel.offset..][0..4], @bitCast(u32, result));
},
.ARM64_RELOC_UNSIGNED => {
const result = blk: {
if (rel.subtractor) |subtractor| {
const sym = macho_file.locals.items[subtractor];
const sym = macho_file.getSymbol(subtractor);
break :blk @intCast(i64, target_addr) - @intCast(i64, sym.n_value) + rel.addend;
} else {
break :blk @intCast(i64, target_addr) + rel.addend;
}
};
log.debug(" | target_addr = 0x{x}", .{result});
if (rel.length == 3) {
mem.writeIntLittle(u64, self.code.items[rel.offset..][0..8], @bitCast(u64, result));
@ -943,6 +812,7 @@ pub fn resolveRelocs(self: *Atom, macho_file: *MachO) !void {
.x86_64 => {
switch (@intToEnum(macho.reloc_type_x86_64, rel.@"type")) {
.X86_64_RELOC_BRANCH => {
log.debug(" | target_addr = 0x{x}", .{target_addr});
const displacement = math.cast(
i32,
@intCast(i64, target_addr) - @intCast(i64, source_addr) - 4 + rel.addend,
@ -950,6 +820,7 @@ pub fn resolveRelocs(self: *Atom, macho_file: *MachO) !void {
mem.writeIntLittle(u32, self.code.items[rel.offset..][0..4], @bitCast(u32, displacement));
},
.X86_64_RELOC_GOT, .X86_64_RELOC_GOT_LOAD => {
log.debug(" | target_addr = 0x{x}", .{target_addr});
const displacement = math.cast(
i32,
@intCast(i64, target_addr) - @intCast(i64, source_addr) - 4 + rel.addend,
@ -957,7 +828,8 @@ pub fn resolveRelocs(self: *Atom, macho_file: *MachO) !void {
mem.writeIntLittle(u32, self.code.items[rel.offset..][0..4], @bitCast(u32, displacement));
},
.X86_64_RELOC_TLV => {
if (!is_via_thread_ptrs) {
log.debug(" | target_addr = 0x{x}", .{target_addr});
if (!macho_file.tlv_ptr_entries_table.contains(rel.target)) {
// We need to rewrite the opcode from movq to leaq.
self.code.items[rel.offset - 2] = 0x8d;
}
@ -980,6 +852,7 @@ pub fn resolveRelocs(self: *Atom, macho_file: *MachO) !void {
else => unreachable,
};
const actual_target_addr = @intCast(i64, target_addr) + rel.addend;
log.debug(" | target_addr = 0x{x}", .{actual_target_addr});
const displacement = math.cast(
i32,
actual_target_addr - @intCast(i64, source_addr + correction + 4),
@ -989,12 +862,13 @@ pub fn resolveRelocs(self: *Atom, macho_file: *MachO) !void {
.X86_64_RELOC_UNSIGNED => {
const result = blk: {
if (rel.subtractor) |subtractor| {
const sym = macho_file.locals.items[subtractor];
const sym = macho_file.getSymbol(subtractor);
break :blk @intCast(i64, target_addr) - @intCast(i64, sym.n_value) + rel.addend;
} else {
break :blk @intCast(i64, target_addr) + rel.addend;
}
};
log.debug(" | target_addr = 0x{x}", .{result});
if (rel.length == 3) {
mem.writeIntLittle(u64, self.code.items[rel.offset..][0..8], @bitCast(u64, result));

View File

@ -5,7 +5,7 @@ const build_options = @import("build_options");
const assert = std.debug.assert;
const fs = std.fs;
const link = @import("../../link.zig");
const log = std.log.scoped(.link);
const log = std.log.scoped(.dsym);
const macho = std.macho;
const makeStaticString = MachO.makeStaticString;
const math = std.math;
@ -17,6 +17,7 @@ const Allocator = mem.Allocator;
const Dwarf = @import("../Dwarf.zig");
const MachO = @import("../MachO.zig");
const Module = @import("../../Module.zig");
const StringTable = @import("../strtab.zig").StringTable;
const TextBlock = MachO.TextBlock;
const Type = @import("../../type.zig").Type;
@ -59,6 +60,8 @@ debug_aranges_section_dirty: bool = false,
debug_info_header_dirty: bool = false,
debug_line_header_dirty: bool = false,
strtab: StringTable(.strtab) = .{},
relocs: std.ArrayListUnmanaged(Reloc) = .{},
pub const Reloc = struct {
@ -93,6 +96,7 @@ pub fn populateMissingMetadata(self: *DebugSymbols, allocator: Allocator) !void
.strsize = 0,
},
});
try self.strtab.buffer.append(allocator, 0);
self.load_commands_dirty = true;
}
@ -269,22 +273,36 @@ pub fn flushModule(self: *DebugSymbols, allocator: Allocator, options: link.Opti
for (self.relocs.items) |*reloc| {
const sym = switch (reloc.@"type") {
.direct_load => self.base.locals.items[reloc.target],
.direct_load => self.base.getSymbol(.{ .sym_index = reloc.target, .file = null }),
.got_load => blk: {
const got_index = self.base.got_entries_table.get(.{ .local = reloc.target }).?;
const got_index = self.base.got_entries_table.get(.{
.sym_index = reloc.target,
.file = null,
}).?;
const got_entry = self.base.got_entries.items[got_index];
break :blk self.base.locals.items[got_entry.atom.local_sym_index];
break :blk got_entry.getSymbol(self.base);
},
};
if (sym.n_value == reloc.prev_vaddr) continue;
const sym_name = switch (reloc.@"type") {
.direct_load => self.base.getSymbolName(.{ .sym_index = reloc.target, .file = null }),
.got_load => blk: {
const got_index = self.base.got_entries_table.get(.{
.sym_index = reloc.target,
.file = null,
}).?;
const got_entry = self.base.got_entries.items[got_index];
break :blk got_entry.getName(self.base);
},
};
const seg = &self.load_commands.items[self.dwarf_segment_cmd_index.?].segment;
const sect = &seg.sections.items[self.debug_info_section_index.?];
const file_offset = sect.offset + reloc.offset;
log.debug("resolving relocation: {d}@{x} ('{s}') at offset {x}", .{
reloc.target,
sym.n_value,
self.base.getString(sym.n_strx),
sym_name,
file_offset,
});
try self.file.pwriteAll(mem.asBytes(&sym.n_value), file_offset);
@ -367,6 +385,7 @@ pub fn deinit(self: *DebugSymbols, allocator: Allocator) void {
}
self.load_commands.deinit(allocator);
self.dwarf.deinit();
self.strtab.deinit(allocator);
self.relocs.deinit(allocator);
}
@ -582,21 +601,39 @@ fn writeSymbolTable(self: *DebugSymbols) !void {
const tracy = trace(@src());
defer tracy.end();
const gpa = self.base.base.allocator;
const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].segment;
const symtab = &self.load_commands.items[self.symtab_cmd_index.?].symtab;
symtab.symoff = @intCast(u32, seg.inner.fileoff);
var locals = std.ArrayList(macho.nlist_64).init(self.base.base.allocator);
var locals = std.ArrayList(macho.nlist_64).init(gpa);
defer locals.deinit();
for (self.base.locals.items) |sym| {
if (sym.n_strx == 0) continue;
if (self.base.symbol_resolver.get(sym.n_strx)) |_| continue;
try locals.append(sym);
for (self.base.locals.items) |sym, sym_id| {
if (sym.n_strx == 0) continue; // no name, skip
if (sym.n_desc == MachO.N_DESC_GCED) continue; // GCed, skip
const sym_loc = MachO.SymbolWithLoc{ .sym_index = @intCast(u32, sym_id), .file = null };
if (self.base.symbolIsTemp(sym_loc)) continue; // local temp symbol, skip
if (self.base.globals.contains(self.base.getSymbolName(sym_loc))) continue; // global symbol is either an export or import, skip
var out_sym = sym;
out_sym.n_strx = try self.strtab.insert(gpa, self.base.getSymbolName(sym_loc));
try locals.append(out_sym);
}
var exports = std.ArrayList(macho.nlist_64).init(gpa);
defer exports.deinit();
for (self.base.globals.values()) |global| {
const sym = self.base.getSymbol(global);
if (sym.undf()) continue; // import, skip
if (sym.n_desc == MachO.N_DESC_GCED) continue; // GCed, skip
var out_sym = sym;
out_sym.n_strx = try self.strtab.insert(gpa, self.base.getSymbolName(global));
try exports.append(out_sym);
}
const nlocals = locals.items.len;
const nexports = self.base.globals.items.len;
const nexports = exports.items.len;
const locals_off = symtab.symoff;
const locals_size = nlocals * @sizeOf(macho.nlist_64);
const exports_off = locals_off + locals_size;
@ -641,7 +678,7 @@ fn writeSymbolTable(self: *DebugSymbols) !void {
try self.file.pwriteAll(mem.sliceAsBytes(locals.items), locals_off);
log.debug("writing exported symbols from 0x{x} to 0x{x}", .{ exports_off, exports_size + exports_off });
try self.file.pwriteAll(mem.sliceAsBytes(self.base.globals.items), exports_off);
try self.file.pwriteAll(mem.sliceAsBytes(exports.items), exports_off);
self.load_commands_dirty = true;
}
@ -655,7 +692,7 @@ fn writeStringTable(self: *DebugSymbols) !void {
const symtab_size = @intCast(u32, symtab.nsyms * @sizeOf(macho.nlist_64));
symtab.stroff = symtab.symoff + symtab_size;
const needed_size = mem.alignForwardGeneric(u64, self.base.strtab.items.len, @alignOf(u64));
const needed_size = mem.alignForwardGeneric(u64, self.strtab.buffer.items.len, @alignOf(u64));
symtab.strsize = @intCast(u32, needed_size);
if (symtab_size + needed_size > seg.inner.filesize) {
@ -692,7 +729,7 @@ fn writeStringTable(self: *DebugSymbols) !void {
log.debug("writing string table from 0x{x} to 0x{x}", .{ symtab.stroff, symtab.stroff + symtab.strsize });
try self.file.pwriteAll(self.base.strtab.items, symtab.stroff);
try self.file.pwriteAll(self.strtab.buffer.items, symtab.stroff);
self.load_commands_dirty = true;
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,292 @@
const std = @import("std");
const assert = std.debug.assert;
const log = std.log.scoped(.dead_strip);
const macho = std.macho;
const math = std.math;
const mem = std.mem;
const Allocator = mem.Allocator;
const Atom = @import("Atom.zig");
const MachO = @import("../MachO.zig");
const MatchingSection = MachO.MatchingSection;
pub fn gcAtoms(macho_file: *MachO) !void {
const gpa = macho_file.base.allocator;
var arena_allocator = std.heap.ArenaAllocator.init(gpa);
defer arena_allocator.deinit();
const arena = arena_allocator.allocator();
var roots = std.AutoHashMap(*Atom, void).init(arena);
try collectRoots(&roots, macho_file);
var alive = std.AutoHashMap(*Atom, void).init(arena);
try mark(roots, &alive, macho_file);
try prune(arena, alive, macho_file);
}
fn removeAtomFromSection(atom: *Atom, match: MatchingSection, macho_file: *MachO) void {
const sect = macho_file.getSectionPtr(match);
// If we want to enable GC for incremental codepath, we need to take into
// account any padding that might have been left here.
sect.size -= atom.size;
if (atom.prev) |prev| {
prev.next = atom.next;
}
if (atom.next) |next| {
next.prev = atom.prev;
} else {
const last = macho_file.atoms.getPtr(match).?;
if (atom.prev) |prev| {
last.* = prev;
} else {
// The section will be GCed in the next step.
last.* = undefined;
sect.size = 0;
}
}
}
fn collectRoots(roots: *std.AutoHashMap(*Atom, void), macho_file: *MachO) !void {
const output_mode = macho_file.base.options.output_mode;
switch (output_mode) {
.Exe => {
// Add entrypoint as GC root
const global = try macho_file.getEntryPoint();
const atom = macho_file.getAtomForSymbol(global).?; // panic here means fatal error
_ = try roots.getOrPut(atom);
},
else => |other| {
assert(other == .Lib);
// Add exports as GC roots
for (macho_file.globals.values()) |global| {
const sym = macho_file.getSymbol(global);
if (!sym.sect()) continue;
const atom = macho_file.getAtomForSymbol(global) orelse {
log.debug("skipping {s}", .{macho_file.getSymbolName(global)});
continue;
};
_ = try roots.getOrPut(atom);
log.debug("adding root", .{});
macho_file.logAtom(atom);
}
},
}
// TODO just a temp until we learn how to parse unwind records
if (macho_file.globals.get("___gxx_personality_v0")) |global| {
if (macho_file.getAtomForSymbol(global)) |atom| {
_ = try roots.getOrPut(atom);
log.debug("adding root", .{});
macho_file.logAtom(atom);
}
}
for (macho_file.objects.items) |object| {
for (object.managed_atoms.items) |atom| {
const source_sym = object.getSourceSymbol(atom.sym_index) orelse continue;
if (source_sym.tentative()) continue;
const source_sect = object.getSourceSection(source_sym.n_sect - 1);
const is_gc_root = blk: {
if (source_sect.isDontDeadStrip()) break :blk true;
if (mem.eql(u8, "__StaticInit", source_sect.sectName())) break :blk true;
switch (source_sect.type_()) {
macho.S_MOD_INIT_FUNC_POINTERS,
macho.S_MOD_TERM_FUNC_POINTERS,
=> break :blk true,
else => break :blk false,
}
};
if (is_gc_root) {
try roots.putNoClobber(atom, {});
log.debug("adding root", .{});
macho_file.logAtom(atom);
}
}
}
}
fn markLive(atom: *Atom, alive: *std.AutoHashMap(*Atom, void), macho_file: *MachO) anyerror!void {
const gop = try alive.getOrPut(atom);
if (gop.found_existing) return;
log.debug("marking live", .{});
macho_file.logAtom(atom);
for (atom.relocs.items) |rel| {
const target_atom = rel.getTargetAtom(macho_file) orelse continue;
try markLive(target_atom, alive, macho_file);
}
}
fn refersLive(atom: *Atom, alive: std.AutoHashMap(*Atom, void), macho_file: *MachO) bool {
for (atom.relocs.items) |rel| {
const target_atom = rel.getTargetAtom(macho_file) orelse continue;
if (alive.contains(target_atom)) return true;
}
return false;
}
fn refersDead(atom: *Atom, macho_file: *MachO) bool {
for (atom.relocs.items) |rel| {
const target_atom = rel.getTargetAtom(macho_file) orelse continue;
const target_sym = target_atom.getSymbol(macho_file);
if (target_sym.n_desc == MachO.N_DESC_GCED) return true;
}
return false;
}
fn mark(
roots: std.AutoHashMap(*Atom, void),
alive: *std.AutoHashMap(*Atom, void),
macho_file: *MachO,
) !void {
try alive.ensureUnusedCapacity(roots.count());
var it = roots.keyIterator();
while (it.next()) |root| {
try markLive(root.*, alive, macho_file);
}
var loop: bool = true;
while (loop) {
loop = false;
for (macho_file.objects.items) |object| {
for (object.managed_atoms.items) |atom| {
if (alive.contains(atom)) continue;
const source_sym = object.getSourceSymbol(atom.sym_index) orelse continue;
if (source_sym.tentative()) continue;
const source_sect = object.getSourceSection(source_sym.n_sect - 1);
if (source_sect.isDontDeadStripIfReferencesLive() and refersLive(atom, alive.*, macho_file)) {
try markLive(atom, alive, macho_file);
loop = true;
}
}
}
}
}
fn prune(arena: Allocator, alive: std.AutoHashMap(*Atom, void), macho_file: *MachO) !void {
// Any section that ends up here will be updated, that is,
// its size and alignment recalculated.
var gc_sections = std.AutoHashMap(MatchingSection, void).init(arena);
var loop: bool = true;
while (loop) {
loop = false;
for (macho_file.objects.items) |object| {
for (object.getSourceSymtab()) |_, source_index| {
const atom = object.getAtomForSymbol(@intCast(u32, source_index)) orelse continue;
if (alive.contains(atom)) continue;
const global = atom.getSymbolWithLoc();
const sym = atom.getSymbolPtr(macho_file);
const match = macho_file.getMatchingSectionFromOrdinal(sym.n_sect);
if (sym.n_desc == MachO.N_DESC_GCED) continue;
if (!sym.ext() and !refersDead(atom, macho_file)) continue;
macho_file.logAtom(atom);
sym.n_desc = MachO.N_DESC_GCED;
removeAtomFromSection(atom, match, macho_file);
_ = try gc_sections.put(match, {});
for (atom.contained.items) |sym_off| {
const inner = macho_file.getSymbolPtr(.{
.sym_index = sym_off.sym_index,
.file = atom.file,
});
inner.n_desc = MachO.N_DESC_GCED;
}
if (macho_file.got_entries_table.contains(global)) {
const got_atom = macho_file.getGotAtomForSymbol(global).?;
const got_sym = got_atom.getSymbolPtr(macho_file);
got_sym.n_desc = MachO.N_DESC_GCED;
}
if (macho_file.stubs_table.contains(global)) {
const stubs_atom = macho_file.getStubsAtomForSymbol(global).?;
const stubs_sym = stubs_atom.getSymbolPtr(macho_file);
stubs_sym.n_desc = MachO.N_DESC_GCED;
}
if (macho_file.tlv_ptr_entries_table.contains(global)) {
const tlv_ptr_atom = macho_file.getTlvPtrAtomForSymbol(global).?;
const tlv_ptr_sym = tlv_ptr_atom.getSymbolPtr(macho_file);
tlv_ptr_sym.n_desc = MachO.N_DESC_GCED;
}
loop = true;
}
}
}
for (macho_file.got_entries.items) |entry| {
const sym = entry.getSymbol(macho_file);
if (sym.n_desc != MachO.N_DESC_GCED) continue;
// TODO tombstone
const atom = entry.getAtom(macho_file);
const match = macho_file.getMatchingSectionFromOrdinal(sym.n_sect);
removeAtomFromSection(atom, match, macho_file);
_ = try gc_sections.put(match, {});
_ = macho_file.got_entries_table.remove(entry.target);
}
for (macho_file.stubs.items) |entry| {
const sym = entry.getSymbol(macho_file);
if (sym.n_desc != MachO.N_DESC_GCED) continue;
// TODO tombstone
const atom = entry.getAtom(macho_file);
const match = macho_file.getMatchingSectionFromOrdinal(sym.n_sect);
removeAtomFromSection(atom, match, macho_file);
_ = try gc_sections.put(match, {});
_ = macho_file.stubs_table.remove(entry.target);
}
for (macho_file.tlv_ptr_entries.items) |entry| {
const sym = entry.getSymbol(macho_file);
if (sym.n_desc != MachO.N_DESC_GCED) continue;
// TODO tombstone
const atom = entry.getAtom(macho_file);
const match = macho_file.getMatchingSectionFromOrdinal(sym.n_sect);
removeAtomFromSection(atom, match, macho_file);
_ = try gc_sections.put(match, {});
_ = macho_file.tlv_ptr_entries_table.remove(entry.target);
}
var gc_sections_it = gc_sections.iterator();
while (gc_sections_it.next()) |entry| {
const match = entry.key_ptr.*;
const sect = macho_file.getSectionPtr(match);
if (sect.size == 0) continue; // Pruning happens automatically in next step.
sect.@"align" = 0;
sect.size = 0;
var atom = macho_file.atoms.get(match).?;
while (atom.prev) |prev| {
atom = prev;
}
while (true) {
const atom_alignment = try math.powi(u32, 2, atom.alignment);
const aligned_end_addr = mem.alignForwardGeneric(u64, sect.size, atom_alignment);
const padding = aligned_end_addr - sect.size;
sect.size += padding + atom.size;
sect.@"align" = @maximum(sect.@"align", atom.alignment);
if (atom.next) |next| {
atom = next;
} else break;
}
}
}

113
src/link/strtab.zig Normal file
View File

@ -0,0 +1,113 @@
const std = @import("std");
const mem = std.mem;
const Allocator = mem.Allocator;
const StringIndexAdapter = std.hash_map.StringIndexAdapter;
const StringIndexContext = std.hash_map.StringIndexContext;
pub fn StringTable(comptime log_scope: @Type(.EnumLiteral)) type {
return struct {
const Self = @This();
const log = std.log.scoped(log_scope);
buffer: std.ArrayListUnmanaged(u8) = .{},
table: std.HashMapUnmanaged(u32, bool, StringIndexContext, std.hash_map.default_max_load_percentage) = .{},
pub fn deinit(self: *Self, gpa: Allocator) void {
self.buffer.deinit(gpa);
self.table.deinit(gpa);
}
pub fn toOwnedSlice(self: *Self, gpa: Allocator) []const u8 {
const result = self.buffer.toOwnedSlice(gpa);
self.table.clearRetainingCapacity();
return result;
}
pub const PrunedResult = struct {
buffer: []const u8,
idx_map: std.AutoHashMap(u32, u32),
};
pub fn toPrunedResult(self: *Self, gpa: Allocator) !PrunedResult {
var buffer = std.ArrayList(u8).init(gpa);
defer buffer.deinit();
try buffer.ensureTotalCapacity(self.buffer.items.len);
buffer.appendAssumeCapacity(0);
var idx_map = std.AutoHashMap(u32, u32).init(gpa);
errdefer idx_map.deinit();
try idx_map.ensureTotalCapacity(self.table.count());
var it = self.table.iterator();
while (it.next()) |entry| {
const off = entry.key_ptr.*;
const save = entry.value_ptr.*;
if (!save) continue;
const new_off = @intCast(u32, buffer.items.len);
buffer.appendSliceAssumeCapacity(self.getAssumeExists(off));
idx_map.putAssumeCapacityNoClobber(off, new_off);
}
self.buffer.clearRetainingCapacity();
self.table.clearRetainingCapacity();
return PrunedResult{
.buffer = buffer.toOwnedSlice(),
.idx_map = idx_map,
};
}
pub fn insert(self: *Self, gpa: Allocator, string: []const u8) !u32 {
const gop = try self.table.getOrPutContextAdapted(gpa, @as([]const u8, string), StringIndexAdapter{
.bytes = &self.buffer,
}, StringIndexContext{
.bytes = &self.buffer,
});
if (gop.found_existing) {
const off = gop.key_ptr.*;
gop.value_ptr.* = true;
log.debug("reusing string '{s}' at offset 0x{x}", .{ string, off });
return off;
}
try self.buffer.ensureUnusedCapacity(gpa, string.len + 1);
const new_off = @intCast(u32, self.buffer.items.len);
log.debug("writing new string '{s}' at offset 0x{x}", .{ string, new_off });
self.buffer.appendSliceAssumeCapacity(string);
self.buffer.appendAssumeCapacity(0);
gop.key_ptr.* = new_off;
gop.value_ptr.* = true;
return new_off;
}
pub fn delete(self: *Self, string: []const u8) void {
const value_ptr = self.table.getPtrAdapted(@as([]const u8, string), StringIndexAdapter{
.bytes = &self.buffer,
}) orelse return;
value_ptr.* = false;
log.debug("marked '{s}' for deletion", .{string});
}
pub fn getOffset(self: *Self, string: []const u8) ?u32 {
return self.table.getKeyAdapted(string, StringIndexAdapter{
.bytes = &self.buffer,
});
}
pub fn get(self: Self, off: u32) ?[]const u8 {
log.debug("getting string at 0x{x}", .{off});
if (off >= self.buffer.items.len) return null;
return mem.sliceTo(@ptrCast([*:0]const u8, self.buffer.items.ptr + off), 0);
}
pub fn getAssumeExists(self: Self, off: u32) []const u8 {
return self.get(off) orelse unreachable;
}
};
}

View File

@ -446,6 +446,8 @@ const usage_build_generic =
\\ --compress-debug-sections=[e] Debug section compression settings
\\ none No compression
\\ zlib Compression with deflate/inflate
\\ --gc-sections Force removal of functions and data that are unreachable by the entry point or exported symbols
\\ --no-gc-sections Don't force removal of unreachable functions and data
\\ --subsystem [subsystem] (Windows) /SUBSYSTEM:<subsystem> to the linker
\\ --stack [size] Override default stack size
\\ --image-base [addr] Set base address for executable image
@ -463,6 +465,7 @@ const usage_build_generic =
\\ -search_dylibs_first (Darwin) search `libx.dylib` in each dir in library search paths, then `libx.a`
\\ -headerpad [value] (Darwin) set minimum space for future expansion of the load commands in hexadecimal notation
\\ -headerpad_max_install_names (Darwin) set enough space as if all paths were MAXPATHLEN
\\ -dead_strip (Darwin) remove functions and data that are unreachable by the entry point or exported symbols
\\ -dead_strip_dylibs (Darwin) remove dylibs that are unreachable by the entry point or exported symbols
\\ --import-memory (WebAssembly) import memory from the environment
\\ --import-table (WebAssembly) import function table from the host environment
@ -969,6 +972,8 @@ fn buildOutputType(
};
} else if (mem.eql(u8, arg, "-headerpad_max_install_names")) {
headerpad_max_install_names = true;
} else if (mem.eql(u8, arg, "-dead_strip")) {
linker_gc_sections = true;
} else if (mem.eql(u8, arg, "-dead_strip_dylibs")) {
dead_strip_dylibs = true;
} else if (mem.eql(u8, arg, "-T") or mem.eql(u8, arg, "--script")) {
@ -1311,6 +1316,10 @@ fn buildOutputType(
try linker_export_symbol_names.append(arg["--export=".len..]);
} else if (mem.eql(u8, arg, "-Bsymbolic")) {
linker_bind_global_refs_locally = true;
} else if (mem.eql(u8, arg, "--gc-sections")) {
linker_gc_sections = true;
} else if (mem.eql(u8, arg, "--no-gc-sections")) {
linker_gc_sections = false;
} else if (mem.eql(u8, arg, "--debug-compile-errors")) {
debug_compile_errors = true;
} else if (mem.eql(u8, arg, "--verbose-link")) {
@ -1764,6 +1773,8 @@ fn buildOutputType(
};
} else if (mem.eql(u8, arg, "-headerpad_max_install_names")) {
headerpad_max_install_names = true;
} else if (mem.eql(u8, arg, "-dead_strip")) {
linker_gc_sections = true;
} else if (mem.eql(u8, arg, "-dead_strip_dylibs")) {
dead_strip_dylibs = true;
} else if (mem.eql(u8, arg, "--gc-sections")) {

View File

@ -9,5 +9,5 @@ inline fn fibonacci(n: usize) usize {
}
// run
// target=x86_64-linux,arm-linux,x86_64-macos,wasm32-wasi
// target=x86_64-linux,arm-linux,wasm32-wasi
//

View File

@ -60,6 +60,10 @@ pub fn addCases(cases: *tests.StandaloneContext) void {
.build_modes = true,
});
cases.addBuildFile("test/link/macho/dead_strip/build.zig", .{
.build_modes = false,
});
cases.addBuildFile("test/link/macho/dead_strip_dylibs/build.zig", .{
.build_modes = true,
.requires_macos_sdk = true,

View File

@ -0,0 +1,49 @@
const std = @import("std");
const Builder = std.build.Builder;
const LibExeObjectStep = std.build.LibExeObjStep;
pub fn build(b: *Builder) void {
const mode = b.standardReleaseOptions();
const test_step = b.step("test", "Test the program");
test_step.dependOn(b.getInstallStep());
{
// Without -dead_strip, we expect `iAmUnused` symbol present
const exe = createScenario(b, mode);
const check = exe.checkObject(.macho);
check.checkInSymtab();
check.checkNext("{*} (__TEXT,__text) external _iAmUnused");
test_step.dependOn(&check.step);
const run_cmd = exe.run();
run_cmd.expectStdOutEqual("Hello!\n");
test_step.dependOn(&run_cmd.step);
}
{
// With -dead_strip, no `iAmUnused` symbol should be present
const exe = createScenario(b, mode);
exe.link_gc_sections = true;
const check = exe.checkObject(.macho);
check.checkInSymtab();
check.checkNotPresent("{*} (__TEXT,__text) external _iAmUnused");
test_step.dependOn(&check.step);
const run_cmd = exe.run();
run_cmd.expectStdOutEqual("Hello!\n");
test_step.dependOn(&run_cmd.step);
}
}
fn createScenario(b: *Builder, mode: std.builtin.Mode) *LibExeObjectStep {
const exe = b.addExecutable("test", null);
exe.addCSourceFile("main.c", &[0][]const u8{});
exe.setBuildMode(mode);
exe.linkLibC();
return exe;
}

View File

@ -0,0 +1,14 @@
#include <stdio.h>
void printMe() {
printf("Hello!\n");
}
int main(int argc, char* argv[]) {
printMe();
return 0;
}
void iAmUnused() {
printf("YOU SHALL NOT PASS!\n");
}