macho: start separating linking contexts

This commit is contained in:
Jakub Konka 2022-09-13 10:05:21 +02:00
parent 53bd7bd044
commit 79ab46ec91
2 changed files with 714 additions and 325 deletions

View File

@ -269,7 +269,7 @@ pub const SymbolWithLoc = struct {
/// When allocating, the ideal_capacity is calculated by
/// actual_capacity + (actual_capacity / ideal_factor)
const ideal_factor = 4;
const ideal_factor = 3;
/// Default path to dyld
const default_dyld_path: [*:0]const u8 = "/usr/lib/dyld";
@ -4322,7 +4322,7 @@ pub fn getSegmentAllocBase(self: MachO, indices: []const ?u8) struct { vmaddr: u
return .{ .vmaddr = 0, .fileoff = 0 };
}
pub fn writeSegmentHeaders(self: *MachO, ncmds: *u32, writer: anytype) !void {
fn writeSegmentHeaders(self: *MachO, ncmds: *u32, writer: anytype) !void {
for (self.segments.items) |seg, i| {
const indexes = self.getSectionIndexes(@intCast(u8, i));
var out_seg = seg;
@ -4351,20 +4351,18 @@ pub fn writeSegmentHeaders(self: *MachO, ncmds: *u32, writer: anytype) !void {
}
}
pub fn writeLinkeditSegmentData(self: *MachO, ncmds: *u32, lc_writer: anytype) !void {
fn writeLinkeditSegmentData(self: *MachO, ncmds: *u32, lc_writer: anytype) !void {
const seg = &self.segments.items[self.linkedit_segment_cmd_index.?];
seg.filesize = 0;
seg.vmsize = 0;
try self.writeDyldInfoData(ncmds, lc_writer);
try self.writeFunctionStarts(ncmds, lc_writer);
try self.writeDataInCode(ncmds, lc_writer);
try self.writeSymtabs(ncmds, lc_writer);
seg.vmsize = mem.alignForwardGeneric(u64, seg.filesize, self.page_size);
}
pub fn writeDyldInfoData(self: *MachO, ncmds: *u32, lc_writer: anytype) !void {
fn writeDyldInfoData(self: *MachO, ncmds: *u32, lc_writer: anytype) !void {
const tracy = trace(@src());
defer tracy.end();
@ -4680,155 +4678,7 @@ fn populateLazyBindOffsetsInStubHelper(self: *MachO, buffer: []const u8) !void {
}
}
const asc_u64 = std.sort.asc(u64);
pub fn writeFunctionStarts(macho_file: *MachO, ncmds: *u32, lc_writer: anytype) !void {
const tracy = trace(@src());
defer tracy.end();
const text_seg_index = macho_file.text_segment_cmd_index orelse return;
const text_sect_index = macho_file.text_section_index orelse return;
const text_seg = macho_file.segments.items[text_seg_index];
const gpa = macho_file.base.allocator;
// We need to sort by address first
var addresses = std.ArrayList(u64).init(gpa);
defer addresses.deinit();
try addresses.ensureTotalCapacityPrecise(macho_file.globals.items.len);
for (macho_file.globals.items) |global| {
const sym = macho_file.getSymbol(global);
if (sym.undf()) continue;
if (sym.n_desc == MachO.N_DESC_GCED) continue;
const sect_id = sym.n_sect - 1;
if (sect_id != text_sect_index) continue;
addresses.appendAssumeCapacity(sym.n_value);
}
std.sort.sort(u64, addresses.items, {}, asc_u64);
var offsets = std.ArrayList(u32).init(gpa);
defer offsets.deinit();
try offsets.ensureTotalCapacityPrecise(addresses.items.len);
var last_off: u32 = 0;
for (addresses.items) |addr| {
const offset = @intCast(u32, addr - text_seg.vmaddr);
const diff = offset - last_off;
if (diff == 0) continue;
offsets.appendAssumeCapacity(diff);
last_off = offset;
}
var buffer = std.ArrayList(u8).init(gpa);
defer buffer.deinit();
const max_size = @intCast(usize, offsets.items.len * @sizeOf(u64));
try buffer.ensureTotalCapacity(max_size);
for (offsets.items) |offset| {
try std.leb.writeULEB128(buffer.writer(), offset);
}
const link_seg = &macho_file.segments.items[macho_file.linkedit_segment_cmd_index.?];
const offset = mem.alignForwardGeneric(u64, link_seg.fileoff + link_seg.filesize, @alignOf(u64));
const needed_size = buffer.items.len;
link_seg.filesize = offset + needed_size - link_seg.fileoff;
log.debug("writing function starts info from 0x{x} to 0x{x}", .{ offset, offset + needed_size });
try macho_file.base.file.?.pwriteAll(buffer.items, offset);
try lc_writer.writeStruct(macho.linkedit_data_command{
.cmd = .FUNCTION_STARTS,
.cmdsize = @sizeOf(macho.linkedit_data_command),
.dataoff = @intCast(u32, offset),
.datasize = @intCast(u32, needed_size),
});
ncmds.* += 1;
}
fn filterDataInCode(
dices: []align(1) const macho.data_in_code_entry,
start_addr: u64,
end_addr: u64,
) []align(1) const macho.data_in_code_entry {
const Predicate = struct {
addr: u64,
pub fn predicate(macho_file: @This(), dice: macho.data_in_code_entry) bool {
return dice.offset >= macho_file.addr;
}
};
const start = MachO.findFirst(macho.data_in_code_entry, dices, 0, Predicate{ .addr = start_addr });
const end = MachO.findFirst(macho.data_in_code_entry, dices, start, Predicate{ .addr = end_addr });
return dices[start..end];
}
pub fn writeDataInCode(macho_file: *MachO, ncmds: *u32, lc_writer: anytype) !void {
const tracy = trace(@src());
defer tracy.end();
var out_dice = std.ArrayList(macho.data_in_code_entry).init(macho_file.base.allocator);
defer out_dice.deinit();
const text_sect_id = macho_file.text_section_index orelse return;
const text_sect_header = macho_file.sections.items(.header)[text_sect_id];
for (macho_file.objects.items) |object| {
const dice = object.parseDataInCode() orelse continue;
try out_dice.ensureUnusedCapacity(dice.len);
for (object.managed_atoms.items) |atom| {
const sym = atom.getSymbol(macho_file);
if (sym.n_desc == MachO.N_DESC_GCED) continue;
const sect_id = sym.n_sect - 1;
if (sect_id != macho_file.text_section_index.?) {
continue;
}
const source_sym = object.getSourceSymbol(atom.sym_index) orelse continue;
const source_addr = math.cast(u32, source_sym.n_value) orelse return error.Overflow;
const filtered_dice = filterDataInCode(dice, source_addr, source_addr + atom.size);
const base = math.cast(u32, sym.n_value - text_sect_header.addr + text_sect_header.offset) orelse
return error.Overflow;
for (filtered_dice) |single| {
const offset = single.offset - source_addr + base;
out_dice.appendAssumeCapacity(.{
.offset = offset,
.length = single.length,
.kind = single.kind,
});
}
}
}
const seg = &macho_file.segments.items[macho_file.linkedit_segment_cmd_index.?];
const offset = mem.alignForwardGeneric(u64, seg.fileoff + seg.filesize, @alignOf(u64));
const needed_size = out_dice.items.len * @sizeOf(macho.data_in_code_entry);
seg.filesize = offset + needed_size - seg.fileoff;
log.debug("writing data-in-code from 0x{x} to 0x{x}", .{ offset, offset + needed_size });
try macho_file.base.file.?.pwriteAll(mem.sliceAsBytes(out_dice.items), offset);
try lc_writer.writeStruct(macho.linkedit_data_command{
.cmd = .DATA_IN_CODE,
.cmdsize = @sizeOf(macho.linkedit_data_command),
.dataoff = @intCast(u32, offset),
.datasize = @intCast(u32, needed_size),
});
ncmds.* += 1;
}
pub fn writeSymtabs(self: *MachO, ncmds: *u32, lc_writer: anytype) !void {
fn writeSymtabs(self: *MachO, ncmds: *u32, lc_writer: anytype) !void {
var symtab_cmd = macho.symtab_command{
.cmdsize = @sizeOf(macho.symtab_command),
.symoff = 0,
@ -4866,7 +4716,7 @@ pub fn writeSymtabs(self: *MachO, ncmds: *u32, lc_writer: anytype) !void {
ncmds.* += 2;
}
pub fn writeSymtab(self: *MachO, lc: *macho.symtab_command) !SymtabCtx {
fn writeSymtab(self: *MachO, lc: *macho.symtab_command) !SymtabCtx {
const gpa = self.base.allocator;
var locals = std.ArrayList(macho.nlist_64).init(gpa);
@ -4892,10 +4742,6 @@ pub fn writeSymtab(self: *MachO, lc: *macho.symtab_command) !SymtabCtx {
out_sym.n_strx = try self.strtab.insert(gpa, self.getSymbolName(sym_loc));
try locals.append(out_sym);
}
if (!self.base.options.strip) {
try self.generateSymbolStabs(object, &locals);
}
}
var exports = std.ArrayList(macho.nlist_64).init(gpa);
@ -5056,7 +4902,7 @@ fn writeDysymtab(self: *MachO, ctx: SymtabCtx, lc: *macho.dysymtab_command) !voi
lc.nindirectsyms = nindirectsyms;
}
pub fn writeCodeSignaturePadding(
fn writeCodeSignaturePadding(
self: *MachO,
code_sig: *CodeSignature,
ncmds: *u32,
@ -5085,7 +4931,7 @@ pub fn writeCodeSignaturePadding(
return @intCast(u32, offset);
}
pub fn writeCodeSignature(self: *MachO, code_sig: *CodeSignature, offset: u32) !void {
fn writeCodeSignature(self: *MachO, code_sig: *CodeSignature, offset: u32) !void {
const seg = self.segments.items[self.text_segment_cmd_index.?];
var buffer = std.ArrayList(u8).init(self.base.allocator);
@ -5109,7 +4955,7 @@ pub fn writeCodeSignature(self: *MachO, code_sig: *CodeSignature, offset: u32) !
}
/// Writes Mach-O file header.
pub fn writeHeader(self: *MachO, ncmds: u32, sizeofcmds: u32) !void {
fn writeHeader(self: *MachO, ncmds: u32, sizeofcmds: u32) !void {
var header: macho.mach_header_64 = .{};
header.flags = macho.MH_NOUNDEFS | macho.MH_DYLDLINK | macho.MH_PIE | macho.MH_TWOLEVEL;
@ -5157,6 +5003,45 @@ pub fn padToIdeal(actual_size: anytype) @TypeOf(actual_size) {
std.math.maxInt(@TypeOf(actual_size));
}
fn detectAllocCollision(self: *MachO, start: u64, size: u64) ?u64 {
// TODO: header and load commands have to be part of the __TEXT segment
const header_size = default_headerpad_size;
if (start < header_size)
return header_size;
const end = start + padToIdeal(size);
for (self.sections.items(.header)) |header| {
const tight_size = header.size;
const increased_size = padToIdeal(tight_size);
const test_end = header.offset + increased_size;
if (end > header.offset and start < test_end) {
return test_end;
}
}
return null;
}
// fn allocatedSize(self: *MachO, start: u64) u64 {
// if (start == 0)
// return 0;
// var min_pos: u64 = std.math.maxInt(u64);
// for (self.sections.items(.header)) |header| {
// if (header.offset <= start) continue;
// if (header.offset < min_pos) min_pos = header.offset;
// }
// return min_pos - start;
// }
fn findFreeSpace(self: *MachO, object_size: u64, min_alignment: u32) u64 {
var start: u64 = 0;
while (self.detectAllocCollision(start, object_size)) |item_end| {
start = mem.alignForwardGeneric(u64, item_end, min_alignment);
}
return start;
}
pub fn makeStaticString(bytes: []const u8) [16]u8 {
var buf = [_]u8{0} ** 16;
assert(bytes.len <= buf.len);
@ -5321,161 +5206,6 @@ pub fn findFirst(comptime T: type, haystack: []align(1) const T, start: usize, p
return i;
}
pub fn generateSymbolStabs(
self: *MachO,
object: Object,
locals: *std.ArrayList(macho.nlist_64),
) !void {
assert(!self.base.options.strip);
log.debug("parsing debug info in '{s}'", .{object.name});
const gpa = self.base.allocator;
var debug_info = try object.parseDwarfInfo();
defer debug_info.deinit(gpa);
try dwarf.openDwarfDebugInfo(&debug_info, gpa);
// We assume there is only one CU.
const compile_unit = debug_info.findCompileUnit(0x0) catch |err| switch (err) {
error.MissingDebugInfo => {
// TODO audit cases with missing debug info and audit our dwarf.zig module.
log.debug("invalid or missing debug info in {s}; skipping", .{object.name});
return;
},
else => |e| return e,
};
const tu_name = try compile_unit.die.getAttrString(&debug_info, dwarf.AT.name, debug_info.debug_str, compile_unit.*);
const tu_comp_dir = try compile_unit.die.getAttrString(&debug_info, dwarf.AT.comp_dir, debug_info.debug_str, compile_unit.*);
// Open scope
try locals.ensureUnusedCapacity(3);
locals.appendAssumeCapacity(.{
.n_strx = try self.strtab.insert(gpa, tu_comp_dir),
.n_type = macho.N_SO,
.n_sect = 0,
.n_desc = 0,
.n_value = 0,
});
locals.appendAssumeCapacity(.{
.n_strx = try self.strtab.insert(gpa, tu_name),
.n_type = macho.N_SO,
.n_sect = 0,
.n_desc = 0,
.n_value = 0,
});
locals.appendAssumeCapacity(.{
.n_strx = try self.strtab.insert(gpa, object.name),
.n_type = macho.N_OSO,
.n_sect = 0,
.n_desc = 1,
.n_value = object.mtime,
});
var stabs_buf: [4]macho.nlist_64 = undefined;
for (object.managed_atoms.items) |atom| {
const stabs = try self.generateSymbolStabsForSymbol(
atom.getSymbolWithLoc(),
debug_info,
&stabs_buf,
);
try locals.appendSlice(stabs);
for (atom.contained.items) |sym_at_off| {
const sym_loc = SymbolWithLoc{
.sym_index = sym_at_off.sym_index,
.file = atom.file,
};
const contained_stabs = try self.generateSymbolStabsForSymbol(
sym_loc,
debug_info,
&stabs_buf,
);
try locals.appendSlice(contained_stabs);
}
}
// Close scope
try locals.append(.{
.n_strx = 0,
.n_type = macho.N_SO,
.n_sect = 0,
.n_desc = 0,
.n_value = 0,
});
}
fn generateSymbolStabsForSymbol(
self: *MachO,
sym_loc: SymbolWithLoc,
debug_info: dwarf.DwarfInfo,
buf: *[4]macho.nlist_64,
) ![]const macho.nlist_64 {
const gpa = self.base.allocator;
const object = self.objects.items[sym_loc.file.?];
const sym = self.getSymbol(sym_loc);
const sym_name = self.getSymbolName(sym_loc);
if (sym.n_strx == 0) return buf[0..0];
if (sym.n_desc == N_DESC_GCED) return buf[0..0];
if (self.symbolIsTemp(sym_loc)) return buf[0..0];
const source_sym = object.getSourceSymbol(sym_loc.sym_index) orelse return buf[0..0];
const size: ?u64 = size: {
if (source_sym.tentative()) break :size null;
for (debug_info.func_list.items) |func| {
if (func.pc_range) |range| {
if (source_sym.n_value >= range.start and source_sym.n_value < range.end) {
break :size range.end - range.start;
}
}
}
break :size null;
};
if (size) |ss| {
buf[0] = .{
.n_strx = 0,
.n_type = macho.N_BNSYM,
.n_sect = sym.n_sect,
.n_desc = 0,
.n_value = sym.n_value,
};
buf[1] = .{
.n_strx = try self.strtab.insert(gpa, sym_name),
.n_type = macho.N_FUN,
.n_sect = sym.n_sect,
.n_desc = 0,
.n_value = sym.n_value,
};
buf[2] = .{
.n_strx = 0,
.n_type = macho.N_FUN,
.n_sect = 0,
.n_desc = 0,
.n_value = ss,
};
buf[3] = .{
.n_strx = 0,
.n_type = macho.N_ENSYM,
.n_sect = sym.n_sect,
.n_desc = 0,
.n_value = ss,
};
return buf;
} else {
buf[0] = .{
.n_strx = try self.strtab.insert(gpa, sym_name),
.n_type = macho.N_STSYM,
.n_sect = sym.n_sect,
.n_desc = 0,
.n_value = sym.n_value,
};
return buf[0..1];
}
}
// fn snapshotState(self: *MachO) !void {
// const emit = self.base.options.emit orelse {
// log.debug("no emit directory found; skipping snapshot...", .{});

View File

@ -1,6 +1,7 @@
const std = @import("std");
const build_options = @import("build_options");
const assert = std.debug.assert;
const dwarf = std.dwarf;
const fs = std.fs;
const log = std.log.scoped(.link);
const macho = std.macho;
@ -18,6 +19,7 @@ const CodeSignature = @import("CodeSignature.zig");
const Compilation = @import("../../Compilation.zig");
const Dylib = @import("Dylib.zig");
const MachO = @import("../MachO.zig");
const Object = @import("Object.zig");
const SymbolWithLoc = MachO.SymbolWithLoc;
const Trie = @import("Trie.zig");
@ -618,20 +620,20 @@ pub fn linkWithZld(macho_file: *MachO, comp: *Compilation, prog_node: *std.Progr
if (macho_file.base.options.entitlements) |path| {
try codesig.addEntitlements(arena, path);
}
codesig_offset = try macho_file.writeCodeSignaturePadding(&codesig, &ncmds, lc_writer);
codesig_offset = try writeCodeSignaturePadding(macho_file, &codesig, &ncmds, lc_writer);
break :blk codesig;
} else null;
var headers_buf = std.ArrayList(u8).init(arena);
try macho_file.writeSegmentHeaders(&ncmds, headers_buf.writer());
try writeSegmentHeaders(macho_file, &ncmds, headers_buf.writer());
try macho_file.base.file.?.pwriteAll(headers_buf.items, @sizeOf(macho.mach_header_64));
try macho_file.base.file.?.pwriteAll(lc_buffer.items, @sizeOf(macho.mach_header_64) + headers_buf.items.len);
try macho_file.writeHeader(ncmds, @intCast(u32, lc_buffer.items.len + headers_buf.items.len));
try writeHeader(macho_file, ncmds, @intCast(u32, lc_buffer.items.len + headers_buf.items.len));
if (codesig) |*csig| {
try macho_file.writeCodeSignature(csig, codesig_offset.?); // code signing always comes last
try writeCodeSignature(macho_file, csig, codesig_offset.?); // code signing always comes last
}
}
@ -964,9 +966,9 @@ fn writeLinkeditSegmentData(macho_file: *MachO, ncmds: *u32, lc_writer: anytype)
seg.vmsize = 0;
try writeDyldInfoData(macho_file, ncmds, lc_writer);
try macho_file.writeFunctionStarts(ncmds, lc_writer);
try macho_file.writeDataInCode(ncmds, lc_writer);
try macho_file.writeSymtabs(ncmds, lc_writer);
try writeFunctionStarts(macho_file, ncmds, lc_writer);
try writeDataInCode(macho_file, ncmds, lc_writer);
try writeSymtabs(macho_file, ncmds, lc_writer);
seg.vmsize = mem.alignForwardGeneric(u64, seg.filesize, macho_file.page_size);
}
@ -1280,3 +1282,660 @@ fn populateLazyBindOffsetsInStubHelper(macho_file: *MachO, buffer: []const u8) !
try macho_file.base.file.?.pwriteAll(&buf, file_offset);
}
}
const asc_u64 = std.sort.asc(u64);
fn writeFunctionStarts(macho_file: *MachO, ncmds: *u32, lc_writer: anytype) !void {
const tracy = trace(@src());
defer tracy.end();
const text_seg_index = macho_file.text_segment_cmd_index orelse return;
const text_sect_index = macho_file.text_section_index orelse return;
const text_seg = macho_file.segments.items[text_seg_index];
const gpa = macho_file.base.allocator;
// We need to sort by address first
var addresses = std.ArrayList(u64).init(gpa);
defer addresses.deinit();
try addresses.ensureTotalCapacityPrecise(macho_file.globals.items.len);
for (macho_file.globals.items) |global| {
const sym = macho_file.getSymbol(global);
if (sym.undf()) continue;
if (sym.n_desc == MachO.N_DESC_GCED) continue;
const sect_id = sym.n_sect - 1;
if (sect_id != text_sect_index) continue;
addresses.appendAssumeCapacity(sym.n_value);
}
std.sort.sort(u64, addresses.items, {}, asc_u64);
var offsets = std.ArrayList(u32).init(gpa);
defer offsets.deinit();
try offsets.ensureTotalCapacityPrecise(addresses.items.len);
var last_off: u32 = 0;
for (addresses.items) |addr| {
const offset = @intCast(u32, addr - text_seg.vmaddr);
const diff = offset - last_off;
if (diff == 0) continue;
offsets.appendAssumeCapacity(diff);
last_off = offset;
}
var buffer = std.ArrayList(u8).init(gpa);
defer buffer.deinit();
const max_size = @intCast(usize, offsets.items.len * @sizeOf(u64));
try buffer.ensureTotalCapacity(max_size);
for (offsets.items) |offset| {
try std.leb.writeULEB128(buffer.writer(), offset);
}
const link_seg = &macho_file.segments.items[macho_file.linkedit_segment_cmd_index.?];
const offset = mem.alignForwardGeneric(u64, link_seg.fileoff + link_seg.filesize, @alignOf(u64));
const needed_size = buffer.items.len;
link_seg.filesize = offset + needed_size - link_seg.fileoff;
log.debug("writing function starts info from 0x{x} to 0x{x}", .{ offset, offset + needed_size });
try macho_file.base.file.?.pwriteAll(buffer.items, offset);
try lc_writer.writeStruct(macho.linkedit_data_command{
.cmd = .FUNCTION_STARTS,
.cmdsize = @sizeOf(macho.linkedit_data_command),
.dataoff = @intCast(u32, offset),
.datasize = @intCast(u32, needed_size),
});
ncmds.* += 1;
}
fn filterDataInCode(
dices: []align(1) const macho.data_in_code_entry,
start_addr: u64,
end_addr: u64,
) []align(1) const macho.data_in_code_entry {
const Predicate = struct {
addr: u64,
pub fn predicate(macho_file: @This(), dice: macho.data_in_code_entry) bool {
return dice.offset >= macho_file.addr;
}
};
const start = MachO.findFirst(macho.data_in_code_entry, dices, 0, Predicate{ .addr = start_addr });
const end = MachO.findFirst(macho.data_in_code_entry, dices, start, Predicate{ .addr = end_addr });
return dices[start..end];
}
fn writeDataInCode(macho_file: *MachO, ncmds: *u32, lc_writer: anytype) !void {
const tracy = trace(@src());
defer tracy.end();
var out_dice = std.ArrayList(macho.data_in_code_entry).init(macho_file.base.allocator);
defer out_dice.deinit();
const text_sect_id = macho_file.text_section_index orelse return;
const text_sect_header = macho_file.sections.items(.header)[text_sect_id];
for (macho_file.objects.items) |object| {
const dice = object.parseDataInCode() orelse continue;
try out_dice.ensureUnusedCapacity(dice.len);
for (object.managed_atoms.items) |atom| {
const sym = atom.getSymbol(macho_file);
if (sym.n_desc == MachO.N_DESC_GCED) continue;
const sect_id = sym.n_sect - 1;
if (sect_id != macho_file.text_section_index.?) {
continue;
}
const source_sym = object.getSourceSymbol(atom.sym_index) orelse continue;
const source_addr = math.cast(u32, source_sym.n_value) orelse return error.Overflow;
const filtered_dice = filterDataInCode(dice, source_addr, source_addr + atom.size);
const base = math.cast(u32, sym.n_value - text_sect_header.addr + text_sect_header.offset) orelse
return error.Overflow;
for (filtered_dice) |single| {
const offset = single.offset - source_addr + base;
out_dice.appendAssumeCapacity(.{
.offset = offset,
.length = single.length,
.kind = single.kind,
});
}
}
}
const seg = &macho_file.segments.items[macho_file.linkedit_segment_cmd_index.?];
const offset = mem.alignForwardGeneric(u64, seg.fileoff + seg.filesize, @alignOf(u64));
const needed_size = out_dice.items.len * @sizeOf(macho.data_in_code_entry);
seg.filesize = offset + needed_size - seg.fileoff;
log.debug("writing data-in-code from 0x{x} to 0x{x}", .{ offset, offset + needed_size });
try macho_file.base.file.?.pwriteAll(mem.sliceAsBytes(out_dice.items), offset);
try lc_writer.writeStruct(macho.linkedit_data_command{
.cmd = .DATA_IN_CODE,
.cmdsize = @sizeOf(macho.linkedit_data_command),
.dataoff = @intCast(u32, offset),
.datasize = @intCast(u32, needed_size),
});
ncmds.* += 1;
}
fn writeSymtabs(macho_file: *MachO, ncmds: *u32, lc_writer: anytype) !void {
var symtab_cmd = macho.symtab_command{
.cmdsize = @sizeOf(macho.symtab_command),
.symoff = 0,
.nsyms = 0,
.stroff = 0,
.strsize = 0,
};
var dysymtab_cmd = macho.dysymtab_command{
.cmdsize = @sizeOf(macho.dysymtab_command),
.ilocalsym = 0,
.nlocalsym = 0,
.iextdefsym = 0,
.nextdefsym = 0,
.iundefsym = 0,
.nundefsym = 0,
.tocoff = 0,
.ntoc = 0,
.modtaboff = 0,
.nmodtab = 0,
.extrefsymoff = 0,
.nextrefsyms = 0,
.indirectsymoff = 0,
.nindirectsyms = 0,
.extreloff = 0,
.nextrel = 0,
.locreloff = 0,
.nlocrel = 0,
};
var ctx = try writeSymtab(macho_file, &symtab_cmd);
defer ctx.imports_table.deinit();
try writeDysymtab(macho_file, ctx, &dysymtab_cmd);
try writeStrtab(macho_file, &symtab_cmd);
try lc_writer.writeStruct(symtab_cmd);
try lc_writer.writeStruct(dysymtab_cmd);
ncmds.* += 2;
}
fn writeSymtab(macho_file: *MachO, lc: *macho.symtab_command) !SymtabCtx {
const gpa = macho_file.base.allocator;
var locals = std.ArrayList(macho.nlist_64).init(gpa);
defer locals.deinit();
for (macho_file.locals.items) |sym, sym_id| {
if (sym.n_strx == 0) continue; // no name, skip
if (sym.n_desc == MachO.N_DESC_GCED) continue; // GCed, skip
const sym_loc = SymbolWithLoc{ .sym_index = @intCast(u32, sym_id), .file = null };
if (macho_file.symbolIsTemp(sym_loc)) continue; // local temp symbol, skip
if (macho_file.getGlobal(macho_file.getSymbolName(sym_loc)) != null) continue; // global symbol is either an export or import, skip
try locals.append(sym);
}
for (macho_file.objects.items) |object, object_id| {
for (object.symtab.items) |sym, sym_id| {
if (sym.n_strx == 0) continue; // no name, skip
if (sym.n_desc == MachO.N_DESC_GCED) continue; // GCed, skip
const sym_loc = SymbolWithLoc{ .sym_index = @intCast(u32, sym_id), .file = @intCast(u32, object_id) };
if (macho_file.symbolIsTemp(sym_loc)) continue; // local temp symbol, skip
if (macho_file.getGlobal(macho_file.getSymbolName(sym_loc)) != null) continue; // global symbol is either an export or import, skip
var out_sym = sym;
out_sym.n_strx = try macho_file.strtab.insert(gpa, macho_file.getSymbolName(sym_loc));
try locals.append(out_sym);
}
if (!macho_file.base.options.strip) {
try generateSymbolStabs(macho_file, object, &locals);
}
}
var exports = std.ArrayList(macho.nlist_64).init(gpa);
defer exports.deinit();
for (macho_file.globals.items) |global| {
const sym = macho_file.getSymbol(global);
if (sym.undf()) continue; // import, skip
if (sym.n_desc == MachO.N_DESC_GCED) continue; // GCed, skip
var out_sym = sym;
out_sym.n_strx = try macho_file.strtab.insert(gpa, macho_file.getSymbolName(global));
try exports.append(out_sym);
}
var imports = std.ArrayList(macho.nlist_64).init(gpa);
defer imports.deinit();
var imports_table = std.AutoHashMap(SymbolWithLoc, u32).init(gpa);
for (macho_file.globals.items) |global| {
const sym = macho_file.getSymbol(global);
if (sym.n_strx == 0) continue; // no name, skip
if (!sym.undf()) continue; // not an import, skip
const new_index = @intCast(u32, imports.items.len);
var out_sym = sym;
out_sym.n_strx = try macho_file.strtab.insert(gpa, macho_file.getSymbolName(global));
try imports.append(out_sym);
try imports_table.putNoClobber(global, new_index);
}
const nlocals = @intCast(u32, locals.items.len);
const nexports = @intCast(u32, exports.items.len);
const nimports = @intCast(u32, imports.items.len);
const nsyms = nlocals + nexports + nimports;
const seg = &macho_file.segments.items[macho_file.linkedit_segment_cmd_index.?];
const offset = mem.alignForwardGeneric(
u64,
seg.fileoff + seg.filesize,
@alignOf(macho.nlist_64),
);
const needed_size = nsyms * @sizeOf(macho.nlist_64);
seg.filesize = offset + needed_size - seg.fileoff;
var buffer = std.ArrayList(u8).init(gpa);
defer buffer.deinit();
try buffer.ensureTotalCapacityPrecise(needed_size);
buffer.appendSliceAssumeCapacity(mem.sliceAsBytes(locals.items));
buffer.appendSliceAssumeCapacity(mem.sliceAsBytes(exports.items));
buffer.appendSliceAssumeCapacity(mem.sliceAsBytes(imports.items));
log.debug("writing symtab from 0x{x} to 0x{x}", .{ offset, offset + needed_size });
try macho_file.base.file.?.pwriteAll(buffer.items, offset);
lc.symoff = @intCast(u32, offset);
lc.nsyms = nsyms;
return SymtabCtx{
.nlocalsym = nlocals,
.nextdefsym = nexports,
.nundefsym = nimports,
.imports_table = imports_table,
};
}
fn writeStrtab(macho_file: *MachO, lc: *macho.symtab_command) !void {
const seg = &macho_file.segments.items[macho_file.linkedit_segment_cmd_index.?];
const offset = mem.alignForwardGeneric(u64, seg.fileoff + seg.filesize, @alignOf(u64));
const needed_size = macho_file.strtab.buffer.items.len;
seg.filesize = offset + needed_size - seg.fileoff;
log.debug("writing string table from 0x{x} to 0x{x}", .{ offset, offset + needed_size });
try macho_file.base.file.?.pwriteAll(macho_file.strtab.buffer.items, offset);
lc.stroff = @intCast(u32, offset);
lc.strsize = @intCast(u32, needed_size);
}
pub fn generateSymbolStabs(
macho_file: *MachO,
object: Object,
locals: *std.ArrayList(macho.nlist_64),
) !void {
assert(!macho_file.base.options.strip);
log.debug("parsing debug info in '{s}'", .{object.name});
const gpa = macho_file.base.allocator;
var debug_info = try object.parseDwarfInfo();
defer debug_info.deinit(gpa);
try dwarf.openDwarfDebugInfo(&debug_info, gpa);
// We assume there is only one CU.
const compile_unit = debug_info.findCompileUnit(0x0) catch |err| switch (err) {
error.MissingDebugInfo => {
// TODO audit cases with missing debug info and audit our dwarf.zig module.
log.debug("invalid or missing debug info in {s}; skipping", .{object.name});
return;
},
else => |e| return e,
};
const tu_name = try compile_unit.die.getAttrString(&debug_info, dwarf.AT.name, debug_info.debug_str, compile_unit.*);
const tu_comp_dir = try compile_unit.die.getAttrString(&debug_info, dwarf.AT.comp_dir, debug_info.debug_str, compile_unit.*);
// Open scope
try locals.ensureUnusedCapacity(3);
locals.appendAssumeCapacity(.{
.n_strx = try macho_file.strtab.insert(gpa, tu_comp_dir),
.n_type = macho.N_SO,
.n_sect = 0,
.n_desc = 0,
.n_value = 0,
});
locals.appendAssumeCapacity(.{
.n_strx = try macho_file.strtab.insert(gpa, tu_name),
.n_type = macho.N_SO,
.n_sect = 0,
.n_desc = 0,
.n_value = 0,
});
locals.appendAssumeCapacity(.{
.n_strx = try macho_file.strtab.insert(gpa, object.name),
.n_type = macho.N_OSO,
.n_sect = 0,
.n_desc = 1,
.n_value = object.mtime,
});
var stabs_buf: [4]macho.nlist_64 = undefined;
for (object.managed_atoms.items) |atom| {
const stabs = try generateSymbolStabsForSymbol(
macho_file,
atom.getSymbolWithLoc(),
debug_info,
&stabs_buf,
);
try locals.appendSlice(stabs);
for (atom.contained.items) |sym_at_off| {
const sym_loc = SymbolWithLoc{
.sym_index = sym_at_off.sym_index,
.file = atom.file,
};
const contained_stabs = try generateSymbolStabsForSymbol(
macho_file,
sym_loc,
debug_info,
&stabs_buf,
);
try locals.appendSlice(contained_stabs);
}
}
// Close scope
try locals.append(.{
.n_strx = 0,
.n_type = macho.N_SO,
.n_sect = 0,
.n_desc = 0,
.n_value = 0,
});
}
fn generateSymbolStabsForSymbol(
macho_file: *MachO,
sym_loc: SymbolWithLoc,
debug_info: dwarf.DwarfInfo,
buf: *[4]macho.nlist_64,
) ![]const macho.nlist_64 {
const gpa = macho_file.base.allocator;
const object = macho_file.objects.items[sym_loc.file.?];
const sym = macho_file.getSymbol(sym_loc);
const sym_name = macho_file.getSymbolName(sym_loc);
if (sym.n_strx == 0) return buf[0..0];
if (sym.n_desc == MachO.N_DESC_GCED) return buf[0..0];
if (macho_file.symbolIsTemp(sym_loc)) return buf[0..0];
const source_sym = object.getSourceSymbol(sym_loc.sym_index) orelse return buf[0..0];
const size: ?u64 = size: {
if (source_sym.tentative()) break :size null;
for (debug_info.func_list.items) |func| {
if (func.pc_range) |range| {
if (source_sym.n_value >= range.start and source_sym.n_value < range.end) {
break :size range.end - range.start;
}
}
}
break :size null;
};
if (size) |ss| {
buf[0] = .{
.n_strx = 0,
.n_type = macho.N_BNSYM,
.n_sect = sym.n_sect,
.n_desc = 0,
.n_value = sym.n_value,
};
buf[1] = .{
.n_strx = try macho_file.strtab.insert(gpa, sym_name),
.n_type = macho.N_FUN,
.n_sect = sym.n_sect,
.n_desc = 0,
.n_value = sym.n_value,
};
buf[2] = .{
.n_strx = 0,
.n_type = macho.N_FUN,
.n_sect = 0,
.n_desc = 0,
.n_value = ss,
};
buf[3] = .{
.n_strx = 0,
.n_type = macho.N_ENSYM,
.n_sect = sym.n_sect,
.n_desc = 0,
.n_value = ss,
};
return buf;
} else {
buf[0] = .{
.n_strx = try macho_file.strtab.insert(gpa, sym_name),
.n_type = macho.N_STSYM,
.n_sect = sym.n_sect,
.n_desc = 0,
.n_value = sym.n_value,
};
return buf[0..1];
}
}
const SymtabCtx = struct {
nlocalsym: u32,
nextdefsym: u32,
nundefsym: u32,
imports_table: std.AutoHashMap(SymbolWithLoc, u32),
};
fn writeDysymtab(macho_file: *MachO, ctx: SymtabCtx, lc: *macho.dysymtab_command) !void {
const gpa = macho_file.base.allocator;
const nstubs = @intCast(u32, macho_file.stubs_table.count());
const ngot_entries = @intCast(u32, macho_file.got_entries_table.count());
const nindirectsyms = nstubs * 2 + ngot_entries;
const iextdefsym = ctx.nlocalsym;
const iundefsym = iextdefsym + ctx.nextdefsym;
const seg = &macho_file.segments.items[macho_file.linkedit_segment_cmd_index.?];
const offset = mem.alignForwardGeneric(u64, seg.fileoff + seg.filesize, @alignOf(u64));
const needed_size = nindirectsyms * @sizeOf(u32);
seg.filesize = offset + needed_size - seg.fileoff;
log.debug("writing indirect symbol table from 0x{x} to 0x{x}", .{ offset, offset + needed_size });
var buf = std.ArrayList(u8).init(gpa);
defer buf.deinit();
try buf.ensureTotalCapacity(needed_size);
const writer = buf.writer();
if (macho_file.stubs_section_index) |sect_id| {
const stubs = &macho_file.sections.items(.header)[sect_id];
stubs.reserved1 = 0;
for (macho_file.stubs.items) |entry| {
if (entry.sym_index == 0) continue;
const atom_sym = entry.getSymbol(macho_file);
if (atom_sym.n_desc == MachO.N_DESC_GCED) continue;
const target_sym = macho_file.getSymbol(entry.target);
assert(target_sym.undf());
try writer.writeIntLittle(u32, iundefsym + ctx.imports_table.get(entry.target).?);
}
}
if (macho_file.got_section_index) |sect_id| {
const got = &macho_file.sections.items(.header)[sect_id];
got.reserved1 = nstubs;
for (macho_file.got_entries.items) |entry| {
if (entry.sym_index == 0) continue;
const atom_sym = entry.getSymbol(macho_file);
if (atom_sym.n_desc == MachO.N_DESC_GCED) continue;
const target_sym = macho_file.getSymbol(entry.target);
if (target_sym.undf()) {
try writer.writeIntLittle(u32, iundefsym + ctx.imports_table.get(entry.target).?);
} else {
try writer.writeIntLittle(u32, macho.INDIRECT_SYMBOL_LOCAL);
}
}
}
if (macho_file.la_symbol_ptr_section_index) |sect_id| {
const la_symbol_ptr = &macho_file.sections.items(.header)[sect_id];
la_symbol_ptr.reserved1 = nstubs + ngot_entries;
for (macho_file.stubs.items) |entry| {
if (entry.sym_index == 0) continue;
const atom_sym = entry.getSymbol(macho_file);
if (atom_sym.n_desc == MachO.N_DESC_GCED) continue;
const target_sym = macho_file.getSymbol(entry.target);
assert(target_sym.undf());
try writer.writeIntLittle(u32, iundefsym + ctx.imports_table.get(entry.target).?);
}
}
assert(buf.items.len == needed_size);
try macho_file.base.file.?.pwriteAll(buf.items, offset);
lc.nlocalsym = ctx.nlocalsym;
lc.iextdefsym = iextdefsym;
lc.nextdefsym = ctx.nextdefsym;
lc.iundefsym = iundefsym;
lc.nundefsym = ctx.nundefsym;
lc.indirectsymoff = @intCast(u32, offset);
lc.nindirectsyms = nindirectsyms;
}
fn writeCodeSignaturePadding(
macho_file: *MachO,
code_sig: *CodeSignature,
ncmds: *u32,
lc_writer: anytype,
) !u32 {
const seg = &macho_file.segments.items[macho_file.linkedit_segment_cmd_index.?];
// Code signature data has to be 16-bytes aligned for Apple tools to recognize the file
// https://github.com/opensource-apple/cctools/blob/fdb4825f303fd5c0751be524babd32958181b3ed/libstuff/checkout.c#L271
const offset = mem.alignForwardGeneric(u64, seg.fileoff + seg.filesize, 16);
const needed_size = code_sig.estimateSize(offset);
seg.filesize = offset + needed_size - seg.fileoff;
seg.vmsize = mem.alignForwardGeneric(u64, seg.filesize, macho_file.page_size);
log.debug("writing code signature padding from 0x{x} to 0x{x}", .{ offset, offset + needed_size });
// Pad out the space. We need to do this to calculate valid hashes for everything in the file
// except for code signature data.
try macho_file.base.file.?.pwriteAll(&[_]u8{0}, offset + needed_size - 1);
try lc_writer.writeStruct(macho.linkedit_data_command{
.cmd = .CODE_SIGNATURE,
.cmdsize = @sizeOf(macho.linkedit_data_command),
.dataoff = @intCast(u32, offset),
.datasize = @intCast(u32, needed_size),
});
ncmds.* += 1;
return @intCast(u32, offset);
}
fn writeCodeSignature(macho_file: *MachO, code_sig: *CodeSignature, offset: u32) !void {
const seg = macho_file.segments.items[macho_file.text_segment_cmd_index.?];
var buffer = std.ArrayList(u8).init(macho_file.base.allocator);
defer buffer.deinit();
try buffer.ensureTotalCapacityPrecise(code_sig.size());
try code_sig.writeAdhocSignature(macho_file.base.allocator, .{
.file = macho_file.base.file.?,
.exec_seg_base = seg.fileoff,
.exec_seg_limit = seg.filesize,
.file_size = offset,
.output_mode = macho_file.base.options.output_mode,
}, buffer.writer());
assert(buffer.items.len == code_sig.size());
log.debug("writing code signature from 0x{x} to 0x{x}", .{
offset,
offset + buffer.items.len,
});
try macho_file.base.file.?.pwriteAll(buffer.items, offset);
}
fn writeSegmentHeaders(macho_file: *MachO, ncmds: *u32, writer: anytype) !void {
for (macho_file.segments.items) |seg, i| {
const indexes = macho_file.getSectionIndexes(@intCast(u8, i));
var out_seg = seg;
out_seg.cmdsize = @sizeOf(macho.segment_command_64);
out_seg.nsects = 0;
// Update section headers count; any section with size of 0 is excluded
// since it doesn't have any data in the final binary file.
for (macho_file.sections.items(.header)[indexes.start..indexes.end]) |header| {
if (header.size == 0) continue;
out_seg.cmdsize += @sizeOf(macho.section_64);
out_seg.nsects += 1;
}
if (out_seg.nsects == 0 and
(mem.eql(u8, out_seg.segName(), "__DATA_CONST") or
mem.eql(u8, out_seg.segName(), "__DATA"))) continue;
try writer.writeStruct(out_seg);
for (macho_file.sections.items(.header)[indexes.start..indexes.end]) |header| {
if (header.size == 0) continue;
try writer.writeStruct(header);
}
ncmds.* += 1;
}
}
/// Writes Mach-O file header.
fn writeHeader(macho_file: *MachO, ncmds: u32, sizeofcmds: u32) !void {
var header: macho.mach_header_64 = .{};
header.flags = macho.MH_NOUNDEFS | macho.MH_DYLDLINK | macho.MH_PIE | macho.MH_TWOLEVEL;
switch (macho_file.base.options.target.cpu.arch) {
.aarch64 => {
header.cputype = macho.CPU_TYPE_ARM64;
header.cpusubtype = macho.CPU_SUBTYPE_ARM_ALL;
},
.x86_64 => {
header.cputype = macho.CPU_TYPE_X86_64;
header.cpusubtype = macho.CPU_SUBTYPE_X86_64_ALL;
},
else => return error.UnsupportedCpuArchitecture,
}
switch (macho_file.base.options.output_mode) {
.Exe => {
header.filetype = macho.MH_EXECUTE;
},
.Lib => {
// By this point, it can only be a dylib.
header.filetype = macho.MH_DYLIB;
header.flags |= macho.MH_NO_REEXPORTED_DYLIBS;
},
else => unreachable,
}
if (macho_file.getSectionByName("__DATA", "__thread_vars")) |sect_id| {
if (macho_file.sections.items(.header)[sect_id].size > 0) {
header.flags |= macho.MH_HAS_TLV_DESCRIPTORS;
}
}
header.ncmds = ncmds;
header.sizeofcmds = sizeofcmds;
log.debug("writing Mach-O header {}", .{header});
try macho_file.base.file.?.pwriteAll(mem.asBytes(&header), 0);
}