Specify path to dyld in Mach-O

This is required since an exec on macOS always has to link against
libSystem.dylib.

Signed-off-by: Jakub Konka <kubkon@jakubkonka.com>
This commit is contained in:
Jakub Konka 2020-08-21 08:04:02 +02:00
parent c1ee9efb7c
commit 2516db9645
3 changed files with 342 additions and 48 deletions

View File

@ -83,7 +83,7 @@ pub const symtab_command = extern struct {
/// The linkedit_data_command contains the offsets and sizes of a blob
/// of data in the __LINKEDIT segment.
const linkedit_data_command = extern struct {
pub const linkedit_data_command = extern struct {
/// LC_CODE_SIGNATURE, LC_SEGMENT_SPLIT_INFO, LC_FUNCTION_STARTS, LC_DATA_IN_CODE, LC_DYLIB_CODE_SIGN_DRS or LC_LINKER_OPTIMIZATION_HINT.
cmd: u32,
@ -97,6 +97,28 @@ const linkedit_data_command = extern struct {
datasize: u32,
};
/// A program that uses a dynamic linker contains a dylinker_command to identify
/// the name of the dynamic linker (LC_LOAD_DYLINKER). And a dynamic linker
/// contains a dylinker_command to identify the dynamic linker (LC_ID_DYLINKER).
/// A file can have at most one of these.
/// This struct is also used for the LC_DYLD_ENVIRONMENT load command and contains
/// string for dyld to treat like an environment variable.
pub const dylinker_command = extern struct {
/// LC_ID_DYLINKER, LC_LOAD_DYLINKER, or LC_DYLD_ENVIRONMENT
cmd: u32,
/// includes pathname string
cmdsize: u32,
/// A variable length string in a load command is represented by an lc_str
/// union. The strings are stored just after the load command structure and
/// the offset is from the start of the load command structure. The size
/// of the string is reflected in the cmdsize field of the load command.
/// Once again any padded bytes to bring the cmdsize field to a multiple
/// of 4 bytes must be zero.
name: u32,
};
/// The segment load command indicates that a part of this file is to be
/// mapped into the task's address space. The size of this segment in memory,
/// vmsize, maybe equal to or larger than the amount to map from this file,

View File

@ -1427,7 +1427,62 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
else => return self.fail(inst.base.src, "TODO implement call for {}", .{self.target.cpu.arch}),
}
} else if (self.bin_file.cast(link.File.MachO)) |macho_file| {
return self.fail(inst.base.src, "TODO implement codegen for call when linking with MachO", .{});
switch (arch) {
// .x86_64 => {
// for (info.args) |mc_arg, arg_i| {
// const arg = inst.args[arg_i];
// const arg_mcv = try self.resolveInst(inst.args[arg_i]);
// // Here we do not use setRegOrMem even though the logic is similar, because
// // the function call will move the stack pointer, so the offsets are different.
// switch (mc_arg) {
// .none => continue,
// .register => |reg| {
// try self.genSetReg(arg.src, reg, arg_mcv);
// // TODO interact with the register allocator to mark the instruction as moved.
// },
// .stack_offset => {
// // Here we need to emit instructions like this:
// // mov qword ptr [rsp + stack_offset], x
// return self.fail(inst.base.src, "TODO implement calling with parameters in memory", .{});
// },
// .ptr_stack_offset => {
// return self.fail(inst.base.src, "TODO implement calling with MCValue.ptr_stack_offset arg", .{});
// },
// .ptr_embedded_in_code => {
// return self.fail(inst.base.src, "TODO implement calling with MCValue.ptr_embedded_in_code arg", .{});
// },
// .undef => unreachable,
// .immediate => unreachable,
// .unreach => unreachable,
// .dead => unreachable,
// .embedded_in_code => unreachable,
// .memory => unreachable,
// .compare_flags_signed => unreachable,
// .compare_flags_unsigned => unreachable,
// }
// }
// if (inst.func.cast(ir.Inst.Constant)) |func_inst| {
// if (func_inst.val.cast(Value.Payload.Function)) |func_val| {
// const func = func_val.func;
// const got = &macho_file.segment_cmds.items[macho_file.seg_got_index.?];
// const ptr_bytes: u64 = 8;
// const got_addr = @intCast(u32, got.vmaddrs + func.owner_decl.link.macho.offset_table_index * ptr_bytes);
// // 01 xx xx xx xx call [addr]
// try self.code.ensureCapacity(self.code.items.len + 5);
// self.code.appendSliceAssumeCapacity(&[1]u8{ 0x1 });
// mem.writeIntLittle(u32, self.code.addManyAsArrayAssumeCapacity(4), got_addr);
// } else {
// return self.fail(inst.base.src, "TODO implement calling bitcasted functions", .{});
// }
// } else {
// return self.fail(inst.base.src, "TODO implement calling runtime known function pointer", .{});
// }
// },
.x86_64 => return self.fail(inst.base.src, "TODO implement codegen for call when linking with MachO for x86_64 arch", .{}),
.aarch64 => return self.fail(inst.base.src, "TODO implement codegen for call when linking with MachO for aarch64 arch", .{}),
else => unreachable,
}
} else {
unreachable;
}

View File

@ -6,8 +6,11 @@ const assert = std.debug.assert;
const fs = std.fs;
const log = std.log.scoped(.link);
const macho = std.macho;
const codegen = @import("../codegen.zig");
const math = std.math;
const mem = std.mem;
const trace = @import("../tracy.zig").trace;
const Type = @import("../type.zig").Type;
const Module = @import("../Module.zig");
const link = @import("../link.zig");
@ -17,18 +20,35 @@ pub const base_tag: File.Tag = File.Tag.macho;
base: File,
/// Stored in native-endian format, depending on target endianness needs to be bswapped on read/write.
/// Same order as in the file.
segment_cmds: std.ArrayListUnmanaged(macho.segment_command_64) = std.ArrayListUnmanaged(macho.segment_command_64){},
/// List of all load command headers that are in the file.
/// We use it to track number and size of all commands needed by the header.
commands: std.ArrayListUnmanaged(macho.load_command) = std.ArrayListUnmanaged(macho.load_command){},
command_file_offset: ?u64 = null,
/// Stored in native-endian format, depending on target endianness needs to be bswapped on read/write.
/// Same order as in the file.
segments: std.ArrayListUnmanaged(macho.segment_command_64) = std.ArrayListUnmanaged(macho.segment_command_64){},
sections: std.ArrayListUnmanaged(macho.section_64) = std.ArrayListUnmanaged(macho.section_64){},
segment_table_offset: ?u64 = null,
/// Entry point load command
entry_point_cmd: ?macho.entry_point_command = null,
entry_addr: ?u64 = null,
/// Default VM start address set at 4GB
vm_start_address: u64 = 0x100000000,
seg_table_dirty: bool = false,
error_flags: File.ErrorFlags = File.ErrorFlags{},
/// `alloc_num / alloc_den` is the factor of padding when allocating.
const alloc_num = 4;
const alloc_den = 3;
/// Default path to dyld
const DEFAULT_DYLD_PATH: [*:0]const u8 = "/usr/lib/dyld";
pub const TextBlock = struct {
pub const empty = TextBlock{};
};
@ -80,12 +100,6 @@ fn openFile(allocator: *Allocator, file: fs.File, options: link.Options) !MachO
/// Truncates the existing file contents and overwrites the contents.
/// Returns an error if `file` is not already open with +read +write +seek abilities.
fn createFile(allocator: *Allocator, file: fs.File, options: link.Options) !MachO {
switch (options.output_mode) {
.Exe => {},
.Obj => {},
.Lib => return error.TODOImplementWritingLibFiles,
}
var self: MachO = .{
.base = .{
.file = file,
@ -96,31 +110,35 @@ fn createFile(allocator: *Allocator, file: fs.File, options: link.Options) !Mach
};
errdefer self.deinit();
if (options.output_mode == .Exe) {
// The first segment command for executables is always a __PAGEZERO segment.
try self.segment_cmds.append(allocator, .{
.cmd = macho.LC_SEGMENT_64,
.cmdsize = @sizeOf(macho.segment_command_64),
.segname = self.makeString("__PAGEZERO"),
.vmaddr = 0,
.vmsize = 0,
.fileoff = 0,
.filesize = 0,
.maxprot = 0,
.initprot = 0,
.nsects = 0,
.flags = 0,
});
switch (options.output_mode) {
.Exe => {
// The first segment command for executables is always a __PAGEZERO segment.
const pagezero = .{
.cmd = macho.LC_SEGMENT_64,
.cmdsize = commandSize(@sizeOf(macho.segment_command_64)),
.segname = makeString("__PAGEZERO"),
.vmaddr = 0,
.vmsize = self.vm_start_address,
.fileoff = 0,
.filesize = 0,
.maxprot = 0,
.initprot = 0,
.nsects = 0,
.flags = 0,
};
try self.commands.append(allocator, .{
.cmd = pagezero.cmd,
.cmdsize = pagezero.cmdsize,
});
try self.segments.append(allocator, pagezero);
},
.Obj => return error.TODOImplementWritingObjFiles,
.Lib => return error.TODOImplementWritingLibFiles,
}
return self;
}
try self.populateMissingMetadata();
fn makeString(self: *MachO, comptime bytes: []const u8) [16]u8 {
var buf: [16]u8 = undefined;
if (bytes.len > buf.len) @compileError("MachO segment/section name too long");
mem.copy(u8, buf[0..], bytes);
return buf;
return self;
}
fn writeMachOHeader(self: *MachO) !void {
@ -156,10 +174,14 @@ fn writeMachOHeader(self: *MachO) !void {
};
hdr.filetype = filetype;
// TODO consider other commands
const ncmds = try math.cast(u32, self.segment_cmds.items.len);
const ncmds = try math.cast(u32, self.commands.items.len);
hdr.ncmds = ncmds;
hdr.sizeofcmds = ncmds * @sizeOf(macho.segment_command_64);
var sizeof_cmds: u32 = 0;
for (self.commands.items) |cmd| {
sizeof_cmds += cmd.cmdsize;
}
hdr.sizeofcmds = sizeof_cmds;
// TODO should these be set to something else?
hdr.flags = 0;
@ -169,36 +191,117 @@ fn writeMachOHeader(self: *MachO) !void {
}
pub fn flush(self: *MachO, module: *Module) !void {
// TODO implement flush
// Save segments first
{
const buf = try self.base.allocator.alloc(macho.segment_command_64, self.segment_cmds.items.len);
const buf = try self.base.allocator.alloc(macho.segment_command_64, self.segments.items.len);
defer self.base.allocator.free(buf);
self.command_file_offset = @sizeOf(macho.mach_header_64);
for (buf) |*seg, i| {
seg.* = self.segment_cmds.items[i];
seg.* = self.segments.items[i];
self.command_file_offset.? += self.segments.items[i].cmdsize;
}
try self.base.file.?.pwriteAll(mem.sliceAsBytes(buf), @sizeOf(macho.mach_header_64));
}
if (self.entry_addr == null and self.base.options.output_mode == .Exe) {
log.debug("flushing. no_entry_point_found = true\n", .{});
self.error_flags.no_entry_point_found = true;
} else {
log.debug("flushing. no_entry_point_found = false\n", .{});
self.error_flags.no_entry_point_found = false;
try self.writeMachOHeader();
switch (self.base.options.output_mode) {
.Exe => {
{
// We need to add LC_LOAD_DYLINKER and LC_LOAD_DYLIB since we always
// have to link against libSystem.dylib
const cmdsize = commandSize(@intCast(u32, @sizeOf(macho.dylinker_command) + mem.lenZ(DEFAULT_DYLD_PATH)));
const load_dylinker = [1]macho.dylinker_command{
.{
.cmd = macho.LC_LOAD_DYLINKER,
.cmdsize = cmdsize,
.name = @sizeOf(macho.dylinker_command),
},
};
try self.commands.append(self.base.allocator, .{
.cmd = macho.LC_LOAD_DYLINKER,
.cmdsize = cmdsize,
});
try self.base.file.?.pwriteAll(mem.sliceAsBytes(load_dylinker[0..1]), self.command_file_offset.?);
const padded_path = try self.base.allocator.alloc(u8, cmdsize - @sizeOf(macho.dylinker_command));
defer self.base.allocator.free(padded_path);
mem.set(u8, padded_path[0..], 0);
mem.copy(u8, padded_path[0..], mem.spanZ(DEFAULT_DYLD_PATH));
try self.base.file.?.pwriteAll(padded_path, self.command_file_offset.? + @sizeOf(macho.dylinker_command));
self.command_file_offset.? += cmdsize;
}
},
.Obj => return error.TODOImplementWritingObjFiles,
.Lib => return error.TODOImplementWritingLibFiles,
}
// if (self.entry_addr == null and self.base.options.output_mode == .Exe) {
// log.debug("flushing. no_entry_point_found = true\n", .{});
// self.error_flags.no_entry_point_found = true;
// } else {
log.debug("flushing. no_entry_point_found = false\n", .{});
self.error_flags.no_entry_point_found = false;
try self.writeMachOHeader();
// }
}
pub fn deinit(self: *MachO) void {
self.segment_cmds.deinit(self.base.allocator);
self.commands.deinit(self.base.allocator);
self.segments.deinit(self.base.allocator);
self.sections.deinit(self.base.allocator);
}
pub fn allocateDeclIndexes(self: *MachO, decl: *Module.Decl) !void {}
pub fn updateDecl(self: *MachO, module: *Module, decl: *Module.Decl) !void {}
pub fn updateDecl(self: *MachO, module: *Module, decl: *Module.Decl) !void {
// const tracy = trace(@src());
// defer tracy.end();
// var code_buffer = std.ArrayList(u8).init(self.base.allocator);
// defer code_buffer.deinit();
// var dbg_line_buffer = std.ArrayList(u8).init(self.base.allocator);
// defer dbg_line_buffer.deinit();
// var dbg_info_buffer = std.ArrayList(u8).init(self.base.allocator);
// defer dbg_info_buffer.deinit();
// var dbg_info_type_relocs: File.DbgInfoTypeRelocsTable = .{};
// defer {
// for (dbg_info_type_relocs.items()) |*entry| {
// entry.value.relocs.deinit(self.base.allocator);
// }
// dbg_info_type_relocs.deinit(self.base.allocator);
// }
// const typed_value = decl.typed_value.most_recent.typed_value;
// log.debug("typed_value = {}", .{typed_value});
// const res = try codegen.generateSymbol(
// &self.base,
// decl.src(),
// typed_value,
// &code_buffer,
// &dbg_line_buffer,
// &dbg_info_buffer,
// &dbg_info_type_relocs,
// );
// log.debug("res = {}", .{res});
// const code = switch (res) {
// .externally_managed => |x| x,
// .appended => code_buffer.items,
// .fail => |em| {
// decl.analysis = .codegen_failure;
// try module.failed_decls.put(module.gpa, decl, em);
// return;
// },
// };
}
pub fn updateDeclLineNumber(self: *MachO, module: *Module, decl: *const Module.Decl) !void {}
@ -214,3 +317,117 @@ pub fn freeDecl(self: *MachO, decl: *Module.Decl) void {}
pub fn getDeclVAddr(self: *MachO, decl: *const Module.Decl) u64 {
@panic("TODO implement getDeclVAddr for MachO");
}
pub fn populateMissingMetadata(self: *MachO) !void {
// if (self.seg_load_re_index == null) {
// self.seg_load_re_index = @intCast(u16, self.segment_cmds.items.len);
// const file_size = self.base.options.program_code_size_hint;
// const p_align = 0x1000;
// const off = self.findFreeSpace(file_size, p_align);
// log.debug("found LC_SEGMENT_64 free space 0x{x} to 0x{x}", .{ off, off + file_size });
// try self.segment_cmds.append(self.base.allocator, .{});
// self.entry_addr = null;
// self.seg_table_dirty = true;
// }
// if (self.seg_got_index == null) {
// self.seg_got_index = @intCast(u16, self.segment_cmds.items.len);
// const file_size = 8 * self.base.options.symbol_count_hint;
// // Apple recommends to page align for better performance.
// // TODO This is not necessarily true for MH_OBJECT which means we
// // could potentially shave off a couple of bytes when generating
// // only object files.
// const p_align = 0x1000;
// const off = self.findFreeSpace(file_size, p_align);
// log.debug("found LC_SEGMENT_64 free space 0x{x} to 0x{x}", .{ off, off + file_size });
// const default_vmaddr = 0x4000000;
// try self.segment_cmds.append(self.base.allocator, .{
// .cmd = macho.LC_SEGMENT_64,
// .cmdsize = @sizeOf(macho.segment_command_64),
// .segname = self.makeString("__TEXT"),
// .vmaddr = default_vmaddr,
// .vmsize = file_size,
// .fileoff = off,
// .filesize = file_size,
// .maxprot = 0x5,
// .initprot = 0x5,
// .nsects = 0,
// .flags = 0,
// });
// self.seg_table_dirty = true;
// }
}
/// Returns end pos of collision, if any.
fn detectAllocCollision(self: *MachO, start: u64, size: u64) ?u64 {
const header_size: u64 = @sizeOf(macho.mach_header_64);
if (start < header_size)
return header_size;
const end = start + satMul(size, alloc_num) / alloc_den;
// if (self.sec_table_offset) |off| {
// const section_size: u64 = @sizeOf(macho.section_64);
// const tight_size = self.sections.items.len * section_size;
// const increased_size = satMul(tight_size, alloc_num) / alloc_den;
// const test_end = off + increased_size;
// if (end > off and start < test_end) {
// return test_end;
// }
// }
// if (self.seg_table_offset) |off| {
// const segment_size: u64 = @sizeOf(macho.segment_command_64);
// const tight_size = self.segment_cmds.items.len * segment_size;
// const increased_size = satMul(tight_size, alloc_num) / alloc_den;
// const test_end = off + increased_size;
// if (end > off and start < test_end) {
// return test_end;
// }
// }
// for (self.sections.items) |section| {
// const increased_size = satMul(section.size, alloc_num) / alloc_den;
// const test_end = section.offset + increased_size;
// if (end > section.offset and start < test_end) {
// return test_end;
// }
// }
for (self.segments.items) |segment| {
const increased_size = satMul(segment.filesize, alloc_num) / alloc_den;
const test_end = segment_cmd.fileoff + increased_size;
if (end > segment_cmd.fileoff and start < test_end) {
return test_end;
}
}
return null;
}
fn findFreeSpace(self: *MachO, object_size: u64, min_alignment: u16) u64 {
var start: u64 = 0;
while (self.detectAllocCollision(start, object_size)) |item_end| {
start = mem.alignForwardGeneric(u64, item_end, min_alignment);
}
return start;
}
/// Saturating multiplication
fn satMul(a: anytype, b: anytype) @TypeOf(a, b) {
const T = @TypeOf(a, b);
return std.math.mul(T, a, b) catch std.math.maxInt(T);
}
fn makeString(comptime bytes: []const u8) [16]u8 {
var buf: [16]u8 = undefined;
if (bytes.len > buf.len) @compileError("MachO segment/section name too long");
mem.copy(u8, buf[0..], bytes);
return buf;
}
fn commandSize(min_size: u32) u32 {
if (min_size % @sizeOf(u64) == 0) return min_size;
const div = min_size / @sizeOf(u64);
return (div + 1) * @sizeOf(u64);
}