diff --git a/lib/std/macho.zig b/lib/std/macho.zig index 7eb22c7179..935802a45f 100644 --- a/lib/std/macho.zig +++ b/lib/std/macho.zig @@ -83,7 +83,7 @@ pub const symtab_command = extern struct { /// The linkedit_data_command contains the offsets and sizes of a blob /// of data in the __LINKEDIT segment. -const linkedit_data_command = extern struct { +pub const linkedit_data_command = extern struct { /// LC_CODE_SIGNATURE, LC_SEGMENT_SPLIT_INFO, LC_FUNCTION_STARTS, LC_DATA_IN_CODE, LC_DYLIB_CODE_SIGN_DRS or LC_LINKER_OPTIMIZATION_HINT. cmd: u32, @@ -97,6 +97,28 @@ const linkedit_data_command = extern struct { datasize: u32, }; +/// A program that uses a dynamic linker contains a dylinker_command to identify +/// the name of the dynamic linker (LC_LOAD_DYLINKER). And a dynamic linker +/// contains a dylinker_command to identify the dynamic linker (LC_ID_DYLINKER). +/// A file can have at most one of these. +/// This struct is also used for the LC_DYLD_ENVIRONMENT load command and contains +/// string for dyld to treat like an environment variable. +pub const dylinker_command = extern struct { + /// LC_ID_DYLINKER, LC_LOAD_DYLINKER, or LC_DYLD_ENVIRONMENT + cmd: u32, + + /// includes pathname string + cmdsize: u32, + + /// A variable length string in a load command is represented by an lc_str + /// union. The strings are stored just after the load command structure and + /// the offset is from the start of the load command structure. The size + /// of the string is reflected in the cmdsize field of the load command. + /// Once again any padded bytes to bring the cmdsize field to a multiple + /// of 4 bytes must be zero. + name: u32, +}; + /// The segment load command indicates that a part of this file is to be /// mapped into the task's address space. The size of this segment in memory, /// vmsize, maybe equal to or larger than the amount to map from this file, diff --git a/src-self-hosted/codegen.zig b/src-self-hosted/codegen.zig index e0caaba38f..4481adf021 100644 --- a/src-self-hosted/codegen.zig +++ b/src-self-hosted/codegen.zig @@ -1427,7 +1427,62 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { else => return self.fail(inst.base.src, "TODO implement call for {}", .{self.target.cpu.arch}), } } else if (self.bin_file.cast(link.File.MachO)) |macho_file| { - return self.fail(inst.base.src, "TODO implement codegen for call when linking with MachO", .{}); + switch (arch) { + // .x86_64 => { + // for (info.args) |mc_arg, arg_i| { + // const arg = inst.args[arg_i]; + // const arg_mcv = try self.resolveInst(inst.args[arg_i]); + // // Here we do not use setRegOrMem even though the logic is similar, because + // // the function call will move the stack pointer, so the offsets are different. + // switch (mc_arg) { + // .none => continue, + // .register => |reg| { + // try self.genSetReg(arg.src, reg, arg_mcv); + // // TODO interact with the register allocator to mark the instruction as moved. + // }, + // .stack_offset => { + // // Here we need to emit instructions like this: + // // mov qword ptr [rsp + stack_offset], x + // return self.fail(inst.base.src, "TODO implement calling with parameters in memory", .{}); + // }, + // .ptr_stack_offset => { + // return self.fail(inst.base.src, "TODO implement calling with MCValue.ptr_stack_offset arg", .{}); + // }, + // .ptr_embedded_in_code => { + // return self.fail(inst.base.src, "TODO implement calling with MCValue.ptr_embedded_in_code arg", .{}); + // }, + // .undef => unreachable, + // .immediate => unreachable, + // .unreach => unreachable, + // .dead => unreachable, + // .embedded_in_code => unreachable, + // .memory => unreachable, + // .compare_flags_signed => unreachable, + // .compare_flags_unsigned => unreachable, + // } + // } + + // if (inst.func.cast(ir.Inst.Constant)) |func_inst| { + // if (func_inst.val.cast(Value.Payload.Function)) |func_val| { + // const func = func_val.func; + // const got = &macho_file.segment_cmds.items[macho_file.seg_got_index.?]; + // const ptr_bytes: u64 = 8; + // const got_addr = @intCast(u32, got.vmaddrs + func.owner_decl.link.macho.offset_table_index * ptr_bytes); + // // 01 xx xx xx xx call [addr] + // try self.code.ensureCapacity(self.code.items.len + 5); + // self.code.appendSliceAssumeCapacity(&[1]u8{ 0x1 }); + // mem.writeIntLittle(u32, self.code.addManyAsArrayAssumeCapacity(4), got_addr); + // } else { + // return self.fail(inst.base.src, "TODO implement calling bitcasted functions", .{}); + // } + // } else { + // return self.fail(inst.base.src, "TODO implement calling runtime known function pointer", .{}); + // } + // }, + .x86_64 => return self.fail(inst.base.src, "TODO implement codegen for call when linking with MachO for x86_64 arch", .{}), + .aarch64 => return self.fail(inst.base.src, "TODO implement codegen for call when linking with MachO for aarch64 arch", .{}), + else => unreachable, + } } else { unreachable; } diff --git a/src-self-hosted/link/MachO.zig b/src-self-hosted/link/MachO.zig index 4bcea9cfa8..e2e93624f1 100644 --- a/src-self-hosted/link/MachO.zig +++ b/src-self-hosted/link/MachO.zig @@ -6,8 +6,11 @@ const assert = std.debug.assert; const fs = std.fs; const log = std.log.scoped(.link); const macho = std.macho; +const codegen = @import("../codegen.zig"); const math = std.math; const mem = std.mem; +const trace = @import("../tracy.zig").trace; +const Type = @import("../type.zig").Type; const Module = @import("../Module.zig"); const link = @import("../link.zig"); @@ -17,18 +20,35 @@ pub const base_tag: File.Tag = File.Tag.macho; base: File, -/// Stored in native-endian format, depending on target endianness needs to be bswapped on read/write. -/// Same order as in the file. -segment_cmds: std.ArrayListUnmanaged(macho.segment_command_64) = std.ArrayListUnmanaged(macho.segment_command_64){}, +/// List of all load command headers that are in the file. +/// We use it to track number and size of all commands needed by the header. +commands: std.ArrayListUnmanaged(macho.load_command) = std.ArrayListUnmanaged(macho.load_command){}, +command_file_offset: ?u64 = null, /// Stored in native-endian format, depending on target endianness needs to be bswapped on read/write. /// Same order as in the file. +segments: std.ArrayListUnmanaged(macho.segment_command_64) = std.ArrayListUnmanaged(macho.segment_command_64){}, sections: std.ArrayListUnmanaged(macho.section_64) = std.ArrayListUnmanaged(macho.section_64){}, +segment_table_offset: ?u64 = null, +/// Entry point load command +entry_point_cmd: ?macho.entry_point_command = null, entry_addr: ?u64 = null, +/// Default VM start address set at 4GB +vm_start_address: u64 = 0x100000000, + +seg_table_dirty: bool = false, + error_flags: File.ErrorFlags = File.ErrorFlags{}, +/// `alloc_num / alloc_den` is the factor of padding when allocating. +const alloc_num = 4; +const alloc_den = 3; + +/// Default path to dyld +const DEFAULT_DYLD_PATH: [*:0]const u8 = "/usr/lib/dyld"; + pub const TextBlock = struct { pub const empty = TextBlock{}; }; @@ -80,12 +100,6 @@ fn openFile(allocator: *Allocator, file: fs.File, options: link.Options) !MachO /// Truncates the existing file contents and overwrites the contents. /// Returns an error if `file` is not already open with +read +write +seek abilities. fn createFile(allocator: *Allocator, file: fs.File, options: link.Options) !MachO { - switch (options.output_mode) { - .Exe => {}, - .Obj => {}, - .Lib => return error.TODOImplementWritingLibFiles, - } - var self: MachO = .{ .base = .{ .file = file, @@ -96,31 +110,35 @@ fn createFile(allocator: *Allocator, file: fs.File, options: link.Options) !Mach }; errdefer self.deinit(); - if (options.output_mode == .Exe) { - // The first segment command for executables is always a __PAGEZERO segment. - try self.segment_cmds.append(allocator, .{ - .cmd = macho.LC_SEGMENT_64, - .cmdsize = @sizeOf(macho.segment_command_64), - .segname = self.makeString("__PAGEZERO"), - .vmaddr = 0, - .vmsize = 0, - .fileoff = 0, - .filesize = 0, - .maxprot = 0, - .initprot = 0, - .nsects = 0, - .flags = 0, - }); + switch (options.output_mode) { + .Exe => { + // The first segment command for executables is always a __PAGEZERO segment. + const pagezero = .{ + .cmd = macho.LC_SEGMENT_64, + .cmdsize = commandSize(@sizeOf(macho.segment_command_64)), + .segname = makeString("__PAGEZERO"), + .vmaddr = 0, + .vmsize = self.vm_start_address, + .fileoff = 0, + .filesize = 0, + .maxprot = 0, + .initprot = 0, + .nsects = 0, + .flags = 0, + }; + try self.commands.append(allocator, .{ + .cmd = pagezero.cmd, + .cmdsize = pagezero.cmdsize, + }); + try self.segments.append(allocator, pagezero); + }, + .Obj => return error.TODOImplementWritingObjFiles, + .Lib => return error.TODOImplementWritingLibFiles, } - return self; -} + try self.populateMissingMetadata(); -fn makeString(self: *MachO, comptime bytes: []const u8) [16]u8 { - var buf: [16]u8 = undefined; - if (bytes.len > buf.len) @compileError("MachO segment/section name too long"); - mem.copy(u8, buf[0..], bytes); - return buf; + return self; } fn writeMachOHeader(self: *MachO) !void { @@ -156,10 +174,14 @@ fn writeMachOHeader(self: *MachO) !void { }; hdr.filetype = filetype; - // TODO consider other commands - const ncmds = try math.cast(u32, self.segment_cmds.items.len); + const ncmds = try math.cast(u32, self.commands.items.len); hdr.ncmds = ncmds; - hdr.sizeofcmds = ncmds * @sizeOf(macho.segment_command_64); + + var sizeof_cmds: u32 = 0; + for (self.commands.items) |cmd| { + sizeof_cmds += cmd.cmdsize; + } + hdr.sizeofcmds = sizeof_cmds; // TODO should these be set to something else? hdr.flags = 0; @@ -169,36 +191,117 @@ fn writeMachOHeader(self: *MachO) !void { } pub fn flush(self: *MachO, module: *Module) !void { - // TODO implement flush + // Save segments first { - const buf = try self.base.allocator.alloc(macho.segment_command_64, self.segment_cmds.items.len); + const buf = try self.base.allocator.alloc(macho.segment_command_64, self.segments.items.len); defer self.base.allocator.free(buf); + self.command_file_offset = @sizeOf(macho.mach_header_64); + for (buf) |*seg, i| { - seg.* = self.segment_cmds.items[i]; + seg.* = self.segments.items[i]; + self.command_file_offset.? += self.segments.items[i].cmdsize; } try self.base.file.?.pwriteAll(mem.sliceAsBytes(buf), @sizeOf(macho.mach_header_64)); } - if (self.entry_addr == null and self.base.options.output_mode == .Exe) { - log.debug("flushing. no_entry_point_found = true\n", .{}); - self.error_flags.no_entry_point_found = true; - } else { - log.debug("flushing. no_entry_point_found = false\n", .{}); - self.error_flags.no_entry_point_found = false; - try self.writeMachOHeader(); + switch (self.base.options.output_mode) { + .Exe => { + { + // We need to add LC_LOAD_DYLINKER and LC_LOAD_DYLIB since we always + // have to link against libSystem.dylib + const cmdsize = commandSize(@intCast(u32, @sizeOf(macho.dylinker_command) + mem.lenZ(DEFAULT_DYLD_PATH))); + const load_dylinker = [1]macho.dylinker_command{ + .{ + .cmd = macho.LC_LOAD_DYLINKER, + .cmdsize = cmdsize, + .name = @sizeOf(macho.dylinker_command), + }, + }; + try self.commands.append(self.base.allocator, .{ + .cmd = macho.LC_LOAD_DYLINKER, + .cmdsize = cmdsize, + }); + + try self.base.file.?.pwriteAll(mem.sliceAsBytes(load_dylinker[0..1]), self.command_file_offset.?); + + const padded_path = try self.base.allocator.alloc(u8, cmdsize - @sizeOf(macho.dylinker_command)); + defer self.base.allocator.free(padded_path); + mem.set(u8, padded_path[0..], 0); + mem.copy(u8, padded_path[0..], mem.spanZ(DEFAULT_DYLD_PATH)); + + try self.base.file.?.pwriteAll(padded_path, self.command_file_offset.? + @sizeOf(macho.dylinker_command)); + self.command_file_offset.? += cmdsize; + } + }, + .Obj => return error.TODOImplementWritingObjFiles, + .Lib => return error.TODOImplementWritingLibFiles, } + + // if (self.entry_addr == null and self.base.options.output_mode == .Exe) { + // log.debug("flushing. no_entry_point_found = true\n", .{}); + // self.error_flags.no_entry_point_found = true; + // } else { + log.debug("flushing. no_entry_point_found = false\n", .{}); + self.error_flags.no_entry_point_found = false; + try self.writeMachOHeader(); + // } } pub fn deinit(self: *MachO) void { - self.segment_cmds.deinit(self.base.allocator); + self.commands.deinit(self.base.allocator); + self.segments.deinit(self.base.allocator); self.sections.deinit(self.base.allocator); } pub fn allocateDeclIndexes(self: *MachO, decl: *Module.Decl) !void {} -pub fn updateDecl(self: *MachO, module: *Module, decl: *Module.Decl) !void {} +pub fn updateDecl(self: *MachO, module: *Module, decl: *Module.Decl) !void { + // const tracy = trace(@src()); + // defer tracy.end(); + + // var code_buffer = std.ArrayList(u8).init(self.base.allocator); + // defer code_buffer.deinit(); + + // var dbg_line_buffer = std.ArrayList(u8).init(self.base.allocator); + // defer dbg_line_buffer.deinit(); + + // var dbg_info_buffer = std.ArrayList(u8).init(self.base.allocator); + // defer dbg_info_buffer.deinit(); + + // var dbg_info_type_relocs: File.DbgInfoTypeRelocsTable = .{}; + // defer { + // for (dbg_info_type_relocs.items()) |*entry| { + // entry.value.relocs.deinit(self.base.allocator); + // } + // dbg_info_type_relocs.deinit(self.base.allocator); + // } + + // const typed_value = decl.typed_value.most_recent.typed_value; + // log.debug("typed_value = {}", .{typed_value}); + + // const res = try codegen.generateSymbol( + // &self.base, + // decl.src(), + // typed_value, + // &code_buffer, + // &dbg_line_buffer, + // &dbg_info_buffer, + // &dbg_info_type_relocs, + // ); + // log.debug("res = {}", .{res}); + + // const code = switch (res) { + // .externally_managed => |x| x, + // .appended => code_buffer.items, + // .fail => |em| { + // decl.analysis = .codegen_failure; + // try module.failed_decls.put(module.gpa, decl, em); + // return; + // }, + // }; +} pub fn updateDeclLineNumber(self: *MachO, module: *Module, decl: *const Module.Decl) !void {} @@ -214,3 +317,117 @@ pub fn freeDecl(self: *MachO, decl: *Module.Decl) void {} pub fn getDeclVAddr(self: *MachO, decl: *const Module.Decl) u64 { @panic("TODO implement getDeclVAddr for MachO"); } + +pub fn populateMissingMetadata(self: *MachO) !void { + // if (self.seg_load_re_index == null) { + // self.seg_load_re_index = @intCast(u16, self.segment_cmds.items.len); + // const file_size = self.base.options.program_code_size_hint; + // const p_align = 0x1000; + // const off = self.findFreeSpace(file_size, p_align); + // log.debug("found LC_SEGMENT_64 free space 0x{x} to 0x{x}", .{ off, off + file_size }); + // try self.segment_cmds.append(self.base.allocator, .{}); + // self.entry_addr = null; + // self.seg_table_dirty = true; + // } + // if (self.seg_got_index == null) { + // self.seg_got_index = @intCast(u16, self.segment_cmds.items.len); + // const file_size = 8 * self.base.options.symbol_count_hint; + // // Apple recommends to page align for better performance. + // // TODO This is not necessarily true for MH_OBJECT which means we + // // could potentially shave off a couple of bytes when generating + // // only object files. + // const p_align = 0x1000; + // const off = self.findFreeSpace(file_size, p_align); + // log.debug("found LC_SEGMENT_64 free space 0x{x} to 0x{x}", .{ off, off + file_size }); + // const default_vmaddr = 0x4000000; + // try self.segment_cmds.append(self.base.allocator, .{ + // .cmd = macho.LC_SEGMENT_64, + // .cmdsize = @sizeOf(macho.segment_command_64), + // .segname = self.makeString("__TEXT"), + // .vmaddr = default_vmaddr, + // .vmsize = file_size, + // .fileoff = off, + // .filesize = file_size, + // .maxprot = 0x5, + // .initprot = 0x5, + // .nsects = 0, + // .flags = 0, + // }); + // self.seg_table_dirty = true; + // } +} + +/// Returns end pos of collision, if any. +fn detectAllocCollision(self: *MachO, start: u64, size: u64) ?u64 { + const header_size: u64 = @sizeOf(macho.mach_header_64); + if (start < header_size) + return header_size; + + const end = start + satMul(size, alloc_num) / alloc_den; + + // if (self.sec_table_offset) |off| { + // const section_size: u64 = @sizeOf(macho.section_64); + // const tight_size = self.sections.items.len * section_size; + // const increased_size = satMul(tight_size, alloc_num) / alloc_den; + // const test_end = off + increased_size; + // if (end > off and start < test_end) { + // return test_end; + // } + // } + + // if (self.seg_table_offset) |off| { + // const segment_size: u64 = @sizeOf(macho.segment_command_64); + // const tight_size = self.segment_cmds.items.len * segment_size; + // const increased_size = satMul(tight_size, alloc_num) / alloc_den; + // const test_end = off + increased_size; + // if (end > off and start < test_end) { + // return test_end; + // } + // } + + // for (self.sections.items) |section| { + // const increased_size = satMul(section.size, alloc_num) / alloc_den; + // const test_end = section.offset + increased_size; + // if (end > section.offset and start < test_end) { + // return test_end; + // } + // } + + for (self.segments.items) |segment| { + const increased_size = satMul(segment.filesize, alloc_num) / alloc_den; + const test_end = segment_cmd.fileoff + increased_size; + if (end > segment_cmd.fileoff and start < test_end) { + return test_end; + } + } + + return null; +} + +fn findFreeSpace(self: *MachO, object_size: u64, min_alignment: u16) u64 { + var start: u64 = 0; + while (self.detectAllocCollision(start, object_size)) |item_end| { + start = mem.alignForwardGeneric(u64, item_end, min_alignment); + } + return start; +} + +/// Saturating multiplication +fn satMul(a: anytype, b: anytype) @TypeOf(a, b) { + const T = @TypeOf(a, b); + return std.math.mul(T, a, b) catch std.math.maxInt(T); +} + +fn makeString(comptime bytes: []const u8) [16]u8 { + var buf: [16]u8 = undefined; + if (bytes.len > buf.len) @compileError("MachO segment/section name too long"); + mem.copy(u8, buf[0..], bytes); + return buf; +} + +fn commandSize(min_size: u32) u32 { + if (min_size % @sizeOf(u64) == 0) return min_size; + + const div = min_size / @sizeOf(u64); + return (div + 1) * @sizeOf(u64); +}