From db2052bc3588c1c52494eef32ef66c5cf3e09d96 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Wed, 14 Dec 2022 11:46:46 +0100 Subject: [PATCH 01/17] macho: dedup LC emitting logic Fix path written to `LC_ID_DYLIB` to include the current CWD (if any). --- CMakeLists.txt | 3 + src/link/MachO.zig | 332 ++----------------------------- src/link/MachO/load_commands.zig | 325 ++++++++++++++++++++++++++++++ src/link/MachO/zld.zig | 332 +++---------------------------- 4 files changed, 371 insertions(+), 621 deletions(-) create mode 100644 src/link/MachO/load_commands.zig diff --git a/CMakeLists.txt b/CMakeLists.txt index fa900dbe93..8e8c66d374 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -585,10 +585,13 @@ set(ZIG_STAGE2_SOURCES "${CMAKE_SOURCE_DIR}/src/link/MachO/DwarfInfo.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO/Dylib.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO/Object.zig" + "${CMAKE_SOURCE_DIR}/src/link/MachO/Relocation.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO/Trie.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO/ZldAtom.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO/bind.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO/dead_strip.zig" + "${CMAKE_SOURCE_DIR}/src/link/MachO/fat.zig" + "${CMAKE_SOURCE_DIR}/src/link/MachO/load_commands.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO/thunks.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO/zld.zig" "${CMAKE_SOURCE_DIR}/src/link/Plan9.zig" diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 4a1ca9a357..f83338f48b 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -20,6 +20,7 @@ const dead_strip = @import("MachO/dead_strip.zig"); const fat = @import("MachO/fat.zig"); const link = @import("../link.zig"); const llvm_backend = @import("../codegen/llvm.zig"); +const load_commands = @import("MachO/load_commands.zig"); const target_util = @import("../target.zig"); const trace = @import("../tracy.zig").trace; const zld = @import("MachO/zld.zig"); @@ -265,9 +266,6 @@ pub const SymbolWithLoc = struct { /// actual_capacity + (actual_capacity / ideal_factor) const ideal_factor = 3; -/// Default path to dyld -pub const default_dyld_path: [*:0]const u8 = "/usr/lib/dyld"; - /// In order for a slice of bytes to be considered eligible to keep metadata pointing at /// it as a possible place to put new symbols, it must have enough room for this many bytes /// (plus extra for reserved capacity). @@ -561,17 +559,24 @@ pub fn flushModule(self: *MachO, comp: *Compilation, prog_node: *std.Progress.No var ncmds: u32 = 0; try self.writeLinkeditSegmentData(&ncmds, lc_writer); - try writeDylinkerLC(&ncmds, lc_writer); + try load_commands.writeDylinkerLC(&ncmds, lc_writer); - self.writeMainLC(&ncmds, lc_writer) catch |err| switch (err) { - error.MissingMainEntrypoint => { - self.error_flags.no_entry_point_found = true; - }, - else => |e| return e, - }; + if (self.base.options.output_mode == .Exe) blk: { + const seg_id = self.header_segment_cmd_index.?; + const seg = self.segments.items[seg_id]; + const global = self.getEntryPoint() catch |err| switch (err) { + error.MissingMainEntrypoint => { + self.error_flags.no_entry_point_found = true; + break :blk; + }, + else => |e| return e, + }; + const sym = self.getSymbol(global); + try load_commands.writeMainLC(@intCast(u32, sym.n_value - seg.vmaddr), &self.base.options, &ncmds, lc_writer); + } - try self.writeDylibIdLC(&ncmds, lc_writer); - try self.writeRpathLCs(&ncmds, lc_writer); + try load_commands.writeDylibIdLC(self.base.allocator, &self.base.options, &ncmds, lc_writer); + try load_commands.writeRpathLCs(self.base.allocator, &self.base.options, &ncmds, lc_writer); { try lc_writer.writeStruct(macho.source_version_command{ @@ -581,7 +586,7 @@ pub fn flushModule(self: *MachO, comp: *Compilation, prog_node: *std.Progress.No ncmds += 1; } - try self.writeBuildVersionLC(&ncmds, lc_writer); + try load_commands.writeBuildVersionLC(&self.base.options, &ncmds, lc_writer); { std.crypto.random.bytes(&self.uuid.uuid); @@ -589,7 +594,7 @@ pub fn flushModule(self: *MachO, comp: *Compilation, prog_node: *std.Progress.No ncmds += 1; } - try self.writeLoadDylibLCs(&ncmds, lc_writer); + try load_commands.writeLoadDylibLCs(self.dylibs.items, self.referenced_dylibs.keys(), &ncmds, lc_writer); const target = self.base.options.target; const requires_codesig = blk: { @@ -1702,195 +1707,6 @@ pub fn resolveDyldStubBinder(self: *MachO) !void { try self.writePtrWidthAtom(got_atom); } -pub fn writeDylinkerLC(ncmds: *u32, lc_writer: anytype) !void { - const name_len = mem.sliceTo(default_dyld_path, 0).len; - const cmdsize = @intCast(u32, mem.alignForwardGeneric( - u64, - @sizeOf(macho.dylinker_command) + name_len, - @sizeOf(u64), - )); - try lc_writer.writeStruct(macho.dylinker_command{ - .cmd = .LOAD_DYLINKER, - .cmdsize = cmdsize, - .name = @sizeOf(macho.dylinker_command), - }); - try lc_writer.writeAll(mem.sliceTo(default_dyld_path, 0)); - const padding = cmdsize - @sizeOf(macho.dylinker_command) - name_len; - if (padding > 0) { - try lc_writer.writeByteNTimes(0, padding); - } - ncmds.* += 1; -} - -pub fn writeMainLC(self: *MachO, ncmds: *u32, lc_writer: anytype) !void { - if (self.base.options.output_mode != .Exe) return; - const seg_id = self.header_segment_cmd_index.?; - const seg = self.segments.items[seg_id]; - const global = try self.getEntryPoint(); - const sym = self.getSymbol(global); - try lc_writer.writeStruct(macho.entry_point_command{ - .cmd = .MAIN, - .cmdsize = @sizeOf(macho.entry_point_command), - .entryoff = @intCast(u32, sym.n_value - seg.vmaddr), - .stacksize = self.base.options.stack_size_override orelse 0, - }); - ncmds.* += 1; -} - -const WriteDylibLCCtx = struct { - cmd: macho.LC, - name: []const u8, - timestamp: u32 = 2, - current_version: u32 = 0x10000, - compatibility_version: u32 = 0x10000, -}; - -pub fn writeDylibLC(ctx: WriteDylibLCCtx, ncmds: *u32, lc_writer: anytype) !void { - const name_len = ctx.name.len + 1; - const cmdsize = @intCast(u32, mem.alignForwardGeneric( - u64, - @sizeOf(macho.dylib_command) + name_len, - @sizeOf(u64), - )); - try lc_writer.writeStruct(macho.dylib_command{ - .cmd = ctx.cmd, - .cmdsize = cmdsize, - .dylib = .{ - .name = @sizeOf(macho.dylib_command), - .timestamp = ctx.timestamp, - .current_version = ctx.current_version, - .compatibility_version = ctx.compatibility_version, - }, - }); - try lc_writer.writeAll(ctx.name); - try lc_writer.writeByte(0); - const padding = cmdsize - @sizeOf(macho.dylib_command) - name_len; - if (padding > 0) { - try lc_writer.writeByteNTimes(0, padding); - } - ncmds.* += 1; -} - -pub fn writeDylibIdLC(self: *MachO, ncmds: *u32, lc_writer: anytype) !void { - if (self.base.options.output_mode != .Lib) return; - const install_name = self.base.options.install_name orelse self.base.options.emit.?.sub_path; - const curr = self.base.options.version orelse std.builtin.Version{ - .major = 1, - .minor = 0, - .patch = 0, - }; - const compat = self.base.options.compatibility_version orelse std.builtin.Version{ - .major = 1, - .minor = 0, - .patch = 0, - }; - try writeDylibLC(.{ - .cmd = .ID_DYLIB, - .name = install_name, - .current_version = curr.major << 16 | curr.minor << 8 | curr.patch, - .compatibility_version = compat.major << 16 | compat.minor << 8 | compat.patch, - }, ncmds, lc_writer); -} - -const RpathIterator = struct { - buffer: []const []const u8, - table: std.StringHashMap(void), - count: usize = 0, - - fn init(gpa: Allocator, rpaths: []const []const u8) RpathIterator { - return .{ .buffer = rpaths, .table = std.StringHashMap(void).init(gpa) }; - } - - fn deinit(it: *RpathIterator) void { - it.table.deinit(); - } - - fn next(it: *RpathIterator) !?[]const u8 { - while (true) { - if (it.count >= it.buffer.len) return null; - const rpath = it.buffer[it.count]; - it.count += 1; - const gop = try it.table.getOrPut(rpath); - if (gop.found_existing) continue; - return rpath; - } - } -}; - -pub fn writeRpathLCs(self: *MachO, ncmds: *u32, lc_writer: anytype) !void { - const gpa = self.base.allocator; - - var it = RpathIterator.init(gpa, self.base.options.rpath_list); - defer it.deinit(); - - while (try it.next()) |rpath| { - const rpath_len = rpath.len + 1; - const cmdsize = @intCast(u32, mem.alignForwardGeneric( - u64, - @sizeOf(macho.rpath_command) + rpath_len, - @sizeOf(u64), - )); - try lc_writer.writeStruct(macho.rpath_command{ - .cmdsize = cmdsize, - .path = @sizeOf(macho.rpath_command), - }); - try lc_writer.writeAll(rpath); - try lc_writer.writeByte(0); - const padding = cmdsize - @sizeOf(macho.rpath_command) - rpath_len; - if (padding > 0) { - try lc_writer.writeByteNTimes(0, padding); - } - ncmds.* += 1; - } -} - -pub fn writeBuildVersionLC(self: *MachO, ncmds: *u32, lc_writer: anytype) !void { - const cmdsize = @sizeOf(macho.build_version_command) + @sizeOf(macho.build_tool_version); - const platform_version = blk: { - const ver = self.base.options.target.os.version_range.semver.min; - const platform_version = ver.major << 16 | ver.minor << 8; - break :blk platform_version; - }; - const sdk_version = if (self.base.options.native_darwin_sdk) |sdk| blk: { - const ver = sdk.version; - const sdk_version = ver.major << 16 | ver.minor << 8; - break :blk sdk_version; - } else platform_version; - const is_simulator_abi = self.base.options.target.abi == .simulator; - try lc_writer.writeStruct(macho.build_version_command{ - .cmdsize = cmdsize, - .platform = switch (self.base.options.target.os.tag) { - .macos => .MACOS, - .ios => if (is_simulator_abi) macho.PLATFORM.IOSSIMULATOR else macho.PLATFORM.IOS, - .watchos => if (is_simulator_abi) macho.PLATFORM.WATCHOSSIMULATOR else macho.PLATFORM.WATCHOS, - .tvos => if (is_simulator_abi) macho.PLATFORM.TVOSSIMULATOR else macho.PLATFORM.TVOS, - else => unreachable, - }, - .minos = platform_version, - .sdk = sdk_version, - .ntools = 1, - }); - try lc_writer.writeAll(mem.asBytes(&macho.build_tool_version{ - .tool = .LD, - .version = 0x0, - })); - ncmds.* += 1; -} - -pub fn writeLoadDylibLCs(self: *MachO, ncmds: *u32, lc_writer: anytype) !void { - for (self.referenced_dylibs.keys()) |id| { - const dylib = self.dylibs.items[id]; - const dylib_id = dylib.id orelse unreachable; - try writeDylibLC(.{ - .cmd = if (dylib.weak) .LOAD_WEAK_DYLIB else .LOAD_DYLIB, - .name = dylib_id.name, - .timestamp = dylib_id.timestamp, - .current_version = dylib_id.current_version, - .compatibility_version = dylib_id.compatibility_version, - }, ncmds, lc_writer); - } -} - pub fn deinit(self: *MachO) void { const gpa = self.base.allocator; @@ -2976,98 +2792,7 @@ pub fn populateMissingMetadata(self: *MachO) !void { } } -pub inline fn calcInstallNameLen(cmd_size: u64, name: []const u8, assume_max_path_len: bool) u64 { - const darwin_path_max = 1024; - const name_len = if (assume_max_path_len) darwin_path_max else std.mem.len(name) + 1; - return mem.alignForwardGeneric(u64, cmd_size + name_len, @alignOf(u64)); -} - -fn calcLCsSize(self: *MachO, assume_max_path_len: bool) !u32 { - const gpa = self.base.allocator; - var sizeofcmds: u64 = 0; - for (self.segments.items) |seg| { - sizeofcmds += seg.nsects * @sizeOf(macho.section_64) + @sizeOf(macho.segment_command_64); - } - - // LC_DYLD_INFO_ONLY - sizeofcmds += @sizeOf(macho.dyld_info_command); - // LC_FUNCTION_STARTS - if (self.text_section_index != null) { - sizeofcmds += @sizeOf(macho.linkedit_data_command); - } - // LC_DATA_IN_CODE - sizeofcmds += @sizeOf(macho.linkedit_data_command); - // LC_SYMTAB - sizeofcmds += @sizeOf(macho.symtab_command); - // LC_DYSYMTAB - sizeofcmds += @sizeOf(macho.dysymtab_command); - // LC_LOAD_DYLINKER - sizeofcmds += calcInstallNameLen( - @sizeOf(macho.dylinker_command), - mem.sliceTo(default_dyld_path, 0), - false, - ); - // LC_MAIN - if (self.base.options.output_mode == .Exe) { - sizeofcmds += @sizeOf(macho.entry_point_command); - } - // LC_ID_DYLIB - if (self.base.options.output_mode == .Lib) { - sizeofcmds += blk: { - const install_name = self.base.options.install_name orelse self.base.options.emit.?.sub_path; - break :blk calcInstallNameLen( - @sizeOf(macho.dylib_command), - install_name, - assume_max_path_len, - ); - }; - } - // LC_RPATH - { - var it = RpathIterator.init(gpa, self.base.options.rpath_list); - defer it.deinit(); - while (try it.next()) |rpath| { - sizeofcmds += calcInstallNameLen( - @sizeOf(macho.rpath_command), - rpath, - assume_max_path_len, - ); - } - } - // LC_SOURCE_VERSION - sizeofcmds += @sizeOf(macho.source_version_command); - // LC_BUILD_VERSION - sizeofcmds += @sizeOf(macho.build_version_command) + @sizeOf(macho.build_tool_version); - // LC_UUID - sizeofcmds += @sizeOf(macho.uuid_command); - // LC_LOAD_DYLIB - for (self.referenced_dylibs.keys()) |id| { - const dylib = self.dylibs.items[id]; - const dylib_id = dylib.id orelse unreachable; - sizeofcmds += calcInstallNameLen( - @sizeOf(macho.dylib_command), - dylib_id.name, - assume_max_path_len, - ); - } - // LC_CODE_SIGNATURE - { - const target = self.base.options.target; - const requires_codesig = blk: { - if (self.base.options.entitlements) |_| break :blk true; - if (target.cpu.arch == .aarch64 and (target.os.tag == .macos or target.abi == .simulator)) - break :blk true; - break :blk false; - }; - if (requires_codesig) { - sizeofcmds += @sizeOf(macho.linkedit_data_command); - } - } - - return @intCast(u32, sizeofcmds); -} - -pub fn calcPagezeroSize(self: *MachO) u64 { +fn calcPagezeroSize(self: *MachO) u64 { const pagezero_vmsize = self.base.options.pagezero_size orelse default_pagezero_vmsize; const aligned_pagezero_vmsize = mem.alignBackwardGeneric(u64, pagezero_vmsize, self.page_size); if (self.base.options.output_mode == .Lib) return 0; @@ -3079,23 +2804,6 @@ pub fn calcPagezeroSize(self: *MachO) u64 { return aligned_pagezero_vmsize; } -pub fn calcMinHeaderPad(self: *MachO) !u64 { - var padding: u32 = (try self.calcLCsSize(false)) + (self.base.options.headerpad_size orelse 0); - log.debug("minimum requested headerpad size 0x{x}", .{padding + @sizeOf(macho.mach_header_64)}); - - if (self.base.options.headerpad_max_install_names) { - var min_headerpad_size: u32 = try self.calcLCsSize(true); - log.debug("headerpad_max_install_names minimum headerpad size 0x{x}", .{ - min_headerpad_size + @sizeOf(macho.mach_header_64), - }); - padding = @max(padding, min_headerpad_size); - } - const offset = @sizeOf(macho.mach_header_64) + padding; - log.debug("actual headerpad size 0x{x}", .{offset}); - - return offset; -} - fn allocateSection(self: *MachO, segname: []const u8, sectname: []const u8, opts: struct { size: u64 = 0, alignment: u32 = 0, diff --git a/src/link/MachO/load_commands.zig b/src/link/MachO/load_commands.zig new file mode 100644 index 0000000000..9b0e61e919 --- /dev/null +++ b/src/link/MachO/load_commands.zig @@ -0,0 +1,325 @@ +const std = @import("std"); +const assert = std.debug.assert; +const link = @import("../../link.zig"); +const log = std.log.scoped(.link); +const macho = std.macho; +const mem = std.mem; + +const Allocator = mem.Allocator; +const Dylib = @import("Dylib.zig"); + +pub const default_dyld_path: [*:0]const u8 = "/usr/lib/dyld"; + +fn calcInstallNameLen(cmd_size: u64, name: []const u8, assume_max_path_len: bool) u64 { + const darwin_path_max = 1024; + const name_len = if (assume_max_path_len) darwin_path_max else std.mem.len(name) + 1; + return mem.alignForwardGeneric(u64, cmd_size + name_len, @alignOf(u64)); +} + +const CalcLCsSizeCtx = struct { + segments: []const macho.segment_command_64, + dylibs: []const Dylib, + referenced_dylibs: []u16, + wants_function_starts: bool = true, +}; + +fn calcLCsSize(gpa: Allocator, options: *const link.Options, ctx: CalcLCsSizeCtx, assume_max_path_len: bool) !u32 { + var has_text_segment: bool = false; + var sizeofcmds: u64 = 0; + for (ctx.segments) |seg| { + sizeofcmds += seg.nsects * @sizeOf(macho.section_64) + @sizeOf(macho.segment_command_64); + if (mem.eql(u8, seg.segName(), "__TEXT")) { + has_text_segment = true; + } + } + + // LC_DYLD_INFO_ONLY + sizeofcmds += @sizeOf(macho.dyld_info_command); + // LC_FUNCTION_STARTS + if (has_text_segment and ctx.wants_function_starts) |_| { + sizeofcmds += @sizeOf(macho.linkedit_data_command); + } + // LC_DATA_IN_CODE + sizeofcmds += @sizeOf(macho.linkedit_data_command); + // LC_SYMTAB + sizeofcmds += @sizeOf(macho.symtab_command); + // LC_DYSYMTAB + sizeofcmds += @sizeOf(macho.dysymtab_command); + // LC_LOAD_DYLINKER + sizeofcmds += calcInstallNameLen( + @sizeOf(macho.dylinker_command), + mem.sliceTo(default_dyld_path, 0), + false, + ); + // LC_MAIN + if (options.output_mode == .Exe) { + sizeofcmds += @sizeOf(macho.entry_point_command); + } + // LC_ID_DYLIB + if (options.output_mode == .Lib and options.link_mode == .Dynamic) { + sizeofcmds += blk: { + const emit = options.emit.?; + const install_name = options.install_name orelse try emit.directory.join(gpa, &.{emit.sub_path}); + defer if (options.install_name == null) gpa.free(install_name); + break :blk calcInstallNameLen( + @sizeOf(macho.dylib_command), + install_name, + assume_max_path_len, + ); + }; + } + // LC_RPATH + { + var it = RpathIterator.init(gpa, options.rpath_list); + defer it.deinit(); + while (try it.next()) |rpath| { + sizeofcmds += calcInstallNameLen( + @sizeOf(macho.rpath_command), + rpath, + assume_max_path_len, + ); + } + } + // LC_SOURCE_VERSION + sizeofcmds += @sizeOf(macho.source_version_command); + // LC_BUILD_VERSION + sizeofcmds += @sizeOf(macho.build_version_command) + @sizeOf(macho.build_tool_version); + // LC_UUID + sizeofcmds += @sizeOf(macho.uuid_command); + // LC_LOAD_DYLIB + for (ctx.referenced_dylibs) |id| { + const dylib = ctx.dylibs[id]; + const dylib_id = dylib.id orelse unreachable; + sizeofcmds += calcInstallNameLen( + @sizeOf(macho.dylib_command), + dylib_id.name, + assume_max_path_len, + ); + } + // LC_CODE_SIGNATURE + { + const target = options.target; + const requires_codesig = blk: { + if (options.entitlements) |_| break :blk true; + if (target.cpu.arch == .aarch64 and (target.os.tag == .macos or target.abi == .simulator)) + break :blk true; + break :blk false; + }; + if (requires_codesig) { + sizeofcmds += @sizeOf(macho.linkedit_data_command); + } + } + + return @intCast(u32, sizeofcmds); +} + +pub fn calcMinHeaderPad(gpa: Allocator, options: *const link.Options, ctx: CalcLCsSizeCtx) !u64 { + var padding: u32 = (try calcLCsSize(gpa, options, ctx, false)) + (options.headerpad_size orelse 0); + log.debug("minimum requested headerpad size 0x{x}", .{padding + @sizeOf(macho.mach_header_64)}); + + if (options.headerpad_max_install_names) { + var min_headerpad_size: u32 = try calcLCsSize(gpa, options, ctx, true); + log.debug("headerpad_max_install_names minimum headerpad size 0x{x}", .{ + min_headerpad_size + @sizeOf(macho.mach_header_64), + }); + padding = @max(padding, min_headerpad_size); + } + + const offset = @sizeOf(macho.mach_header_64) + padding; + log.debug("actual headerpad size 0x{x}", .{offset}); + + return offset; +} + +pub fn writeDylinkerLC(ncmds: *u32, lc_writer: anytype) !void { + const name_len = mem.sliceTo(default_dyld_path, 0).len; + const cmdsize = @intCast(u32, mem.alignForwardGeneric( + u64, + @sizeOf(macho.dylinker_command) + name_len, + @sizeOf(u64), + )); + try lc_writer.writeStruct(macho.dylinker_command{ + .cmd = .LOAD_DYLINKER, + .cmdsize = cmdsize, + .name = @sizeOf(macho.dylinker_command), + }); + try lc_writer.writeAll(mem.sliceTo(default_dyld_path, 0)); + const padding = cmdsize - @sizeOf(macho.dylinker_command) - name_len; + if (padding > 0) { + try lc_writer.writeByteNTimes(0, padding); + } + ncmds.* += 1; +} + +const WriteDylibLCCtx = struct { + cmd: macho.LC, + name: []const u8, + timestamp: u32 = 2, + current_version: u32 = 0x10000, + compatibility_version: u32 = 0x10000, +}; + +fn writeDylibLC(ctx: WriteDylibLCCtx, ncmds: *u32, lc_writer: anytype) !void { + const name_len = ctx.name.len + 1; + const cmdsize = @intCast(u32, mem.alignForwardGeneric( + u64, + @sizeOf(macho.dylib_command) + name_len, + @sizeOf(u64), + )); + try lc_writer.writeStruct(macho.dylib_command{ + .cmd = ctx.cmd, + .cmdsize = cmdsize, + .dylib = .{ + .name = @sizeOf(macho.dylib_command), + .timestamp = ctx.timestamp, + .current_version = ctx.current_version, + .compatibility_version = ctx.compatibility_version, + }, + }); + try lc_writer.writeAll(ctx.name); + try lc_writer.writeByte(0); + const padding = cmdsize - @sizeOf(macho.dylib_command) - name_len; + if (padding > 0) { + try lc_writer.writeByteNTimes(0, padding); + } + ncmds.* += 1; +} + +pub fn writeDylibIdLC(gpa: Allocator, options: *const link.Options, ncmds: *u32, lc_writer: anytype) !void { + assert(options.output_mode == .Lib and options.link_mode == .Dynamic); + const emit = options.emit.?; + const install_name = options.install_name orelse try emit.directory.join(gpa, &.{emit.sub_path}); + defer if (options.install_name == null) gpa.free(install_name); + const curr = options.version orelse std.builtin.Version{ + .major = 1, + .minor = 0, + .patch = 0, + }; + const compat = options.compatibility_version orelse std.builtin.Version{ + .major = 1, + .minor = 0, + .patch = 0, + }; + try writeDylibLC(.{ + .cmd = .ID_DYLIB, + .name = install_name, + .current_version = curr.major << 16 | curr.minor << 8 | curr.patch, + .compatibility_version = compat.major << 16 | compat.minor << 8 | compat.patch, + }, ncmds, lc_writer); +} + +pub fn writeMainLC(entryoff: u32, options: *const link.Options, ncmds: *u32, lc_writer: anytype) !void { + assert(options.output_mode == .Exe); + try lc_writer.writeStruct(macho.entry_point_command{ + .cmd = .MAIN, + .cmdsize = @sizeOf(macho.entry_point_command), + .entryoff = entryoff, + .stacksize = options.stack_size_override orelse 0, + }); + ncmds.* += 1; +} + +const RpathIterator = struct { + buffer: []const []const u8, + table: std.StringHashMap(void), + count: usize = 0, + + fn init(gpa: Allocator, rpaths: []const []const u8) RpathIterator { + return .{ .buffer = rpaths, .table = std.StringHashMap(void).init(gpa) }; + } + + fn deinit(it: *RpathIterator) void { + it.table.deinit(); + } + + fn next(it: *RpathIterator) !?[]const u8 { + while (true) { + if (it.count >= it.buffer.len) return null; + const rpath = it.buffer[it.count]; + it.count += 1; + const gop = try it.table.getOrPut(rpath); + if (gop.found_existing) continue; + return rpath; + } + } +}; + +pub fn writeRpathLCs(gpa: Allocator, options: *const link.Options, ncmds: *u32, lc_writer: anytype) !void { + var it = RpathIterator.init(gpa, options.rpath_list); + defer it.deinit(); + + while (try it.next()) |rpath| { + const rpath_len = rpath.len + 1; + const cmdsize = @intCast(u32, mem.alignForwardGeneric( + u64, + @sizeOf(macho.rpath_command) + rpath_len, + @sizeOf(u64), + )); + try lc_writer.writeStruct(macho.rpath_command{ + .cmdsize = cmdsize, + .path = @sizeOf(macho.rpath_command), + }); + try lc_writer.writeAll(rpath); + try lc_writer.writeByte(0); + const padding = cmdsize - @sizeOf(macho.rpath_command) - rpath_len; + if (padding > 0) { + try lc_writer.writeByteNTimes(0, padding); + } + ncmds.* += 1; + } +} + +pub fn writeBuildVersionLC(options: *const link.Options, ncmds: *u32, lc_writer: anytype) !void { + const cmdsize = @sizeOf(macho.build_version_command) + @sizeOf(macho.build_tool_version); + const platform_version = blk: { + const ver = options.target.os.version_range.semver.min; + const platform_version = ver.major << 16 | ver.minor << 8; + break :blk platform_version; + }; + const sdk_version = if (options.native_darwin_sdk) |sdk| blk: { + const ver = sdk.version; + const sdk_version = ver.major << 16 | ver.minor << 8; + break :blk sdk_version; + } else platform_version; + const is_simulator_abi = options.target.abi == .simulator; + try lc_writer.writeStruct(macho.build_version_command{ + .cmdsize = cmdsize, + .platform = switch (options.target.os.tag) { + .macos => .MACOS, + .ios => if (is_simulator_abi) macho.PLATFORM.IOSSIMULATOR else macho.PLATFORM.IOS, + .watchos => if (is_simulator_abi) macho.PLATFORM.WATCHOSSIMULATOR else macho.PLATFORM.WATCHOS, + .tvos => if (is_simulator_abi) macho.PLATFORM.TVOSSIMULATOR else macho.PLATFORM.TVOS, + else => unreachable, + }, + .minos = platform_version, + .sdk = sdk_version, + .ntools = 1, + }); + try lc_writer.writeAll(mem.asBytes(&macho.build_tool_version{ + .tool = .LD, + .version = 0x0, + })); + ncmds.* += 1; +} + +pub fn writeLoadDylibLCs(dylibs: []const Dylib, referenced: []u16, ncmds: *u32, lc_writer: anytype) !void { + for (referenced) |index| { + const dylib = dylibs[index]; + const dylib_id = dylib.id orelse unreachable; + try writeDylibLC(.{ + .cmd = if (dylib.weak) .LOAD_WEAK_DYLIB else .LOAD_DYLIB, + .name = dylib_id.name, + .timestamp = dylib_id.timestamp, + .current_version = dylib_id.current_version, + .compatibility_version = dylib_id.compatibility_version, + }, ncmds, lc_writer); + } +} + +pub fn writeSourceVersionLC(ncmds: *u32, lc_writer: anytype) !void { + try lc_writer.writeStruct(macho.source_version_command{ + .cmdsize = @sizeOf(macho.source_version_command), + .version = 0x0, + }); + ncmds.* += 1; +} diff --git a/src/link/MachO/zld.zig b/src/link/MachO/zld.zig index 9baecd326a..7bffc10e85 100644 --- a/src/link/MachO/zld.zig +++ b/src/link/MachO/zld.zig @@ -13,6 +13,7 @@ const bind = @import("bind.zig"); const dead_strip = @import("dead_strip.zig"); const fat = @import("fat.zig"); const link = @import("../../link.zig"); +const load_commands = @import("load_commands.zig"); const thunks = @import("thunks.zig"); const trace = @import("../../tracy.zig").trace; @@ -34,7 +35,7 @@ pub const Zld = struct { gpa: Allocator, file: fs.File, page_size: u16, - options: link.Options, + options: *const link.Options, objects: std.ArrayListUnmanaged(Object) = .{}, archives: std.ArrayListUnmanaged(Archive) = .{}, @@ -1227,195 +1228,6 @@ pub const Zld = struct { } } - fn writeDylinkerLC(ncmds: *u32, lc_writer: anytype) !void { - const name_len = mem.sliceTo(MachO.default_dyld_path, 0).len; - const cmdsize = @intCast(u32, mem.alignForwardGeneric( - u64, - @sizeOf(macho.dylinker_command) + name_len, - @sizeOf(u64), - )); - try lc_writer.writeStruct(macho.dylinker_command{ - .cmd = .LOAD_DYLINKER, - .cmdsize = cmdsize, - .name = @sizeOf(macho.dylinker_command), - }); - try lc_writer.writeAll(mem.sliceTo(MachO.default_dyld_path, 0)); - const padding = cmdsize - @sizeOf(macho.dylinker_command) - name_len; - if (padding > 0) { - try lc_writer.writeByteNTimes(0, padding); - } - ncmds.* += 1; - } - - fn writeMainLC(self: *Zld, ncmds: *u32, lc_writer: anytype) !void { - if (self.options.output_mode != .Exe) return; - const seg_id = self.getSegmentByName("__TEXT").?; - const seg = self.segments.items[seg_id]; - const global = self.getEntryPoint(); - const sym = self.getSymbol(global); - try lc_writer.writeStruct(macho.entry_point_command{ - .cmd = .MAIN, - .cmdsize = @sizeOf(macho.entry_point_command), - .entryoff = @intCast(u32, sym.n_value - seg.vmaddr), - .stacksize = self.options.stack_size_override orelse 0, - }); - ncmds.* += 1; - } - - const WriteDylibLCCtx = struct { - cmd: macho.LC, - name: []const u8, - timestamp: u32 = 2, - current_version: u32 = 0x10000, - compatibility_version: u32 = 0x10000, - }; - - fn writeDylibLC(ctx: WriteDylibLCCtx, ncmds: *u32, lc_writer: anytype) !void { - const name_len = ctx.name.len + 1; - const cmdsize = @intCast(u32, mem.alignForwardGeneric( - u64, - @sizeOf(macho.dylib_command) + name_len, - @sizeOf(u64), - )); - try lc_writer.writeStruct(macho.dylib_command{ - .cmd = ctx.cmd, - .cmdsize = cmdsize, - .dylib = .{ - .name = @sizeOf(macho.dylib_command), - .timestamp = ctx.timestamp, - .current_version = ctx.current_version, - .compatibility_version = ctx.compatibility_version, - }, - }); - try lc_writer.writeAll(ctx.name); - try lc_writer.writeByte(0); - const padding = cmdsize - @sizeOf(macho.dylib_command) - name_len; - if (padding > 0) { - try lc_writer.writeByteNTimes(0, padding); - } - ncmds.* += 1; - } - - fn writeDylibIdLC(self: *Zld, ncmds: *u32, lc_writer: anytype) !void { - if (self.options.output_mode != .Lib) return; - const install_name = self.options.install_name orelse self.options.emit.?.sub_path; - const curr = self.options.version orelse std.builtin.Version{ - .major = 1, - .minor = 0, - .patch = 0, - }; - const compat = self.options.compatibility_version orelse std.builtin.Version{ - .major = 1, - .minor = 0, - .patch = 0, - }; - try writeDylibLC(.{ - .cmd = .ID_DYLIB, - .name = install_name, - .current_version = curr.major << 16 | curr.minor << 8 | curr.patch, - .compatibility_version = compat.major << 16 | compat.minor << 8 | compat.patch, - }, ncmds, lc_writer); - } - - const RpathIterator = struct { - buffer: []const []const u8, - table: std.StringHashMap(void), - count: usize = 0, - - fn init(gpa: Allocator, rpaths: []const []const u8) RpathIterator { - return .{ .buffer = rpaths, .table = std.StringHashMap(void).init(gpa) }; - } - - fn deinit(it: *RpathIterator) void { - it.table.deinit(); - } - - fn next(it: *RpathIterator) !?[]const u8 { - while (true) { - if (it.count >= it.buffer.len) return null; - const rpath = it.buffer[it.count]; - it.count += 1; - const gop = try it.table.getOrPut(rpath); - if (gop.found_existing) continue; - return rpath; - } - } - }; - - fn writeRpathLCs(self: *Zld, ncmds: *u32, lc_writer: anytype) !void { - const gpa = self.gpa; - - var it = RpathIterator.init(gpa, self.options.rpath_list); - defer it.deinit(); - - while (try it.next()) |rpath| { - const rpath_len = rpath.len + 1; - const cmdsize = @intCast(u32, mem.alignForwardGeneric( - u64, - @sizeOf(macho.rpath_command) + rpath_len, - @sizeOf(u64), - )); - try lc_writer.writeStruct(macho.rpath_command{ - .cmdsize = cmdsize, - .path = @sizeOf(macho.rpath_command), - }); - try lc_writer.writeAll(rpath); - try lc_writer.writeByte(0); - const padding = cmdsize - @sizeOf(macho.rpath_command) - rpath_len; - if (padding > 0) { - try lc_writer.writeByteNTimes(0, padding); - } - ncmds.* += 1; - } - } - - fn writeBuildVersionLC(self: *Zld, ncmds: *u32, lc_writer: anytype) !void { - const cmdsize = @sizeOf(macho.build_version_command) + @sizeOf(macho.build_tool_version); - const platform_version = blk: { - const ver = self.options.target.os.version_range.semver.min; - const platform_version = ver.major << 16 | ver.minor << 8; - break :blk platform_version; - }; - const sdk_version = if (self.options.native_darwin_sdk) |sdk| blk: { - const ver = sdk.version; - const sdk_version = ver.major << 16 | ver.minor << 8; - break :blk sdk_version; - } else platform_version; - const is_simulator_abi = self.options.target.abi == .simulator; - try lc_writer.writeStruct(macho.build_version_command{ - .cmdsize = cmdsize, - .platform = switch (self.options.target.os.tag) { - .macos => .MACOS, - .ios => if (is_simulator_abi) macho.PLATFORM.IOSSIMULATOR else macho.PLATFORM.IOS, - .watchos => if (is_simulator_abi) macho.PLATFORM.WATCHOSSIMULATOR else macho.PLATFORM.WATCHOS, - .tvos => if (is_simulator_abi) macho.PLATFORM.TVOSSIMULATOR else macho.PLATFORM.TVOS, - else => unreachable, - }, - .minos = platform_version, - .sdk = sdk_version, - .ntools = 1, - }); - try lc_writer.writeAll(mem.asBytes(&macho.build_tool_version{ - .tool = .LD, - .version = 0x0, - })); - ncmds.* += 1; - } - - fn writeLoadDylibLCs(self: *Zld, ncmds: *u32, lc_writer: anytype) !void { - for (self.referenced_dylibs.keys()) |id| { - const dylib = self.dylibs.items[id]; - const dylib_id = dylib.id orelse unreachable; - try writeDylibLC(.{ - .cmd = if (dylib.weak) .LOAD_WEAK_DYLIB else .LOAD_DYLIB, - .name = dylib_id.name, - .timestamp = dylib_id.timestamp, - .current_version = dylib_id.current_version, - .compatibility_version = dylib_id.compatibility_version, - }, ncmds, lc_writer); - } - } - pub fn deinit(self: *Zld) void { const gpa = self.gpa; @@ -1516,110 +1328,6 @@ pub const Zld = struct { } } - fn calcLCsSize(self: *Zld, assume_max_path_len: bool) !u32 { - const gpa = self.gpa; - - var sizeofcmds: u64 = 0; - for (self.segments.items) |seg| { - sizeofcmds += seg.nsects * @sizeOf(macho.section_64) + @sizeOf(macho.segment_command_64); - } - - // LC_DYLD_INFO_ONLY - sizeofcmds += @sizeOf(macho.dyld_info_command); - // LC_FUNCTION_STARTS - if (self.getSectionByName("__TEXT", "__text")) |_| { - sizeofcmds += @sizeOf(macho.linkedit_data_command); - } - // LC_DATA_IN_CODE - sizeofcmds += @sizeOf(macho.linkedit_data_command); - // LC_SYMTAB - sizeofcmds += @sizeOf(macho.symtab_command); - // LC_DYSYMTAB - sizeofcmds += @sizeOf(macho.dysymtab_command); - // LC_LOAD_DYLINKER - sizeofcmds += MachO.calcInstallNameLen( - @sizeOf(macho.dylinker_command), - mem.sliceTo(MachO.default_dyld_path, 0), - false, - ); - // LC_MAIN - if (self.options.output_mode == .Exe) { - sizeofcmds += @sizeOf(macho.entry_point_command); - } - // LC_ID_DYLIB - if (self.options.output_mode == .Lib) { - sizeofcmds += blk: { - const install_name = self.options.install_name orelse self.options.emit.?.sub_path; - break :blk MachO.calcInstallNameLen( - @sizeOf(macho.dylib_command), - install_name, - assume_max_path_len, - ); - }; - } - // LC_RPATH - { - var it = RpathIterator.init(gpa, self.options.rpath_list); - defer it.deinit(); - while (try it.next()) |rpath| { - sizeofcmds += MachO.calcInstallNameLen( - @sizeOf(macho.rpath_command), - rpath, - assume_max_path_len, - ); - } - } - // LC_SOURCE_VERSION - sizeofcmds += @sizeOf(macho.source_version_command); - // LC_BUILD_VERSION - sizeofcmds += @sizeOf(macho.build_version_command) + @sizeOf(macho.build_tool_version); - // LC_UUID - sizeofcmds += @sizeOf(macho.uuid_command); - // LC_LOAD_DYLIB - for (self.referenced_dylibs.keys()) |id| { - const dylib = self.dylibs.items[id]; - const dylib_id = dylib.id orelse unreachable; - sizeofcmds += MachO.calcInstallNameLen( - @sizeOf(macho.dylib_command), - dylib_id.name, - assume_max_path_len, - ); - } - // LC_CODE_SIGNATURE - { - const target = self.options.target; - const requires_codesig = blk: { - if (self.options.entitlements) |_| break :blk true; - if (target.cpu.arch == .aarch64 and (target.os.tag == .macos or target.abi == .simulator)) - break :blk true; - break :blk false; - }; - if (requires_codesig) { - sizeofcmds += @sizeOf(macho.linkedit_data_command); - } - } - - return @intCast(u32, sizeofcmds); - } - - fn calcMinHeaderPad(self: *Zld) !u64 { - var padding: u32 = (try self.calcLCsSize(false)) + (self.options.headerpad_size orelse 0); - log.debug("minimum requested headerpad size 0x{x}", .{padding + @sizeOf(macho.mach_header_64)}); - - if (self.options.headerpad_max_install_names) { - var min_headerpad_size: u32 = try self.calcLCsSize(true); - log.debug("headerpad_max_install_names minimum headerpad size 0x{x}", .{ - min_headerpad_size + @sizeOf(macho.mach_header_64), - }); - padding = @max(padding, min_headerpad_size); - } - - const offset = @sizeOf(macho.mach_header_64) + padding; - log.debug("actual headerpad size 0x{x}", .{offset}); - - return offset; - } - pub fn allocateSymbol(self: *Zld) !u32 { try self.locals.ensureUnusedCapacity(self.gpa, 1); log.debug(" (allocating symbol index {d})", .{self.locals.items.len}); @@ -1842,7 +1550,11 @@ pub const Zld = struct { fn allocateSegments(self: *Zld) !void { for (self.segments.items) |*segment, segment_index| { const is_text_segment = mem.eql(u8, segment.segName(), "__TEXT"); - const base_size = if (is_text_segment) try self.calcMinHeaderPad() else 0; + const base_size = if (is_text_segment) try load_commands.calcMinHeaderPad(self.gpa, self.options, .{ + .segments = self.segments.items, + .dylibs = self.dylibs.items, + .referenced_dylibs = self.referenced_dylibs.keys(), + }) else 0; try self.allocateSegment(@intCast(u8, segment_index), base_size); } } @@ -3734,7 +3446,7 @@ pub fn linkWithZld(macho_file: *MachO, comp: *Compilation, prog_node: *std.Progr defer tracy.end(); const gpa = macho_file.base.allocator; - const options = macho_file.base.options; + const options = &macho_file.base.options; const target = options.target; var arena_allocator = std.heap.ArenaAllocator.init(gpa); @@ -3884,7 +3596,7 @@ pub fn linkWithZld(macho_file: *MachO, comp: *Compilation, prog_node: *std.Progr macho_file.base.file = try directory.handle.createFile(sub_path, .{ .truncate = true, .read = true, - .mode = link.determineMode(options), + .mode = link.determineMode(options.*), }); } var zld = Zld{ @@ -4301,20 +4013,22 @@ pub fn linkWithZld(macho_file: *MachO, comp: *Compilation, prog_node: *std.Progr } } - try Zld.writeDylinkerLC(&ncmds, lc_writer); - try zld.writeMainLC(&ncmds, lc_writer); - try zld.writeDylibIdLC(&ncmds, lc_writer); - try zld.writeRpathLCs(&ncmds, lc_writer); + try load_commands.writeDylinkerLC(&ncmds, lc_writer); - { - try lc_writer.writeStruct(macho.source_version_command{ - .cmdsize = @sizeOf(macho.source_version_command), - .version = 0x0, - }); - ncmds += 1; + if (zld.options.output_mode == .Exe) { + const seg_id = zld.getSegmentByName("__TEXT").?; + const seg = zld.segments.items[seg_id]; + const global = zld.getEntryPoint(); + const sym = zld.getSymbol(global); + try load_commands.writeMainLC(@intCast(u32, sym.n_value - seg.vmaddr), options, &ncmds, lc_writer); + } else { + assert(zld.options.output_mode == .Lib); + try load_commands.writeDylibIdLC(zld.gpa, zld.options, &ncmds, lc_writer); } - try zld.writeBuildVersionLC(&ncmds, lc_writer); + try load_commands.writeRpathLCs(zld.gpa, zld.options, &ncmds, lc_writer); + try load_commands.writeSourceVersionLC(&ncmds, lc_writer); + try load_commands.writeBuildVersionLC(zld.options, &ncmds, lc_writer); { var uuid_lc = macho.uuid_command{ @@ -4326,7 +4040,7 @@ pub fn linkWithZld(macho_file: *MachO, comp: *Compilation, prog_node: *std.Progr ncmds += 1; } - try zld.writeLoadDylibLCs(&ncmds, lc_writer); + try load_commands.writeLoadDylibLCs(zld.dylibs.items, zld.referenced_dylibs.keys(), &ncmds, lc_writer); const requires_codesig = blk: { if (options.entitlements) |_| break :blk true; From ec40c6b28fb1612f401db3c43b68aba670327e2e Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Wed, 14 Dec 2022 13:56:52 +0100 Subject: [PATCH 02/17] macho: calculate UUID as a streaming MD5 hash of the file contents --- CMakeLists.txt | 1 + src/link/MachO/load_commands.zig | 9 +++++++++ src/link/MachO/zld.zig | 28 +++++++++++++++++++--------- 3 files changed, 29 insertions(+), 9 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 8e8c66d374..31d72a34d6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -593,6 +593,7 @@ set(ZIG_STAGE2_SOURCES "${CMAKE_SOURCE_DIR}/src/link/MachO/fat.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO/load_commands.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO/thunks.zig" + "${CMAKE_SOURCE_DIR}/src/link/MachO/uuid.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO/zld.zig" "${CMAKE_SOURCE_DIR}/src/link/Plan9.zig" "${CMAKE_SOURCE_DIR}/src/link/Plan9/aout.zig" diff --git a/src/link/MachO/load_commands.zig b/src/link/MachO/load_commands.zig index 9b0e61e919..c4e565928b 100644 --- a/src/link/MachO/load_commands.zig +++ b/src/link/MachO/load_commands.zig @@ -323,3 +323,12 @@ pub fn writeSourceVersionLC(ncmds: *u32, lc_writer: anytype) !void { }); ncmds.* += 1; } + +pub fn writeUuidLC(uuid: *const [16]u8, ncmds: *u32, lc_writer: anytype) !void { + var uuid_lc = macho.uuid_command{ + .cmdsize = @sizeOf(macho.uuid_command), + .uuid = uuid.*, + }; + try lc_writer.writeStruct(uuid_lc); + ncmds.* += 1; +} diff --git a/src/link/MachO/zld.zig b/src/link/MachO/zld.zig index 7bffc10e85..9a66d76e1a 100644 --- a/src/link/MachO/zld.zig +++ b/src/link/MachO/zld.zig @@ -16,6 +16,7 @@ const link = @import("../../link.zig"); const load_commands = @import("load_commands.zig"); const thunks = @import("thunks.zig"); const trace = @import("../../tracy.zig").trace; +const uuid = @import("uuid.zig"); const Allocator = mem.Allocator; const Archive = @import("Archive.zig"); @@ -3986,6 +3987,7 @@ pub fn linkWithZld(macho_file: *MachO, comp: *Compilation, prog_node: *std.Progr var lc_buffer = std.ArrayList(u8).init(arena); const lc_writer = lc_buffer.writer(); + var ncmds: u32 = 0; try zld.writeLinkeditSegmentData(&ncmds, lc_writer, reverse_lookups); @@ -4030,15 +4032,14 @@ pub fn linkWithZld(macho_file: *MachO, comp: *Compilation, prog_node: *std.Progr try load_commands.writeSourceVersionLC(&ncmds, lc_writer); try load_commands.writeBuildVersionLC(zld.options, &ncmds, lc_writer); - { - var uuid_lc = macho.uuid_command{ - .cmdsize = @sizeOf(macho.uuid_command), - .uuid = undefined, - }; - std.crypto.random.bytes(&uuid_lc.uuid); - try lc_writer.writeStruct(uuid_lc); - ncmds += 1; - } + // Looking forward into the future, we will want to offer `-no_uuid` support in which case + // there will be nothing to backpatch. + const uuid_offset_backpatch: ?usize = blk: { + const index = lc_buffer.items.len; + var uuid_buf: [16]u8 = [_]u8{0} ** 16; + try load_commands.writeUuidLC(&uuid_buf, &ncmds, lc_writer); + break :blk index; + }; try load_commands.writeLoadDylibLCs(zld.dylibs.items, zld.referenced_dylibs.keys(), &ncmds, lc_writer); @@ -4071,6 +4072,15 @@ pub fn linkWithZld(macho_file: *MachO, comp: *Compilation, prog_node: *std.Progr try zld.file.pwriteAll(lc_buffer.items, @sizeOf(macho.mach_header_64) + headers_buf.items.len); try zld.writeHeader(ncmds, @intCast(u32, lc_buffer.items.len + headers_buf.items.len)); + if (uuid_offset_backpatch) |backpatch| { + const seg = zld.getLinkeditSegmentPtr(); + const file_size = seg.fileoff + seg.filesize; + var uuid_buf: [16]u8 = undefined; + try uuid.calcMd5Hash(zld.gpa, zld.file, file_size, &uuid_buf); + const offset = @sizeOf(macho.mach_header_64) + headers_buf.items.len + backpatch + @sizeOf(macho.load_command); + try zld.file.pwriteAll(&uuid_buf, offset); + } + if (codesig) |*csig| { try zld.writeCodeSignature(comp, csig, codesig_offset.?); // code signing always comes last } From 79457fc76a61695560e6314246b0a8c21a7e2d2c Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Wed, 14 Dec 2022 15:15:20 +0100 Subject: [PATCH 03/17] macho: generalize parallel hasher; impl parallel MD5-like hash By pulling out the parallel hashing setup from `CodeSignature.zig`, we can now reuse it different places across MachO linker (for now; I can totally see its usefulness beyond MachO, eg. in COFF or ELF too). The parallel hasher is generic over actual hasher such as Sha256 or MD5. The implementation is kept as it was. For UUID calculation, depending on the linking mode: * incremental - since it only supports debug mode, we don't bother with MD5 hashing of the contents, and populate it with random data but only once per a sequence of in-place binary patches * traditional - in debug, we use random string (for speed); in release, we calculate the hash, however we use LLVM/LLD's trick in that we calculate a series of MD5 hashes in parallel and then one an MD5 of MD5 final hash to generate digest. --- CMakeLists.txt | 1 + src/link/MachO.zig | 16 ++++---- src/link/MachO/CodeSignature.zig | 56 +++++--------------------- src/link/MachO/DebugSymbols.zig | 6 +-- src/link/MachO/hasher.zig | 60 +++++++++++++++++++++++++++ src/link/MachO/uuid.zig | 69 ++++++++++++++++++++++++++++++++ src/link/MachO/zld.zig | 11 ++++- 7 files changed, 160 insertions(+), 59 deletions(-) create mode 100644 src/link/MachO/hasher.zig create mode 100644 src/link/MachO/uuid.zig diff --git a/CMakeLists.txt b/CMakeLists.txt index 31d72a34d6..ae8c0e05d1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -591,6 +591,7 @@ set(ZIG_STAGE2_SOURCES "${CMAKE_SOURCE_DIR}/src/link/MachO/bind.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO/dead_strip.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO/fat.zig" + "${CMAKE_SOURCE_DIR}/src/link/MachO/hasher.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO/load_commands.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO/thunks.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO/uuid.zig" diff --git a/src/link/MachO.zig b/src/link/MachO.zig index f83338f48b..8a8525ab19 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -99,10 +99,10 @@ page_size: u16, /// fashion (default for LLVM backend). mode: enum { incremental, one_shot }, -uuid: macho.uuid_command = .{ - .cmdsize = @sizeOf(macho.uuid_command), - .uuid = undefined, -}, +uuid: struct { + buf: [16]u8 = undefined, + final: bool = false, +} = .{}, dylibs: std.ArrayListUnmanaged(Dylib) = .{}, dylibs_map: std.StringHashMapUnmanaged(u16) = .{}, @@ -588,11 +588,11 @@ pub fn flushModule(self: *MachO, comp: *Compilation, prog_node: *std.Progress.No try load_commands.writeBuildVersionLC(&self.base.options, &ncmds, lc_writer); - { - std.crypto.random.bytes(&self.uuid.uuid); - try lc_writer.writeStruct(self.uuid); - ncmds += 1; + if (!self.uuid.final) { + std.crypto.random.bytes(&self.uuid.buf); + self.uuid.final = true; } + try load_commands.writeUuidLC(&self.uuid.buf, &ncmds, lc_writer); try load_commands.writeLoadDylibLCs(self.dylibs.items, self.referenced_dylibs.keys(), &ncmds, lc_writer); diff --git a/src/link/MachO/CodeSignature.zig b/src/link/MachO/CodeSignature.zig index e3c362e941..391ac28efa 100644 --- a/src/link/MachO/CodeSignature.zig +++ b/src/link/MachO/CodeSignature.zig @@ -1,6 +1,4 @@ const CodeSignature = @This(); -const Compilation = @import("../../Compilation.zig"); -const WaitGroup = @import("../../WaitGroup.zig"); const std = @import("std"); const assert = std.debug.assert; @@ -9,10 +7,13 @@ const log = std.log.scoped(.link); const macho = std.macho; const mem = std.mem; const testing = std.testing; + const Allocator = mem.Allocator; +const Compilation = @import("../../Compilation.zig"); +const Hasher = @import("hasher.zig").ParallelHasher; const Sha256 = std.crypto.hash.sha2.Sha256; -const hash_size: u8 = 32; +const hash_size = Sha256.digest_length; const Blob = union(enum) { code_directory: *CodeDirectory, @@ -109,7 +110,7 @@ const CodeDirectory = struct { fn size(self: CodeDirectory) u32 { const code_slots = self.inner.nCodeSlots * hash_size; const special_slots = self.inner.nSpecialSlots * hash_size; - return @sizeOf(macho.CodeDirectory) + @intCast(u32, self.ident.len + 1) + special_slots + code_slots; + return @sizeOf(macho.CodeDirectory) + @intCast(u32, self.ident.len + 1 + special_slots + code_slots); } fn write(self: CodeDirectory, writer: anytype) !void { @@ -287,33 +288,11 @@ pub fn writeAdhocSignature( self.code_directory.inner.nCodeSlots = total_pages; // Calculate hash for each page (in file) and write it to the buffer - var wg: WaitGroup = .{}; - { - const buffer = try gpa.alloc(u8, self.page_size * total_pages); - defer gpa.free(buffer); - - const results = try gpa.alloc(fs.File.PReadError!usize, total_pages); - defer gpa.free(results); - { - wg.reset(); - defer wg.wait(); - - var i: usize = 0; - while (i < total_pages) : (i += 1) { - const fstart = i * self.page_size; - const fsize = if (fstart + self.page_size > opts.file_size) - opts.file_size - fstart - else - self.page_size; - const out_hash = &self.code_directory.code_slots.items[i]; - wg.start(); - try comp.thread_pool.spawn(workerSha256Hash, .{ - opts.file, fstart, buffer[fstart..][0..fsize], out_hash, &results[i], &wg, - }); - } - } - for (results) |result| _ = try result; - } + var hasher = Hasher(Sha256){}; + try hasher.hash(gpa, comp.thread_pool, opts.file, self.code_directory.code_slots.items, .{ + .chunk_size = self.page_size, + .max_file_size = opts.file_size, + }); try blobs.append(.{ .code_directory = &self.code_directory }); header.length += @sizeOf(macho.BlobIndex); @@ -352,7 +331,7 @@ pub fn writeAdhocSignature( } self.code_directory.inner.hashOffset = - @sizeOf(macho.CodeDirectory) + @intCast(u32, self.code_directory.ident.len + 1) + self.code_directory.inner.nSpecialSlots * hash_size; + @sizeOf(macho.CodeDirectory) + @intCast(u32, self.code_directory.ident.len + 1 + self.code_directory.inner.nSpecialSlots * hash_size); self.code_directory.inner.length = self.code_directory.size(); header.length += self.code_directory.size(); @@ -372,19 +351,6 @@ pub fn writeAdhocSignature( } } -fn workerSha256Hash( - file: fs.File, - fstart: usize, - buffer: []u8, - hash: *[hash_size]u8, - err: *fs.File.PReadError!usize, - wg: *WaitGroup, -) void { - defer wg.finish(); - err.* = file.preadAll(buffer, fstart); - Sha256.hash(buffer, hash, .{}); -} - pub fn size(self: CodeSignature) u32 { var ssize: u32 = @sizeOf(macho.SuperBlob) + @sizeOf(macho.BlobIndex) + self.code_directory.size(); if (self.requirements) |req| { diff --git a/src/link/MachO/DebugSymbols.zig b/src/link/MachO/DebugSymbols.zig index 655ba7162f..22905a520a 100644 --- a/src/link/MachO/DebugSymbols.zig +++ b/src/link/MachO/DebugSymbols.zig @@ -5,6 +5,7 @@ const build_options = @import("build_options"); const assert = std.debug.assert; const fs = std.fs; const link = @import("../../link.zig"); +const load_commands = @import("load_commands.zig"); const log = std.log.scoped(.dsym); const macho = std.macho; const makeStaticString = MachO.makeStaticString; @@ -303,10 +304,7 @@ pub fn flushModule(self: *DebugSymbols, macho_file: *MachO) !void { self.finalizeDwarfSegment(macho_file); try self.writeLinkeditSegmentData(macho_file, &ncmds, lc_writer); - { - try lc_writer.writeStruct(macho_file.uuid); - ncmds += 1; - } + try load_commands.writeUuidLC(&macho_file.uuid.buf, &ncmds, lc_writer); var headers_buf = std.ArrayList(u8).init(self.allocator); defer headers_buf.deinit(); diff --git a/src/link/MachO/hasher.zig b/src/link/MachO/hasher.zig new file mode 100644 index 0000000000..29099ad2d9 --- /dev/null +++ b/src/link/MachO/hasher.zig @@ -0,0 +1,60 @@ +const std = @import("std"); +const assert = std.debug.assert; +const fs = std.fs; +const mem = std.mem; + +const Allocator = mem.Allocator; +const ThreadPool = @import("../../ThreadPool.zig"); +const WaitGroup = @import("../../WaitGroup.zig"); + +pub fn ParallelHasher(comptime Hasher: type) type { + const hash_size = Hasher.digest_length; + + return struct { + pub fn hash(self: @This(), gpa: Allocator, pool: *ThreadPool, file: fs.File, out: [][hash_size]u8, opts: struct { + chunk_size: u16 = 0x4000, + max_file_size: ?u64 = null, + }) !void { + _ = self; + + var wg: WaitGroup = .{}; + + const file_size = opts.max_file_size orelse try file.getEndPos(); + const total_num_chunks = mem.alignForward(file_size, opts.chunk_size) / opts.chunk_size; + assert(out.len >= total_num_chunks); + + const buffer = try gpa.alloc(u8, opts.chunk_size * total_num_chunks); + defer gpa.free(buffer); + + const results = try gpa.alloc(fs.File.PReadError!usize, total_num_chunks); + defer gpa.free(results); + + { + wg.reset(); + defer wg.wait(); + + var i: usize = 0; + while (i < total_num_chunks) : (i += 1) { + const fstart = i * opts.chunk_size; + const fsize = if (fstart + opts.chunk_size > file_size) file_size - fstart else opts.chunk_size; + wg.start(); + try pool.spawn(worker, .{ file, fstart, buffer[fstart..][0..fsize], &out[i], &results[i], &wg }); + } + } + for (results) |result| _ = try result; + } + + fn worker( + file: fs.File, + fstart: usize, + buffer: []u8, + out: *[hash_size]u8, + err: *fs.File.PReadError!usize, + wg: *WaitGroup, + ) void { + defer wg.finish(); + err.* = file.preadAll(buffer, fstart); + Hasher.hash(buffer, out, .{}); + } + }; +} diff --git a/src/link/MachO/uuid.zig b/src/link/MachO/uuid.zig new file mode 100644 index 0000000000..987b156a4b --- /dev/null +++ b/src/link/MachO/uuid.zig @@ -0,0 +1,69 @@ +const std = @import("std"); +const fs = std.fs; +const mem = std.mem; + +const Allocator = mem.Allocator; +const Compilation = @import("../../Compilation.zig"); +const Md5 = std.crypto.hash.Md5; +const Hasher = @import("hasher.zig").ParallelHasher; + +/// Somewhat random chunk size for MD5 hash calculation. +pub const chunk_size = 0x4000; + +/// Calculates Md5 hash of the file contents. +/// Hash is calculated in a streaming manner which may be slow. +pub fn calcUuidStreaming(file: fs.File, file_size: u64, out: *[Md5.digest_length]u8) !void { + const total_num_chunks = mem.alignForward(file_size, chunk_size) / chunk_size; + + var hasher = Md5.init(.{}); + var buffer: [chunk_size]u8 = undefined; + + var i: usize = 0; + while (i < total_num_chunks) : (i += 1) { + const start = i * chunk_size; + const size = if (start + chunk_size > file_size) + file_size - start + else + chunk_size; + const amt = try file.preadAll(&buffer, start); + if (amt != size) return error.InputOutput; + + hasher.update(buffer[0..size]); + } + + hasher.final(out); + conform(out); +} + +/// Calculates Md5 hash of each chunk in parallel and then hashes all Md5 hashes to produce +/// the final digest. +/// While this is NOT a correct MD5 hash of the contents, this methodology is used by LLVM/LLD +/// and we will use it too as it seems accepted by Apple OSes. +pub fn calcUuidParallel(comp: *const Compilation, file: fs.File, file_size: u64, out: *[Md5.digest_length]u8) !void { + const total_hashes = mem.alignForward(file_size, chunk_size) / chunk_size; + + const hashes = try comp.gpa.alloc([Md5.digest_length]u8, total_hashes); + defer comp.gpa.free(hashes); + + var hasher = Hasher(Md5){}; + try hasher.hash(comp.gpa, comp.thread_pool, file, hashes, .{ + .chunk_size = chunk_size, + .max_file_size = file_size, + }); + + const final_buffer = try comp.gpa.alloc(u8, total_hashes * Md5.digest_length); + defer comp.gpa.free(final_buffer); + + for (hashes) |hash, i| { + mem.copy(u8, final_buffer[i * Md5.digest_length ..][0..Md5.digest_length], &hash); + } + + Md5.hash(final_buffer, out, .{}); + conform(out); +} + +inline fn conform(out: *[Md5.digest_length]u8) void { + // LC_UUID uuids should conform to RFC 4122 UUID version 4 & UUID version 5 formats + out[6] = (out[6] & 0x0F) | (3 << 4); + out[8] = (out[8] & 0x3F) | 0x80; +} diff --git a/src/link/MachO/zld.zig b/src/link/MachO/zld.zig index 9a66d76e1a..022167e223 100644 --- a/src/link/MachO/zld.zig +++ b/src/link/MachO/zld.zig @@ -4037,8 +4037,15 @@ pub fn linkWithZld(macho_file: *MachO, comp: *Compilation, prog_node: *std.Progr const uuid_offset_backpatch: ?usize = blk: { const index = lc_buffer.items.len; var uuid_buf: [16]u8 = [_]u8{0} ** 16; + + if (zld.options.optimize_mode == .Debug) { + // In Debug we don't really care about reproducibility, so put in a random value + // and be done with it. + std.crypto.random.bytes(&uuid_buf); + } + try load_commands.writeUuidLC(&uuid_buf, &ncmds, lc_writer); - break :blk index; + break :blk if (zld.options.optimize_mode == .Debug) null else index; }; try load_commands.writeLoadDylibLCs(zld.dylibs.items, zld.referenced_dylibs.keys(), &ncmds, lc_writer); @@ -4076,7 +4083,7 @@ pub fn linkWithZld(macho_file: *MachO, comp: *Compilation, prog_node: *std.Progr const seg = zld.getLinkeditSegmentPtr(); const file_size = seg.fileoff + seg.filesize; var uuid_buf: [16]u8 = undefined; - try uuid.calcMd5Hash(zld.gpa, zld.file, file_size, &uuid_buf); + try uuid.calcUuidParallel(comp, zld.file, file_size, &uuid_buf); const offset = @sizeOf(macho.mach_header_64) + headers_buf.items.len + backpatch + @sizeOf(macho.load_command); try zld.file.pwriteAll(&uuid_buf, offset); } From 6b99aab0eb25821d91c1708575cabdadc9079024 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Wed, 14 Dec 2022 15:31:56 +0100 Subject: [PATCH 04/17] macho: do not assume exe or dylib when flushing module --- src/link/MachO.zig | 31 ++++++++++++++++++------------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 8a8525ab19..0422a983dc 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -561,21 +561,26 @@ pub fn flushModule(self: *MachO, comp: *Compilation, prog_node: *std.Progress.No try self.writeLinkeditSegmentData(&ncmds, lc_writer); try load_commands.writeDylinkerLC(&ncmds, lc_writer); - if (self.base.options.output_mode == .Exe) blk: { - const seg_id = self.header_segment_cmd_index.?; - const seg = self.segments.items[seg_id]; - const global = self.getEntryPoint() catch |err| switch (err) { - error.MissingMainEntrypoint => { - self.error_flags.no_entry_point_found = true; - break :blk; - }, - else => |e| return e, - }; - const sym = self.getSymbol(global); - try load_commands.writeMainLC(@intCast(u32, sym.n_value - seg.vmaddr), &self.base.options, &ncmds, lc_writer); + switch (self.base.options.output_mode) { + .Exe => blk: { + const seg_id = self.header_segment_cmd_index.?; + const seg = self.segments.items[seg_id]; + const global = self.getEntryPoint() catch |err| switch (err) { + error.MissingMainEntrypoint => { + self.error_flags.no_entry_point_found = true; + break :blk; + }, + else => |e| return e, + }; + const sym = self.getSymbol(global); + try load_commands.writeMainLC(@intCast(u32, sym.n_value - seg.vmaddr), &self.base.options, &ncmds, lc_writer); + }, + .Lib => if (self.base.options.link_mode == .Dynamic) { + try load_commands.writeDylibIdLC(self.base.allocator, &self.base.options, &ncmds, lc_writer); + }, + else => {}, } - try load_commands.writeDylibIdLC(self.base.allocator, &self.base.options, &ncmds, lc_writer); try load_commands.writeRpathLCs(self.base.allocator, &self.base.options, &ncmds, lc_writer); { From 3af6a4e887bb3cd986a52ea94202a0079f442e9b Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Thu, 15 Dec 2022 11:25:54 +0100 Subject: [PATCH 05/17] macho: generate stabs last to aid in deterministic uuid calculation --- src/link/MachO/zld.zig | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/link/MachO/zld.zig b/src/link/MachO/zld.zig index 022167e223..60d0f853ab 100644 --- a/src/link/MachO/zld.zig +++ b/src/link/MachO/zld.zig @@ -2556,12 +2556,6 @@ pub const Zld = struct { } } - if (!self.options.strip) { - for (self.objects.items) |object| { - try self.generateSymbolStabs(object, &locals); - } - } - var exports = std.ArrayList(macho.nlist_64).init(gpa); defer exports.deinit(); @@ -2592,6 +2586,14 @@ pub const Zld = struct { try imports_table.putNoClobber(global, new_index); } + // We generate stabs last in order to ensure that the strtab always has debug info + // strings trailing + if (!self.options.strip) { + for (self.objects.items) |object| { + try self.generateSymbolStabs(object, &locals); + } + } + const nlocals = @intCast(u32, locals.items.len); const nexports = @intCast(u32, exports.items.len); const nimports = @intCast(u32, imports.items.len); From 09dee744145fc423feb2b74ffa22cc1679a2749e Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Thu, 15 Dec 2022 13:32:51 +0100 Subject: [PATCH 06/17] macho: store LC headers to often updated LINKEDIT sections --- lib/std/macho.zig | 92 +++++------ src/link/MachO.zig | 189 ++++++++-------------- src/link/MachO/DebugSymbols.zig | 78 ++++----- src/link/MachO/load_commands.zig | 60 +++---- src/link/MachO/zld.zig | 265 ++++++++++++------------------- 5 files changed, 268 insertions(+), 416 deletions(-) diff --git a/lib/std/macho.zig b/lib/std/macho.zig index cb1fca20b2..24dd1749ea 100644 --- a/lib/std/macho.zig +++ b/lib/std/macho.zig @@ -58,10 +58,10 @@ pub const uuid_command = extern struct { cmd: LC = .UUID, /// sizeof(struct uuid_command) - cmdsize: u32, + cmdsize: u32 = @sizeOf(uuid_command), /// the 128-bit uuid - uuid: [16]u8, + uuid: [16]u8 = undefined, }; /// The version_min_command contains the min OS version on which this @@ -71,7 +71,7 @@ pub const version_min_command = extern struct { cmd: LC, /// sizeof(struct version_min_command) - cmdsize: u32, + cmdsize: u32 = @sizeOf(version_min_command), /// X.Y.Z is encoded in nibbles xxxx.yy.zz version: u32, @@ -87,7 +87,7 @@ pub const source_version_command = extern struct { cmd: LC = .SOURCE_VERSION, /// sizeof(source_version_command) - cmdsize: u32, + cmdsize: u32 = @sizeOf(source_version_command), /// A.B.C.D.E packed as a24.b10.c10.d10.e10 version: u64, @@ -155,13 +155,13 @@ pub const entry_point_command = extern struct { cmd: LC = .MAIN, /// sizeof(struct entry_point_command) - cmdsize: u32, + cmdsize: u32 = @sizeOf(entry_point_command), /// file (__TEXT) offset of main() - entryoff: u64, + entryoff: u64 = 0, /// if not zero, initial stack size - stacksize: u64, + stacksize: u64 = 0, }; /// The symtab_command contains the offsets and sizes of the link-edit 4.3BSD @@ -172,19 +172,19 @@ pub const symtab_command = extern struct { cmd: LC = .SYMTAB, /// sizeof(struct symtab_command) - cmdsize: u32, + cmdsize: u32 = @sizeOf(symtab_command), /// symbol table offset - symoff: u32, + symoff: u32 = 0, /// number of symbol table entries - nsyms: u32, + nsyms: u32 = 0, /// string table offset - stroff: u32, + stroff: u32 = 0, /// string table size in bytes - strsize: u32, + strsize: u32 = 0, }; /// This is the second set of the symbolic information which is used to support @@ -230,7 +230,7 @@ pub const dysymtab_command = extern struct { cmd: LC = .DYSYMTAB, /// sizeof(struct dysymtab_command) - cmdsize: u32, + cmdsize: u32 = @sizeOf(dysymtab_command), // The symbols indicated by symoff and nsyms of the LC_SYMTAB load command // are grouped into the following three groups: @@ -247,22 +247,22 @@ pub const dysymtab_command = extern struct { // table when this is a dynamically linked shared library file). /// index of local symbols - ilocalsym: u32, + ilocalsym: u32 = 0, /// number of local symbols - nlocalsym: u32, + nlocalsym: u32 = 0, /// index to externally defined symbols - iextdefsym: u32, + iextdefsym: u32 = 0, /// number of externally defined symbols - nextdefsym: u32, + nextdefsym: u32 = 0, /// index to undefined symbols - iundefsym: u32, + iundefsym: u32 = 0, /// number of undefined symbols - nundefsym: u32, + nundefsym: u32 = 0, // For the for the dynamic binding process to find which module a symbol // is defined in the table of contents is used (analogous to the ranlib @@ -272,10 +272,10 @@ pub const dysymtab_command = extern struct { // symbols are sorted by name and is use as the table of contents. /// file offset to table of contents - tocoff: u32, + tocoff: u32 = 0, /// number of entries in table of contents - ntoc: u32, + ntoc: u32 = 0, // To support dynamic binding of "modules" (whole object files) the symbol // table must reflect the modules that the file was created from. This is @@ -286,10 +286,10 @@ pub const dysymtab_command = extern struct { // contains one module so everything in the file belongs to the module. /// file offset to module table - modtaboff: u32, + modtaboff: u32 = 0, /// number of module table entries - nmodtab: u32, + nmodtab: u32 = 0, // To support dynamic module binding the module structure for each module // indicates the external references (defined and undefined) each module @@ -300,10 +300,10 @@ pub const dysymtab_command = extern struct { // undefined external symbols indicates the external references. /// offset to referenced symbol table - extrefsymoff: u32, + extrefsymoff: u32 = 0, /// number of referenced symbol table entries - nextrefsyms: u32, + nextrefsyms: u32 = 0, // The sections that contain "symbol pointers" and "routine stubs" have // indexes and (implied counts based on the size of the section and fixed @@ -315,10 +315,10 @@ pub const dysymtab_command = extern struct { // The indirect symbol table is ordered to match the entries in the section. /// file offset to the indirect symbol table - indirectsymoff: u32, + indirectsymoff: u32 = 0, /// number of indirect symbol table entries - nindirectsyms: u32, + nindirectsyms: u32 = 0, // To support relocating an individual module in a library file quickly the // external relocation entries for each module in the library need to be @@ -347,20 +347,20 @@ pub const dysymtab_command = extern struct { // remaining relocation entries must be local). /// offset to external relocation entries - extreloff: u32, + extreloff: u32 = 0, /// number of external relocation entries - nextrel: u32, + nextrel: u32 = 0, // All the local relocation entries are grouped together (they are not // grouped by their module since they are only used if the object is moved // from it staticly link edited address). /// offset to local relocation entries - locreloff: u32, + locreloff: u32 = 0, /// number of local relocation entries - nlocrel: u32, + nlocrel: u32 = 0, }; /// The linkedit_data_command contains the offsets and sizes of a blob @@ -370,13 +370,13 @@ pub const linkedit_data_command = extern struct { cmd: LC, /// sizeof(struct linkedit_data_command) - cmdsize: u32, + cmdsize: u32 = @sizeOf(linkedit_data_command), /// file offset of data in __LINKEDIT segment - dataoff: u32, + dataoff: u32 = 0, /// file size of data in __LINKEDIT segment - datasize: u32, + datasize: u32 = 0, }; /// The dyld_info_command contains the file offsets and sizes of @@ -387,10 +387,10 @@ pub const linkedit_data_command = extern struct { /// to interpret it. pub const dyld_info_command = extern struct { /// LC_DYLD_INFO or LC_DYLD_INFO_ONLY - cmd: LC, + cmd: LC = .DYLD_INFO_ONLY, /// sizeof(struct dyld_info_command) - cmdsize: u32, + cmdsize: u32 = @sizeOf(dyld_info_command), // Dyld rebases an image whenever dyld loads it at an address different // from its preferred address. The rebase information is a stream @@ -403,10 +403,10 @@ pub const dyld_info_command = extern struct { // bytes. /// file offset to rebase info - rebase_off: u32, + rebase_off: u32 = 0, /// size of rebase info - rebase_size: u32, + rebase_size: u32 = 0, // Dyld binds an image during the loading process, if the image // requires any pointers to be initialized to symbols in other images. @@ -420,10 +420,10 @@ pub const dyld_info_command = extern struct { // encoded in a few bytes. /// file offset to binding info - bind_off: u32, + bind_off: u32 = 0, /// size of binding info - bind_size: u32, + bind_size: u32 = 0, // Some C++ programs require dyld to unique symbols so that all // images in the process use the same copy of some code/data. @@ -440,10 +440,10 @@ pub const dyld_info_command = extern struct { // and the call to operator new is then rebound. /// file offset to weak binding info - weak_bind_off: u32, + weak_bind_off: u32 = 0, /// size of weak binding info - weak_bind_size: u32, + weak_bind_size: u32 = 0, // Some uses of external symbols do not need to be bound immediately. // Instead they can be lazily bound on first use. The lazy_bind @@ -457,10 +457,10 @@ pub const dyld_info_command = extern struct { // to bind. /// file offset to lazy binding info - lazy_bind_off: u32, + lazy_bind_off: u32 = 0, /// size of lazy binding info - lazy_bind_size: u32, + lazy_bind_size: u32 = 0, // The symbols exported by a dylib are encoded in a trie. This // is a compact representation that factors out common prefixes. @@ -494,10 +494,10 @@ pub const dyld_info_command = extern struct { // edge points to. /// file offset to lazy binding info - export_off: u32, + export_off: u32 = 0, /// size of lazy binding info - export_size: u32, + export_size: u32 = 0, }; /// A program that uses a dynamic linker contains a dylinker_command to identify diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 0422a983dc..b06552bc2a 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -99,10 +99,11 @@ page_size: u16, /// fashion (default for LLVM backend). mode: enum { incremental, one_shot }, -uuid: struct { - buf: [16]u8 = undefined, - final: bool = false, -} = .{}, +dyld_info_cmd: macho.dyld_info_command = .{}, +symtab_cmd: macho.symtab_command = .{}, +dysymtab_cmd: macho.dysymtab_command = .{}, +uuid_cmd: macho.uuid_command = .{}, +codesig_cmd: macho.linkedit_data_command = .{ .cmd = .CODE_SIGNATURE }, dylibs: std.ArrayListUnmanaged(Dylib) = .{}, dylibs_map: std.StringHashMapUnmanaged(u16) = .{}, @@ -554,12 +555,17 @@ pub fn flushModule(self: *MachO, comp: *Compilation, prog_node: *std.Progress.No self.logAtoms(); } + try self.writeLinkeditSegmentData(); + + // Write load commands var lc_buffer = std.ArrayList(u8).init(arena); const lc_writer = lc_buffer.writer(); - var ncmds: u32 = 0; - try self.writeLinkeditSegmentData(&ncmds, lc_writer); - try load_commands.writeDylinkerLC(&ncmds, lc_writer); + try self.writeSegmentHeaders(lc_writer); + try lc_writer.writeStruct(self.dyld_info_cmd); + try lc_writer.writeStruct(self.symtab_cmd); + try lc_writer.writeStruct(self.dysymtab_cmd); + try load_commands.writeDylinkerLC(lc_writer); switch (self.base.options.output_mode) { .Exe => blk: { @@ -573,33 +579,29 @@ pub fn flushModule(self: *MachO, comp: *Compilation, prog_node: *std.Progress.No else => |e| return e, }; const sym = self.getSymbol(global); - try load_commands.writeMainLC(@intCast(u32, sym.n_value - seg.vmaddr), &self.base.options, &ncmds, lc_writer); + try lc_writer.writeStruct(macho.entry_point_command{ + .entryoff = @intCast(u32, sym.n_value - seg.vmaddr), + .stacksize = self.base.options.stack_size_override orelse 0, + }); }, .Lib => if (self.base.options.link_mode == .Dynamic) { - try load_commands.writeDylibIdLC(self.base.allocator, &self.base.options, &ncmds, lc_writer); + try load_commands.writeDylibIdLC(self.base.allocator, &self.base.options, lc_writer); }, else => {}, } - try load_commands.writeRpathLCs(self.base.allocator, &self.base.options, &ncmds, lc_writer); + try load_commands.writeRpathLCs(self.base.allocator, &self.base.options, lc_writer); + try lc_writer.writeStruct(macho.source_version_command{ + .version = 0, + }); + try load_commands.writeBuildVersionLC(&self.base.options, lc_writer); - { - try lc_writer.writeStruct(macho.source_version_command{ - .cmdsize = @sizeOf(macho.source_version_command), - .version = 0x0, - }); - ncmds += 1; + if (self.cold_start) { + std.crypto.random.bytes(&self.uuid_cmd.uuid); } + try lc_writer.writeStruct(self.uuid_cmd); - try load_commands.writeBuildVersionLC(&self.base.options, &ncmds, lc_writer); - - if (!self.uuid.final) { - std.crypto.random.bytes(&self.uuid.buf); - self.uuid.final = true; - } - try load_commands.writeUuidLC(&self.uuid.buf, &ncmds, lc_writer); - - try load_commands.writeLoadDylibLCs(self.dylibs.items, self.referenced_dylibs.keys(), &ncmds, lc_writer); + try load_commands.writeLoadDylibLCs(self.dylibs.items, self.referenced_dylibs.keys(), lc_writer); const target = self.base.options.target; const requires_codesig = blk: { @@ -608,7 +610,6 @@ pub fn flushModule(self: *MachO, comp: *Compilation, prog_node: *std.Progress.No break :blk true; break :blk false; }; - var codesig_offset: ?u32 = null; var codesig: ?CodeSignature = if (requires_codesig) blk: { // Preallocate space for the code signature. // We need to do this at this stage so that we have the load commands with proper values @@ -620,20 +621,18 @@ pub fn flushModule(self: *MachO, comp: *Compilation, prog_node: *std.Progress.No if (self.base.options.entitlements) |path| { try codesig.addEntitlements(arena, path); } - codesig_offset = try self.writeCodeSignaturePadding(&codesig, &ncmds, lc_writer); + try self.writeCodeSignaturePadding(&codesig); + try lc_writer.writeStruct(self.codesig_cmd); break :blk codesig; } else null; - var headers_buf = std.ArrayList(u8).init(arena); - try self.writeSegmentHeaders(&ncmds, headers_buf.writer()); + try self.base.file.?.pwriteAll(lc_buffer.items, @sizeOf(macho.mach_header_64)); - try self.base.file.?.pwriteAll(headers_buf.items, @sizeOf(macho.mach_header_64)); - try self.base.file.?.pwriteAll(lc_buffer.items, @sizeOf(macho.mach_header_64) + headers_buf.items.len); - - try self.writeHeader(ncmds, @intCast(u32, lc_buffer.items.len + headers_buf.items.len)); + const ncmds = load_commands.calcNumOfLCs(lc_buffer.items); + try self.writeHeader(ncmds, @intCast(u32, lc_buffer.items.len)); if (codesig) |*csig| { - try self.writeCodeSignature(comp, csig, codesig_offset.?); // code signing always comes last + try self.writeCodeSignature(comp, csig); // code signing always comes last } if (self.d_sym) |*d_sym| { @@ -3146,18 +3145,17 @@ pub fn getGlobalSymbol(self: *MachO, name: []const u8) !u32 { return global_index; } -fn writeSegmentHeaders(self: *MachO, ncmds: *u32, writer: anytype) !void { +fn writeSegmentHeaders(self: *MachO, writer: anytype) !void { for (self.segments.items) |seg, i| { const indexes = self.getSectionIndexes(@intCast(u8, i)); try writer.writeStruct(seg); for (self.sections.items(.header)[indexes.start..indexes.end]) |header| { try writer.writeStruct(header); } - ncmds.* += 1; } } -fn writeLinkeditSegmentData(self: *MachO, ncmds: *u32, lc_writer: anytype) !void { +fn writeLinkeditSegmentData(self: *MachO) !void { const seg = self.getLinkeditSegmentPtr(); seg.filesize = 0; seg.vmsize = 0; @@ -3172,8 +3170,8 @@ fn writeLinkeditSegmentData(self: *MachO, ncmds: *u32, lc_writer: anytype) !void } } - try self.writeDyldInfoData(ncmds, lc_writer); - try self.writeSymtabs(ncmds, lc_writer); + try self.writeDyldInfoData(); + try self.writeSymtabs(); seg.vmsize = mem.alignForwardGeneric(u64, seg.filesize, self.page_size); } @@ -3325,7 +3323,7 @@ fn collectExportData(self: *MachO, trie: *Trie) !void { try trie.finalize(gpa); } -fn writeDyldInfoData(self: *MachO, ncmds: *u32, lc_writer: anytype) !void { +fn writeDyldInfoData(self: *MachO) !void { const tracy = trace(@src()); defer tracy.end(); @@ -3396,21 +3394,14 @@ fn writeDyldInfoData(self: *MachO, ncmds: *u32, lc_writer: anytype) !void { const end = start + (math.cast(usize, lazy_bind_size) orelse return error.Overflow); try self.populateLazyBindOffsetsInStubHelper(buffer[start..end]); - try lc_writer.writeStruct(macho.dyld_info_command{ - .cmd = .DYLD_INFO_ONLY, - .cmdsize = @sizeOf(macho.dyld_info_command), - .rebase_off = @intCast(u32, rebase_off), - .rebase_size = @intCast(u32, rebase_size), - .bind_off = @intCast(u32, bind_off), - .bind_size = @intCast(u32, bind_size), - .weak_bind_off = 0, - .weak_bind_size = 0, - .lazy_bind_off = @intCast(u32, lazy_bind_off), - .lazy_bind_size = @intCast(u32, lazy_bind_size), - .export_off = @intCast(u32, export_off), - .export_size = @intCast(u32, export_size), - }); - ncmds.* += 1; + self.dyld_info_cmd.rebase_off = @intCast(u32, rebase_off); + self.dyld_info_cmd.rebase_size = @intCast(u32, rebase_size); + self.dyld_info_cmd.bind_off = @intCast(u32, bind_off); + self.dyld_info_cmd.bind_size = @intCast(u32, bind_size); + self.dyld_info_cmd.lazy_bind_off = @intCast(u32, lazy_bind_off); + self.dyld_info_cmd.lazy_bind_size = @intCast(u32, lazy_bind_size); + self.dyld_info_cmd.export_off = @intCast(u32, export_off); + self.dyld_info_cmd.export_size = @intCast(u32, export_size); } fn populateLazyBindOffsetsInStubHelper(self: *MachO, buffer: []const u8) !void { @@ -3512,45 +3503,14 @@ fn populateLazyBindOffsetsInStubHelper(self: *MachO, buffer: []const u8) !void { } } -fn writeSymtabs(self: *MachO, ncmds: *u32, lc_writer: anytype) !void { - var symtab_cmd = macho.symtab_command{ - .cmdsize = @sizeOf(macho.symtab_command), - .symoff = 0, - .nsyms = 0, - .stroff = 0, - .strsize = 0, - }; - var dysymtab_cmd = macho.dysymtab_command{ - .cmdsize = @sizeOf(macho.dysymtab_command), - .ilocalsym = 0, - .nlocalsym = 0, - .iextdefsym = 0, - .nextdefsym = 0, - .iundefsym = 0, - .nundefsym = 0, - .tocoff = 0, - .ntoc = 0, - .modtaboff = 0, - .nmodtab = 0, - .extrefsymoff = 0, - .nextrefsyms = 0, - .indirectsymoff = 0, - .nindirectsyms = 0, - .extreloff = 0, - .nextrel = 0, - .locreloff = 0, - .nlocrel = 0, - }; - var ctx = try self.writeSymtab(&symtab_cmd); +fn writeSymtabs(self: *MachO) !void { + var ctx = try self.writeSymtab(); defer ctx.imports_table.deinit(); - try self.writeDysymtab(ctx, &dysymtab_cmd); - try self.writeStrtab(&symtab_cmd); - try lc_writer.writeStruct(symtab_cmd); - try lc_writer.writeStruct(dysymtab_cmd); - ncmds.* += 2; + try self.writeDysymtab(ctx); + try self.writeStrtab(); } -fn writeSymtab(self: *MachO, lc: *macho.symtab_command) !SymtabCtx { +fn writeSymtab(self: *MachO) !SymtabCtx { const gpa = self.base.allocator; var locals = std.ArrayList(macho.nlist_64).init(gpa); @@ -3615,8 +3575,8 @@ fn writeSymtab(self: *MachO, lc: *macho.symtab_command) !SymtabCtx { log.debug("writing symtab from 0x{x} to 0x{x}", .{ offset, offset + needed_size }); try self.base.file.?.pwriteAll(buffer.items, offset); - lc.symoff = @intCast(u32, offset); - lc.nsyms = nsyms; + self.symtab_cmd.symoff = @intCast(u32, offset); + self.symtab_cmd.nsyms = nsyms; return SymtabCtx{ .nlocalsym = nlocals, @@ -3626,7 +3586,7 @@ fn writeSymtab(self: *MachO, lc: *macho.symtab_command) !SymtabCtx { }; } -fn writeStrtab(self: *MachO, lc: *macho.symtab_command) !void { +fn writeStrtab(self: *MachO) !void { const seg = self.getLinkeditSegmentPtr(); const offset = mem.alignForwardGeneric(u64, seg.fileoff + seg.filesize, @alignOf(u64)); const needed_size = self.strtab.buffer.items.len; @@ -3636,8 +3596,8 @@ fn writeStrtab(self: *MachO, lc: *macho.symtab_command) !void { try self.base.file.?.pwriteAll(self.strtab.buffer.items, offset); - lc.stroff = @intCast(u32, offset); - lc.strsize = @intCast(u32, needed_size); + self.symtab_cmd.stroff = @intCast(u32, offset); + self.symtab_cmd.strsize = @intCast(u32, needed_size); } const SymtabCtx = struct { @@ -3647,7 +3607,7 @@ const SymtabCtx = struct { imports_table: std.AutoHashMap(SymbolWithLoc, u32), }; -fn writeDysymtab(self: *MachO, ctx: SymtabCtx, lc: *macho.dysymtab_command) !void { +fn writeDysymtab(self: *MachO, ctx: SymtabCtx) !void { const gpa = self.base.allocator; const nstubs = @intCast(u32, self.stubs_table.count()); const ngot_entries = @intCast(u32, self.got_entries_table.count()); @@ -3706,21 +3666,16 @@ fn writeDysymtab(self: *MachO, ctx: SymtabCtx, lc: *macho.dysymtab_command) !voi assert(buf.items.len == needed_size); try self.base.file.?.pwriteAll(buf.items, offset); - lc.nlocalsym = ctx.nlocalsym; - lc.iextdefsym = iextdefsym; - lc.nextdefsym = ctx.nextdefsym; - lc.iundefsym = iundefsym; - lc.nundefsym = ctx.nundefsym; - lc.indirectsymoff = @intCast(u32, offset); - lc.nindirectsyms = nindirectsyms; + self.dysymtab_cmd.nlocalsym = ctx.nlocalsym; + self.dysymtab_cmd.iextdefsym = iextdefsym; + self.dysymtab_cmd.nextdefsym = ctx.nextdefsym; + self.dysymtab_cmd.iundefsym = iundefsym; + self.dysymtab_cmd.nundefsym = ctx.nundefsym; + self.dysymtab_cmd.indirectsymoff = @intCast(u32, offset); + self.dysymtab_cmd.nindirectsyms = nindirectsyms; } -fn writeCodeSignaturePadding( - self: *MachO, - code_sig: *CodeSignature, - ncmds: *u32, - lc_writer: anytype, -) !u32 { +fn writeCodeSignaturePadding(self: *MachO, code_sig: *CodeSignature) !void { const seg = self.getLinkeditSegmentPtr(); // Code signature data has to be 16-bytes aligned for Apple tools to recognize the file // https://github.com/opensource-apple/cctools/blob/fdb4825f303fd5c0751be524babd32958181b3ed/libstuff/checkout.c#L271 @@ -3733,19 +3688,13 @@ fn writeCodeSignaturePadding( // except for code signature data. try self.base.file.?.pwriteAll(&[_]u8{0}, offset + needed_size - 1); - try lc_writer.writeStruct(macho.linkedit_data_command{ - .cmd = .CODE_SIGNATURE, - .cmdsize = @sizeOf(macho.linkedit_data_command), - .dataoff = @intCast(u32, offset), - .datasize = @intCast(u32, needed_size), - }); - ncmds.* += 1; - - return @intCast(u32, offset); + self.codesig_cmd.dataoff = @intCast(u32, offset); + self.codesig_cmd.datasize = @intCast(u32, needed_size); } -fn writeCodeSignature(self: *MachO, comp: *const Compilation, code_sig: *CodeSignature, offset: u32) !void { +fn writeCodeSignature(self: *MachO, comp: *const Compilation, code_sig: *CodeSignature) !void { const seg = self.getSegment(self.text_section_index.?); + const offset = self.codesig_cmd.dataoff; var buffer = std.ArrayList(u8).init(self.base.allocator); defer buffer.deinit(); diff --git a/src/link/MachO/DebugSymbols.zig b/src/link/MachO/DebugSymbols.zig index 22905a520a..a13ad9c9f4 100644 --- a/src/link/MachO/DebugSymbols.zig +++ b/src/link/MachO/DebugSymbols.zig @@ -26,6 +26,8 @@ dwarf: Dwarf, file: fs.File, page_size: u16, +symtab_cmd: macho.symtab_command = .{}, + segments: std.ArrayListUnmanaged(macho.segment_command_64) = .{}, sections: std.ArrayListUnmanaged(macho.section_64) = .{}, @@ -296,28 +298,21 @@ pub fn flushModule(self: *DebugSymbols, macho_file: *MachO) !void { } } + self.finalizeDwarfSegment(macho_file); + try self.writeLinkeditSegmentData(macho_file); + + // Write load commands var lc_buffer = std.ArrayList(u8).init(self.allocator); defer lc_buffer.deinit(); const lc_writer = lc_buffer.writer(); - var ncmds: u32 = 0; - self.finalizeDwarfSegment(macho_file); - try self.writeLinkeditSegmentData(macho_file, &ncmds, lc_writer); + try self.writeSegmentHeaders(macho_file, lc_writer); + try lc_writer.writeStruct(self.symtab_cmd); + try lc_writer.writeStruct(macho_file.uuid_cmd); - try load_commands.writeUuidLC(&macho_file.uuid.buf, &ncmds, lc_writer); - - var headers_buf = std.ArrayList(u8).init(self.allocator); - defer headers_buf.deinit(); - try self.writeSegmentHeaders(macho_file, &ncmds, headers_buf.writer()); - - try self.file.pwriteAll(headers_buf.items, @sizeOf(macho.mach_header_64)); - try self.file.pwriteAll(lc_buffer.items, @sizeOf(macho.mach_header_64) + headers_buf.items.len); - - try self.writeHeader( - macho_file, - ncmds, - @intCast(u32, lc_buffer.items.len + headers_buf.items.len), - ); + const ncmds = load_commands.calcNumOfLCs(lc_buffer.items); + try self.file.pwriteAll(lc_buffer.items, @sizeOf(macho.mach_header_64)); + try self.writeHeader(macho_file, ncmds, @intCast(u32, lc_buffer.items.len)); assert(!self.debug_abbrev_section_dirty); assert(!self.debug_aranges_section_dirty); @@ -384,7 +379,7 @@ fn finalizeDwarfSegment(self: *DebugSymbols, macho_file: *MachO) void { log.debug("found __LINKEDIT segment free space at 0x{x}", .{linkedit.fileoff}); } -fn writeSegmentHeaders(self: *DebugSymbols, macho_file: *MachO, ncmds: *u32, writer: anytype) !void { +fn writeSegmentHeaders(self: *DebugSymbols, macho_file: *MachO, writer: anytype) !void { // Write segment/section headers from the binary file first. const end = macho_file.linkedit_segment_cmd_index.?; for (macho_file.segments.items[0..end]) |seg, i| { @@ -414,8 +409,6 @@ fn writeSegmentHeaders(self: *DebugSymbols, macho_file: *MachO, ncmds: *u32, wri out_header.offset = 0; try writer.writeStruct(out_header); } - - ncmds.* += 1; } // Next, commit DSYM's __LINKEDIT and __DWARF segments headers. for (self.segments.items) |seg, i| { @@ -424,7 +417,6 @@ fn writeSegmentHeaders(self: *DebugSymbols, macho_file: *MachO, ncmds: *u32, wri for (self.sections.items[indexes.start..indexes.end]) |header| { try writer.writeStruct(header); } - ncmds.* += 1; } } @@ -463,33 +455,19 @@ fn allocatedSize(self: *DebugSymbols, start: u64) u64 { return min_pos - start; } -fn writeLinkeditSegmentData( - self: *DebugSymbols, - macho_file: *MachO, - ncmds: *u32, - lc_writer: anytype, -) !void { +fn writeLinkeditSegmentData(self: *DebugSymbols, macho_file: *MachO) !void { const tracy = trace(@src()); defer tracy.end(); - var symtab_cmd = macho.symtab_command{ - .cmdsize = @sizeOf(macho.symtab_command), - .symoff = 0, - .nsyms = 0, - .stroff = 0, - .strsize = 0, - }; - try self.writeSymtab(macho_file, &symtab_cmd); - try self.writeStrtab(&symtab_cmd); - try lc_writer.writeStruct(symtab_cmd); - ncmds.* += 1; + try self.writeSymtab(macho_file); + try self.writeStrtab(); const seg = &self.segments.items[self.linkedit_segment_cmd_index.?]; const aligned_size = mem.alignForwardGeneric(u64, seg.filesize, self.page_size); seg.vmsize = aligned_size; } -fn writeSymtab(self: *DebugSymbols, macho_file: *MachO, lc: *macho.symtab_command) !void { +fn writeSymtab(self: *DebugSymbols, macho_file: *MachO) !void { const tracy = trace(@src()); defer tracy.end(); @@ -528,10 +506,10 @@ fn writeSymtab(self: *DebugSymbols, macho_file: *MachO, lc: *macho.symtab_comman const needed_size = nsyms * @sizeOf(macho.nlist_64); seg.filesize = offset + needed_size - seg.fileoff; - lc.symoff = @intCast(u32, offset); - lc.nsyms = @intCast(u32, nsyms); + self.symtab_cmd.symoff = @intCast(u32, offset); + self.symtab_cmd.nsyms = @intCast(u32, nsyms); - const locals_off = lc.symoff; + const locals_off = @intCast(u32, offset); const locals_size = nlocals * @sizeOf(macho.nlist_64); const exports_off = locals_off + locals_size; const exports_size = nexports * @sizeOf(macho.nlist_64); @@ -543,26 +521,26 @@ fn writeSymtab(self: *DebugSymbols, macho_file: *MachO, lc: *macho.symtab_comman try self.file.pwriteAll(mem.sliceAsBytes(exports.items), exports_off); } -fn writeStrtab(self: *DebugSymbols, lc: *macho.symtab_command) !void { +fn writeStrtab(self: *DebugSymbols) !void { const tracy = trace(@src()); defer tracy.end(); const seg = &self.segments.items[self.linkedit_segment_cmd_index.?]; - const symtab_size = @intCast(u32, lc.nsyms * @sizeOf(macho.nlist_64)); - const offset = mem.alignForwardGeneric(u64, lc.symoff + symtab_size, @alignOf(u64)); + const symtab_size = @intCast(u32, self.symtab_cmd.nsyms * @sizeOf(macho.nlist_64)); + const offset = mem.alignForwardGeneric(u64, self.symtab_cmd.symoff + symtab_size, @alignOf(u64)); const needed_size = mem.alignForwardGeneric(u64, self.strtab.buffer.items.len, @alignOf(u64)); seg.filesize = offset + needed_size - seg.fileoff; - lc.stroff = @intCast(u32, offset); - lc.strsize = @intCast(u32, needed_size); + self.symtab_cmd.stroff = @intCast(u32, offset); + self.symtab_cmd.strsize = @intCast(u32, needed_size); - log.debug("writing string table from 0x{x} to 0x{x}", .{ lc.stroff, lc.stroff + lc.strsize }); + log.debug("writing string table from 0x{x} to 0x{x}", .{ offset, offset + needed_size }); - try self.file.pwriteAll(self.strtab.buffer.items, lc.stroff); + try self.file.pwriteAll(self.strtab.buffer.items, offset); if (self.strtab.buffer.items.len < needed_size) { // Ensure we are always padded to the actual length of the file. - try self.file.pwriteAll(&[_]u8{0}, lc.stroff + lc.strsize); + try self.file.pwriteAll(&[_]u8{0}, offset + needed_size); } } diff --git a/src/link/MachO/load_commands.zig b/src/link/MachO/load_commands.zig index c4e565928b..73c28965ff 100644 --- a/src/link/MachO/load_commands.zig +++ b/src/link/MachO/load_commands.zig @@ -131,7 +131,19 @@ pub fn calcMinHeaderPad(gpa: Allocator, options: *const link.Options, ctx: CalcL return offset; } -pub fn writeDylinkerLC(ncmds: *u32, lc_writer: anytype) !void { +pub fn calcNumOfLCs(lc_buffer: []const u8) u32 { + var ncmds: u32 = 0; + var pos: usize = 0; + while (true) { + if (pos >= lc_buffer.len) break; + const cmd = @ptrCast(*align(1) const macho.load_command, lc_buffer.ptr + pos).*; + ncmds += 1; + pos += cmd.cmdsize; + } + return ncmds; +} + +pub fn writeDylinkerLC(lc_writer: anytype) !void { const name_len = mem.sliceTo(default_dyld_path, 0).len; const cmdsize = @intCast(u32, mem.alignForwardGeneric( u64, @@ -148,7 +160,6 @@ pub fn writeDylinkerLC(ncmds: *u32, lc_writer: anytype) !void { if (padding > 0) { try lc_writer.writeByteNTimes(0, padding); } - ncmds.* += 1; } const WriteDylibLCCtx = struct { @@ -159,7 +170,7 @@ const WriteDylibLCCtx = struct { compatibility_version: u32 = 0x10000, }; -fn writeDylibLC(ctx: WriteDylibLCCtx, ncmds: *u32, lc_writer: anytype) !void { +fn writeDylibLC(ctx: WriteDylibLCCtx, lc_writer: anytype) !void { const name_len = ctx.name.len + 1; const cmdsize = @intCast(u32, mem.alignForwardGeneric( u64, @@ -182,10 +193,9 @@ fn writeDylibLC(ctx: WriteDylibLCCtx, ncmds: *u32, lc_writer: anytype) !void { if (padding > 0) { try lc_writer.writeByteNTimes(0, padding); } - ncmds.* += 1; } -pub fn writeDylibIdLC(gpa: Allocator, options: *const link.Options, ncmds: *u32, lc_writer: anytype) !void { +pub fn writeDylibIdLC(gpa: Allocator, options: *const link.Options, lc_writer: anytype) !void { assert(options.output_mode == .Lib and options.link_mode == .Dynamic); const emit = options.emit.?; const install_name = options.install_name orelse try emit.directory.join(gpa, &.{emit.sub_path}); @@ -205,18 +215,7 @@ pub fn writeDylibIdLC(gpa: Allocator, options: *const link.Options, ncmds: *u32, .name = install_name, .current_version = curr.major << 16 | curr.minor << 8 | curr.patch, .compatibility_version = compat.major << 16 | compat.minor << 8 | compat.patch, - }, ncmds, lc_writer); -} - -pub fn writeMainLC(entryoff: u32, options: *const link.Options, ncmds: *u32, lc_writer: anytype) !void { - assert(options.output_mode == .Exe); - try lc_writer.writeStruct(macho.entry_point_command{ - .cmd = .MAIN, - .cmdsize = @sizeOf(macho.entry_point_command), - .entryoff = entryoff, - .stacksize = options.stack_size_override orelse 0, - }); - ncmds.* += 1; + }, lc_writer); } const RpathIterator = struct { @@ -244,7 +243,7 @@ const RpathIterator = struct { } }; -pub fn writeRpathLCs(gpa: Allocator, options: *const link.Options, ncmds: *u32, lc_writer: anytype) !void { +pub fn writeRpathLCs(gpa: Allocator, options: *const link.Options, lc_writer: anytype) !void { var it = RpathIterator.init(gpa, options.rpath_list); defer it.deinit(); @@ -265,11 +264,10 @@ pub fn writeRpathLCs(gpa: Allocator, options: *const link.Options, ncmds: *u32, if (padding > 0) { try lc_writer.writeByteNTimes(0, padding); } - ncmds.* += 1; } } -pub fn writeBuildVersionLC(options: *const link.Options, ncmds: *u32, lc_writer: anytype) !void { +pub fn writeBuildVersionLC(options: *const link.Options, lc_writer: anytype) !void { const cmdsize = @sizeOf(macho.build_version_command) + @sizeOf(macho.build_tool_version); const platform_version = blk: { const ver = options.target.os.version_range.semver.min; @@ -299,10 +297,9 @@ pub fn writeBuildVersionLC(options: *const link.Options, ncmds: *u32, lc_writer: .tool = .LD, .version = 0x0, })); - ncmds.* += 1; } -pub fn writeLoadDylibLCs(dylibs: []const Dylib, referenced: []u16, ncmds: *u32, lc_writer: anytype) !void { +pub fn writeLoadDylibLCs(dylibs: []const Dylib, referenced: []u16, lc_writer: anytype) !void { for (referenced) |index| { const dylib = dylibs[index]; const dylib_id = dylib.id orelse unreachable; @@ -312,23 +309,6 @@ pub fn writeLoadDylibLCs(dylibs: []const Dylib, referenced: []u16, ncmds: *u32, .timestamp = dylib_id.timestamp, .current_version = dylib_id.current_version, .compatibility_version = dylib_id.compatibility_version, - }, ncmds, lc_writer); + }, lc_writer); } } - -pub fn writeSourceVersionLC(ncmds: *u32, lc_writer: anytype) !void { - try lc_writer.writeStruct(macho.source_version_command{ - .cmdsize = @sizeOf(macho.source_version_command), - .version = 0x0, - }); - ncmds.* += 1; -} - -pub fn writeUuidLC(uuid: *const [16]u8, ncmds: *u32, lc_writer: anytype) !void { - var uuid_lc = macho.uuid_command{ - .cmdsize = @sizeOf(macho.uuid_command), - .uuid = uuid.*, - }; - try lc_writer.writeStruct(uuid_lc); - ncmds.* += 1; -} diff --git a/src/link/MachO/zld.zig b/src/link/MachO/zld.zig index 60d0f853ab..5ca1afd98c 100644 --- a/src/link/MachO/zld.zig +++ b/src/link/MachO/zld.zig @@ -38,6 +38,14 @@ pub const Zld = struct { page_size: u16, options: *const link.Options, + dyld_info_cmd: macho.dyld_info_command = .{}, + symtab_cmd: macho.symtab_command = .{}, + dysymtab_cmd: macho.dysymtab_command = .{}, + function_starts_cmd: macho.linkedit_data_command = .{ .cmd = .FUNCTION_STARTS }, + data_in_code_cmd: macho.linkedit_data_command = .{ .cmd = .DATA_IN_CODE }, + uuid_cmd: macho.uuid_command = .{}, + codesig_cmd: macho.linkedit_data_command = .{ .cmd = .CODE_SIGNATURE }, + objects: std.ArrayListUnmanaged(Object) = .{}, archives: std.ArrayListUnmanaged(Archive) = .{}, dylibs: std.ArrayListUnmanaged(Dylib) = .{}, @@ -1728,7 +1736,7 @@ pub const Zld = struct { return (@intCast(u8, segment_precedence) << 4) + section_precedence; } - fn writeSegmentHeaders(self: *Zld, ncmds: *u32, writer: anytype) !void { + fn writeSegmentHeaders(self: *Zld, writer: anytype) !void { for (self.segments.items) |seg, i| { const indexes = self.getSectionIndexes(@intCast(u8, i)); var out_seg = seg; @@ -1752,16 +1760,14 @@ pub const Zld = struct { if (header.size == 0) continue; try writer.writeStruct(header); } - - ncmds.* += 1; } } - fn writeLinkeditSegmentData(self: *Zld, ncmds: *u32, lc_writer: anytype, reverse_lookups: [][]u32) !void { - try self.writeDyldInfoData(ncmds, lc_writer, reverse_lookups); - try self.writeFunctionStarts(ncmds, lc_writer); - try self.writeDataInCode(ncmds, lc_writer); - try self.writeSymtabs(ncmds, lc_writer); + fn writeLinkeditSegmentData(self: *Zld, reverse_lookups: [][]u32) !void { + try self.writeDyldInfoData(reverse_lookups); + try self.writeFunctionStarts(); + try self.writeDataInCode(); + try self.writeSymtabs(); const seg = self.getLinkeditSegmentPtr(); seg.vmsize = mem.alignForwardGeneric(u64, seg.filesize, self.page_size); @@ -2150,7 +2156,7 @@ pub const Zld = struct { try trie.finalize(gpa); } - fn writeDyldInfoData(self: *Zld, ncmds: *u32, lc_writer: anytype, reverse_lookups: [][]u32) !void { + fn writeDyldInfoData(self: *Zld, reverse_lookups: [][]u32) !void { const gpa = self.gpa; var rebase_pointers = std.ArrayList(bind.Pointer).init(gpa); @@ -2219,21 +2225,14 @@ pub const Zld = struct { const size = math.cast(usize, lazy_bind_size) orelse return error.Overflow; try self.populateLazyBindOffsetsInStubHelper(buffer[offset..][0..size]); - try lc_writer.writeStruct(macho.dyld_info_command{ - .cmd = .DYLD_INFO_ONLY, - .cmdsize = @sizeOf(macho.dyld_info_command), - .rebase_off = @intCast(u32, rebase_off), - .rebase_size = @intCast(u32, rebase_size), - .bind_off = @intCast(u32, bind_off), - .bind_size = @intCast(u32, bind_size), - .weak_bind_off = 0, - .weak_bind_size = 0, - .lazy_bind_off = @intCast(u32, lazy_bind_off), - .lazy_bind_size = @intCast(u32, lazy_bind_size), - .export_off = @intCast(u32, export_off), - .export_size = @intCast(u32, export_size), - }); - ncmds.* += 1; + self.dyld_info_cmd.rebase_off = @intCast(u32, rebase_off); + self.dyld_info_cmd.rebase_size = @intCast(u32, rebase_size); + self.dyld_info_cmd.bind_off = @intCast(u32, bind_off); + self.dyld_info_cmd.bind_size = @intCast(u32, bind_size); + self.dyld_info_cmd.lazy_bind_off = @intCast(u32, lazy_bind_off); + self.dyld_info_cmd.lazy_bind_size = @intCast(u32, lazy_bind_size); + self.dyld_info_cmd.export_off = @intCast(u32, export_off); + self.dyld_info_cmd.export_size = @intCast(u32, export_size); } fn populateLazyBindOffsetsInStubHelper(self: *Zld, buffer: []const u8) !void { @@ -2351,7 +2350,7 @@ pub const Zld = struct { const asc_u64 = std.sort.asc(u64); - fn writeFunctionStarts(self: *Zld, ncmds: *u32, lc_writer: anytype) !void { + fn writeFunctionStarts(self: *Zld) !void { const text_seg_index = self.getSegmentByName("__TEXT") orelse return; const text_sect_index = self.getSectionByName("__TEXT", "__text") orelse return; const text_seg = self.segments.items[text_seg_index]; @@ -2410,13 +2409,8 @@ pub const Zld = struct { try self.file.pwriteAll(buffer.items, offset); - try lc_writer.writeStruct(macho.linkedit_data_command{ - .cmd = .FUNCTION_STARTS, - .cmdsize = @sizeOf(macho.linkedit_data_command), - .dataoff = @intCast(u32, offset), - .datasize = @intCast(u32, needed_size), - }); - ncmds.* += 1; + self.function_starts_cmd.dataoff = @intCast(u32, offset); + self.function_starts_cmd.datasize = @intCast(u32, needed_size); } fn filterDataInCode( @@ -2438,7 +2432,7 @@ pub const Zld = struct { return dices[start..end]; } - fn writeDataInCode(self: *Zld, ncmds: *u32, lc_writer: anytype) !void { + fn writeDataInCode(self: *Zld) !void { var out_dice = std.ArrayList(macho.data_in_code_entry).init(self.gpa); defer out_dice.deinit(); @@ -2488,54 +2482,19 @@ pub const Zld = struct { log.debug("writing data-in-code from 0x{x} to 0x{x}", .{ offset, offset + needed_size }); try self.file.pwriteAll(mem.sliceAsBytes(out_dice.items), offset); - try lc_writer.writeStruct(macho.linkedit_data_command{ - .cmd = .DATA_IN_CODE, - .cmdsize = @sizeOf(macho.linkedit_data_command), - .dataoff = @intCast(u32, offset), - .datasize = @intCast(u32, needed_size), - }); - ncmds.* += 1; + + self.data_in_code_cmd.dataoff = @intCast(u32, offset); + self.data_in_code_cmd.datasize = @intCast(u32, needed_size); } - fn writeSymtabs(self: *Zld, ncmds: *u32, lc_writer: anytype) !void { - var symtab_cmd = macho.symtab_command{ - .cmdsize = @sizeOf(macho.symtab_command), - .symoff = 0, - .nsyms = 0, - .stroff = 0, - .strsize = 0, - }; - var dysymtab_cmd = macho.dysymtab_command{ - .cmdsize = @sizeOf(macho.dysymtab_command), - .ilocalsym = 0, - .nlocalsym = 0, - .iextdefsym = 0, - .nextdefsym = 0, - .iundefsym = 0, - .nundefsym = 0, - .tocoff = 0, - .ntoc = 0, - .modtaboff = 0, - .nmodtab = 0, - .extrefsymoff = 0, - .nextrefsyms = 0, - .indirectsymoff = 0, - .nindirectsyms = 0, - .extreloff = 0, - .nextrel = 0, - .locreloff = 0, - .nlocrel = 0, - }; - var ctx = try self.writeSymtab(&symtab_cmd); + fn writeSymtabs(self: *Zld) !void { + var ctx = try self.writeSymtab(); defer ctx.imports_table.deinit(); - try self.writeDysymtab(ctx, &dysymtab_cmd); - try self.writeStrtab(&symtab_cmd); - try lc_writer.writeStruct(symtab_cmd); - try lc_writer.writeStruct(dysymtab_cmd); - ncmds.* += 2; + try self.writeDysymtab(ctx); + try self.writeStrtab(); } - fn writeSymtab(self: *Zld, lc: *macho.symtab_command) !SymtabCtx { + fn writeSymtab(self: *Zld) !SymtabCtx { const gpa = self.gpa; var locals = std.ArrayList(macho.nlist_64).init(gpa); @@ -2618,8 +2577,8 @@ pub const Zld = struct { log.debug("writing symtab from 0x{x} to 0x{x}", .{ offset, offset + needed_size }); try self.file.pwriteAll(buffer.items, offset); - lc.symoff = @intCast(u32, offset); - lc.nsyms = nsyms; + self.symtab_cmd.symoff = @intCast(u32, offset); + self.symtab_cmd.nsyms = nsyms; return SymtabCtx{ .nlocalsym = nlocals, @@ -2629,7 +2588,7 @@ pub const Zld = struct { }; } - fn writeStrtab(self: *Zld, lc: *macho.symtab_command) !void { + fn writeStrtab(self: *Zld) !void { const seg = self.getLinkeditSegmentPtr(); const offset = mem.alignForwardGeneric(u64, seg.fileoff + seg.filesize, @alignOf(u64)); const needed_size = self.strtab.buffer.items.len; @@ -2639,8 +2598,8 @@ pub const Zld = struct { try self.file.pwriteAll(self.strtab.buffer.items, offset); - lc.stroff = @intCast(u32, offset); - lc.strsize = @intCast(u32, needed_size); + self.symtab_cmd.stroff = @intCast(u32, offset); + self.symtab_cmd.strsize = @intCast(u32, needed_size); } const SymtabCtx = struct { @@ -2650,7 +2609,7 @@ pub const Zld = struct { imports_table: std.AutoHashMap(SymbolWithLoc, u32), }; - fn writeDysymtab(self: *Zld, ctx: SymtabCtx, lc: *macho.dysymtab_command) !void { + fn writeDysymtab(self: *Zld, ctx: SymtabCtx) !void { const gpa = self.gpa; const nstubs = @intCast(u32, self.stubs.items.len); const ngot_entries = @intCast(u32, self.got_entries.items.len); @@ -2706,21 +2665,33 @@ pub const Zld = struct { assert(buf.items.len == needed_size); try self.file.pwriteAll(buf.items, offset); - lc.nlocalsym = ctx.nlocalsym; - lc.iextdefsym = iextdefsym; - lc.nextdefsym = ctx.nextdefsym; - lc.iundefsym = iundefsym; - lc.nundefsym = ctx.nundefsym; - lc.indirectsymoff = @intCast(u32, offset); - lc.nindirectsyms = nindirectsyms; + self.dysymtab_cmd.nlocalsym = ctx.nlocalsym; + self.dysymtab_cmd.iextdefsym = iextdefsym; + self.dysymtab_cmd.nextdefsym = ctx.nextdefsym; + self.dysymtab_cmd.iundefsym = iundefsym; + self.dysymtab_cmd.nundefsym = ctx.nundefsym; + self.dysymtab_cmd.indirectsymoff = @intCast(u32, offset); + self.dysymtab_cmd.nindirectsyms = nindirectsyms; } - fn writeCodeSignaturePadding( - self: *Zld, - code_sig: *CodeSignature, - ncmds: *u32, - lc_writer: anytype, - ) !u32 { + fn writeUuid(self: *Zld, comp: *const Compilation, offset: u32) !void { + switch (self.options.optimize_mode) { + .Debug => { + // In Debug we don't really care about reproducibility, so put in a random value + // and be done with it. + std.crypto.random.bytes(&self.uuid_cmd.uuid); + }, + else => { + const seg = self.getLinkeditSegmentPtr(); + const file_size = seg.fileoff + seg.filesize; + try uuid.calcUuidParallel(comp, self.file, file_size, &self.uuid_cmd.uuid); + }, + } + const in_file = @sizeOf(macho.mach_header_64) + offset + @sizeOf(macho.load_command); + try self.file.pwriteAll(&self.uuid_cmd.uuid, in_file); + } + + fn writeCodeSignaturePadding(self: *Zld, code_sig: *CodeSignature) !void { const seg = self.getLinkeditSegmentPtr(); // Code signature data has to be 16-bytes aligned for Apple tools to recognize the file // https://github.com/opensource-apple/cctools/blob/fdb4825f303fd5c0751be524babd32958181b3ed/libstuff/checkout.c#L271 @@ -2733,23 +2704,11 @@ pub const Zld = struct { // except for code signature data. try self.file.pwriteAll(&[_]u8{0}, offset + needed_size - 1); - try lc_writer.writeStruct(macho.linkedit_data_command{ - .cmd = .CODE_SIGNATURE, - .cmdsize = @sizeOf(macho.linkedit_data_command), - .dataoff = @intCast(u32, offset), - .datasize = @intCast(u32, needed_size), - }); - ncmds.* += 1; - - return @intCast(u32, offset); + self.codesig_cmd.dataoff = @intCast(u32, offset); + self.codesig_cmd.datasize = @intCast(u32, needed_size); } - fn writeCodeSignature( - self: *Zld, - comp: *const Compilation, - code_sig: *CodeSignature, - offset: u32, - ) !void { + fn writeCodeSignature(self: *Zld, comp: *const Compilation, code_sig: *CodeSignature) !void { const seg_id = self.getSegmentByName("__TEXT").?; const seg = self.segments.items[seg_id]; @@ -2760,17 +2719,17 @@ pub const Zld = struct { .file = self.file, .exec_seg_base = seg.fileoff, .exec_seg_limit = seg.filesize, - .file_size = offset, + .file_size = self.codesig_cmd.dataoff, .output_mode = self.options.output_mode, }, buffer.writer()); assert(buffer.items.len == code_sig.size()); log.debug("writing code signature from 0x{x} to 0x{x}", .{ - offset, - offset + buffer.items.len, + self.codesig_cmd.dataoff, + self.codesig_cmd.dataoff + buffer.items.len, }); - try self.file.pwriteAll(buffer.items, offset); + try self.file.pwriteAll(buffer.items, self.codesig_cmd.dataoff); } /// Writes Mach-O file header. @@ -3986,13 +3945,7 @@ pub fn linkWithZld(macho_file: *MachO, comp: *Compilation, prog_node: *std.Progr } try zld.writeAtoms(reverse_lookups); - - var lc_buffer = std.ArrayList(u8).init(arena); - const lc_writer = lc_buffer.writer(); - - var ncmds: u32 = 0; - - try zld.writeLinkeditSegmentData(&ncmds, lc_writer, reverse_lookups); + try zld.writeLinkeditSegmentData(reverse_lookups); // If the last section of __DATA segment is zerofill section, we need to ensure // that the free space between the end of the last non-zerofill section of __DATA @@ -4017,47 +3970,48 @@ pub fn linkWithZld(macho_file: *MachO, comp: *Compilation, prog_node: *std.Progr } } - try load_commands.writeDylinkerLC(&ncmds, lc_writer); + // Write load commands + var lc_buffer = std.ArrayList(u8).init(arena); + const lc_writer = lc_buffer.writer(); + + try zld.writeSegmentHeaders(lc_writer); + try lc_writer.writeStruct(zld.dyld_info_cmd); + try lc_writer.writeStruct(zld.function_starts_cmd); + try lc_writer.writeStruct(zld.data_in_code_cmd); + try lc_writer.writeStruct(zld.symtab_cmd); + try lc_writer.writeStruct(zld.dysymtab_cmd); + try load_commands.writeDylinkerLC(lc_writer); if (zld.options.output_mode == .Exe) { const seg_id = zld.getSegmentByName("__TEXT").?; const seg = zld.segments.items[seg_id]; const global = zld.getEntryPoint(); const sym = zld.getSymbol(global); - try load_commands.writeMainLC(@intCast(u32, sym.n_value - seg.vmaddr), options, &ncmds, lc_writer); + try lc_writer.writeStruct(macho.entry_point_command{ + .entryoff = @intCast(u32, sym.n_value - seg.vmaddr), + .stacksize = options.stack_size_override orelse 0, + }); } else { assert(zld.options.output_mode == .Lib); - try load_commands.writeDylibIdLC(zld.gpa, zld.options, &ncmds, lc_writer); + try load_commands.writeDylibIdLC(zld.gpa, zld.options, lc_writer); } - try load_commands.writeRpathLCs(zld.gpa, zld.options, &ncmds, lc_writer); - try load_commands.writeSourceVersionLC(&ncmds, lc_writer); - try load_commands.writeBuildVersionLC(zld.options, &ncmds, lc_writer); + try load_commands.writeRpathLCs(zld.gpa, zld.options, lc_writer); + try lc_writer.writeStruct(macho.source_version_command{ + .version = 0, + }); + try load_commands.writeBuildVersionLC(zld.options, lc_writer); - // Looking forward into the future, we will want to offer `-no_uuid` support in which case - // there will be nothing to backpatch. - const uuid_offset_backpatch: ?usize = blk: { - const index = lc_buffer.items.len; - var uuid_buf: [16]u8 = [_]u8{0} ** 16; + const uuid_offset = @intCast(u32, lc_buffer.items.len); + try lc_writer.writeStruct(zld.uuid_cmd); - if (zld.options.optimize_mode == .Debug) { - // In Debug we don't really care about reproducibility, so put in a random value - // and be done with it. - std.crypto.random.bytes(&uuid_buf); - } - - try load_commands.writeUuidLC(&uuid_buf, &ncmds, lc_writer); - break :blk if (zld.options.optimize_mode == .Debug) null else index; - }; - - try load_commands.writeLoadDylibLCs(zld.dylibs.items, zld.referenced_dylibs.keys(), &ncmds, lc_writer); + try load_commands.writeLoadDylibLCs(zld.dylibs.items, zld.referenced_dylibs.keys(), lc_writer); const requires_codesig = blk: { if (options.entitlements) |_| break :blk true; if (cpu_arch == .aarch64 and (os_tag == .macos or abi == .simulator)) break :blk true; break :blk false; }; - var codesig_offset: ?u32 = null; var codesig: ?CodeSignature = if (requires_codesig) blk: { // Preallocate space for the code signature. // We need to do this at this stage so that we have the load commands with proper values @@ -4069,29 +4023,20 @@ pub fn linkWithZld(macho_file: *MachO, comp: *Compilation, prog_node: *std.Progr if (options.entitlements) |path| { try codesig.addEntitlements(gpa, path); } - codesig_offset = try zld.writeCodeSignaturePadding(&codesig, &ncmds, lc_writer); + try zld.writeCodeSignaturePadding(&codesig); + try lc_writer.writeStruct(zld.codesig_cmd); break :blk codesig; } else null; defer if (codesig) |*csig| csig.deinit(gpa); - var headers_buf = std.ArrayList(u8).init(arena); - try zld.writeSegmentHeaders(&ncmds, headers_buf.writer()); + const ncmds = load_commands.calcNumOfLCs(lc_buffer.items); + try zld.file.pwriteAll(lc_buffer.items, @sizeOf(macho.mach_header_64)); + try zld.writeHeader(ncmds, @intCast(u32, lc_buffer.items.len)); - try zld.file.pwriteAll(headers_buf.items, @sizeOf(macho.mach_header_64)); - try zld.file.pwriteAll(lc_buffer.items, @sizeOf(macho.mach_header_64) + headers_buf.items.len); - try zld.writeHeader(ncmds, @intCast(u32, lc_buffer.items.len + headers_buf.items.len)); - - if (uuid_offset_backpatch) |backpatch| { - const seg = zld.getLinkeditSegmentPtr(); - const file_size = seg.fileoff + seg.filesize; - var uuid_buf: [16]u8 = undefined; - try uuid.calcUuidParallel(comp, zld.file, file_size, &uuid_buf); - const offset = @sizeOf(macho.mach_header_64) + headers_buf.items.len + backpatch + @sizeOf(macho.load_command); - try zld.file.pwriteAll(&uuid_buf, offset); - } + try zld.writeUuid(comp, uuid_offset); if (codesig) |*csig| { - try zld.writeCodeSignature(comp, csig, codesig_offset.?); // code signing always comes last + try zld.writeCodeSignature(comp, csig); // code signing always comes last } } From 660270b7a9c492dbd7c0b76a823bcba5a13da71c Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Thu, 15 Dec 2022 15:10:35 +0100 Subject: [PATCH 07/17] macho: calculate UUID excluding stabs and part of contributing strtab --- CMakeLists.txt | 1 - src/link/MachO.zig | 8 ++++ src/link/MachO/hasher.zig | 10 ++++- src/link/MachO/uuid.zig | 69 ------------------------------ src/link/MachO/zld.zig | 88 +++++++++++++++++++++++++++++++++++++-- 5 files changed, 102 insertions(+), 74 deletions(-) delete mode 100644 src/link/MachO/uuid.zig diff --git a/CMakeLists.txt b/CMakeLists.txt index ae8c0e05d1..91f68376bc 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -594,7 +594,6 @@ set(ZIG_STAGE2_SOURCES "${CMAKE_SOURCE_DIR}/src/link/MachO/hasher.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO/load_commands.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO/thunks.zig" - "${CMAKE_SOURCE_DIR}/src/link/MachO/uuid.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO/zld.zig" "${CMAKE_SOURCE_DIR}/src/link/Plan9.zig" "${CMAKE_SOURCE_DIR}/src/link/Plan9/aout.zig" diff --git a/src/link/MachO.zig b/src/link/MachO.zig index b06552bc2a..72a24b0ac6 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -39,6 +39,7 @@ const Object = @import("MachO/Object.zig"); const LibStub = @import("tapi.zig").LibStub; const Liveness = @import("../Liveness.zig"); const LlvmObject = @import("../codegen/llvm.zig").Object; +const Md5 = std.crypto.hash.Md5; const Module = @import("../Module.zig"); const Relocation = @import("MachO/Relocation.zig"); const StringTable = @import("strtab.zig").StringTable; @@ -598,6 +599,8 @@ pub fn flushModule(self: *MachO, comp: *Compilation, prog_node: *std.Progress.No if (self.cold_start) { std.crypto.random.bytes(&self.uuid_cmd.uuid); + Md5.hash(&self.uuid_cmd.uuid, &self.uuid_cmd.uuid, .{}); + conformUuid(&self.uuid_cmd.uuid); } try lc_writer.writeStruct(self.uuid_cmd); @@ -662,6 +665,11 @@ pub fn flushModule(self: *MachO, comp: *Compilation, prog_node: *std.Progress.No self.cold_start = false; } +inline fn conformUuid(out: *[Md5.digest_length]u8) void { + // LC_UUID uuids should conform to RFC 4122 UUID version 4 & UUID version 5 formats + out[6] = (out[6] & 0x0F) | (3 << 4); + out[8] = (out[8] & 0x3F) | 0x80; +} pub fn resolveLibSystem( arena: Allocator, diff --git a/src/link/MachO/hasher.zig b/src/link/MachO/hasher.zig index 29099ad2d9..bb0531286c 100644 --- a/src/link/MachO/hasher.zig +++ b/src/link/MachO/hasher.zig @@ -13,6 +13,7 @@ pub fn ParallelHasher(comptime Hasher: type) type { return struct { pub fn hash(self: @This(), gpa: Allocator, pool: *ThreadPool, file: fs.File, out: [][hash_size]u8, opts: struct { chunk_size: u16 = 0x4000, + file_pos: u64 = 0, max_file_size: ?u64 = null, }) !void { _ = self; @@ -38,7 +39,14 @@ pub fn ParallelHasher(comptime Hasher: type) type { const fstart = i * opts.chunk_size; const fsize = if (fstart + opts.chunk_size > file_size) file_size - fstart else opts.chunk_size; wg.start(); - try pool.spawn(worker, .{ file, fstart, buffer[fstart..][0..fsize], &out[i], &results[i], &wg }); + try pool.spawn(worker, .{ + file, + fstart + opts.file_pos, + buffer[fstart..][0..fsize], + &out[i], + &results[i], + &wg, + }); } } for (results) |result| _ = try result; diff --git a/src/link/MachO/uuid.zig b/src/link/MachO/uuid.zig deleted file mode 100644 index 987b156a4b..0000000000 --- a/src/link/MachO/uuid.zig +++ /dev/null @@ -1,69 +0,0 @@ -const std = @import("std"); -const fs = std.fs; -const mem = std.mem; - -const Allocator = mem.Allocator; -const Compilation = @import("../../Compilation.zig"); -const Md5 = std.crypto.hash.Md5; -const Hasher = @import("hasher.zig").ParallelHasher; - -/// Somewhat random chunk size for MD5 hash calculation. -pub const chunk_size = 0x4000; - -/// Calculates Md5 hash of the file contents. -/// Hash is calculated in a streaming manner which may be slow. -pub fn calcUuidStreaming(file: fs.File, file_size: u64, out: *[Md5.digest_length]u8) !void { - const total_num_chunks = mem.alignForward(file_size, chunk_size) / chunk_size; - - var hasher = Md5.init(.{}); - var buffer: [chunk_size]u8 = undefined; - - var i: usize = 0; - while (i < total_num_chunks) : (i += 1) { - const start = i * chunk_size; - const size = if (start + chunk_size > file_size) - file_size - start - else - chunk_size; - const amt = try file.preadAll(&buffer, start); - if (amt != size) return error.InputOutput; - - hasher.update(buffer[0..size]); - } - - hasher.final(out); - conform(out); -} - -/// Calculates Md5 hash of each chunk in parallel and then hashes all Md5 hashes to produce -/// the final digest. -/// While this is NOT a correct MD5 hash of the contents, this methodology is used by LLVM/LLD -/// and we will use it too as it seems accepted by Apple OSes. -pub fn calcUuidParallel(comp: *const Compilation, file: fs.File, file_size: u64, out: *[Md5.digest_length]u8) !void { - const total_hashes = mem.alignForward(file_size, chunk_size) / chunk_size; - - const hashes = try comp.gpa.alloc([Md5.digest_length]u8, total_hashes); - defer comp.gpa.free(hashes); - - var hasher = Hasher(Md5){}; - try hasher.hash(comp.gpa, comp.thread_pool, file, hashes, .{ - .chunk_size = chunk_size, - .max_file_size = file_size, - }); - - const final_buffer = try comp.gpa.alloc(u8, total_hashes * Md5.digest_length); - defer comp.gpa.free(final_buffer); - - for (hashes) |hash, i| { - mem.copy(u8, final_buffer[i * Md5.digest_length ..][0..Md5.digest_length], &hash); - } - - Md5.hash(final_buffer, out, .{}); - conform(out); -} - -inline fn conform(out: *[Md5.digest_length]u8) void { - // LC_UUID uuids should conform to RFC 4122 UUID version 4 & UUID version 5 formats - out[6] = (out[6] & 0x0F) | (3 << 4); - out[8] = (out[8] & 0x3F) | 0x80; -} diff --git a/src/link/MachO/zld.zig b/src/link/MachO/zld.zig index 5ca1afd98c..e8646f5dd9 100644 --- a/src/link/MachO/zld.zig +++ b/src/link/MachO/zld.zig @@ -16,7 +16,6 @@ const link = @import("../../link.zig"); const load_commands = @import("load_commands.zig"); const thunks = @import("thunks.zig"); const trace = @import("../../tracy.zig").trace; -const uuid = @import("uuid.zig"); const Allocator = mem.Allocator; const Archive = @import("Archive.zig"); @@ -26,7 +25,9 @@ const CodeSignature = @import("CodeSignature.zig"); const Compilation = @import("../../Compilation.zig"); const DwarfInfo = @import("DwarfInfo.zig"); const Dylib = @import("Dylib.zig"); +const Hasher = @import("hasher.zig").ParallelHasher; const MachO = @import("../MachO.zig"); +const Md5 = std.crypto.hash.Md5; const LibStub = @import("../tapi.zig").LibStub; const Object = @import("Object.zig"); const StringTable = @import("../strtab.zig").StringTable; @@ -2680,17 +2681,98 @@ pub const Zld = struct { // In Debug we don't really care about reproducibility, so put in a random value // and be done with it. std.crypto.random.bytes(&self.uuid_cmd.uuid); + Md5.hash(&self.uuid_cmd.uuid, &self.uuid_cmd.uuid, .{}); + conformUuid(&self.uuid_cmd.uuid); }, else => { const seg = self.getLinkeditSegmentPtr(); - const file_size = seg.fileoff + seg.filesize; - try uuid.calcUuidParallel(comp, self.file, file_size, &self.uuid_cmd.uuid); + const max_file_size = @intCast(u32, seg.fileoff + seg.filesize); + + var hashes = std.ArrayList([Md5.digest_length]u8).init(self.gpa); + defer hashes.deinit(); + + if (!self.options.strip) { + // First exclusion region will comprise all symbol stabs. + const nlocals = self.dysymtab_cmd.nlocalsym; + + const locals_buf = try self.gpa.alloc(u8, nlocals * @sizeOf(macho.nlist_64)); + defer self.gpa.free(locals_buf); + + const amt = try self.file.preadAll(locals_buf, self.symtab_cmd.symoff); + if (amt != locals_buf.len) return error.InputOutput; + const locals = @ptrCast([*]macho.nlist_64, @alignCast(@alignOf(macho.nlist_64), locals_buf))[0..nlocals]; + + const istab: usize = for (locals) |local, i| { + if (local.stab()) break i; + } else locals.len; + const nstabs = locals.len - istab; + + // Next, a subsection of the strtab. + // We do not care about anything succeeding strtab as it is the code signature data which is + // not part of the UUID calculation anyway. + const stab_stroff = locals[istab].n_strx; + + const first_cut = FileSubsection{ + .start = 0, + .end = @intCast(u32, self.symtab_cmd.symoff + istab * @sizeOf(macho.nlist_64)), + }; + const second_cut = FileSubsection{ + .start = first_cut.end + @intCast(u32, nstabs * @sizeOf(macho.nlist_64)), + .end = self.symtab_cmd.stroff + stab_stroff, + }; + + for (&[_]FileSubsection{ first_cut, second_cut }) |cut| { + try self.calcUuidHashes(comp, cut, &hashes); + } + } else { + try self.calcUuidHashes(comp, .{ .start = 0, .end = max_file_size }, &hashes); + } + + const final_buffer = try self.gpa.alloc(u8, hashes.items.len * Md5.digest_length); + defer self.gpa.free(final_buffer); + + for (hashes.items) |hash, i| { + mem.copy(u8, final_buffer[i * Md5.digest_length ..][0..Md5.digest_length], &hash); + } + + Md5.hash(final_buffer, &self.uuid_cmd.uuid, .{}); + conformUuid(&self.uuid_cmd.uuid); }, } + const in_file = @sizeOf(macho.mach_header_64) + offset + @sizeOf(macho.load_command); try self.file.pwriteAll(&self.uuid_cmd.uuid, in_file); } + inline fn conformUuid(out: *[Md5.digest_length]u8) void { + // LC_UUID uuids should conform to RFC 4122 UUID version 4 & UUID version 5 formats + out[6] = (out[6] & 0x0F) | (3 << 4); + out[8] = (out[8] & 0x3F) | 0x80; + } + + const FileSubsection = struct { + start: u32, + end: u32, + }; + + fn calcUuidHashes( + self: *Zld, + comp: *const Compilation, + cut: FileSubsection, + hashes: *std.ArrayList([Md5.digest_length]u8), + ) !void { + const chunk_size = 0x4000; + const total_hashes = mem.alignForward(cut.end - cut.start, chunk_size) / chunk_size; + try hashes.resize(hashes.items.len + total_hashes); + + var hasher = Hasher(Md5){}; + try hasher.hash(self.gpa, comp.thread_pool, self.file, hashes.items, .{ + .chunk_size = chunk_size, + .file_pos = cut.start, + .max_file_size = cut.end - cut.start, + }); + } + fn writeCodeSignaturePadding(self: *Zld, code_sig: *CodeSignature) !void { const seg = self.getLinkeditSegmentPtr(); // Code signature data has to be 16-bytes aligned for Apple tools to recognize the file From b323e14b1c50d731b643180972361552f8e5f5ec Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Thu, 15 Dec 2022 16:04:10 +0100 Subject: [PATCH 08/17] macho: exclude linkedit and symtab/dysymtab load commands from the uuid calc --- src/link/MachO/zld.zig | 85 +++++++++++++++++++++++++++++++----------- 1 file changed, 64 insertions(+), 21 deletions(-) diff --git a/src/link/MachO/zld.zig b/src/link/MachO/zld.zig index e8646f5dd9..0863ec3fc1 100644 --- a/src/link/MachO/zld.zig +++ b/src/link/MachO/zld.zig @@ -2675,7 +2675,11 @@ pub const Zld = struct { self.dysymtab_cmd.nindirectsyms = nindirectsyms; } - fn writeUuid(self: *Zld, comp: *const Compilation, offset: u32) !void { + fn writeUuid(self: *Zld, comp: *const Compilation, args: struct { + linkedit_cmd_offset: u32, + symtab_cmd_offset: u32, + uuid_cmd_offset: u32, + }) !void { switch (self.options.optimize_mode) { .Debug => { // In Debug we don't really care about reproducibility, so put in a random value @@ -2691,8 +2695,24 @@ pub const Zld = struct { var hashes = std.ArrayList([Md5.digest_length]u8).init(self.gpa); defer hashes.deinit(); + var subsections: [4]FileSubsection = undefined; + var count: usize = 2; + + // Exclude LINKEDIT segment command as it contains file size that includes stabs contribution + // and code signature. + subsections[0] = .{ + .start = 0, + .end = args.linkedit_cmd_offset, + }; + + // Exclude SYMTAB and DYSYMTAB commands for the same reason. + subsections[1] = .{ + .start = args.linkedit_cmd_offset + @sizeOf(macho.segment_command_64), + .end = args.symtab_cmd_offset, + }; + if (!self.options.strip) { - // First exclusion region will comprise all symbol stabs. + // Exclude region comprising all symbol stabs. const nlocals = self.dysymtab_cmd.nlocalsym; const locals_buf = try self.gpa.alloc(u8, nlocals * @sizeOf(macho.nlist_64)); @@ -2706,26 +2726,40 @@ pub const Zld = struct { if (local.stab()) break i; } else locals.len; const nstabs = locals.len - istab; + if (nstabs == 0) { + subsections[2] = .{ + .start = args.symtab_cmd_offset + @sizeOf(macho.symtab_command) + @sizeOf(macho.dysymtab_command), + .end = max_file_size, + }; + count += 1; + } else { + // Exclude a subsection of the strtab with names of the stabs. + // We do not care about anything succeeding strtab as it is the code signature data which is + // not part of the UUID calculation anyway. + const stab_stroff = locals[istab].n_strx; - // Next, a subsection of the strtab. - // We do not care about anything succeeding strtab as it is the code signature data which is - // not part of the UUID calculation anyway. - const stab_stroff = locals[istab].n_strx; + subsections[2] = .{ + .start = args.symtab_cmd_offset + @sizeOf(macho.symtab_command) + @sizeOf(macho.dysymtab_command), + .end = @intCast(u32, self.symtab_cmd.symoff + istab * @sizeOf(macho.nlist_64)), + }; + subsections[3] = .{ + .start = subsections[2].end + @intCast(u32, nstabs * @sizeOf(macho.nlist_64)), + .end = self.symtab_cmd.stroff + stab_stroff, + }; - const first_cut = FileSubsection{ - .start = 0, - .end = @intCast(u32, self.symtab_cmd.symoff + istab * @sizeOf(macho.nlist_64)), - }; - const second_cut = FileSubsection{ - .start = first_cut.end + @intCast(u32, nstabs * @sizeOf(macho.nlist_64)), - .end = self.symtab_cmd.stroff + stab_stroff, - }; - - for (&[_]FileSubsection{ first_cut, second_cut }) |cut| { - try self.calcUuidHashes(comp, cut, &hashes); + count += 2; } } else { - try self.calcUuidHashes(comp, .{ .start = 0, .end = max_file_size }, &hashes); + subsections[2] = .{ + .start = args.symtab_cmd_offset + @sizeOf(macho.symtab_command) + @sizeOf(macho.dysymtab_command), + .end = max_file_size, + }; + count += 1; + } + + for (subsections[0..count]) |cut| { + std.debug.print("{x} - {x}\n", .{ cut.start, cut.end }); + try self.calcUuidHashes(comp, cut, &hashes); } const final_buffer = try self.gpa.alloc(u8, hashes.items.len * Md5.digest_length); @@ -2740,7 +2774,7 @@ pub const Zld = struct { }, } - const in_file = @sizeOf(macho.mach_header_64) + offset + @sizeOf(macho.load_command); + const in_file = args.uuid_cmd_offset + @sizeOf(macho.load_command); try self.file.pwriteAll(&self.uuid_cmd.uuid, in_file); } @@ -4057,11 +4091,16 @@ pub fn linkWithZld(macho_file: *MachO, comp: *Compilation, prog_node: *std.Progr const lc_writer = lc_buffer.writer(); try zld.writeSegmentHeaders(lc_writer); + const linkedit_cmd_offset = @sizeOf(macho.mach_header_64) + @intCast(u32, lc_buffer.items.len - @sizeOf(macho.segment_command_64)); + try lc_writer.writeStruct(zld.dyld_info_cmd); try lc_writer.writeStruct(zld.function_starts_cmd); try lc_writer.writeStruct(zld.data_in_code_cmd); + + const symtab_cmd_offset = @sizeOf(macho.mach_header_64) + @intCast(u32, lc_buffer.items.len); try lc_writer.writeStruct(zld.symtab_cmd); try lc_writer.writeStruct(zld.dysymtab_cmd); + try load_commands.writeDylinkerLC(lc_writer); if (zld.options.output_mode == .Exe) { @@ -4084,7 +4123,7 @@ pub fn linkWithZld(macho_file: *MachO, comp: *Compilation, prog_node: *std.Progr }); try load_commands.writeBuildVersionLC(zld.options, lc_writer); - const uuid_offset = @intCast(u32, lc_buffer.items.len); + const uuid_cmd_offset = @sizeOf(macho.mach_header_64) + @intCast(u32, lc_buffer.items.len); try lc_writer.writeStruct(zld.uuid_cmd); try load_commands.writeLoadDylibLCs(zld.dylibs.items, zld.referenced_dylibs.keys(), lc_writer); @@ -4115,7 +4154,11 @@ pub fn linkWithZld(macho_file: *MachO, comp: *Compilation, prog_node: *std.Progr try zld.file.pwriteAll(lc_buffer.items, @sizeOf(macho.mach_header_64)); try zld.writeHeader(ncmds, @intCast(u32, lc_buffer.items.len)); - try zld.writeUuid(comp, uuid_offset); + try zld.writeUuid(comp, .{ + .linkedit_cmd_offset = linkedit_cmd_offset, + .symtab_cmd_offset = symtab_cmd_offset, + .uuid_cmd_offset = uuid_cmd_offset, + }); if (codesig) |*csig| { try zld.writeCodeSignature(comp, csig); // code signing always comes last From d00094dd459f37d30b3297939bed6c320470fba8 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Thu, 15 Dec 2022 22:12:29 +0100 Subject: [PATCH 09/17] macho: exclude all content of the binary that could cause non-deterministic UUID --- src/link/MachO/zld.zig | 224 ++++++++++++++++++++++++++++++++--------- 1 file changed, 178 insertions(+), 46 deletions(-) diff --git a/src/link/MachO/zld.zig b/src/link/MachO/zld.zig index 0863ec3fc1..28f1926e31 100644 --- a/src/link/MachO/zld.zig +++ b/src/link/MachO/zld.zig @@ -25,7 +25,6 @@ const CodeSignature = @import("CodeSignature.zig"); const Compilation = @import("../../Compilation.zig"); const DwarfInfo = @import("DwarfInfo.zig"); const Dylib = @import("Dylib.zig"); -const Hasher = @import("hasher.zig").ParallelHasher; const MachO = @import("../MachO.zig"); const Md5 = std.crypto.hash.Md5; const LibStub = @import("../tapi.zig").LibStub; @@ -44,7 +43,9 @@ pub const Zld = struct { dysymtab_cmd: macho.dysymtab_command = .{}, function_starts_cmd: macho.linkedit_data_command = .{ .cmd = .FUNCTION_STARTS }, data_in_code_cmd: macho.linkedit_data_command = .{ .cmd = .DATA_IN_CODE }, - uuid_cmd: macho.uuid_command = .{}, + uuid_cmd: macho.uuid_command = .{ + .uuid = [_]u8{0} ** 16, + }, codesig_cmd: macho.linkedit_data_command = .{ .cmd = .CODE_SIGNATURE }, objects: std.ArrayListUnmanaged(Object) = .{}, @@ -2679,7 +2680,9 @@ pub const Zld = struct { linkedit_cmd_offset: u32, symtab_cmd_offset: u32, uuid_cmd_offset: u32, + codesig_cmd_offset: ?u32, }) !void { + _ = comp; switch (self.options.optimize_mode) { .Debug => { // In Debug we don't really care about reproducibility, so put in a random value @@ -2689,27 +2692,34 @@ pub const Zld = struct { conformUuid(&self.uuid_cmd.uuid); }, else => { - const seg = self.getLinkeditSegmentPtr(); - const max_file_size = @intCast(u32, seg.fileoff + seg.filesize); + const max_file_size = self.symtab_cmd.stroff + self.symtab_cmd.strsize; - var hashes = std.ArrayList([Md5.digest_length]u8).init(self.gpa); - defer hashes.deinit(); - - var subsections: [4]FileSubsection = undefined; - var count: usize = 2; + var subsections: [5]FileSubsection = undefined; + var count: usize = 0; // Exclude LINKEDIT segment command as it contains file size that includes stabs contribution // and code signature. - subsections[0] = .{ + subsections[count] = .{ .start = 0, .end = args.linkedit_cmd_offset, }; + count += 1; // Exclude SYMTAB and DYSYMTAB commands for the same reason. - subsections[1] = .{ - .start = args.linkedit_cmd_offset + @sizeOf(macho.segment_command_64), + subsections[count] = .{ + .start = subsections[count - 1].end + @sizeOf(macho.segment_command_64), .end = args.symtab_cmd_offset, }; + count += 1; + + // Exclude CODE_SIGNATURE command (if present). + if (args.codesig_cmd_offset) |offset| { + subsections[count] = .{ + .start = subsections[count - 1].end + @sizeOf(macho.symtab_command) + @sizeOf(macho.dysymtab_command), + .end = offset, + }; + count += 1; + } if (!self.options.strip) { // Exclude region comprising all symbol stabs. @@ -2726,9 +2736,13 @@ pub const Zld = struct { if (local.stab()) break i; } else locals.len; const nstabs = locals.len - istab; + if (nstabs == 0) { - subsections[2] = .{ - .start = args.symtab_cmd_offset + @sizeOf(macho.symtab_command) + @sizeOf(macho.dysymtab_command), + subsections[count] = .{ + .start = subsections[count - 1].end + if (args.codesig_cmd_offset == null) + @as(u32, @sizeOf(macho.symtab_command) + @sizeOf(macho.dysymtab_command)) + else + @sizeOf(macho.linkedit_data_command), .end = max_file_size, }; count += 1; @@ -2738,38 +2752,80 @@ pub const Zld = struct { // not part of the UUID calculation anyway. const stab_stroff = locals[istab].n_strx; - subsections[2] = .{ - .start = args.symtab_cmd_offset + @sizeOf(macho.symtab_command) + @sizeOf(macho.dysymtab_command), + subsections[count] = .{ + .start = subsections[count - 1].end + if (args.codesig_cmd_offset == null) + @as(u32, @sizeOf(macho.symtab_command) + @sizeOf(macho.dysymtab_command)) + else + @sizeOf(macho.linkedit_data_command), .end = @intCast(u32, self.symtab_cmd.symoff + istab * @sizeOf(macho.nlist_64)), }; - subsections[3] = .{ - .start = subsections[2].end + @intCast(u32, nstabs * @sizeOf(macho.nlist_64)), + count += 1; + + subsections[count] = .{ + .start = subsections[count - 1].end + @intCast(u32, nstabs * @sizeOf(macho.nlist_64)), .end = self.symtab_cmd.stroff + stab_stroff, }; - - count += 2; + count += 1; } } else { - subsections[2] = .{ - .start = args.symtab_cmd_offset + @sizeOf(macho.symtab_command) + @sizeOf(macho.dysymtab_command), + subsections[count] = .{ + .start = subsections[count - 1].end + if (args.codesig_cmd_offset == null) + @as(u32, @sizeOf(macho.symtab_command) + @sizeOf(macho.dysymtab_command)) + else + @sizeOf(macho.linkedit_data_command), .end = max_file_size, }; count += 1; } + const chunk_size = 0x4000; + + var rb = RingBuffer{}; + var hasher = Md5.init(.{}); + var buffer: [chunk_size]u8 = undefined; + var hashed: usize = 0; + for (subsections[0..count]) |cut| { - std.debug.print("{x} - {x}\n", .{ cut.start, cut.end }); - try self.calcUuidHashes(comp, cut, &hashes); + // std.debug.print("{x} - {x}, {x}\n", .{ cut.start, cut.end, cut.end - cut.start }); + + const size = cut.end - cut.start; + const num_chunks = mem.alignForward(size, chunk_size) / chunk_size; + + var i: usize = 0; + while (i < num_chunks) : (i += 1) { + const fstart = cut.start + i * chunk_size; + const fsize = if (fstart + chunk_size > cut.end) + cut.end - fstart + else + chunk_size; + // std.debug.print("fstart {x}, fsize {x}\n", .{ fstart, fsize }); + const amt = try self.file.preadAll(buffer[0..fsize], fstart); + if (amt != fsize) return error.InputOutput; + + // try formatBinaryBlob(buffer[0..fsize], .{ .fmt_as_str = false }, std.io.getStdOut().writer()); + + var leftover = rb.append(buffer[0..fsize]); + while (leftover > 0) { + if (rb.full()) { + hasher.update(rb.getBuffer()); + hashed += rb.getBuffer().len; + rb.clear(); + } + leftover = rb.append(buffer[fsize - leftover ..]); + } + } } - const final_buffer = try self.gpa.alloc(u8, hashes.items.len * Md5.digest_length); - defer self.gpa.free(final_buffer); - - for (hashes.items) |hash, i| { - mem.copy(u8, final_buffer[i * Md5.digest_length ..][0..Md5.digest_length], &hash); + if (!rb.empty()) { + // try formatBinaryBlob(rb.getBuffer(), .{ .fmt_as_str = false }, std.io.getStdOut().writer()); + hasher.update(rb.getBuffer()); + hashed += rb.getBuffer().len; + rb.clear(); } - Md5.hash(final_buffer, &self.uuid_cmd.uuid, .{}); + // std.debug.print("hashed {x}\n", .{hashed}); + + hasher.final(&self.uuid_cmd.uuid); conformUuid(&self.uuid_cmd.uuid); }, } @@ -2778,6 +2834,79 @@ pub const Zld = struct { try self.file.pwriteAll(&self.uuid_cmd.uuid, in_file); } + const FmtBinaryBlobOpts = struct { + fmt_as_str: bool = true, + escape_str: bool = false, + }; + + fn formatBinaryBlob(blob: []const u8, opts: FmtBinaryBlobOpts, writer: anytype) !void { + // Format as 16-by-16-by-8 with two left column in hex, and right in ascii: + // xxxxxxxxxxxxxxxx xxxxxxxxxxxxxxxx xxxxxxxx + var i: usize = 0; + const step = 16; + var tmp_buf: [step]u8 = undefined; + while (i < blob.len) : (i += step) { + const end = if (blob[i..].len >= step) step else blob[i..].len; + const padding = step - blob[i .. i + end].len; + if (padding > 0) { + mem.set(u8, &tmp_buf, 0); + } + mem.copy(u8, &tmp_buf, blob[i .. i + end]); + try writer.print("{x} {x:<016} {x:<016}", .{ + i, std.fmt.fmtSliceHexLower(tmp_buf[0 .. step / 2]), std.fmt.fmtSliceHexLower(tmp_buf[step / 2 .. step]), + }); + if (opts.fmt_as_str) { + if (opts.escape_str) { + try writer.print(" {s}", .{std.fmt.fmtSliceEscapeLower(tmp_buf[0..step])}); + } else { + try writer.print(" {s}", .{tmp_buf[0..step]}); + } + } + try writer.writeByte('\n'); + } + } + + const RingBuffer = struct { + buffer: [chunk_size]u8 = undefined, + pos: usize = 0, + + const chunk_size = 0x4000; + + fn append(rb: *RingBuffer, data: []u8) usize { + const cpy_size = if (data.len > rb.available()) + data.len - rb.available() + else + data.len; + // std.debug.print(" appending {x} of {x} (pos {x})\n", .{ cpy_size, data.len, rb.pos }); + mem.copy(u8, rb.buffer[rb.pos..], data[0..cpy_size]); + rb.pos += cpy_size; + const leftover = data.len - cpy_size; + // std.debug.print(" leftover {x}\n", .{leftover}); + // std.debug.print(" buffer {x} full\n", .{rb.pos}); + return leftover; + } + + fn available(rb: RingBuffer) usize { + return rb.buffer.len - rb.pos; + } + + fn clear(rb: *RingBuffer) void { + rb.pos = 0; + } + + fn full(rb: RingBuffer) bool { + return rb.buffer.len == rb.pos; + } + + fn empty(rb: RingBuffer) bool { + return rb.pos == 0; + } + + fn getBuffer(rb: *const RingBuffer) []const u8 { + return rb.buffer[0..rb.pos]; + } + }; + inline fn conformUuid(out: *[Md5.digest_length]u8) void { // LC_UUID uuids should conform to RFC 4122 UUID version 4 & UUID version 5 formats out[6] = (out[6] & 0x0F) | (3 << 4); @@ -2789,23 +2918,23 @@ pub const Zld = struct { end: u32, }; - fn calcUuidHashes( - self: *Zld, - comp: *const Compilation, - cut: FileSubsection, - hashes: *std.ArrayList([Md5.digest_length]u8), - ) !void { - const chunk_size = 0x4000; - const total_hashes = mem.alignForward(cut.end - cut.start, chunk_size) / chunk_size; - try hashes.resize(hashes.items.len + total_hashes); + // fn calcUuidHashes( + // self: *Zld, + // comp: *const Compilation, + // cut: FileSubsection, + // hashes: *std.ArrayList([Md5.digest_length]u8), + // ) !void { + // const chunk_size = 0x4000; + // const total_hashes = mem.alignForward(cut.end - cut.start, chunk_size) / chunk_size; + // try hashes.resize(hashes.items.len + total_hashes); - var hasher = Hasher(Md5){}; - try hasher.hash(self.gpa, comp.thread_pool, self.file, hashes.items, .{ - .chunk_size = chunk_size, - .file_pos = cut.start, - .max_file_size = cut.end - cut.start, - }); - } + // var hasher = Hasher(Md5){}; + // try hasher.hash(self.gpa, comp.thread_pool, self.file, hashes.items, .{ + // .chunk_size = chunk_size, + // .file_pos = cut.start, + // .max_file_size = cut.end - cut.start, + // }); + // } fn writeCodeSignaturePadding(self: *Zld, code_sig: *CodeSignature) !void { const seg = self.getLinkeditSegmentPtr(); @@ -4133,6 +4262,7 @@ pub fn linkWithZld(macho_file: *MachO, comp: *Compilation, prog_node: *std.Progr if (cpu_arch == .aarch64 and (os_tag == .macos or abi == .simulator)) break :blk true; break :blk false; }; + var codesig_cmd_offset: ?u32 = null; var codesig: ?CodeSignature = if (requires_codesig) blk: { // Preallocate space for the code signature. // We need to do this at this stage so that we have the load commands with proper values @@ -4145,6 +4275,7 @@ pub fn linkWithZld(macho_file: *MachO, comp: *Compilation, prog_node: *std.Progr try codesig.addEntitlements(gpa, path); } try zld.writeCodeSignaturePadding(&codesig); + codesig_cmd_offset = @sizeOf(macho.mach_header_64) + @intCast(u32, lc_buffer.items.len); try lc_writer.writeStruct(zld.codesig_cmd); break :blk codesig; } else null; @@ -4158,6 +4289,7 @@ pub fn linkWithZld(macho_file: *MachO, comp: *Compilation, prog_node: *std.Progr .linkedit_cmd_offset = linkedit_cmd_offset, .symtab_cmd_offset = symtab_cmd_offset, .uuid_cmd_offset = uuid_cmd_offset, + .codesig_cmd_offset = codesig_cmd_offset, }); if (codesig) |*csig| { From 79b92f9eb850a1b4da08990d7b58f52391a802fb Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Thu, 15 Dec 2022 23:01:25 +0100 Subject: [PATCH 10/17] macho: remove temp debugging routines --- src/link/MachO/zld.zig | 43 ------------------------------------------ 1 file changed, 43 deletions(-) diff --git a/src/link/MachO/zld.zig b/src/link/MachO/zld.zig index 28f1926e31..f86ded39e8 100644 --- a/src/link/MachO/zld.zig +++ b/src/link/MachO/zld.zig @@ -2786,8 +2786,6 @@ pub const Zld = struct { var hashed: usize = 0; for (subsections[0..count]) |cut| { - // std.debug.print("{x} - {x}, {x}\n", .{ cut.start, cut.end, cut.end - cut.start }); - const size = cut.end - cut.start; const num_chunks = mem.alignForward(size, chunk_size) / chunk_size; @@ -2798,12 +2796,9 @@ pub const Zld = struct { cut.end - fstart else chunk_size; - // std.debug.print("fstart {x}, fsize {x}\n", .{ fstart, fsize }); const amt = try self.file.preadAll(buffer[0..fsize], fstart); if (amt != fsize) return error.InputOutput; - // try formatBinaryBlob(buffer[0..fsize], .{ .fmt_as_str = false }, std.io.getStdOut().writer()); - var leftover = rb.append(buffer[0..fsize]); while (leftover > 0) { if (rb.full()) { @@ -2817,14 +2812,11 @@ pub const Zld = struct { } if (!rb.empty()) { - // try formatBinaryBlob(rb.getBuffer(), .{ .fmt_as_str = false }, std.io.getStdOut().writer()); hasher.update(rb.getBuffer()); hashed += rb.getBuffer().len; rb.clear(); } - // std.debug.print("hashed {x}\n", .{hashed}); - hasher.final(&self.uuid_cmd.uuid); conformUuid(&self.uuid_cmd.uuid); }, @@ -2834,38 +2826,6 @@ pub const Zld = struct { try self.file.pwriteAll(&self.uuid_cmd.uuid, in_file); } - const FmtBinaryBlobOpts = struct { - fmt_as_str: bool = true, - escape_str: bool = false, - }; - - fn formatBinaryBlob(blob: []const u8, opts: FmtBinaryBlobOpts, writer: anytype) !void { - // Format as 16-by-16-by-8 with two left column in hex, and right in ascii: - // xxxxxxxxxxxxxxxx xxxxxxxxxxxxxxxx xxxxxxxx - var i: usize = 0; - const step = 16; - var tmp_buf: [step]u8 = undefined; - while (i < blob.len) : (i += step) { - const end = if (blob[i..].len >= step) step else blob[i..].len; - const padding = step - blob[i .. i + end].len; - if (padding > 0) { - mem.set(u8, &tmp_buf, 0); - } - mem.copy(u8, &tmp_buf, blob[i .. i + end]); - try writer.print("{x} {x:<016} {x:<016}", .{ - i, std.fmt.fmtSliceHexLower(tmp_buf[0 .. step / 2]), std.fmt.fmtSliceHexLower(tmp_buf[step / 2 .. step]), - }); - if (opts.fmt_as_str) { - if (opts.escape_str) { - try writer.print(" {s}", .{std.fmt.fmtSliceEscapeLower(tmp_buf[0..step])}); - } else { - try writer.print(" {s}", .{tmp_buf[0..step]}); - } - } - try writer.writeByte('\n'); - } - } - const RingBuffer = struct { buffer: [chunk_size]u8 = undefined, pos: usize = 0, @@ -2877,12 +2837,9 @@ pub const Zld = struct { data.len - rb.available() else data.len; - // std.debug.print(" appending {x} of {x} (pos {x})\n", .{ cpy_size, data.len, rb.pos }); mem.copy(u8, rb.buffer[rb.pos..], data[0..cpy_size]); rb.pos += cpy_size; const leftover = data.len - cpy_size; - // std.debug.print(" leftover {x}\n", .{leftover}); - // std.debug.print(" buffer {x} full\n", .{rb.pos}); return leftover; } From 1928ed7dab3949db964d96dad82beb776e69554f Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Thu, 15 Dec 2022 23:08:04 +0100 Subject: [PATCH 11/17] macho: RingBuffer experiment to crack down the bug --- src/link/MachO/zld.zig | 56 +----------------------------------------- 1 file changed, 1 insertion(+), 55 deletions(-) diff --git a/src/link/MachO/zld.zig b/src/link/MachO/zld.zig index f86ded39e8..23a7963cc0 100644 --- a/src/link/MachO/zld.zig +++ b/src/link/MachO/zld.zig @@ -2780,10 +2780,8 @@ pub const Zld = struct { const chunk_size = 0x4000; - var rb = RingBuffer{}; var hasher = Md5.init(.{}); var buffer: [chunk_size]u8 = undefined; - var hashed: usize = 0; for (subsections[0..count]) |cut| { const size = cut.end - cut.start; @@ -2799,24 +2797,10 @@ pub const Zld = struct { const amt = try self.file.preadAll(buffer[0..fsize], fstart); if (amt != fsize) return error.InputOutput; - var leftover = rb.append(buffer[0..fsize]); - while (leftover > 0) { - if (rb.full()) { - hasher.update(rb.getBuffer()); - hashed += rb.getBuffer().len; - rb.clear(); - } - leftover = rb.append(buffer[fsize - leftover ..]); - } + hasher.update(buffer[0..fsize]); } } - if (!rb.empty()) { - hasher.update(rb.getBuffer()); - hashed += rb.getBuffer().len; - rb.clear(); - } - hasher.final(&self.uuid_cmd.uuid); conformUuid(&self.uuid_cmd.uuid); }, @@ -2826,44 +2810,6 @@ pub const Zld = struct { try self.file.pwriteAll(&self.uuid_cmd.uuid, in_file); } - const RingBuffer = struct { - buffer: [chunk_size]u8 = undefined, - pos: usize = 0, - - const chunk_size = 0x4000; - - fn append(rb: *RingBuffer, data: []u8) usize { - const cpy_size = if (data.len > rb.available()) - data.len - rb.available() - else - data.len; - mem.copy(u8, rb.buffer[rb.pos..], data[0..cpy_size]); - rb.pos += cpy_size; - const leftover = data.len - cpy_size; - return leftover; - } - - fn available(rb: RingBuffer) usize { - return rb.buffer.len - rb.pos; - } - - fn clear(rb: *RingBuffer) void { - rb.pos = 0; - } - - fn full(rb: RingBuffer) bool { - return rb.buffer.len == rb.pos; - } - - fn empty(rb: RingBuffer) bool { - return rb.pos == 0; - } - - fn getBuffer(rb: *const RingBuffer) []const u8 { - return rb.buffer[0..rb.pos]; - } - }; - inline fn conformUuid(out: *[Md5.digest_length]u8) void { // LC_UUID uuids should conform to RFC 4122 UUID version 4 & UUID version 5 formats out[6] = (out[6] & 0x0F) | (3 << 4); From 585c21e54d339f207028d871a45546da2d1b6871 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Thu, 15 Dec 2022 23:43:50 +0100 Subject: [PATCH 12/17] macho: move parallel file hashing back to CodeSignature I need to think some more how to calculate UUID in parallel, if it is even possible, to preserve UUID's determinism. --- CMakeLists.txt | 1 - src/link/MachO/CodeSignature.zig | 65 +++++++++++++++++++++++++++--- src/link/MachO/hasher.zig | 68 -------------------------------- src/link/MachO/zld.zig | 34 ++++------------ 4 files changed, 67 insertions(+), 101 deletions(-) delete mode 100644 src/link/MachO/hasher.zig diff --git a/CMakeLists.txt b/CMakeLists.txt index 91f68376bc..8e8c66d374 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -591,7 +591,6 @@ set(ZIG_STAGE2_SOURCES "${CMAKE_SOURCE_DIR}/src/link/MachO/bind.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO/dead_strip.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO/fat.zig" - "${CMAKE_SOURCE_DIR}/src/link/MachO/hasher.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO/load_commands.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO/thunks.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO/zld.zig" diff --git a/src/link/MachO/CodeSignature.zig b/src/link/MachO/CodeSignature.zig index 391ac28efa..116ed254f8 100644 --- a/src/link/MachO/CodeSignature.zig +++ b/src/link/MachO/CodeSignature.zig @@ -10,8 +10,9 @@ const testing = std.testing; const Allocator = mem.Allocator; const Compilation = @import("../../Compilation.zig"); -const Hasher = @import("hasher.zig").ParallelHasher; const Sha256 = std.crypto.hash.sha2.Sha256; +const ThreadPool = @import("../../ThreadPool.zig"); +const WaitGroup = @import("../../WaitGroup.zig"); const hash_size = Sha256.digest_length; @@ -288,11 +289,7 @@ pub fn writeAdhocSignature( self.code_directory.inner.nCodeSlots = total_pages; // Calculate hash for each page (in file) and write it to the buffer - var hasher = Hasher(Sha256){}; - try hasher.hash(gpa, comp.thread_pool, opts.file, self.code_directory.code_slots.items, .{ - .chunk_size = self.page_size, - .max_file_size = opts.file_size, - }); + try self.parallelHash(gpa, comp.thread_pool, opts.file, opts.file_size); try blobs.append(.{ .code_directory = &self.code_directory }); header.length += @sizeOf(macho.BlobIndex); @@ -351,6 +348,62 @@ pub fn writeAdhocSignature( } } +fn parallelHash( + self: *CodeSignature, + gpa: Allocator, + pool: *ThreadPool, + file: fs.File, + file_size: u64, +) !void { + var wg: WaitGroup = .{}; + + const total_num_chunks = mem.alignForward(file_size, self.page_size) / self.page_size; + assert(self.code_directory.code_slots.items.len >= total_num_chunks); + + const buffer = try gpa.alloc(u8, self.page_size * total_num_chunks); + defer gpa.free(buffer); + + const results = try gpa.alloc(fs.File.PReadError!usize, total_num_chunks); + defer gpa.free(results); + + { + wg.reset(); + defer wg.wait(); + + var i: usize = 0; + while (i < total_num_chunks) : (i += 1) { + const fstart = i * self.page_size; + const fsize = if (fstart + self.page_size > file_size) + file_size - fstart + else + self.page_size; + wg.start(); + try pool.spawn(worker, .{ + file, + fstart, + buffer[fstart..][0..fsize], + &self.code_directory.code_slots.items[i], + &results[i], + &wg, + }); + } + } + for (results) |result| _ = try result; +} + +fn worker( + file: fs.File, + fstart: usize, + buffer: []u8, + out: *[hash_size]u8, + err: *fs.File.PReadError!usize, + wg: *WaitGroup, +) void { + defer wg.finish(); + err.* = file.preadAll(buffer, fstart); + Sha256.hash(buffer, out, .{}); +} + pub fn size(self: CodeSignature) u32 { var ssize: u32 = @sizeOf(macho.SuperBlob) + @sizeOf(macho.BlobIndex) + self.code_directory.size(); if (self.requirements) |req| { diff --git a/src/link/MachO/hasher.zig b/src/link/MachO/hasher.zig deleted file mode 100644 index bb0531286c..0000000000 --- a/src/link/MachO/hasher.zig +++ /dev/null @@ -1,68 +0,0 @@ -const std = @import("std"); -const assert = std.debug.assert; -const fs = std.fs; -const mem = std.mem; - -const Allocator = mem.Allocator; -const ThreadPool = @import("../../ThreadPool.zig"); -const WaitGroup = @import("../../WaitGroup.zig"); - -pub fn ParallelHasher(comptime Hasher: type) type { - const hash_size = Hasher.digest_length; - - return struct { - pub fn hash(self: @This(), gpa: Allocator, pool: *ThreadPool, file: fs.File, out: [][hash_size]u8, opts: struct { - chunk_size: u16 = 0x4000, - file_pos: u64 = 0, - max_file_size: ?u64 = null, - }) !void { - _ = self; - - var wg: WaitGroup = .{}; - - const file_size = opts.max_file_size orelse try file.getEndPos(); - const total_num_chunks = mem.alignForward(file_size, opts.chunk_size) / opts.chunk_size; - assert(out.len >= total_num_chunks); - - const buffer = try gpa.alloc(u8, opts.chunk_size * total_num_chunks); - defer gpa.free(buffer); - - const results = try gpa.alloc(fs.File.PReadError!usize, total_num_chunks); - defer gpa.free(results); - - { - wg.reset(); - defer wg.wait(); - - var i: usize = 0; - while (i < total_num_chunks) : (i += 1) { - const fstart = i * opts.chunk_size; - const fsize = if (fstart + opts.chunk_size > file_size) file_size - fstart else opts.chunk_size; - wg.start(); - try pool.spawn(worker, .{ - file, - fstart + opts.file_pos, - buffer[fstart..][0..fsize], - &out[i], - &results[i], - &wg, - }); - } - } - for (results) |result| _ = try result; - } - - fn worker( - file: fs.File, - fstart: usize, - buffer: []u8, - out: *[hash_size]u8, - err: *fs.File.PReadError!usize, - wg: *WaitGroup, - ) void { - defer wg.finish(); - err.* = file.preadAll(buffer, fstart); - Hasher.hash(buffer, out, .{}); - } - }; -} diff --git a/src/link/MachO/zld.zig b/src/link/MachO/zld.zig index 23a7963cc0..e7e78b9aef 100644 --- a/src/link/MachO/zld.zig +++ b/src/link/MachO/zld.zig @@ -2692,7 +2692,12 @@ pub const Zld = struct { conformUuid(&self.uuid_cmd.uuid); }, else => { - const max_file_size = self.symtab_cmd.stroff + self.symtab_cmd.strsize; + const max_file_end = self.symtab_cmd.stroff + self.symtab_cmd.strsize; + + const FileSubsection = struct { + start: u32, + end: u32, + }; var subsections: [5]FileSubsection = undefined; var count: usize = 0; @@ -2743,7 +2748,7 @@ pub const Zld = struct { @as(u32, @sizeOf(macho.symtab_command) + @sizeOf(macho.dysymtab_command)) else @sizeOf(macho.linkedit_data_command), - .end = max_file_size, + .end = max_file_end, }; count += 1; } else { @@ -2773,7 +2778,7 @@ pub const Zld = struct { @as(u32, @sizeOf(macho.symtab_command) + @sizeOf(macho.dysymtab_command)) else @sizeOf(macho.linkedit_data_command), - .end = max_file_size, + .end = max_file_end, }; count += 1; } @@ -2816,29 +2821,6 @@ pub const Zld = struct { out[8] = (out[8] & 0x3F) | 0x80; } - const FileSubsection = struct { - start: u32, - end: u32, - }; - - // fn calcUuidHashes( - // self: *Zld, - // comp: *const Compilation, - // cut: FileSubsection, - // hashes: *std.ArrayList([Md5.digest_length]u8), - // ) !void { - // const chunk_size = 0x4000; - // const total_hashes = mem.alignForward(cut.end - cut.start, chunk_size) / chunk_size; - // try hashes.resize(hashes.items.len + total_hashes); - - // var hasher = Hasher(Md5){}; - // try hasher.hash(self.gpa, comp.thread_pool, self.file, hashes.items, .{ - // .chunk_size = chunk_size, - // .file_pos = cut.start, - // .max_file_size = cut.end - cut.start, - // }); - // } - fn writeCodeSignaturePadding(self: *Zld, code_sig: *CodeSignature) !void { const seg = self.getLinkeditSegmentPtr(); // Code signature data has to be 16-bytes aligned for Apple tools to recognize the file From b1f874c78a847d591d9f14e67e181d5f3e5e7105 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Fri, 16 Dec 2022 00:42:21 +0100 Subject: [PATCH 13/17] enable testing for stage3 and stage4 being byte-for-byte identical --- ci/aarch64-macos.sh | 16 ++++++++++++++++ ci/x86_64-macos.sh | 16 ++++++++++++++++ 2 files changed, 32 insertions(+) diff --git a/ci/aarch64-macos.sh b/ci/aarch64-macos.sh index 337c2bfa8e..2fe38f21a4 100755 --- a/ci/aarch64-macos.sh +++ b/ci/aarch64-macos.sh @@ -52,3 +52,19 @@ stage3-release/bin/zig build test docs \ # Produce the experimental std lib documentation. stage3-release/bin/zig test ../lib/std/std.zig -femit-docs -fno-emit-bin --zig-lib-dir ../lib + +# Ensure that stage3 and stage4 are byte-for-byte identical. +stage3-release/bin/zig build \ + --prefix stage4-release \ + -Denable-llvm \ + -Dno-lib \ + -Drelease \ + -Dstrip \ + -Dtarget=$TARGET \ + -Duse-zig-libcxx \ + -Dversion-string="$(stage3-release/bin/zig version)" + +# diff returns an error code if the files differ. +echo "If the following command fails, it means nondeterminism has been" +echo "introduced, making stage3 and stage4 no longer byte-for-byte identical." +diff stage3-release/bin/zig stage4-release/bin/zig diff --git a/ci/x86_64-macos.sh b/ci/x86_64-macos.sh index f09121ccd0..9d9cdb9d6f 100755 --- a/ci/x86_64-macos.sh +++ b/ci/x86_64-macos.sh @@ -60,3 +60,19 @@ stage3-release/bin/zig build test docs \ # Produce the experimental std lib documentation. stage3-release/bin/zig test ../lib/std/std.zig -femit-docs -fno-emit-bin --zig-lib-dir ../lib + +# Ensure that stage3 and stage4 are byte-for-byte identical. +stage3-release/bin/zig build \ + --prefix stage4-release \ + -Denable-llvm \ + -Dno-lib \ + -Drelease \ + -Dstrip \ + -Dtarget=$TARGET \ + -Duse-zig-libcxx \ + -Dversion-string="$(stage3-release/bin/zig version)" + +# diff returns an error code if the files differ. +echo "If the following command fails, it means nondeterminism has been" +echo "introduced, making stage3 and stage4 no longer byte-for-byte identical." +diff stage3-release/bin/zig stage4-release/bin/zig From bd6d951f63b1dc35ded127d7b7b13355f11421bb Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Fri, 16 Dec 2022 12:49:51 +0100 Subject: [PATCH 14/17] macho: fix 32bit build --- src/link/MachO/CodeSignature.zig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/link/MachO/CodeSignature.zig b/src/link/MachO/CodeSignature.zig index 116ed254f8..8bc00d9181 100644 --- a/src/link/MachO/CodeSignature.zig +++ b/src/link/MachO/CodeSignature.zig @@ -353,7 +353,7 @@ fn parallelHash( gpa: Allocator, pool: *ThreadPool, file: fs.File, - file_size: u64, + file_size: u32, ) !void { var wg: WaitGroup = .{}; From f7266e03a8a9dda93b7118ea10c8f97adf21b3b0 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Fri, 16 Dec 2022 14:32:19 +0100 Subject: [PATCH 15/17] macho: identifier string in code signature should be just basename --- src/link/MachO/zld.zig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/link/MachO/zld.zig b/src/link/MachO/zld.zig index e7e78b9aef..272c8ad094 100644 --- a/src/link/MachO/zld.zig +++ b/src/link/MachO/zld.zig @@ -4155,7 +4155,7 @@ pub fn linkWithZld(macho_file: *MachO, comp: *Compilation, prog_node: *std.Progr // The most important here is to have the correct vm and filesize of the __LINKEDIT segment // where the code signature goes into. var codesig = CodeSignature.init(page_size); - codesig.code_directory.ident = options.emit.?.sub_path; + codesig.code_directory.ident = fs.path.basename(full_out_path); if (options.entitlements) |path| { try codesig.addEntitlements(gpa, path); } From 9ad24a4aee839437a10d1038bb2f99a2abdbf1c2 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Fri, 16 Dec 2022 18:31:48 +0100 Subject: [PATCH 16/17] macho: add uuid link test --- lib/std/build/CheckObjectStep.zig | 6 ++++ test/link.zig | 5 +++ test/link/macho/uuid/build.zig | 52 +++++++++++++++++++++++++++++++ test/link/macho/uuid/test.c | 2 ++ 4 files changed, 65 insertions(+) create mode 100644 test/link/macho/uuid/build.zig create mode 100644 test/link/macho/uuid/test.c diff --git a/lib/std/build/CheckObjectStep.zig b/lib/std/build/CheckObjectStep.zig index 7bebea54a0..41a275c38f 100644 --- a/lib/std/build/CheckObjectStep.zig +++ b/lib/std/build/CheckObjectStep.zig @@ -571,6 +571,12 @@ const MachODumper = struct { }); }, + .UUID => { + const uuid = lc.cast(macho.uuid_command).?; + try writer.writeByte('\n'); + try writer.print("uuid {x}", .{std.fmt.fmtSliceHexLower(&uuid.uuid)}); + }, + else => {}, } } diff --git a/test/link.zig b/test/link.zig index 7eec02e53a..5620ac95a0 100644 --- a/test/link.zig +++ b/test/link.zig @@ -170,6 +170,11 @@ fn addMachOCases(cases: *tests.StandaloneContext) void { .requires_symlinks = true, }); + cases.addBuildFile("test/link/macho/uuid/build.zig", .{ + .build_modes = false, + .requires_symlinks = true, + }); + cases.addBuildFile("test/link/macho/weak_library/build.zig", .{ .build_modes = true, .requires_symlinks = true, diff --git a/test/link/macho/uuid/build.zig b/test/link/macho/uuid/build.zig new file mode 100644 index 0000000000..620af04d04 --- /dev/null +++ b/test/link/macho/uuid/build.zig @@ -0,0 +1,52 @@ +const std = @import("std"); +const builtin = @import("builtin"); +const Builder = std.build.Builder; +const LibExeObjectStep = std.build.LibExeObjStep; + +pub fn build(b: *Builder) void { + const test_step = b.step("test", "Test"); + test_step.dependOn(b.getInstallStep()); + + switch (builtin.cpu.arch) { + .aarch64 => { + testUuid(b, test_step, .ReleaseSafe, "eb1203019e453d808d4f1e71053af9af"); + testUuid(b, test_step, .ReleaseFast, "eb1203019e453d808d4f1e71053af9af"); + testUuid(b, test_step, .ReleaseSmall, "eb1203019e453d808d4f1e71053af9af"); + }, + .x86_64 => { + testUuid(b, test_step, .ReleaseSafe, "b3598e7c42dc38b0bd2975ead6e4ae85"); + testUuid(b, test_step, .ReleaseFast, "b3598e7c42dc38b0bd2975ead6e4ae85"); + testUuid(b, test_step, .ReleaseSmall, "1064b25eef4e3e6391866188b3dd7156"); + }, + else => unreachable, + } +} + +fn testUuid(b: *Builder, test_step: *std.build.Step, mode: std.builtin.Mode, comptime exp: []const u8) void { + // The calculated UUID value is independent of debug info and so it should + // stay the same across builds. + { + const dylib = simpleDylib(b, mode); + const check_dylib = dylib.checkObject(.macho); + check_dylib.checkStart("cmd UUID"); + check_dylib.checkNext("uuid " ++ exp); + test_step.dependOn(&check_dylib.step); + } + { + const dylib = simpleDylib(b, mode); + dylib.strip = true; + const check_dylib = dylib.checkObject(.macho); + check_dylib.checkStart("cmd UUID"); + check_dylib.checkNext("uuid " ++ exp); + test_step.dependOn(&check_dylib.step); + } +} + +fn simpleDylib(b: *Builder, mode: std.builtin.Mode) *LibExeObjectStep { + const dylib = b.addSharedLibrary("test", null, b.version(1, 0, 0)); + dylib.setBuildMode(mode); + dylib.setTarget(.{ .os_tag = .macos }); + dylib.addCSourceFile("test.c", &.{}); + dylib.linkLibC(); + return dylib; +} diff --git a/test/link/macho/uuid/test.c b/test/link/macho/uuid/test.c new file mode 100644 index 0000000000..6f23a1a926 --- /dev/null +++ b/test/link/macho/uuid/test.c @@ -0,0 +1,2 @@ +void test() {} + From b20a610f03b0c281958802770b927cde5f47b99c Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sat, 17 Dec 2022 00:53:47 +0100 Subject: [PATCH 17/17] link-tests: force cross-comp to exclude host differences --- test/link/macho/uuid/build.zig | 20 ++++---------------- 1 file changed, 4 insertions(+), 16 deletions(-) diff --git a/test/link/macho/uuid/build.zig b/test/link/macho/uuid/build.zig index 620af04d04..9276d8a401 100644 --- a/test/link/macho/uuid/build.zig +++ b/test/link/macho/uuid/build.zig @@ -1,25 +1,13 @@ const std = @import("std"); -const builtin = @import("builtin"); const Builder = std.build.Builder; const LibExeObjectStep = std.build.LibExeObjStep; pub fn build(b: *Builder) void { const test_step = b.step("test", "Test"); test_step.dependOn(b.getInstallStep()); - - switch (builtin.cpu.arch) { - .aarch64 => { - testUuid(b, test_step, .ReleaseSafe, "eb1203019e453d808d4f1e71053af9af"); - testUuid(b, test_step, .ReleaseFast, "eb1203019e453d808d4f1e71053af9af"); - testUuid(b, test_step, .ReleaseSmall, "eb1203019e453d808d4f1e71053af9af"); - }, - .x86_64 => { - testUuid(b, test_step, .ReleaseSafe, "b3598e7c42dc38b0bd2975ead6e4ae85"); - testUuid(b, test_step, .ReleaseFast, "b3598e7c42dc38b0bd2975ead6e4ae85"); - testUuid(b, test_step, .ReleaseSmall, "1064b25eef4e3e6391866188b3dd7156"); - }, - else => unreachable, - } + testUuid(b, test_step, .ReleaseSafe, "eb1203019e453d808d4f1e71053af9af"); + testUuid(b, test_step, .ReleaseFast, "eb1203019e453d808d4f1e71053af9af"); + testUuid(b, test_step, .ReleaseSmall, "eb1203019e453d808d4f1e71053af9af"); } fn testUuid(b: *Builder, test_step: *std.build.Step, mode: std.builtin.Mode, comptime exp: []const u8) void { @@ -45,7 +33,7 @@ fn testUuid(b: *Builder, test_step: *std.build.Step, mode: std.builtin.Mode, com fn simpleDylib(b: *Builder, mode: std.builtin.Mode) *LibExeObjectStep { const dylib = b.addSharedLibrary("test", null, b.version(1, 0, 0)); dylib.setBuildMode(mode); - dylib.setTarget(.{ .os_tag = .macos }); + dylib.setTarget(.{ .cpu_arch = .aarch64, .os_tag = .macos }); dylib.addCSourceFile("test.c", &.{}); dylib.linkLibC(); return dylib;