diff --git a/CMakeLists.txt b/CMakeLists.txt index 31d72a34d6..ae8c0e05d1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -591,6 +591,7 @@ set(ZIG_STAGE2_SOURCES "${CMAKE_SOURCE_DIR}/src/link/MachO/bind.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO/dead_strip.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO/fat.zig" + "${CMAKE_SOURCE_DIR}/src/link/MachO/hasher.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO/load_commands.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO/thunks.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO/uuid.zig" diff --git a/src/link/MachO.zig b/src/link/MachO.zig index f83338f48b..8a8525ab19 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -99,10 +99,10 @@ page_size: u16, /// fashion (default for LLVM backend). mode: enum { incremental, one_shot }, -uuid: macho.uuid_command = .{ - .cmdsize = @sizeOf(macho.uuid_command), - .uuid = undefined, -}, +uuid: struct { + buf: [16]u8 = undefined, + final: bool = false, +} = .{}, dylibs: std.ArrayListUnmanaged(Dylib) = .{}, dylibs_map: std.StringHashMapUnmanaged(u16) = .{}, @@ -588,11 +588,11 @@ pub fn flushModule(self: *MachO, comp: *Compilation, prog_node: *std.Progress.No try load_commands.writeBuildVersionLC(&self.base.options, &ncmds, lc_writer); - { - std.crypto.random.bytes(&self.uuid.uuid); - try lc_writer.writeStruct(self.uuid); - ncmds += 1; + if (!self.uuid.final) { + std.crypto.random.bytes(&self.uuid.buf); + self.uuid.final = true; } + try load_commands.writeUuidLC(&self.uuid.buf, &ncmds, lc_writer); try load_commands.writeLoadDylibLCs(self.dylibs.items, self.referenced_dylibs.keys(), &ncmds, lc_writer); diff --git a/src/link/MachO/CodeSignature.zig b/src/link/MachO/CodeSignature.zig index e3c362e941..391ac28efa 100644 --- a/src/link/MachO/CodeSignature.zig +++ b/src/link/MachO/CodeSignature.zig @@ -1,6 +1,4 @@ const CodeSignature = @This(); -const Compilation = @import("../../Compilation.zig"); -const WaitGroup = @import("../../WaitGroup.zig"); const std = @import("std"); const assert = std.debug.assert; @@ -9,10 +7,13 @@ const log = std.log.scoped(.link); const macho = std.macho; const mem = std.mem; const testing = std.testing; + const Allocator = mem.Allocator; +const Compilation = @import("../../Compilation.zig"); +const Hasher = @import("hasher.zig").ParallelHasher; const Sha256 = std.crypto.hash.sha2.Sha256; -const hash_size: u8 = 32; +const hash_size = Sha256.digest_length; const Blob = union(enum) { code_directory: *CodeDirectory, @@ -109,7 +110,7 @@ const CodeDirectory = struct { fn size(self: CodeDirectory) u32 { const code_slots = self.inner.nCodeSlots * hash_size; const special_slots = self.inner.nSpecialSlots * hash_size; - return @sizeOf(macho.CodeDirectory) + @intCast(u32, self.ident.len + 1) + special_slots + code_slots; + return @sizeOf(macho.CodeDirectory) + @intCast(u32, self.ident.len + 1 + special_slots + code_slots); } fn write(self: CodeDirectory, writer: anytype) !void { @@ -287,33 +288,11 @@ pub fn writeAdhocSignature( self.code_directory.inner.nCodeSlots = total_pages; // Calculate hash for each page (in file) and write it to the buffer - var wg: WaitGroup = .{}; - { - const buffer = try gpa.alloc(u8, self.page_size * total_pages); - defer gpa.free(buffer); - - const results = try gpa.alloc(fs.File.PReadError!usize, total_pages); - defer gpa.free(results); - { - wg.reset(); - defer wg.wait(); - - var i: usize = 0; - while (i < total_pages) : (i += 1) { - const fstart = i * self.page_size; - const fsize = if (fstart + self.page_size > opts.file_size) - opts.file_size - fstart - else - self.page_size; - const out_hash = &self.code_directory.code_slots.items[i]; - wg.start(); - try comp.thread_pool.spawn(workerSha256Hash, .{ - opts.file, fstart, buffer[fstart..][0..fsize], out_hash, &results[i], &wg, - }); - } - } - for (results) |result| _ = try result; - } + var hasher = Hasher(Sha256){}; + try hasher.hash(gpa, comp.thread_pool, opts.file, self.code_directory.code_slots.items, .{ + .chunk_size = self.page_size, + .max_file_size = opts.file_size, + }); try blobs.append(.{ .code_directory = &self.code_directory }); header.length += @sizeOf(macho.BlobIndex); @@ -352,7 +331,7 @@ pub fn writeAdhocSignature( } self.code_directory.inner.hashOffset = - @sizeOf(macho.CodeDirectory) + @intCast(u32, self.code_directory.ident.len + 1) + self.code_directory.inner.nSpecialSlots * hash_size; + @sizeOf(macho.CodeDirectory) + @intCast(u32, self.code_directory.ident.len + 1 + self.code_directory.inner.nSpecialSlots * hash_size); self.code_directory.inner.length = self.code_directory.size(); header.length += self.code_directory.size(); @@ -372,19 +351,6 @@ pub fn writeAdhocSignature( } } -fn workerSha256Hash( - file: fs.File, - fstart: usize, - buffer: []u8, - hash: *[hash_size]u8, - err: *fs.File.PReadError!usize, - wg: *WaitGroup, -) void { - defer wg.finish(); - err.* = file.preadAll(buffer, fstart); - Sha256.hash(buffer, hash, .{}); -} - pub fn size(self: CodeSignature) u32 { var ssize: u32 = @sizeOf(macho.SuperBlob) + @sizeOf(macho.BlobIndex) + self.code_directory.size(); if (self.requirements) |req| { diff --git a/src/link/MachO/DebugSymbols.zig b/src/link/MachO/DebugSymbols.zig index 655ba7162f..22905a520a 100644 --- a/src/link/MachO/DebugSymbols.zig +++ b/src/link/MachO/DebugSymbols.zig @@ -5,6 +5,7 @@ const build_options = @import("build_options"); const assert = std.debug.assert; const fs = std.fs; const link = @import("../../link.zig"); +const load_commands = @import("load_commands.zig"); const log = std.log.scoped(.dsym); const macho = std.macho; const makeStaticString = MachO.makeStaticString; @@ -303,10 +304,7 @@ pub fn flushModule(self: *DebugSymbols, macho_file: *MachO) !void { self.finalizeDwarfSegment(macho_file); try self.writeLinkeditSegmentData(macho_file, &ncmds, lc_writer); - { - try lc_writer.writeStruct(macho_file.uuid); - ncmds += 1; - } + try load_commands.writeUuidLC(&macho_file.uuid.buf, &ncmds, lc_writer); var headers_buf = std.ArrayList(u8).init(self.allocator); defer headers_buf.deinit(); diff --git a/src/link/MachO/hasher.zig b/src/link/MachO/hasher.zig new file mode 100644 index 0000000000..29099ad2d9 --- /dev/null +++ b/src/link/MachO/hasher.zig @@ -0,0 +1,60 @@ +const std = @import("std"); +const assert = std.debug.assert; +const fs = std.fs; +const mem = std.mem; + +const Allocator = mem.Allocator; +const ThreadPool = @import("../../ThreadPool.zig"); +const WaitGroup = @import("../../WaitGroup.zig"); + +pub fn ParallelHasher(comptime Hasher: type) type { + const hash_size = Hasher.digest_length; + + return struct { + pub fn hash(self: @This(), gpa: Allocator, pool: *ThreadPool, file: fs.File, out: [][hash_size]u8, opts: struct { + chunk_size: u16 = 0x4000, + max_file_size: ?u64 = null, + }) !void { + _ = self; + + var wg: WaitGroup = .{}; + + const file_size = opts.max_file_size orelse try file.getEndPos(); + const total_num_chunks = mem.alignForward(file_size, opts.chunk_size) / opts.chunk_size; + assert(out.len >= total_num_chunks); + + const buffer = try gpa.alloc(u8, opts.chunk_size * total_num_chunks); + defer gpa.free(buffer); + + const results = try gpa.alloc(fs.File.PReadError!usize, total_num_chunks); + defer gpa.free(results); + + { + wg.reset(); + defer wg.wait(); + + var i: usize = 0; + while (i < total_num_chunks) : (i += 1) { + const fstart = i * opts.chunk_size; + const fsize = if (fstart + opts.chunk_size > file_size) file_size - fstart else opts.chunk_size; + wg.start(); + try pool.spawn(worker, .{ file, fstart, buffer[fstart..][0..fsize], &out[i], &results[i], &wg }); + } + } + for (results) |result| _ = try result; + } + + fn worker( + file: fs.File, + fstart: usize, + buffer: []u8, + out: *[hash_size]u8, + err: *fs.File.PReadError!usize, + wg: *WaitGroup, + ) void { + defer wg.finish(); + err.* = file.preadAll(buffer, fstart); + Hasher.hash(buffer, out, .{}); + } + }; +} diff --git a/src/link/MachO/uuid.zig b/src/link/MachO/uuid.zig new file mode 100644 index 0000000000..987b156a4b --- /dev/null +++ b/src/link/MachO/uuid.zig @@ -0,0 +1,69 @@ +const std = @import("std"); +const fs = std.fs; +const mem = std.mem; + +const Allocator = mem.Allocator; +const Compilation = @import("../../Compilation.zig"); +const Md5 = std.crypto.hash.Md5; +const Hasher = @import("hasher.zig").ParallelHasher; + +/// Somewhat random chunk size for MD5 hash calculation. +pub const chunk_size = 0x4000; + +/// Calculates Md5 hash of the file contents. +/// Hash is calculated in a streaming manner which may be slow. +pub fn calcUuidStreaming(file: fs.File, file_size: u64, out: *[Md5.digest_length]u8) !void { + const total_num_chunks = mem.alignForward(file_size, chunk_size) / chunk_size; + + var hasher = Md5.init(.{}); + var buffer: [chunk_size]u8 = undefined; + + var i: usize = 0; + while (i < total_num_chunks) : (i += 1) { + const start = i * chunk_size; + const size = if (start + chunk_size > file_size) + file_size - start + else + chunk_size; + const amt = try file.preadAll(&buffer, start); + if (amt != size) return error.InputOutput; + + hasher.update(buffer[0..size]); + } + + hasher.final(out); + conform(out); +} + +/// Calculates Md5 hash of each chunk in parallel and then hashes all Md5 hashes to produce +/// the final digest. +/// While this is NOT a correct MD5 hash of the contents, this methodology is used by LLVM/LLD +/// and we will use it too as it seems accepted by Apple OSes. +pub fn calcUuidParallel(comp: *const Compilation, file: fs.File, file_size: u64, out: *[Md5.digest_length]u8) !void { + const total_hashes = mem.alignForward(file_size, chunk_size) / chunk_size; + + const hashes = try comp.gpa.alloc([Md5.digest_length]u8, total_hashes); + defer comp.gpa.free(hashes); + + var hasher = Hasher(Md5){}; + try hasher.hash(comp.gpa, comp.thread_pool, file, hashes, .{ + .chunk_size = chunk_size, + .max_file_size = file_size, + }); + + const final_buffer = try comp.gpa.alloc(u8, total_hashes * Md5.digest_length); + defer comp.gpa.free(final_buffer); + + for (hashes) |hash, i| { + mem.copy(u8, final_buffer[i * Md5.digest_length ..][0..Md5.digest_length], &hash); + } + + Md5.hash(final_buffer, out, .{}); + conform(out); +} + +inline fn conform(out: *[Md5.digest_length]u8) void { + // LC_UUID uuids should conform to RFC 4122 UUID version 4 & UUID version 5 formats + out[6] = (out[6] & 0x0F) | (3 << 4); + out[8] = (out[8] & 0x3F) | 0x80; +} diff --git a/src/link/MachO/zld.zig b/src/link/MachO/zld.zig index 9a66d76e1a..022167e223 100644 --- a/src/link/MachO/zld.zig +++ b/src/link/MachO/zld.zig @@ -4037,8 +4037,15 @@ pub fn linkWithZld(macho_file: *MachO, comp: *Compilation, prog_node: *std.Progr const uuid_offset_backpatch: ?usize = blk: { const index = lc_buffer.items.len; var uuid_buf: [16]u8 = [_]u8{0} ** 16; + + if (zld.options.optimize_mode == .Debug) { + // In Debug we don't really care about reproducibility, so put in a random value + // and be done with it. + std.crypto.random.bytes(&uuid_buf); + } + try load_commands.writeUuidLC(&uuid_buf, &ncmds, lc_writer); - break :blk index; + break :blk if (zld.options.optimize_mode == .Debug) null else index; }; try load_commands.writeLoadDylibLCs(zld.dylibs.items, zld.referenced_dylibs.keys(), &ncmds, lc_writer); @@ -4076,7 +4083,7 @@ pub fn linkWithZld(macho_file: *MachO, comp: *Compilation, prog_node: *std.Progr const seg = zld.getLinkeditSegmentPtr(); const file_size = seg.fileoff + seg.filesize; var uuid_buf: [16]u8 = undefined; - try uuid.calcMd5Hash(zld.gpa, zld.file, file_size, &uuid_buf); + try uuid.calcUuidParallel(comp, zld.file, file_size, &uuid_buf); const offset = @sizeOf(macho.mach_header_64) + headers_buf.items.len + backpatch + @sizeOf(macho.load_command); try zld.file.pwriteAll(&uuid_buf, offset); }