mirror of
https://github.com/ziglang/zig.git
synced 2026-02-09 19:10:48 +00:00
macho: hash the entire file contents for UUID but calc in parallel
This commit is contained in:
parent
b3a2ab3fed
commit
10aaf2983d
@ -13,6 +13,7 @@ const mem = std.mem;
|
||||
const meta = std.meta;
|
||||
|
||||
const aarch64 = @import("../arch/aarch64/bits.zig");
|
||||
const calcUuid = @import("MachO/uuid.zig").calcUuid;
|
||||
const codegen = @import("../codegen.zig");
|
||||
const dead_strip = @import("MachO/dead_strip.zig");
|
||||
const fat = @import("MachO/fat.zig");
|
||||
@ -756,11 +757,7 @@ pub fn flushModule(self: *MachO, comp: *Compilation, prog_node: *std.Progress.No
|
||||
});
|
||||
try load_commands.writeBuildVersionLC(&self.base.options, lc_writer);
|
||||
|
||||
if (self.cold_start) {
|
||||
std.crypto.random.bytes(&self.uuid_cmd.uuid);
|
||||
Md5.hash(&self.uuid_cmd.uuid, &self.uuid_cmd.uuid, .{});
|
||||
conformUuid(&self.uuid_cmd.uuid);
|
||||
}
|
||||
const uuid_cmd_offset = @sizeOf(macho.mach_header_64) + @intCast(u32, lc_buffer.items.len);
|
||||
try lc_writer.writeStruct(self.uuid_cmd);
|
||||
|
||||
try load_commands.writeLoadDylibLCs(self.dylibs.items, self.referenced_dylibs.keys(), lc_writer);
|
||||
@ -769,10 +766,10 @@ pub fn flushModule(self: *MachO, comp: *Compilation, prog_node: *std.Progress.No
|
||||
try lc_writer.writeStruct(self.codesig_cmd);
|
||||
}
|
||||
|
||||
try self.base.file.?.pwriteAll(lc_buffer.items, @sizeOf(macho.mach_header_64));
|
||||
|
||||
const ncmds = load_commands.calcNumOfLCs(lc_buffer.items);
|
||||
try self.base.file.?.pwriteAll(lc_buffer.items, @sizeOf(macho.mach_header_64));
|
||||
try self.writeHeader(ncmds, @intCast(u32, lc_buffer.items.len));
|
||||
try self.writeUuid(comp, uuid_cmd_offset);
|
||||
|
||||
if (codesig) |*csig| {
|
||||
try self.writeCodeSignature(comp, csig); // code signing always comes last
|
||||
@ -3510,6 +3507,14 @@ fn writeDysymtab(self: *MachO, ctx: SymtabCtx) !void {
|
||||
self.dysymtab_cmd.nindirectsyms = nindirectsyms;
|
||||
}
|
||||
|
||||
fn writeUuid(self: *MachO, comp: *const Compilation, uuid_cmd_offset: u32) !void {
|
||||
const seg = self.getLinkeditSegmentPtr();
|
||||
const file_size = seg.fileoff + seg.filesize;
|
||||
try calcUuid(comp, self.base.file.?, file_size, &self.uuid_cmd.uuid);
|
||||
const offset = uuid_cmd_offset + @sizeOf(macho.load_command);
|
||||
try self.base.file.?.pwriteAll(&self.uuid_cmd.uuid, offset);
|
||||
}
|
||||
|
||||
fn writeCodeSignaturePadding(self: *MachO, code_sig: *CodeSignature) !void {
|
||||
const seg = self.getLinkeditSegmentPtr();
|
||||
// Code signature data has to be 16-bytes aligned for Apple tools to recognize the file
|
||||
|
||||
46
src/link/MachO/uuid.zig
Normal file
46
src/link/MachO/uuid.zig
Normal file
@ -0,0 +1,46 @@
|
||||
const std = @import("std");
|
||||
const fs = std.fs;
|
||||
const mem = std.mem;
|
||||
|
||||
const Allocator = mem.Allocator;
|
||||
const Compilation = @import("../../Compilation.zig");
|
||||
const Md5 = std.crypto.hash.Md5;
|
||||
const Hasher = @import("hasher.zig").ParallelHasher;
|
||||
|
||||
/// Somewhat random chunk size for MD5 hash calculation.
|
||||
pub const chunk_size = 0x4000;
|
||||
|
||||
/// Calculates Md5 hash of each chunk in parallel and then hashes all Md5 hashes to produce
|
||||
/// the final digest.
|
||||
/// While this is NOT a correct MD5 hash of the contents, this methodology is used by LLVM/LLD
|
||||
/// and we will use it too as it seems accepted by Apple OSes.
|
||||
/// TODO LLD also hashes the output filename to disambiguate between same builds with different
|
||||
/// output files. Should we also do that?
|
||||
pub fn calcUuid(comp: *const Compilation, file: fs.File, file_size: u64, out: *[Md5.digest_length]u8) !void {
|
||||
const total_hashes = mem.alignForward(u64, file_size, chunk_size) / chunk_size;
|
||||
|
||||
const hashes = try comp.gpa.alloc([Md5.digest_length]u8, total_hashes);
|
||||
defer comp.gpa.free(hashes);
|
||||
|
||||
var hasher = Hasher(Md5){};
|
||||
try hasher.hash(comp.gpa, comp.thread_pool, file, hashes, .{
|
||||
.chunk_size = chunk_size,
|
||||
.max_file_size = file_size,
|
||||
});
|
||||
|
||||
const final_buffer = try comp.gpa.alloc(u8, total_hashes * Md5.digest_length);
|
||||
defer comp.gpa.free(final_buffer);
|
||||
|
||||
for (hashes, 0..) |hash, i| {
|
||||
mem.copy(u8, final_buffer[i * Md5.digest_length ..][0..Md5.digest_length], &hash);
|
||||
}
|
||||
|
||||
Md5.hash(final_buffer, out, .{});
|
||||
conform(out);
|
||||
}
|
||||
|
||||
inline fn conform(out: *[Md5.digest_length]u8) void {
|
||||
// LC_UUID uuids should conform to RFC 4122 UUID version 4 & UUID version 5 formats
|
||||
out[6] = (out[6] & 0x0F) | (3 << 4);
|
||||
out[8] = (out[8] & 0x3F) | 0x80;
|
||||
}
|
||||
@ -9,14 +9,15 @@ const math = std.math;
|
||||
const mem = std.mem;
|
||||
|
||||
const aarch64 = @import("../../arch/aarch64/bits.zig");
|
||||
const calcUuid = @import("uuid.zig").calcUuid;
|
||||
const dead_strip = @import("dead_strip.zig");
|
||||
const eh_frame = @import("eh_frame.zig");
|
||||
const fat = @import("fat.zig");
|
||||
const link = @import("../../link.zig");
|
||||
const load_commands = @import("load_commands.zig");
|
||||
const stub_helpers = @import("stubs.zig");
|
||||
const thunks = @import("thunks.zig");
|
||||
const trace = @import("../../tracy.zig").trace;
|
||||
const stub_helpers = @import("stubs.zig");
|
||||
|
||||
const Allocator = mem.Allocator;
|
||||
const Archive = @import("Archive.zig");
|
||||
@ -2575,150 +2576,12 @@ pub const Zld = struct {
|
||||
self.dysymtab_cmd.nindirectsyms = nindirectsyms;
|
||||
}
|
||||
|
||||
fn writeUuid(self: *Zld, comp: *const Compilation, args: struct {
|
||||
linkedit_cmd_offset: u32,
|
||||
symtab_cmd_offset: u32,
|
||||
uuid_cmd_offset: u32,
|
||||
codesig_cmd_offset: ?u32,
|
||||
}) !void {
|
||||
_ = comp;
|
||||
switch (self.options.optimize_mode) {
|
||||
.Debug => {
|
||||
// In Debug we don't really care about reproducibility, so put in a random value
|
||||
// and be done with it.
|
||||
std.crypto.random.bytes(&self.uuid_cmd.uuid);
|
||||
Md5.hash(&self.uuid_cmd.uuid, &self.uuid_cmd.uuid, .{});
|
||||
conformUuid(&self.uuid_cmd.uuid);
|
||||
},
|
||||
else => {
|
||||
// We set the max file size to the actual strtab buffer length to exclude any strtab padding.
|
||||
const max_file_end = @intCast(u32, self.symtab_cmd.stroff + self.strtab.buffer.items.len);
|
||||
|
||||
const FileSubsection = struct {
|
||||
start: u32,
|
||||
end: u32,
|
||||
};
|
||||
|
||||
var subsections: [5]FileSubsection = undefined;
|
||||
var count: usize = 0;
|
||||
|
||||
// Exclude LINKEDIT segment command as it contains file size that includes stabs contribution
|
||||
// and code signature.
|
||||
subsections[count] = .{
|
||||
.start = 0,
|
||||
.end = args.linkedit_cmd_offset,
|
||||
};
|
||||
count += 1;
|
||||
|
||||
// Exclude SYMTAB and DYSYMTAB commands for the same reason.
|
||||
subsections[count] = .{
|
||||
.start = subsections[count - 1].end + @sizeOf(macho.segment_command_64),
|
||||
.end = args.symtab_cmd_offset,
|
||||
};
|
||||
count += 1;
|
||||
|
||||
// Exclude CODE_SIGNATURE command (if present).
|
||||
if (args.codesig_cmd_offset) |offset| {
|
||||
subsections[count] = .{
|
||||
.start = subsections[count - 1].end + @sizeOf(macho.symtab_command) + @sizeOf(macho.dysymtab_command),
|
||||
.end = offset,
|
||||
};
|
||||
count += 1;
|
||||
}
|
||||
|
||||
if (!self.options.strip) {
|
||||
// Exclude region comprising all symbol stabs.
|
||||
const nlocals = self.dysymtab_cmd.nlocalsym;
|
||||
|
||||
const locals = try self.gpa.alloc(macho.nlist_64, nlocals);
|
||||
defer self.gpa.free(locals);
|
||||
|
||||
const locals_buf = @ptrCast([*]u8, locals.ptr)[0 .. @sizeOf(macho.nlist_64) * nlocals];
|
||||
const amt = try self.file.preadAll(locals_buf, self.symtab_cmd.symoff);
|
||||
if (amt != locals_buf.len) return error.InputOutput;
|
||||
|
||||
const istab: usize = for (locals, 0..) |local, i| {
|
||||
if (local.stab()) break i;
|
||||
} else locals.len;
|
||||
const nstabs = locals.len - istab;
|
||||
|
||||
if (nstabs == 0) {
|
||||
subsections[count] = .{
|
||||
.start = subsections[count - 1].end + if (args.codesig_cmd_offset == null)
|
||||
@as(u32, @sizeOf(macho.symtab_command) + @sizeOf(macho.dysymtab_command))
|
||||
else
|
||||
@sizeOf(macho.linkedit_data_command),
|
||||
.end = max_file_end,
|
||||
};
|
||||
count += 1;
|
||||
} else {
|
||||
// Exclude a subsection of the strtab with names of the stabs.
|
||||
// We do not care about anything succeeding strtab as it is the code signature data which is
|
||||
// not part of the UUID calculation anyway.
|
||||
const stab_stroff = locals[istab].n_strx;
|
||||
|
||||
subsections[count] = .{
|
||||
.start = subsections[count - 1].end + if (args.codesig_cmd_offset == null)
|
||||
@as(u32, @sizeOf(macho.symtab_command) + @sizeOf(macho.dysymtab_command))
|
||||
else
|
||||
@sizeOf(macho.linkedit_data_command),
|
||||
.end = @intCast(u32, self.symtab_cmd.symoff + istab * @sizeOf(macho.nlist_64)),
|
||||
};
|
||||
count += 1;
|
||||
|
||||
subsections[count] = .{
|
||||
.start = subsections[count - 1].end + @intCast(u32, nstabs * @sizeOf(macho.nlist_64)),
|
||||
.end = self.symtab_cmd.stroff + stab_stroff,
|
||||
};
|
||||
count += 1;
|
||||
}
|
||||
} else {
|
||||
subsections[count] = .{
|
||||
.start = subsections[count - 1].end + if (args.codesig_cmd_offset == null)
|
||||
@as(u32, @sizeOf(macho.symtab_command) + @sizeOf(macho.dysymtab_command))
|
||||
else
|
||||
@sizeOf(macho.linkedit_data_command),
|
||||
.end = max_file_end,
|
||||
};
|
||||
count += 1;
|
||||
}
|
||||
|
||||
const chunk_size = 0x4000;
|
||||
|
||||
var hasher = Md5.init(.{});
|
||||
var buffer: [chunk_size]u8 = undefined;
|
||||
|
||||
for (subsections[0..count]) |cut| {
|
||||
const size = cut.end - cut.start;
|
||||
const num_chunks = mem.alignForward(usize, size, chunk_size) / chunk_size;
|
||||
|
||||
var i: usize = 0;
|
||||
while (i < num_chunks) : (i += 1) {
|
||||
const fstart = cut.start + i * chunk_size;
|
||||
const fsize = if (fstart + chunk_size > cut.end)
|
||||
cut.end - fstart
|
||||
else
|
||||
chunk_size;
|
||||
const amt = try self.file.preadAll(buffer[0..fsize], fstart);
|
||||
if (amt != fsize) return error.InputOutput;
|
||||
|
||||
hasher.update(buffer[0..fsize]);
|
||||
}
|
||||
}
|
||||
|
||||
hasher.final(&self.uuid_cmd.uuid);
|
||||
conformUuid(&self.uuid_cmd.uuid);
|
||||
},
|
||||
}
|
||||
|
||||
const in_file = args.uuid_cmd_offset + @sizeOf(macho.load_command);
|
||||
try self.file.pwriteAll(&self.uuid_cmd.uuid, in_file);
|
||||
}
|
||||
|
||||
inline fn conformUuid(out: *[Md5.digest_length]u8) void {
|
||||
// LC_UUID uuids should conform to RFC 4122 UUID version 4 & UUID version 5 formats
|
||||
out[6] = (out[6] & 0x0F) | (3 << 4);
|
||||
out[8] = (out[8] & 0x3F) | 0x80;
|
||||
fn writeUuid(self: *Zld, comp: *const Compilation, uuid_cmd_offset: u32) !void {
|
||||
const seg = self.getLinkeditSegmentPtr();
|
||||
const file_size = seg.fileoff + seg.filesize;
|
||||
try calcUuid(comp, self.file, file_size, &self.uuid_cmd.uuid);
|
||||
const offset = uuid_cmd_offset + @sizeOf(macho.load_command);
|
||||
try self.file.pwriteAll(&self.uuid_cmd.uuid, offset);
|
||||
}
|
||||
|
||||
fn writeCodeSignaturePadding(self: *Zld, code_sig: *CodeSignature) !void {
|
||||
@ -4041,16 +3904,11 @@ pub fn linkWithZld(macho_file: *MachO, comp: *Compilation, prog_node: *std.Progr
|
||||
const lc_writer = lc_buffer.writer();
|
||||
|
||||
try zld.writeSegmentHeaders(lc_writer);
|
||||
const linkedit_cmd_offset = @sizeOf(macho.mach_header_64) + @intCast(u32, lc_buffer.items.len - @sizeOf(macho.segment_command_64));
|
||||
|
||||
try lc_writer.writeStruct(zld.dyld_info_cmd);
|
||||
try lc_writer.writeStruct(zld.function_starts_cmd);
|
||||
try lc_writer.writeStruct(zld.data_in_code_cmd);
|
||||
|
||||
const symtab_cmd_offset = @sizeOf(macho.mach_header_64) + @intCast(u32, lc_buffer.items.len);
|
||||
try lc_writer.writeStruct(zld.symtab_cmd);
|
||||
try lc_writer.writeStruct(zld.dysymtab_cmd);
|
||||
|
||||
try load_commands.writeDylinkerLC(lc_writer);
|
||||
|
||||
if (zld.options.output_mode == .Exe) {
|
||||
@ -4088,22 +3946,14 @@ pub fn linkWithZld(macho_file: *MachO, comp: *Compilation, prog_node: *std.Progr
|
||||
|
||||
try load_commands.writeLoadDylibLCs(zld.dylibs.items, zld.referenced_dylibs.keys(), lc_writer);
|
||||
|
||||
var codesig_cmd_offset: ?u32 = null;
|
||||
if (requires_codesig) {
|
||||
codesig_cmd_offset = @sizeOf(macho.mach_header_64) + @intCast(u32, lc_buffer.items.len);
|
||||
try lc_writer.writeStruct(zld.codesig_cmd);
|
||||
}
|
||||
|
||||
const ncmds = load_commands.calcNumOfLCs(lc_buffer.items);
|
||||
try zld.file.pwriteAll(lc_buffer.items, @sizeOf(macho.mach_header_64));
|
||||
try zld.writeHeader(ncmds, @intCast(u32, lc_buffer.items.len));
|
||||
|
||||
try zld.writeUuid(comp, .{
|
||||
.linkedit_cmd_offset = linkedit_cmd_offset,
|
||||
.symtab_cmd_offset = symtab_cmd_offset,
|
||||
.uuid_cmd_offset = uuid_cmd_offset,
|
||||
.codesig_cmd_offset = codesig_cmd_offset,
|
||||
});
|
||||
try zld.writeUuid(comp, uuid_cmd_offset);
|
||||
|
||||
if (codesig) |*csig| {
|
||||
try zld.writeCodeSignature(comp, csig); // code signing always comes last
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user