macho: move parallel file hashing back to CodeSignature

I need to think some more how to calculate UUID in parallel, if
it is even possible, to preserve UUID's determinism.
This commit is contained in:
Jakub Konka 2022-12-15 23:43:50 +01:00
parent 1928ed7dab
commit 585c21e54d
4 changed files with 67 additions and 101 deletions

View File

@ -591,7 +591,6 @@ set(ZIG_STAGE2_SOURCES
"${CMAKE_SOURCE_DIR}/src/link/MachO/bind.zig"
"${CMAKE_SOURCE_DIR}/src/link/MachO/dead_strip.zig"
"${CMAKE_SOURCE_DIR}/src/link/MachO/fat.zig"
"${CMAKE_SOURCE_DIR}/src/link/MachO/hasher.zig"
"${CMAKE_SOURCE_DIR}/src/link/MachO/load_commands.zig"
"${CMAKE_SOURCE_DIR}/src/link/MachO/thunks.zig"
"${CMAKE_SOURCE_DIR}/src/link/MachO/zld.zig"

View File

@ -10,8 +10,9 @@ const testing = std.testing;
const Allocator = mem.Allocator;
const Compilation = @import("../../Compilation.zig");
const Hasher = @import("hasher.zig").ParallelHasher;
const Sha256 = std.crypto.hash.sha2.Sha256;
const ThreadPool = @import("../../ThreadPool.zig");
const WaitGroup = @import("../../WaitGroup.zig");
const hash_size = Sha256.digest_length;
@ -288,11 +289,7 @@ pub fn writeAdhocSignature(
self.code_directory.inner.nCodeSlots = total_pages;
// Calculate hash for each page (in file) and write it to the buffer
var hasher = Hasher(Sha256){};
try hasher.hash(gpa, comp.thread_pool, opts.file, self.code_directory.code_slots.items, .{
.chunk_size = self.page_size,
.max_file_size = opts.file_size,
});
try self.parallelHash(gpa, comp.thread_pool, opts.file, opts.file_size);
try blobs.append(.{ .code_directory = &self.code_directory });
header.length += @sizeOf(macho.BlobIndex);
@ -351,6 +348,62 @@ pub fn writeAdhocSignature(
}
}
fn parallelHash(
self: *CodeSignature,
gpa: Allocator,
pool: *ThreadPool,
file: fs.File,
file_size: u64,
) !void {
var wg: WaitGroup = .{};
const total_num_chunks = mem.alignForward(file_size, self.page_size) / self.page_size;
assert(self.code_directory.code_slots.items.len >= total_num_chunks);
const buffer = try gpa.alloc(u8, self.page_size * total_num_chunks);
defer gpa.free(buffer);
const results = try gpa.alloc(fs.File.PReadError!usize, total_num_chunks);
defer gpa.free(results);
{
wg.reset();
defer wg.wait();
var i: usize = 0;
while (i < total_num_chunks) : (i += 1) {
const fstart = i * self.page_size;
const fsize = if (fstart + self.page_size > file_size)
file_size - fstart
else
self.page_size;
wg.start();
try pool.spawn(worker, .{
file,
fstart,
buffer[fstart..][0..fsize],
&self.code_directory.code_slots.items[i],
&results[i],
&wg,
});
}
}
for (results) |result| _ = try result;
}
fn worker(
file: fs.File,
fstart: usize,
buffer: []u8,
out: *[hash_size]u8,
err: *fs.File.PReadError!usize,
wg: *WaitGroup,
) void {
defer wg.finish();
err.* = file.preadAll(buffer, fstart);
Sha256.hash(buffer, out, .{});
}
pub fn size(self: CodeSignature) u32 {
var ssize: u32 = @sizeOf(macho.SuperBlob) + @sizeOf(macho.BlobIndex) + self.code_directory.size();
if (self.requirements) |req| {

View File

@ -1,68 +0,0 @@
const std = @import("std");
const assert = std.debug.assert;
const fs = std.fs;
const mem = std.mem;
const Allocator = mem.Allocator;
const ThreadPool = @import("../../ThreadPool.zig");
const WaitGroup = @import("../../WaitGroup.zig");
pub fn ParallelHasher(comptime Hasher: type) type {
const hash_size = Hasher.digest_length;
return struct {
pub fn hash(self: @This(), gpa: Allocator, pool: *ThreadPool, file: fs.File, out: [][hash_size]u8, opts: struct {
chunk_size: u16 = 0x4000,
file_pos: u64 = 0,
max_file_size: ?u64 = null,
}) !void {
_ = self;
var wg: WaitGroup = .{};
const file_size = opts.max_file_size orelse try file.getEndPos();
const total_num_chunks = mem.alignForward(file_size, opts.chunk_size) / opts.chunk_size;
assert(out.len >= total_num_chunks);
const buffer = try gpa.alloc(u8, opts.chunk_size * total_num_chunks);
defer gpa.free(buffer);
const results = try gpa.alloc(fs.File.PReadError!usize, total_num_chunks);
defer gpa.free(results);
{
wg.reset();
defer wg.wait();
var i: usize = 0;
while (i < total_num_chunks) : (i += 1) {
const fstart = i * opts.chunk_size;
const fsize = if (fstart + opts.chunk_size > file_size) file_size - fstart else opts.chunk_size;
wg.start();
try pool.spawn(worker, .{
file,
fstart + opts.file_pos,
buffer[fstart..][0..fsize],
&out[i],
&results[i],
&wg,
});
}
}
for (results) |result| _ = try result;
}
fn worker(
file: fs.File,
fstart: usize,
buffer: []u8,
out: *[hash_size]u8,
err: *fs.File.PReadError!usize,
wg: *WaitGroup,
) void {
defer wg.finish();
err.* = file.preadAll(buffer, fstart);
Hasher.hash(buffer, out, .{});
}
};
}

View File

@ -2692,7 +2692,12 @@ pub const Zld = struct {
conformUuid(&self.uuid_cmd.uuid);
},
else => {
const max_file_size = self.symtab_cmd.stroff + self.symtab_cmd.strsize;
const max_file_end = self.symtab_cmd.stroff + self.symtab_cmd.strsize;
const FileSubsection = struct {
start: u32,
end: u32,
};
var subsections: [5]FileSubsection = undefined;
var count: usize = 0;
@ -2743,7 +2748,7 @@ pub const Zld = struct {
@as(u32, @sizeOf(macho.symtab_command) + @sizeOf(macho.dysymtab_command))
else
@sizeOf(macho.linkedit_data_command),
.end = max_file_size,
.end = max_file_end,
};
count += 1;
} else {
@ -2773,7 +2778,7 @@ pub const Zld = struct {
@as(u32, @sizeOf(macho.symtab_command) + @sizeOf(macho.dysymtab_command))
else
@sizeOf(macho.linkedit_data_command),
.end = max_file_size,
.end = max_file_end,
};
count += 1;
}
@ -2816,29 +2821,6 @@ pub const Zld = struct {
out[8] = (out[8] & 0x3F) | 0x80;
}
const FileSubsection = struct {
start: u32,
end: u32,
};
// fn calcUuidHashes(
// self: *Zld,
// comp: *const Compilation,
// cut: FileSubsection,
// hashes: *std.ArrayList([Md5.digest_length]u8),
// ) !void {
// const chunk_size = 0x4000;
// const total_hashes = mem.alignForward(cut.end - cut.start, chunk_size) / chunk_size;
// try hashes.resize(hashes.items.len + total_hashes);
// var hasher = Hasher(Md5){};
// try hasher.hash(self.gpa, comp.thread_pool, self.file, hashes.items, .{
// .chunk_size = chunk_size,
// .file_pos = cut.start,
// .max_file_size = cut.end - cut.start,
// });
// }
fn writeCodeSignaturePadding(self: *Zld, code_sig: *CodeSignature) !void {
const seg = self.getLinkeditSegmentPtr();
// Code signature data has to be 16-bytes aligned for Apple tools to recognize the file