diff --git a/tools/fetch_them_macos_headers.zig b/tools/fetch_them_macos_headers.zig index f436990072..3d17681ee0 100644 --- a/tools/fetch_them_macos_headers.zig +++ b/tools/fetch_them_macos_headers.zig @@ -7,22 +7,19 @@ const assert = std.debug.assert; const tmpDir = std.testing.tmpDir; const Allocator = mem.Allocator; -const Blake3 = std.crypto.hash.Blake3; const OsTag = std.Target.Os.Tag; var general_purpose_allocator = std.heap.GeneralPurposeAllocator(.{}){}; const gpa = general_purpose_allocator.allocator(); const Arch = enum { - any, aarch64, x86_64, }; -const Abi = enum { any, none }; +const Abi = enum { none }; const OsVer = enum(u32) { - any = 0, catalina = 10, big_sur = 11, monterey = 12, @@ -37,22 +34,6 @@ const Target = struct { os_ver: OsVer, abi: Abi = .none, - fn hash(a: Target) u32 { - var hasher = std.hash.Wyhash.init(0); - std.hash.autoHash(&hasher, a.arch); - std.hash.autoHash(&hasher, a.os); - std.hash.autoHash(&hasher, a.os_ver); - std.hash.autoHash(&hasher, a.abi); - return @as(u32, @truncate(hasher.final())); - } - - fn eql(a: Target, b: Target) bool { - return a.arch == b.arch and - a.os == b.os and - a.os_ver == b.os_ver and - a.abi == b.abi; - } - fn name(self: Target, allocator: Allocator) ![]const u8 { return std.fmt.allocPrint(allocator, "{s}-{s}-{s}", .{ @tagName(self.arch), @@ -62,7 +43,6 @@ const Target = struct { } fn fullName(self: Target, allocator: Allocator) ![]const u8 { - if (self.os_ver == .any) return self.name(allocator); return std.fmt.allocPrint(allocator, "{s}-{s}.{d}-{s}", .{ @tagName(self.arch), @tagName(self.os), @@ -72,179 +52,10 @@ const Target = struct { } }; -const targets = [_]Target{ - Target{ - .arch = .any, - .abi = .any, - .os_ver = .any, - }, - Target{ - .arch = .aarch64, - .os_ver = .any, - }, - Target{ - .arch = .x86_64, - .os_ver = .any, - }, - Target{ - .arch = .x86_64, - .os_ver = .catalina, - }, - Target{ - .arch = .x86_64, - .os_ver = .big_sur, - }, - Target{ - .arch = .x86_64, - .os_ver = .monterey, - }, - Target{ - .arch = .x86_64, - .os_ver = .ventura, - }, - Target{ - .arch = .x86_64, - .os_ver = .sonoma, - }, - Target{ - .arch = .x86_64, - .os_ver = .sequoia, - }, - Target{ - .arch = .aarch64, - .os_ver = .big_sur, - }, - Target{ - .arch = .aarch64, - .os_ver = .monterey, - }, - Target{ - .arch = .aarch64, - .os_ver = .ventura, - }, - Target{ - .arch = .aarch64, - .os_ver = .sonoma, - }, - Target{ - .arch = .aarch64, - .os_ver = .sequoia, - }, -}; - const headers_source_prefix: []const u8 = "headers"; -const Contents = struct { - bytes: []const u8, - hit_count: usize, - hash: []const u8, - is_generic: bool, - - fn hitCountLessThan(context: void, lhs: *const Contents, rhs: *const Contents) bool { - _ = context; - return lhs.hit_count < rhs.hit_count; - } -}; - -const TargetToHashContext = struct { - pub fn hash(self: @This(), target: Target) u32 { - _ = self; - return target.hash(); - } - pub fn eql(self: @This(), a: Target, b: Target, b_index: usize) bool { - _ = self; - _ = b_index; - return a.eql(b); - } -}; -const TargetToHash = std.ArrayHashMap(Target, []const u8, TargetToHashContext, true); - -const HashToContents = std.StringHashMap(Contents); -const PathTable = std.StringHashMap(*TargetToHash); - -/// The don't-dedup-list contains file paths with known problematic headers -/// which while contain the same contents between architectures, should not be -/// deduped since they contain includes, etc. which are relative and thus cannot be separated -/// into a shared include dir such as `any-macos-any`. -const dont_dedup_list = &[_][]const u8{ - "libkern/OSAtomic.h", - "libkern/OSAtomicDeprecated.h", - "libkern/OSSpinLockDeprecated.h", - "libkern/OSAtomicQueue.h", -}; - -fn generateDontDedupMap(arena: Allocator) !std.StringHashMap(void) { - var map = std.StringHashMap(void).init(arena); - try map.ensureTotalCapacity(dont_dedup_list.len); - for (dont_dedup_list) |path| { - map.putAssumeCapacityNoClobber(path, {}); - } - return map; -} - const usage = - \\fetch_them_macos_headers fetch - \\fetch_them_macos_headers dedup - \\ - \\Commands: - \\ fetch Fetch libc headers into headers/-macos. dir - \\ dedup Generate deduplicated dirs into a given path - \\ - \\General Options: - \\-h, --help Print this help and exit -; - -pub fn main() anyerror!void { - var arena = std.heap.ArenaAllocator.init(gpa); - defer arena.deinit(); - - const all_args = try std.process.argsAlloc(arena.allocator()); - const args = all_args[1..]; - if (args.len == 0) fatal("no command or option specified", .{}); - - const cmd = args[0]; - if (mem.eql(u8, cmd, "--help") or mem.eql(u8, cmd, "-h")) { - return info(usage, .{}); - } else if (mem.eql(u8, cmd, "dedup")) { - return dedup(arena.allocator(), args[1..]); - } else if (mem.eql(u8, cmd, "fetch")) { - return fetch(arena.allocator(), args[1..]); - } else fatal("unknown command or option: {s}", .{cmd}); -} - -const ArgsIterator = struct { - args: []const []const u8, - i: usize = 0, - - fn next(it: *@This()) ?[]const u8 { - if (it.i >= it.args.len) { - return null; - } - defer it.i += 1; - return it.args[it.i]; - } - - fn nextOrFatal(it: *@This()) []const u8 { - const arg = it.next() orelse fatal("expected parameter after '{s}'", .{it.args[it.i - 1]}); - return arg; - } -}; - -fn info(comptime format: []const u8, args: anytype) void { - const msg = std.fmt.allocPrint(gpa, "info: " ++ format ++ "\n", args) catch return; - std.io.getStdOut().writeAll(msg) catch {}; -} - -fn fatal(comptime format: []const u8, args: anytype) noreturn { - ret: { - const msg = std.fmt.allocPrint(gpa, "fatal: " ++ format ++ "\n", args) catch break :ret; - std.io.getStdErr().writeAll(msg) catch {}; - } - std.process.exit(1); -} - -const fetch_usage = - \\fetch_them_macos_headers fetch + \\fetch_them_macos_headers [options] [cc args] \\ \\Options: \\ --sysroot Path to macOS SDK @@ -253,14 +64,20 @@ const fetch_usage = \\-h, --help Print this help and exit ; -fn fetch(arena: Allocator, args: []const []const u8) !void { - var argv = std.ArrayList([]const u8).init(arena); +pub fn main() anyerror!void { + var arena = std.heap.ArenaAllocator.init(gpa); + defer arena.deinit(); + const allocator = arena.allocator(); + + const args = try std.process.argsAlloc(allocator); + + var argv = std.ArrayList([]const u8).init(allocator); var sysroot: ?[]const u8 = null; - var args_iter = ArgsIterator{ .args = args }; + var args_iter = ArgsIterator{ .args = args[1..] }; while (args_iter.next()) |arg| { if (mem.eql(u8, arg, "--help") or mem.eql(u8, arg, "-h")) { - return info(fetch_usage, .{}); + return info(usage, .{}); } else if (mem.eql(u8, arg, "--sysroot")) { sysroot = args_iter.nextOrFatal(); } else try argv.append(arg); @@ -268,17 +85,17 @@ fn fetch(arena: Allocator, args: []const []const u8) !void { const sysroot_path = sysroot orelse blk: { const target = try std.zig.system.resolveTargetQuery(.{}); - break :blk std.zig.system.darwin.getSdk(arena, target) orelse + break :blk std.zig.system.darwin.getSdk(allocator, target) orelse fatal("no SDK found; you can provide one explicitly with '--sysroot' flag", .{}); }; var sdk_dir = try std.fs.cwd().openDir(sysroot_path, .{}); defer sdk_dir.close(); - const sdk_info = try sdk_dir.readFileAlloc(arena, "SDKSettings.json", std.math.maxInt(u32)); + const sdk_info = try sdk_dir.readFileAlloc(allocator, "SDKSettings.json", std.math.maxInt(u32)); const parsed_json = try std.json.parseFromSlice(struct { DefaultProperties: struct { MACOSX_DEPLOYMENT_TARGET: []const u8 }, - }, arena, sdk_info, .{ .ignore_unknown_fields = true }); + }, allocator, sdk_info, .{ .ignore_unknown_fields = true }); const version = Version.parse(parsed_json.value.DefaultProperties.MACOSX_DEPLOYMENT_TARGET) orelse fatal("don't know how to parse SDK version: {s}", .{ @@ -303,7 +120,7 @@ fn fetch(arena: Allocator, args: []const []const u8) !void { .arch = arch, .os_ver = os_ver, }; - try fetchTarget(arena, argv.items, sysroot_path, target, version, tmp); + try fetchTarget(allocator, argv.items, sysroot_path, target, version, tmp); } } @@ -333,7 +150,6 @@ fn fetchTarget( switch (target.arch) { .x86_64 => "x86_64", .aarch64 => "arm64", - else => unreachable, }, macos_version, "-isysroot", @@ -359,7 +175,7 @@ fn fetchTarget( std.log.err("{s}", .{res.stderr}); } - // Read in the contents of `upgrade.o.d` + // Read in the contents of `macos-headers.o.d` const headers_list_file = try tmp.dir.openFile(headers_list_filename, .{}); defer headers_list_file.close(); @@ -411,295 +227,35 @@ fn fetchTarget( } } -const dedup_usage = - \\fetch_them_macos_headers dedup [path] - \\ - \\General Options: - \\-h, --help Print this help and exit -; +const ArgsIterator = struct { + args: []const []const u8, + i: usize = 0, -/// Dedups libs headers assuming the following layered structure: -/// layer 1: x86_64-macos.10 x86_64-macos.11 x86_64-macos.12 aarch64-macos.11 aarch64-macos.12 -/// layer 2: any-macos.10 any-macos.11 any-macos.12 -/// layer 3: any-macos -/// -/// The first layer consists of headers specific to a CPU architecture AND macOS version. The second -/// layer consists of headers common to a macOS version across CPU architectures, and the final -/// layer consists of headers common to all libc headers. -fn dedup(arena: Allocator, args: []const []const u8) !void { - var path: ?[]const u8 = null; - var args_iter = ArgsIterator{ .args = args }; - while (args_iter.next()) |arg| { - if (mem.eql(u8, arg, "--help") or mem.eql(u8, arg, "-h")) { - return info(dedup_usage, .{}); - } else { - if (path != null) fatal("too many arguments", .{}); - path = arg; + fn next(it: *@This()) ?[]const u8 { + if (it.i >= it.args.len) { + return null; } + defer it.i += 1; + return it.args[it.i]; } - const dest_path = path orelse fatal("no destination path specified", .{}); - var dest_dir = fs.cwd().makeOpenPath(dest_path, .{}) catch |err| switch (err) { - error.NotDir => fatal("path '{s}' not a directory", .{dest_path}), - else => return err, - }; - defer dest_dir.close(); - - var dont_dedup_map = try generateDontDedupMap(arena); - var layer_2_targets = std.ArrayList(TargetWithPrefix).init(arena); - - for (&[_]OsVer{ .catalina, .big_sur, .monterey, .ventura, .sonoma, .sequoia }) |os_ver| { - var layer_1_targets = std.ArrayList(TargetWithPrefix).init(arena); - - for (targets) |target| { - if (target.os_ver != os_ver) continue; - try layer_1_targets.append(.{ - .prefix = headers_source_prefix, - .target = target, - }); - } - - if (layer_1_targets.items.len < 2) { - try layer_2_targets.appendSlice(layer_1_targets.items); - continue; - } - - const layer_2_target = try dedupDirs(arena, .{ - .os_ver = os_ver, - .dest_path = dest_path, - .dest_dir = dest_dir, - .targets = layer_1_targets.items, - .dont_dedup_map = &dont_dedup_map, - }); - try layer_2_targets.append(layer_2_target); + fn nextOrFatal(it: *@This()) []const u8 { + const arg = it.next() orelse fatal("expected parameter after '{s}'", .{it.args[it.i - 1]}); + return arg; } - - const layer_3_target = try dedupDirs(arena, .{ - .os_ver = .any, - .dest_path = dest_path, - .dest_dir = dest_dir, - .targets = layer_2_targets.items, - .dont_dedup_map = &dont_dedup_map, - }); - assert(layer_3_target.target.eql(targets[0])); -} - -const TargetWithPrefix = struct { - prefix: []const u8, - target: Target, }; -const DedupDirsArgs = struct { - os_ver: OsVer, - dest_path: []const u8, - dest_dir: fs.Dir, - targets: []const TargetWithPrefix, - dont_dedup_map: *const std.StringHashMap(void), -}; - -fn dedupDirs(arena: Allocator, args: DedupDirsArgs) !TargetWithPrefix { - var tmp = tmpDir(.{ .iterate = true }); - defer tmp.cleanup(); - - var path_table = PathTable.init(arena); - var hash_to_contents = HashToContents.init(arena); - - var savings = FindResult{}; - for (args.targets) |target| { - const res = try findDuplicates(target.target, arena, target.prefix, &path_table, &hash_to_contents); - savings.max_bytes_saved += res.max_bytes_saved; - savings.total_bytes += res.total_bytes; - } - - info("summary: {} could be reduced to {}", .{ - std.fmt.fmtIntSizeBin(savings.total_bytes), - std.fmt.fmtIntSizeBin(savings.total_bytes - savings.max_bytes_saved), - }); - - const output_target = Target{ - .arch = .any, - .abi = .any, - .os_ver = args.os_ver, - }; - const common_name = try output_target.fullName(arena); - - var missed_opportunity_bytes: usize = 0; - // Iterate path_table. For each path, put all the hashes into a list. Sort by hit_count. - // The hash with the highest hit_count gets to be the "generic" one. Everybody else - // gets their header in a separate arch directory. - var path_it = path_table.iterator(); - while (path_it.next()) |path_kv| { - if (!args.dont_dedup_map.contains(path_kv.key_ptr.*)) { - var contents_list = std.ArrayList(*Contents).init(arena); - { - var hash_it = path_kv.value_ptr.*.iterator(); - while (hash_it.next()) |hash_kv| { - const contents = &hash_to_contents.getEntry(hash_kv.value_ptr.*).?.value_ptr.*; - try contents_list.append(contents); - } - } - std.mem.sort(*Contents, contents_list.items, {}, Contents.hitCountLessThan); - const best_contents = contents_list.popOrNull().?; - if (best_contents.hit_count > 1) { - // Put it in `any-macos-none`. - const full_path = try fs.path.join(arena, &[_][]const u8{ common_name, path_kv.key_ptr.* }); - try tmp.dir.makePath(fs.path.dirname(full_path).?); - try tmp.dir.writeFile(.{ .sub_path = full_path, .data = best_contents.bytes }); - best_contents.is_generic = true; - while (contents_list.popOrNull()) |contender| { - if (contender.hit_count > 1) { - const this_missed_bytes = contender.hit_count * contender.bytes.len; - missed_opportunity_bytes += this_missed_bytes; - info("Missed opportunity ({}): {s}", .{ - std.fmt.fmtIntSizeBin(this_missed_bytes), - path_kv.key_ptr.*, - }); - } else break; - } - } - } - var hash_it = path_kv.value_ptr.*.iterator(); - while (hash_it.next()) |hash_kv| { - const contents = &hash_to_contents.getEntry(hash_kv.value_ptr.*).?.value_ptr.*; - if (contents.is_generic) continue; - - const target = hash_kv.key_ptr.*; - const target_name = try target.fullName(arena); - const full_path = try fs.path.join(arena, &[_][]const u8{ target_name, path_kv.key_ptr.* }); - try tmp.dir.makePath(fs.path.dirname(full_path).?); - try tmp.dir.writeFile(.{ .sub_path = full_path, .data = contents.bytes }); - } - } - - for (args.targets) |target| { - const target_name = try target.target.fullName(arena); - try args.dest_dir.deleteTree(target_name); - } - try args.dest_dir.deleteTree(common_name); - - var tmp_it = tmp.dir.iterate(); - while (try tmp_it.next()) |entry| { - switch (entry.kind) { - .directory => { - const sub_dir = try tmp.dir.openDir(entry.name, .{ .iterate = true }); - const dest_sub_dir = try args.dest_dir.makeOpenPath(entry.name, .{}); - try copyDirAll(sub_dir, dest_sub_dir); - }, - else => info("unexpected file format: not a directory: '{s}'", .{entry.name}), - } - } - - return TargetWithPrefix{ - .prefix = args.dest_path, - .target = output_target, - }; +fn info(comptime format: []const u8, args: anytype) void { + const msg = std.fmt.allocPrint(gpa, "info: " ++ format ++ "\n", args) catch return; + std.io.getStdOut().writeAll(msg) catch {}; } -const FindResult = struct { - max_bytes_saved: usize = 0, - total_bytes: usize = 0, -}; - -fn findDuplicates( - target: Target, - arena: Allocator, - dest_path: []const u8, - path_table: *PathTable, - hash_to_contents: *HashToContents, -) !FindResult { - var result = FindResult{}; - - const target_name = try target.fullName(arena); - const target_include_dir = try fs.path.join(arena, &[_][]const u8{ dest_path, target_name }); - var dir_stack = std.ArrayList([]const u8).init(arena); - try dir_stack.append(target_include_dir); - - while (dir_stack.popOrNull()) |full_dir_name| { - var dir = fs.cwd().openDir(full_dir_name, .{ .iterate = true }) catch |err| switch (err) { - error.FileNotFound => break, - error.AccessDenied => break, - else => return err, - }; - defer dir.close(); - - var dir_it = dir.iterate(); - - while (try dir_it.next()) |entry| { - const full_path = try fs.path.join(arena, &[_][]const u8{ full_dir_name, entry.name }); - switch (entry.kind) { - .directory => try dir_stack.append(full_path), - .file => { - const rel_path = try fs.path.relative(arena, target_include_dir, full_path); - const max_size = 2 * 1024 * 1024 * 1024; - const raw_bytes = try fs.cwd().readFileAlloc(arena, full_path, max_size); - const trimmed = mem.trim(u8, raw_bytes, " \r\n\t"); - result.total_bytes += raw_bytes.len; - const hash = try arena.alloc(u8, 32); - var hasher = Blake3.init(.{}); - hasher.update(rel_path); - hasher.update(trimmed); - hasher.final(hash); - const gop = try hash_to_contents.getOrPut(hash); - if (gop.found_existing) { - result.max_bytes_saved += raw_bytes.len; - gop.value_ptr.hit_count += 1; - info("duplicate: {s} {s} ({})", .{ - target_name, - rel_path, - std.fmt.fmtIntSizeBin(raw_bytes.len), - }); - } else { - gop.value_ptr.* = Contents{ - .bytes = trimmed, - .hit_count = 1, - .hash = hash, - .is_generic = false, - }; - } - const path_gop = try path_table.getOrPut(rel_path); - const target_to_hash = if (path_gop.found_existing) path_gop.value_ptr.* else blk: { - const ptr = try arena.create(TargetToHash); - ptr.* = TargetToHash.init(arena); - path_gop.value_ptr.* = ptr; - break :blk ptr; - }; - try target_to_hash.putNoClobber(target, hash); - }, - else => info("unexpected file: {s}", .{full_path}), - } - } - } - - return result; -} - -fn copyDirAll(source: fs.Dir, dest: fs.Dir) anyerror!void { - var it = source.iterate(); - while (try it.next()) |next| { - switch (next.kind) { - .directory => { - var sub_dir = try dest.makeOpenPath(next.name, .{}); - var sub_source = try source.openDir(next.name, .{ .iterate = true }); - defer { - sub_dir.close(); - sub_source.close(); - } - try copyDirAll(sub_source, sub_dir); - }, - .file => { - var source_file = try source.openFile(next.name, .{}); - var dest_file = try dest.createFile(next.name, .{}); - defer { - source_file.close(); - dest_file.close(); - } - const stat = try source_file.stat(); - const ncopied = try source_file.copyRangeAll(0, dest_file, 0, stat.size); - assert(ncopied == stat.size); - }, - else => |kind| info("unexpected file kind '{s}' will be ignored", .{@tagName(kind)}), - } +fn fatal(comptime format: []const u8, args: anytype) noreturn { + ret: { + const msg = std.fmt.allocPrint(gpa, "fatal: " ++ format ++ "\n", args) catch break :ret; + std.io.getStdErr().writeAll(msg) catch {}; } + std.process.exit(1); } const Version = struct {