//! To get started, run this tool with no args and read the help message. //! //! The build systems of glibc, musl, FreeBSD, and NetBSD require specifying a single target //! architecture. Meanwhile, Zig supports out-of-the-box cross compilation for //! every target. So the process to create libc headers that Zig ships is to use //! this tool. //! //! First, use the glibc, musl, FreeBSD, and NetBSD build systems to create installations of all the //! targets in the `glibc_targets`, `musl_targets`, `freebsd_targets`, and `netbsd_targets` //! variables. Next, run this tool to create a new directory which puts .h files into //! subdirectories, with `generic` being files that apply to all architectures. //! You'll then have to manually update Zig source repo with these new files. const std = @import("std"); const Arch = std.Target.Cpu.Arch; const Abi = std.Target.Abi; const OsTag = std.Target.Os.Tag; const assert = std.debug.assert; const Blake3 = std.crypto.hash.Blake3; const LibCTarget = struct { arch: Arch, abi: Abi, dest: ?[]const u8 = null, }; const glibc_targets = [_]LibCTarget{ .{ .arch = .arc, .abi = .gnu }, .{ .arch = .arm, .abi = .gnueabi, .dest = "arm-linux-gnu" }, .{ .arch = .arm, .abi = .gnueabihf, .dest = "arm-linux-gnu" }, .{ .arch = .armeb, .abi = .gnueabi, .dest = "arm-linux-gnu" }, .{ .arch = .armeb, .abi = .gnueabihf, .dest = "arm-linux-gnu" }, .{ .arch = .aarch64, .abi = .gnu, .dest = "aarch64-linux-gnu" }, .{ .arch = .aarch64_be, .abi = .gnu, .dest = "aarch64-linux-gnu" }, .{ .arch = .csky, .abi = .gnueabi, .dest = "csky-linux-gnu" }, .{ .arch = .csky, .abi = .gnueabihf, .dest = "csky-linux-gnu" }, .{ .arch = .loongarch64, .abi = .gnu, .dest = "loongarch-linux-gnu" }, .{ .arch = .loongarch64, .abi = .gnusf, .dest = "loongarch-linux-gnu" }, .{ .arch = .m68k, .abi = .gnu }, .{ .arch = .mips, .abi = .gnueabi, .dest = "mips-linux-gnu" }, .{ .arch = .mips, .abi = .gnueabihf, .dest = "mips-linux-gnu" }, .{ .arch = .mipsel, .abi = .gnueabi, .dest = "mips-linux-gnu" }, .{ .arch = .mipsel, .abi = .gnueabihf, .dest = "mips-linux-gnu" }, .{ .arch = .mips64, .abi = .gnuabi64, .dest = "mips-linux-gnu" }, .{ .arch = .mips64, .abi = .gnuabin32, .dest = "mips-linux-gnu" }, .{ .arch = .mips64el, .abi = .gnuabi64, .dest = "mips-linux-gnu" }, .{ .arch = .mips64el, .abi = .gnuabin32, .dest = "mips-linux-gnu" }, .{ .arch = .powerpc, .abi = .gnueabi, .dest = "powerpc-linux-gnu" }, .{ .arch = .powerpc, .abi = .gnueabihf, .dest = "powerpc-linux-gnu" }, .{ .arch = .powerpc64, .abi = .gnu, .dest = "powerpc-linux-gnu" }, .{ .arch = .powerpc64le, .abi = .gnu, .dest = "powerpc-linux-gnu" }, .{ .arch = .riscv32, .abi = .gnu, .dest = "riscv-linux-gnu" }, .{ .arch = .riscv64, .abi = .gnu, .dest = "riscv-linux-gnu" }, .{ .arch = .s390x, .abi = .gnu }, .{ .arch = .sparc, .abi = .gnu, .dest = "sparc-linux-gnu" }, .{ .arch = .sparc64, .abi = .gnu, .dest = "sparc-linux-gnu" }, .{ .arch = .x86, .abi = .gnu, .dest = "x86-linux-gnu" }, .{ .arch = .x86_64, .abi = .gnu, .dest = "x86-linux-gnu" }, .{ .arch = .x86_64, .abi = .gnux32, .dest = "x86-linux-gnu" }, }; const musl_targets = [_]LibCTarget{ .{ .arch = .arm, .abi = .musl }, .{ .arch = .aarch64, .abi = .musl }, .{ .arch = .hexagon, .abi = .musl }, .{ .arch = .loongarch64, .abi = .musl }, .{ .arch = .m68k, .abi = .musl }, .{ .arch = .mips, .abi = .musl }, .{ .arch = .mips64, .abi = .musl }, .{ .arch = .mips64, .abi = .muslabin32 }, .{ .arch = .powerpc, .abi = .musl }, .{ .arch = .powerpc64, .abi = .musl }, .{ .arch = .riscv32, .abi = .musl }, .{ .arch = .riscv64, .abi = .musl }, .{ .arch = .s390x, .abi = .musl }, .{ .arch = .x86, .abi = .musl }, .{ .arch = .x86_64, .abi = .musl }, .{ .arch = .x86_64, .abi = .muslx32 }, }; const freebsd_targets = [_]LibCTarget{ .{ .arch = .arm, .abi = .eabihf }, .{ .arch = .aarch64, .abi = .none }, .{ .arch = .powerpc, .abi = .eabihf }, .{ .arch = .powerpc64, .abi = .none }, .{ .arch = .riscv64, .abi = .none }, .{ .arch = .x86, .abi = .none }, .{ .arch = .x86_64, .abi = .none }, }; const netbsd_targets = [_]LibCTarget{ .{ .arch = .arm, .abi = .eabi, .dest = "arm-netbsd-eabi" }, .{ .arch = .arm, .abi = .eabihf, .dest = "arm-netbsd-eabi" }, .{ .arch = .aarch64, .abi = .none }, .{ .arch = .m68k, .abi = .none }, .{ .arch = .mips, .abi = .eabi, .dest = "mips-netbsd-eabi" }, .{ .arch = .mips, .abi = .eabihf, .dest = "mips-netbsd-eabi" }, .{ .arch = .powerpc, .abi = .eabi, .dest = "powerpc-netbsd-eabi" }, .{ .arch = .powerpc, .abi = .eabihf, .dest = "powerpc-netbsd-eabi" }, .{ .arch = .sparc, .abi = .none }, .{ .arch = .sparc64, .abi = .none }, .{ .arch = .x86, .abi = .none }, .{ .arch = .x86_64, .abi = .none }, }; const Contents = struct { bytes: []const u8, hit_count: usize, hash: []const u8, is_generic: bool, fn hitCountLessThan(context: void, lhs: *const Contents, rhs: *const Contents) bool { _ = context; return lhs.hit_count < rhs.hit_count; } }; const HashToContents = std.StringHashMap(Contents); const TargetToHash = std.StringArrayHashMap([]const u8); const PathTable = std.StringHashMap(*TargetToHash); pub fn main() !void { var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator); const allocator = arena.allocator(); const args = try std.cli.parse(struct { named: struct { @"search-path": []const []const u8 = &.{}, out: []const u8, abi: enum { musl, glibc, freebsd, netbsd }, pub const @"search-path_help" = "subdirectories of search paths look like, e.g. x86_64-linux-gnu"; pub const out_help = "a dir that will be created, and populated with the results"; }, }, allocator, .{}); const search_paths = args.named.@"search-path"; const out_dir = args.named.out; const vendor = args.named.abi; const abi_name = @tagName(vendor); const generic_name = try std.fmt.allocPrint(allocator, "generic-{s}", .{abi_name}); const libc_targets = switch (vendor) { .glibc => &glibc_targets, .musl => &musl_targets, .freebsd => &freebsd_targets, .netbsd => &netbsd_targets, }; var path_table = PathTable.init(allocator); var hash_to_contents = HashToContents.init(allocator); var max_bytes_saved: usize = 0; var total_bytes: usize = 0; var hasher = Blake3.init(.{}); for (libc_targets) |libc_target| { const libc_dir = switch (vendor) { .glibc => try std.zig.target.glibcRuntimeTriple(allocator, libc_target.arch, .linux, libc_target.abi), .musl => std.zig.target.muslArchName(libc_target.arch, libc_target.abi), .freebsd => switch (libc_target.arch) { .arm => "armv7", .x86 => "i386", .x86_64 => "amd64", .aarch64, .powerpc, .powerpc64, .riscv64, => |a| @tagName(a), else => unreachable, }, .netbsd => switch (libc_target.arch) { .arm => if (libc_target.abi == .eabihf) "evbarmv7hf" else "evbarmv7", .aarch64 => "evbarm64", .m68k => "mac68k", .mips => if (libc_target.abi == .eabihf) "evbmips" else "evbmipssf", .powerpc => if (libc_target.abi == .eabihf) "evbppc" else "evbppcsf", .x86 => "i386", .x86_64 => "amd64", .sparc, .sparc64, => |a| @tagName(a), else => unreachable, }, }; const dest_target = if (libc_target.dest) |dest| dest else try std.fmt.allocPrint(allocator, "{s}-{s}-{s}", .{ @tagName(libc_target.arch), switch (vendor) { .musl, .glibc => "linux", .freebsd => "freebsd", .netbsd => "netbsd", }, @tagName(libc_target.abi), }); search: for (search_paths) |search_path| { const sub_path = switch (vendor) { .glibc, .freebsd, .netbsd, => &[_][]const u8{ search_path, libc_dir, "usr", "include" }, .musl => &[_][]const u8{ search_path, libc_dir, "usr", "local", "musl", "include" }, }; const target_include_dir = try std.fs.path.join(allocator, sub_path); var dir_stack = std.array_list.Managed([]const u8).init(allocator); try dir_stack.append(target_include_dir); while (dir_stack.pop()) |full_dir_name| { var dir = std.fs.cwd().openDir(full_dir_name, .{ .iterate = true }) catch |err| switch (err) { error.FileNotFound => continue :search, error.AccessDenied => continue :search, else => return err, }; defer dir.close(); var dir_it = dir.iterate(); while (try dir_it.next()) |entry| { const full_path = try std.fs.path.join(allocator, &[_][]const u8{ full_dir_name, entry.name }); switch (entry.kind) { .directory => try dir_stack.append(full_path), .file, .sym_link => { const rel_path = try std.fs.path.relative(allocator, target_include_dir, full_path); const max_size = 2 * 1024 * 1024 * 1024; const raw_bytes = try std.fs.cwd().readFileAlloc(full_path, allocator, .limited(max_size)); const trimmed = std.mem.trim(u8, raw_bytes, " \r\n\t"); total_bytes += raw_bytes.len; const hash = try allocator.alloc(u8, 32); hasher = Blake3.init(.{}); hasher.update(rel_path); hasher.update(trimmed); hasher.final(hash); const gop = try hash_to_contents.getOrPut(hash); if (gop.found_existing) { max_bytes_saved += raw_bytes.len; gop.value_ptr.hit_count += 1; std.debug.print("duplicate: {s} {s} ({B})\n", .{ libc_dir, rel_path, raw_bytes.len, }); } else { gop.value_ptr.* = Contents{ .bytes = trimmed, .hit_count = 1, .hash = hash, .is_generic = false, }; } const path_gop = try path_table.getOrPut(rel_path); const target_to_hash = if (path_gop.found_existing) path_gop.value_ptr.* else blk: { const ptr = try allocator.create(TargetToHash); ptr.* = TargetToHash.init(allocator); path_gop.value_ptr.* = ptr; break :blk ptr; }; // When `dest` is set, there are a few rare cases where we expect to overwrite a header. For // example, `bits/long-double.h` differs very slightly between `powerpc64le-linux-gnu` and // other `powerpc*-linux-gnu` targets, and we unify those targets as `powerpc-linux-gnu`. In // such cases, we manually patch the affected header after processing, so it's fine that // only one header wins here. if (libc_target.dest != null) { const hash_gop = try target_to_hash.getOrPut(dest_target); if (hash_gop.found_existing) std.debug.print("overwrote: {s} {s} {s}\n", .{ libc_dir, rel_path, dest_target, }) else hash_gop.value_ptr.* = hash; } else { try target_to_hash.putNoClobber(dest_target, hash); } }, else => std.debug.print("warning: weird file: {s}\n", .{full_path}), } } } break; } else { std.debug.print("warning: libc target not found: {s}\n", .{libc_dir}); } } std.debug.print("summary: {B} could be reduced to {B}\n", .{ total_bytes, total_bytes - max_bytes_saved, }); try std.fs.cwd().makePath(out_dir); var missed_opportunity_bytes: usize = 0; // iterate path_table. for each path, put all the hashes into a list. sort by hit_count. // the hash with the highest hit_count gets to be the "generic" one. everybody else // gets their header in a separate arch directory. var path_it = path_table.iterator(); while (path_it.next()) |path_kv| { var contents_list = std.array_list.Managed(*Contents).init(allocator); { var hash_it = path_kv.value_ptr.*.iterator(); while (hash_it.next()) |hash_kv| { const contents = hash_to_contents.getPtr(hash_kv.value_ptr.*).?; try contents_list.append(contents); } } std.mem.sort(*Contents, contents_list.items, {}, Contents.hitCountLessThan); const best_contents = contents_list.pop().?; if (best_contents.hit_count > 1) { // worth it to make it generic const full_path = try std.fs.path.join(allocator, &[_][]const u8{ out_dir, generic_name, path_kv.key_ptr.* }); try std.fs.cwd().makePath(std.fs.path.dirname(full_path).?); try std.fs.cwd().writeFile(.{ .sub_path = full_path, .data = best_contents.bytes }); best_contents.is_generic = true; while (contents_list.pop()) |contender| { if (contender.hit_count > 1) { const this_missed_bytes = contender.hit_count * contender.bytes.len; missed_opportunity_bytes += this_missed_bytes; std.debug.print("Missed opportunity ({B}): {s}\n", .{ this_missed_bytes, path_kv.key_ptr.*, }); } else break; } } var hash_it = path_kv.value_ptr.*.iterator(); while (hash_it.next()) |hash_kv| { const contents = hash_to_contents.get(hash_kv.value_ptr.*).?; if (contents.is_generic) continue; const dest_target = hash_kv.key_ptr.*; const full_path = try std.fs.path.join(allocator, &[_][]const u8{ out_dir, dest_target, path_kv.key_ptr.* }); try std.fs.cwd().makePath(std.fs.path.dirname(full_path).?); try std.fs.cwd().writeFile(.{ .sub_path = full_path, .data = contents.bytes }); } } }