From b171a6f25d5257f615b6369c11f8307dcf076c0d Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Mon, 16 Oct 2023 16:47:47 -0700 Subject: [PATCH 1/3] Package.Fetch: normalize path separators in symlinks closes #17549 --- lib/std/mem.zig | 11 +++++------ src/Package/Fetch.zig | 30 +++++++++++++++++------------- 2 files changed, 22 insertions(+), 19 deletions(-) diff --git a/lib/std/mem.zig b/lib/std/mem.zig index 8d05be4117..e9d957d387 100644 --- a/lib/std/mem.zig +++ b/lib/std/mem.zig @@ -3810,12 +3810,11 @@ test "replace" { try testing.expectEqualStrings(expected, output[0..expected.len]); } -/// Replace all occurrences of `needle` with `replacement`. -pub fn replaceScalar(comptime T: type, slice: []T, needle: T, replacement: T) void { - for (slice, 0..) |e, i| { - if (e == needle) { - slice[i] = replacement; - } +/// Replace all occurrences of `match` with `replacement`. +pub fn replaceScalar(comptime T: type, slice: []T, match: T, replacement: T) void { + for (slice) |*e| { + if (e.* == match) + e.* = replacement; } } diff --git a/src/Package/Fetch.zig b/src/Package/Fetch.zig index 5d315ffed4..ee835031a8 100644 --- a/src/Package/Fetch.zig +++ b/src/Package/Fetch.zig @@ -1329,7 +1329,7 @@ fn computeHash( const hashed_file = try arena.create(HashedFile); hashed_file.* = .{ .fs_path = fs_path, - .normalized_path = try normalizePath(arena, fs_path), + .normalized_path = try normalizePathAlloc(arena, fs_path), .kind = kind, .hash = undefined, // to be populated by the worker .failure = undefined, // to be populated by the worker @@ -1429,6 +1429,12 @@ fn hashFileFallible(dir: fs.Dir, hashed_file: *HashedFile) HashedFile.Error!void }, .sym_link => { const link_name = try dir.readLink(hashed_file.fs_path, &buf); + if (fs.path.sep != canonical_sep) { + // Package hashes are intended to be consistent across + // platforms which means we must normalize path separators + // inside symlinks. + normalizePath(link_name); + } hasher.update(link_name); }, } @@ -1484,22 +1490,20 @@ const HashedFile = struct { /// Make a file system path identical independently of operating system path inconsistencies. /// This converts backslashes into forward slashes. -fn normalizePath(arena: Allocator, fs_path: []const u8) ![]const u8 { - const canonical_sep = '/'; - - if (fs.path.sep == canonical_sep) - return fs_path; - +fn normalizePathAlloc(arena: Allocator, fs_path: []const u8) ![]const u8 { + if (fs.path.sep == canonical_sep) return fs_path; const normalized = try arena.dupe(u8, fs_path); - for (normalized) |*byte| { - switch (byte.*) { - fs.path.sep => byte.* = canonical_sep, - else => continue, - } - } + normalizePath(normalized); return normalized; } +const canonical_sep = fs.path.sep_posix; + +fn normalizePath(bytes: []u8) void { + assert(fs.path.sep != canonical_sep); + std.mem.replaceScalar(u8, bytes, fs.path.sep, canonical_sep); +} + const Filter = struct { include_paths: std.StringArrayHashMapUnmanaged(void) = .{}, From f1a9344ffeaa9b449eb111a112df33c845a1210c Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Mon, 16 Oct 2023 17:22:24 -0700 Subject: [PATCH 2/3] std.fs.openDir: handle OBJECT_NAME_INVALID --- lib/std/fs.zig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/std/fs.zig b/lib/std/fs.zig index 8c1d2cc88d..d95321cfd3 100644 --- a/lib/std/fs.zig +++ b/lib/std/fs.zig @@ -1803,7 +1803,7 @@ pub const Dir = struct { ); switch (rc) { .SUCCESS => return result, - .OBJECT_NAME_INVALID => unreachable, + .OBJECT_NAME_INVALID => return error.BadPathName, .OBJECT_NAME_NOT_FOUND => return error.FileNotFound, .OBJECT_PATH_NOT_FOUND => return error.FileNotFound, .NOT_A_DIRECTORY => return error.NotDir, From ba656e5c9f58e0440278918e5c6cdf0ac2fe673c Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Mon, 16 Oct 2023 18:15:47 -0700 Subject: [PATCH 3/3] zig fetch: add `--debug-hash` argument This argument causes zig to print verbose hashing information to stdout, which can be used to diff two different fetches and find out why the hashes do not equal each other. --- src/Package/Fetch.zig | 37 ++++++++++++++++++++++++++++++++++--- src/main.zig | 7 +++++++ 2 files changed, 41 insertions(+), 3 deletions(-) diff --git a/src/Package/Fetch.zig b/src/Package/Fetch.zig index ee835031a8..7256c5e15c 100644 --- a/src/Package/Fetch.zig +++ b/src/Package/Fetch.zig @@ -81,6 +81,10 @@ pub const JobQueue = struct { wait_group: WaitGroup = .{}, global_cache: Cache.Directory, recursive: bool, + /// Dumps hash information to stdout which can be used to troubleshoot why + /// two hashes of the same package do not match. + /// If this is true, `recursive` must be false. + debug_hash: bool, work_around_btrfs_bug: bool, pub const Table = std.AutoArrayHashMapUnmanaged(Manifest.MultiHashHexDigest, *Fetch); @@ -1315,7 +1319,7 @@ fn computeHash( const kind: HashedFile.Kind = switch (entry.kind) { .directory => unreachable, .file => .file, - .sym_link => .sym_link, + .sym_link => .link, else => return f.fail(f.location_tok, try eb.printString( "package contains '{s}' which has illegal file type '{s}'", .{ entry.path, @tagName(entry.kind) }, @@ -1399,9 +1403,36 @@ fn computeHash( } if (any_failures) return error.FetchFailed; + + if (f.job_queue.debug_hash) { + assert(!f.job_queue.recursive); + // Print something to stdout that can be text diffed to figure out why + // the package hash is different. + dumpHashInfo(all_files.items) catch |err| { + std.debug.print("unable to write to stdout: {s}\n", .{@errorName(err)}); + std.process.exit(1); + }; + } + return hasher.finalResult(); } +fn dumpHashInfo(all_files: []const *const HashedFile) !void { + const stdout = std.io.getStdOut(); + var bw = std.io.bufferedWriter(stdout.writer()); + const w = bw.writer(); + + for (all_files) |hashed_file| { + try w.print("{s}: {s}: {s}\n", .{ + @tagName(hashed_file.kind), + std.fmt.fmtSliceHexLower(&hashed_file.hash), + hashed_file.normalized_path, + }); + } + + try bw.flush(); +} + fn workerHashFile(dir: fs.Dir, hashed_file: *HashedFile, wg: *WaitGroup) void { defer wg.finish(); hashed_file.failure = hashFileFallible(dir, hashed_file); @@ -1427,7 +1458,7 @@ fn hashFileFallible(dir: fs.Dir, hashed_file: *HashedFile) HashedFile.Error!void hasher.update(buf[0..bytes_read]); } }, - .sym_link => { + .link => { const link_name = try dir.readLink(hashed_file.fs_path, &buf); if (fs.path.sep != canonical_sep) { // Package hashes are intended to be consistent across @@ -1480,7 +1511,7 @@ const HashedFile = struct { fs.File.StatError || fs.Dir.ReadLinkError; - const Kind = enum { file, sym_link }; + const Kind = enum { file, link }; fn lessThan(context: void, lhs: *const HashedFile, rhs: *const HashedFile) bool { _ = context; diff --git a/src/main.zig b/src/main.zig index fe8c871ce8..6dd4fb0725 100644 --- a/src/main.zig +++ b/src/main.zig @@ -5143,6 +5143,7 @@ pub fn cmdBuild(gpa: Allocator, arena: Allocator, args: []const []const u8) !voi .thread_pool = &thread_pool, .global_cache = global_cache_directory, .recursive = true, + .debug_hash = false, .work_around_btrfs_bug = work_around_btrfs_bug, }; defer job_queue.deinit(); @@ -6991,6 +6992,7 @@ pub const usage_fetch = \\Options: \\ -h, --help Print this help and exit \\ --global-cache-dir [path] Override path to global Zig cache directory + \\ --debug-hash Print verbose hash information to stdout \\ ; @@ -7004,6 +7006,7 @@ fn cmdFetch( std.process.hasEnvVarConstant("ZIG_BTRFS_WORKAROUND"); var opt_path_or_url: ?[]const u8 = null; var override_global_cache_dir: ?[]const u8 = try optionalStringEnvVar(arena, "ZIG_GLOBAL_CACHE_DIR"); + var debug_hash: bool = false; { var i: usize = 0; @@ -7019,6 +7022,9 @@ fn cmdFetch( i += 1; override_global_cache_dir = args[i]; continue; + } else if (mem.eql(u8, arg, "--debug-hash")) { + debug_hash = true; + continue; } else { fatal("unrecognized parameter: '{s}'", .{arg}); } @@ -7057,6 +7063,7 @@ fn cmdFetch( .thread_pool = &thread_pool, .global_cache = global_cache_directory, .recursive = false, + .debug_hash = debug_hash, .work_around_btrfs_bug = work_around_btrfs_bug, }; defer job_queue.deinit();