From bc5076715be066540b0b61a5599404f45bfa6d5f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Igor=20Anic=CC=81?= Date: Mon, 26 Feb 2024 00:26:38 +0100 Subject: [PATCH 01/10] fetch: fix failing test Prior to [this](https://github.com/ziglang/zig/commit/ef9966c9855dd855afda767f212abec6e5a36307#diff-08c935ef8c633bb630641d44230597f1cff5afb0e736d451e2ba5569fa53d915R805) commit tar was not a valid extension. After that this one is valid case. --- src/Package/Fetch.zig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Package/Fetch.zig b/src/Package/Fetch.zig index d0cfd5ab94..96c6d0127d 100644 --- a/src/Package/Fetch.zig +++ b/src/Package/Fetch.zig @@ -867,9 +867,9 @@ const FileType = enum { try std.testing.expectEqual(@as(?FileType, .@"tar.xz"), fromContentDisposition("ATTACHMENT; filename=\"stuff.tar.xz\"")); try std.testing.expectEqual(@as(?FileType, .@"tar.xz"), fromContentDisposition("attachment; FileName=\"stuff.tar.xz\"")); try std.testing.expectEqual(@as(?FileType, .@"tar.gz"), fromContentDisposition("attachment; FileName*=UTF-8\'\'xyz%2Fstuff.tar.gz")); + try std.testing.expectEqual(@as(?FileType, .tar), fromContentDisposition("attachment; FileName=\"stuff.tar\"")); try std.testing.expect(fromContentDisposition("attachment FileName=\"stuff.tar.gz\"") == null); - try std.testing.expect(fromContentDisposition("attachment; FileName=\"stuff.tar\"") == null); try std.testing.expect(fromContentDisposition("attachment; FileName\"stuff.gz\"") == null); try std.testing.expect(fromContentDisposition("attachment; size=42") == null); try std.testing.expect(fromContentDisposition("inline; size=42") == null); From a5a928b9668f244a7c5d53f5c75de0152c79f5f6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Igor=20Anic=CC=81?= Date: Tue, 27 Feb 2024 19:43:30 +0100 Subject: [PATCH 02/10] package manager: don't strip components in tar Unpack tar without removing leading root folder. Then find package root in unpacked tmp folder. --- src/Package/Fetch.zig | 97 ++++++++++++++++++++++++++++++++++++++----- 1 file changed, 86 insertions(+), 11 deletions(-) diff --git a/src/Package/Fetch.zig b/src/Package/Fetch.zig index 96c6d0127d..e9b7871f03 100644 --- a/src/Package/Fetch.zig +++ b/src/Package/Fetch.zig @@ -439,7 +439,8 @@ fn runResource( const s = fs.path.sep_str; const cache_root = f.job_queue.global_cache; const rand_int = std.crypto.random.int(u64); - const tmp_dir_sub_path = "tmp" ++ s ++ Manifest.hex64(rand_int); + const tmp_dir_sub_path = "tmp" ++ s ++ Manifest.hex64(rand_int); // root of the temporary directory + var tmp_package_root_sub_path: ?[]const u8 = null; // package root inside temporary directory { const tmp_directory_path = try cache_root.join(arena, &.{tmp_dir_sub_path}); @@ -463,6 +464,34 @@ fn runResource( try unpackResource(f, resource, uri_path, tmp_directory); + // Strip leading root directory if needed. + if (findPackageRootSubPath(arena, tmp_directory) catch null) |sub_path| { + // Position tmp_directory to sub_path. + const handle = tmp_directory.handle.openDir(sub_path, .{ .iterate = true }) catch |err| { + try eb.addRootErrorMessage(.{ + .msg = try eb.printString("fail to open temporary directory '{s}' sub path '{s}': {s}", .{ + tmp_directory_path, sub_path, @errorName(err), + }), + }); + return error.FetchFailed; + }; + tmp_package_root_sub_path = try fs.path.join(arena, &[_][]const u8{ tmp_dir_sub_path, sub_path }); + tmp_directory.handle.close(); + tmp_directory = .{ + .path = try cache_root.join(arena, &.{tmp_package_root_sub_path.?}), + .handle = handle, + }; + } else { + // btrfs workaround; reopen tmp_directory + if (native_os == .linux and f.job_queue.work_around_btrfs_bug) { + // https://github.com/ziglang/zig/issues/17095 + tmp_directory.handle.close(); + tmp_directory.handle = cache_root.handle.makeOpenPath(tmp_dir_sub_path, .{ + .iterate = true, + }) catch @panic("btrfs workaround failed"); + } + } + // Load, parse, and validate the unpacked build.zig.zon file. It is allowed // for the file to be missing, in which case this fetched package is // considered to be a "naked" package. @@ -481,15 +510,6 @@ fn runResource( // Compute the package hash based on the remaining files in the temporary // directory. - - if (native_os == .linux and f.job_queue.work_around_btrfs_bug) { - // https://github.com/ziglang/zig/issues/17095 - tmp_directory.handle.close(); - tmp_directory.handle = cache_root.handle.makeOpenPath(tmp_dir_sub_path, .{ - .iterate = true, - }) catch @panic("btrfs workaround failed"); - } - f.actual_hash = try computeHash(f, tmp_directory, filter); } @@ -503,7 +523,11 @@ fn runResource( .root_dir = cache_root, .sub_path = try arena.dupe(u8, "p" ++ s ++ Manifest.hexDigest(f.actual_hash)), }; - renameTmpIntoCache(cache_root.handle, tmp_dir_sub_path, f.package_root.sub_path) catch |err| { + renameTmpIntoCache( + cache_root.handle, + if (tmp_package_root_sub_path) |p| p else tmp_dir_sub_path, + f.package_root.sub_path, + ) catch |err| { const src = try cache_root.join(arena, &.{tmp_dir_sub_path}); const dest = try cache_root.join(arena, &.{f.package_root.sub_path}); try eb.addRootErrorMessage(.{ .msg = try eb.printString( @@ -512,6 +536,10 @@ fn runResource( ) }); return error.FetchFailed; }; + // Remove temporary directory root if that not already done in rename. + if (tmp_package_root_sub_path) |_| { + cache_root.handle.deleteTree(tmp_dir_sub_path) catch {}; + } // Validate the computed hash against the expected hash. If invalid, this // job is done. @@ -608,6 +636,19 @@ fn loadManifest(f: *Fetch, pkg_root: Cache.Path) RunError!void { } } +// Finds package root subpath. +// Skips single root directory, returns null in all other cases. +fn findPackageRootSubPath(allocator: Allocator, parent: Cache.Directory) !?[]const u8 { + var iter = parent.handle.iterate(); + if (try iter.next()) |entry| { + if (try iter.next() != null) return null; + if (entry.kind == .directory) { + return try allocator.dupe(u8, entry.name); + } + } + return null; +} + fn queueJobsForDeps(f: *Fetch) RunError!void { assert(f.job_queue.recursive); @@ -1700,3 +1741,37 @@ test { _ = Filter; _ = FileType; } + +test "findPackageRootSubPath" { + const testing = std.testing; + + var root = std.testing.tmpDir(.{ .iterate = true }); + defer root.cleanup(); + + // folder1 + // ├── folder2 + // ├── file1 + // + try root.dir.makePath("folder1/folder2"); + (try root.dir.createFile("folder1/file1", .{})).close(); + + // start at root returns folder1 as package root + const sub_path = (try findPackageRootSubPath( + testing.allocator, + Cache.Directory{ .path = null, .handle = root.dir }, + )).?; + try testing.expectEqualStrings("folder1", sub_path); + testing.allocator.free(sub_path); + + // start at folder1 returns null + try testing.expect(null == (try findPackageRootSubPath( + testing.allocator, + Cache.Directory{ .path = null, .handle = try root.dir.openDir("folder1", .{ .iterate = true }) }, + ))); + + // start at folder1/folder2 returns null + try testing.expect(null == (try findPackageRootSubPath( + testing.allocator, + Cache.Directory{ .path = null, .handle = try root.dir.openDir("folder1/folder2", .{ .iterate = true }) }, + ))); +} From 15ca0c947176779cb57e2c0946b6a11ba0367257 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Igor=20Anic=CC=81?= Date: Wed, 20 Mar 2024 22:22:17 +0100 Subject: [PATCH 03/10] fix typo --- src/Package/Fetch.zig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Package/Fetch.zig b/src/Package/Fetch.zig index e9b7871f03..21bb5f2158 100644 --- a/src/Package/Fetch.zig +++ b/src/Package/Fetch.zig @@ -536,7 +536,7 @@ fn runResource( ) }); return error.FetchFailed; }; - // Remove temporary directory root if that not already done in rename. + // Remove temporary directory root if that's not already done in rename. if (tmp_package_root_sub_path) |_| { cache_root.handle.deleteTree(tmp_dir_sub_path) catch {}; } From c4261bf5624d80f1e613bed475ebe03543ec83b8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Igor=20Anic=CC=81?= Date: Tue, 26 Mar 2024 22:27:06 +0100 Subject: [PATCH 04/10] fetch: find package root only for archives --- src/Package/Fetch.zig | 125 +++++++++++++++++++++--------------------- 1 file changed, 61 insertions(+), 64 deletions(-) diff --git a/src/Package/Fetch.zig b/src/Package/Fetch.zig index 21bb5f2158..02afc70b7f 100644 --- a/src/Package/Fetch.zig +++ b/src/Package/Fetch.zig @@ -439,10 +439,9 @@ fn runResource( const s = fs.path.sep_str; const cache_root = f.job_queue.global_cache; const rand_int = std.crypto.random.int(u64); - const tmp_dir_sub_path = "tmp" ++ s ++ Manifest.hex64(rand_int); // root of the temporary directory - var tmp_package_root_sub_path: ?[]const u8 = null; // package root inside temporary directory + const tmp_dir_sub_path = "tmp" ++ s ++ Manifest.hex64(rand_int); - { + const package_sub_path = blk: { const tmp_directory_path = try cache_root.join(arena, &.{tmp_dir_sub_path}); var tmp_directory: Cache.Directory = .{ .path = tmp_directory_path, @@ -462,25 +461,21 @@ fn runResource( }; defer tmp_directory.handle.close(); - try unpackResource(f, resource, uri_path, tmp_directory); + const package_dir = try unpackResource(f, resource, uri_path, tmp_directory); - // Strip leading root directory if needed. - if (findPackageRootSubPath(arena, tmp_directory) catch null) |sub_path| { - // Position tmp_directory to sub_path. - const handle = tmp_directory.handle.openDir(sub_path, .{ .iterate = true }) catch |err| { + if (package_dir) |dir_name| { + // Position tmp_directory to dir_name inside tmp_dir_sub_path. + const path = try cache_root.join(arena, &.{ tmp_dir_sub_path, dir_name }); + const handle = tmp_directory.handle.openDir(dir_name, .{ .iterate = true }) catch |err| { try eb.addRootErrorMessage(.{ - .msg = try eb.printString("fail to open temporary directory '{s}' sub path '{s}': {s}", .{ - tmp_directory_path, sub_path, @errorName(err), + .msg = try eb.printString("unable to open temporary directory '{s}': {s}", .{ + path, @errorName(err), }), }); return error.FetchFailed; }; - tmp_package_root_sub_path = try fs.path.join(arena, &[_][]const u8{ tmp_dir_sub_path, sub_path }); tmp_directory.handle.close(); - tmp_directory = .{ - .path = try cache_root.join(arena, &.{tmp_package_root_sub_path.?}), - .handle = handle, - }; + tmp_directory = .{ .path = path, .handle = handle }; } else { // btrfs workaround; reopen tmp_directory if (native_os == .linux and f.job_queue.work_around_btrfs_bug) { @@ -511,7 +506,12 @@ fn runResource( // Compute the package hash based on the remaining files in the temporary // directory. f.actual_hash = try computeHash(f, tmp_directory, filter); - } + + break :blk if (package_dir) |dir_name| + try fs.path.join(arena, &.{ tmp_dir_sub_path, dir_name }) + else + tmp_dir_sub_path; + }; // Rename the temporary directory into the global zig package cache // directory. If the hash already exists, delete the temporary directory @@ -523,11 +523,7 @@ fn runResource( .root_dir = cache_root, .sub_path = try arena.dupe(u8, "p" ++ s ++ Manifest.hexDigest(f.actual_hash)), }; - renameTmpIntoCache( - cache_root.handle, - if (tmp_package_root_sub_path) |p| p else tmp_dir_sub_path, - f.package_root.sub_path, - ) catch |err| { + renameTmpIntoCache(cache_root.handle, package_sub_path, f.package_root.sub_path) catch |err| { const src = try cache_root.join(arena, &.{tmp_dir_sub_path}); const dest = try cache_root.join(arena, &.{f.package_root.sub_path}); try eb.addRootErrorMessage(.{ .msg = try eb.printString( @@ -536,10 +532,8 @@ fn runResource( ) }); return error.FetchFailed; }; - // Remove temporary directory root if that's not already done in rename. - if (tmp_package_root_sub_path) |_| { - cache_root.handle.deleteTree(tmp_dir_sub_path) catch {}; - } + // Remove temporary directory root. + cache_root.handle.deleteTree(tmp_dir_sub_path) catch {}; // Validate the computed hash against the expected hash. If invalid, this // job is done. @@ -636,14 +630,11 @@ fn loadManifest(f: *Fetch, pkg_root: Cache.Path) RunError!void { } } -// Finds package root subpath. -// Skips single root directory, returns null in all other cases. -fn findPackageRootSubPath(allocator: Allocator, parent: Cache.Directory) !?[]const u8 { - var iter = parent.handle.iterate(); +fn archivePackageDir(arena: Allocator, out_dir: fs.Dir) !?[]const u8 { + var iter = out_dir.iterate(); if (try iter.next()) |entry| { - if (try iter.next() != null) return null; - if (entry.kind == .directory) { - return try allocator.dupe(u8, entry.name); + if (try iter.next() == null and entry.kind == .directory) { + return try arena.dupe(u8, entry.name); } } return null; @@ -1073,7 +1064,7 @@ fn unpackResource( resource: *Resource, uri_path: []const u8, tmp_directory: Cache.Directory, -) RunError!void { +) RunError!?[]const u8 { const eb = &f.error_bundle; const file_type = switch (resource.*) { .file => FileType.fromPath(uri_path) orelse @@ -1134,21 +1125,24 @@ fn unpackResource( .git => .git_pack, - .dir => |dir| return f.recursiveDirectoryCopy(dir, tmp_directory.handle) catch |err| { - return f.fail(f.location_tok, try eb.printString( - "unable to copy directory '{s}': {s}", - .{ uri_path, @errorName(err) }, - )); + .dir => |dir| { + f.recursiveDirectoryCopy(dir, tmp_directory.handle) catch |err| { + return f.fail(f.location_tok, try eb.printString( + "unable to copy directory '{s}': {s}", + .{ uri_path, @errorName(err) }, + )); + }; + return null; }, }; switch (file_type) { - .tar => try unpackTarball(f, tmp_directory.handle, resource.reader()), + .tar => return try unpackTarball(f, tmp_directory.handle, resource.reader()), .@"tar.gz" => { const reader = resource.reader(); var br = std.io.bufferedReaderSize(std.crypto.tls.max_ciphertext_record_len, reader); var dcp = std.compress.gzip.decompressor(br.reader()); - try unpackTarball(f, tmp_directory.handle, dcp.reader()); + return try unpackTarball(f, tmp_directory.handle, dcp.reader()); }, .@"tar.xz" => { const gpa = f.arena.child_allocator; @@ -1161,7 +1155,7 @@ fn unpackResource( )); }; defer dcp.deinit(); - try unpackTarball(f, tmp_directory.handle, dcp.reader()); + return try unpackTarball(f, tmp_directory.handle, dcp.reader()); }, .@"tar.zst" => { const window_size = std.compress.zstd.DecompressorOptions.default_window_buffer_len; @@ -1171,21 +1165,25 @@ fn unpackResource( var dcp = std.compress.zstd.decompressor(br.reader(), .{ .window_buffer = window_buffer, }); - return unpackTarball(f, tmp_directory.handle, dcp.reader()); + return try unpackTarball(f, tmp_directory.handle, dcp.reader()); }, - .git_pack => unpackGitPack(f, tmp_directory.handle, resource) catch |err| switch (err) { - error.FetchFailed => return error.FetchFailed, - error.OutOfMemory => return error.OutOfMemory, - else => |e| return f.fail(f.location_tok, try eb.printString( - "unable to unpack git files: {s}", - .{@errorName(e)}, - )), + .git_pack => { + unpackGitPack(f, tmp_directory.handle, resource) catch |err| switch (err) { + error.FetchFailed => return error.FetchFailed, + error.OutOfMemory => return error.OutOfMemory, + else => |e| return f.fail(f.location_tok, try eb.printString( + "unable to unpack git files: {s}", + .{@errorName(e)}, + )), + }; + return null; }, } } -fn unpackTarball(f: *Fetch, out_dir: fs.Dir, reader: anytype) RunError!void { +fn unpackTarball(f: *Fetch, out_dir: fs.Dir, reader: anytype) RunError!?[]const u8 { const eb = &f.error_bundle; + const arena = f.arena.allocator(); const gpa = f.arena.child_allocator; var diagnostics: std.tar.Diagnostics = .{ .allocator = gpa }; @@ -1193,7 +1191,7 @@ fn unpackTarball(f: *Fetch, out_dir: fs.Dir, reader: anytype) RunError!void { std.tar.pipeToFileSystem(out_dir, reader, .{ .diagnostics = &diagnostics, - .strip_components = 1, + .strip_components = 0, // https://github.com/ziglang/zig/issues/17463 .mode_mode = .ignore, .exclude_empty_directories = true, @@ -1237,6 +1235,8 @@ fn unpackTarball(f: *Fetch, out_dir: fs.Dir, reader: anytype) RunError!void { } return error.FetchFailed; } + + return archivePackageDir(arena, out_dir) catch null; } fn unpackGitPack(f: *Fetch, out_dir: fs.Dir, resource: *Resource) anyerror!void { @@ -1742,36 +1742,33 @@ test { _ = FileType; } -test "findPackageRootSubPath" { +test archivePackageDir { const testing = std.testing; - var root = std.testing.tmpDir(.{ .iterate = true }); - defer root.cleanup(); + var tmp = std.testing.tmpDir(.{ .iterate = true }); + defer tmp.cleanup(); // folder1 // ├── folder2 // ├── file1 // - try root.dir.makePath("folder1/folder2"); - (try root.dir.createFile("folder1/file1", .{})).close(); + try tmp.dir.makePath("folder1/folder2"); + (try tmp.dir.createFile("folder1/file1", .{})).close(); // start at root returns folder1 as package root - const sub_path = (try findPackageRootSubPath( - testing.allocator, - Cache.Directory{ .path = null, .handle = root.dir }, - )).?; + const sub_path = (try archivePackageDir(testing.allocator, tmp.dir)).?; try testing.expectEqualStrings("folder1", sub_path); testing.allocator.free(sub_path); // start at folder1 returns null - try testing.expect(null == (try findPackageRootSubPath( + try testing.expect(null == (try archivePackageDir( testing.allocator, - Cache.Directory{ .path = null, .handle = try root.dir.openDir("folder1", .{ .iterate = true }) }, + try tmp.dir.openDir("folder1", .{ .iterate = true }), ))); // start at folder1/folder2 returns null - try testing.expect(null == (try findPackageRootSubPath( + try testing.expect(null == (try archivePackageDir( testing.allocator, - Cache.Directory{ .path = null, .handle = try root.dir.openDir("folder1/folder2", .{ .iterate = true }) }, + try tmp.dir.openDir("folder1/folder2", .{ .iterate = true }), ))); } From 24304a43854d62572daab4b5e922bb7436c73c50 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Igor=20Anic=CC=81?= Date: Wed, 3 Apr 2024 17:08:41 +0200 Subject: [PATCH 05/10] fetch: save syscall, and add comment From review comments: https://github.com/ziglang/zig/pull/19111 --- src/Package/Fetch.zig | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/Package/Fetch.zig b/src/Package/Fetch.zig index 02afc70b7f..9abf0dceb7 100644 --- a/src/Package/Fetch.zig +++ b/src/Package/Fetch.zig @@ -532,8 +532,10 @@ fn runResource( ) }); return error.FetchFailed; }; - // Remove temporary directory root. - cache_root.handle.deleteTree(tmp_dir_sub_path) catch {}; + // Remove temporary directory root if not already renamed to cache. + if (!std.mem.eql(u8, package_sub_path, tmp_dir_sub_path)) { + cache_root.handle.deleteDir(tmp_dir_sub_path) catch {}; + } // Validate the computed hash against the expected hash. If invalid, this // job is done. @@ -1059,6 +1061,10 @@ fn initResource(f: *Fetch, uri: std.Uri, server_header_buffer: []u8) RunError!Re )); } +/// A `null` return value indicates the `tmp_directory` is populated directly +/// with the package contents. +/// A non-null return value means that the package contents are inside a +/// sub-directory indicated by the named path. fn unpackResource( f: *Fetch, resource: *Resource, From b1e70edd907da91bc0863b541d04a15f2093f9a0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Igor=20Anic=CC=81?= Date: Wed, 3 Apr 2024 19:44:51 +0200 Subject: [PATCH 06/10] tar: find package root dir in pipeToFileSystem While iterating over all files in tarball set root_dir in diagnostic if there is single root in tarball. Will be used in package manager with strip_components = 0 to find the root of the fetched package. --- lib/std/tar.zig | 110 ++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 102 insertions(+), 8 deletions(-) diff --git a/lib/std/tar.zig b/lib/std/tar.zig index 0f273bad9a..2977bc16cc 100644 --- a/lib/std/tar.zig +++ b/lib/std/tar.zig @@ -29,6 +29,9 @@ pub const Diagnostics = struct { allocator: std.mem.Allocator, errors: std.ArrayListUnmanaged(Error) = .{}, + root_entries: usize = 0, + root_dir: ?[]const u8 = null, + pub const Error = union(enum) { unable_to_create_sym_link: struct { code: anyerror, @@ -45,6 +48,45 @@ pub const Diagnostics = struct { }, }; + fn findRoot(d: *Diagnostics, path: []const u8, kind: FileKind) !void { + if (rootDir(path)) |root_dir| { + d.root_entries += 1; + if (kind == .directory and d.root_entries == 1) { + d.root_dir = try d.allocator.dupe(u8, root_dir); + return; + } + if (d.root_dir) |r| { + d.allocator.free(r); + d.root_dir = null; + } + } + } + + // If path is package root returns root_dir name, otherwise null. + fn rootDir(path: []const u8) ?[]const u8 { + if (path.len == 0) return null; + + const start_index: usize = if (path[0] == '/') 1 else 0; + const end_index: usize = if (path[path.len - 1] == '/') path.len - 1 else path.len; + const buf = path[start_index..end_index]; + return if (std.mem.indexOfScalarPos(u8, buf, 0, '/') == null) + buf + else + null; + } + + test rootDir { + const expectEqualStrings = testing.expectEqualStrings; + const expect = testing.expect; + + try expectEqualStrings("a", rootDir("a").?); + try expectEqualStrings("b", rootDir("b").?); + try expectEqualStrings("c", rootDir("/c").?); + try expectEqualStrings("d", rootDir("/d/").?); + try expect(rootDir("a/b") == null); + try expect(rootDir("") == null); + } + pub fn deinit(d: *Diagnostics) void { for (d.errors.items) |item| { switch (item) { @@ -61,6 +103,10 @@ pub const Diagnostics = struct { } } d.errors.deinit(d.allocator); + if (d.root_dir) |r| { + d.allocator.free(r); + d.root_dir = null; + } d.* = undefined; } }; @@ -580,19 +626,21 @@ pub fn pipeToFileSystem(dir: std.fs.Dir, reader: anytype, options: PipeOptions) .link_name_buffer = &link_name_buffer, .diagnostics = options.diagnostics, }); + while (try iter.next()) |file| { + const file_name = stripComponents(file.name, options.strip_components); + if (options.diagnostics) |d| { + try d.findRoot(file_name, file.kind); + } + switch (file.kind) { .directory => { - const file_name = stripComponents(file.name, options.strip_components); if (file_name.len != 0 and !options.exclude_empty_directories) { try dir.makePath(file_name); } }, .file => { - if (file.size == 0 and file.name.len == 0) return; - const file_name = stripComponents(file.name, options.strip_components); if (file_name.len == 0) return error.BadFileName; - if (createDirAndFile(dir, file_name, fileMode(file.mode, options))) |fs_file| { defer fs_file.close(); try file.writeAll(fs_file); @@ -605,12 +653,8 @@ pub fn pipeToFileSystem(dir: std.fs.Dir, reader: anytype, options: PipeOptions) } }, .sym_link => { - // The file system path of the symbolic link. - const file_name = stripComponents(file.name, options.strip_components); if (file_name.len == 0) return error.BadFileName; - // The data inside the symbolic link. const link_name = file.link_name; - createDirAndSymlink(dir, link_name, file_name) catch |err| { const d = options.diagnostics orelse return error.UnableToCreateSymLink; try d.errors.append(d.allocator, .{ .unable_to_create_sym_link = .{ @@ -799,6 +843,7 @@ test PaxIterator { test { _ = @import("tar/test.zig"); + _ = Diagnostics; } test "header parse size" { @@ -993,6 +1038,55 @@ test pipeToFileSystem { ); } +test "pipeToFileSystem root_dir" { + const data = @embedFile("tar/testdata/example.tar"); + var fbs = std.io.fixedBufferStream(data); + const reader = fbs.reader(); + + // with strip_components = 1 + { + var tmp = testing.tmpDir(.{ .no_follow = true }); + defer tmp.cleanup(); + var diagnostics: Diagnostics = .{ .allocator = testing.allocator }; + defer diagnostics.deinit(); + + pipeToFileSystem(tmp.dir, reader, .{ + .strip_components = 1, + .diagnostics = &diagnostics, + }) catch |err| { + // Skip on platform which don't support symlinks + if (err == error.UnableToCreateSymLink) return error.SkipZigTest; + return err; + }; + + // there is no root_dir + try testing.expect(diagnostics.root_dir == null); + try testing.expectEqual(3, diagnostics.root_entries); + } + + // with strip_components = 0 + { + fbs.reset(); + var tmp = testing.tmpDir(.{ .no_follow = true }); + defer tmp.cleanup(); + var diagnostics: Diagnostics = .{ .allocator = testing.allocator }; + defer diagnostics.deinit(); + + pipeToFileSystem(tmp.dir, reader, .{ + .strip_components = 0, + .diagnostics = &diagnostics, + }) catch |err| { + // Skip on platform which don't support symlinks + if (err == error.UnableToCreateSymLink) return error.SkipZigTest; + return err; + }; + + // root_dir found + try testing.expectEqualStrings("example", diagnostics.root_dir.?); + try testing.expectEqual(1, diagnostics.root_entries); + } +} + fn normalizePath(bytes: []u8) []u8 { const canonical_sep = std.fs.path.sep_posix; if (std.fs.path.sep == canonical_sep) return bytes; From 1431e34cb9e3af8d4437ca2e5f91b6da53be5a47 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Igor=20Anic=CC=81?= Date: Wed, 3 Apr 2024 21:01:29 +0200 Subject: [PATCH 07/10] fetch: remove one openDir in runResource Based on comment: https://github.com/ziglang/zig/pull/19111#discussion_r1548640939 computeHash finds all files in temporary directory. There is no difference on what path are they. When calculating hash normalized_path must be set relative to package root. That's the place where we strip root if needed. --- src/Package/Fetch.zig | 70 ++++++++++++++++++++++--------------------- 1 file changed, 36 insertions(+), 34 deletions(-) diff --git a/src/Package/Fetch.zig b/src/Package/Fetch.zig index 9abf0dceb7..a1165afdc4 100644 --- a/src/Package/Fetch.zig +++ b/src/Package/Fetch.zig @@ -461,36 +461,26 @@ fn runResource( }; defer tmp_directory.handle.close(); - const package_dir = try unpackResource(f, resource, uri_path, tmp_directory); + const pkg_dir = try unpackResource(f, resource, uri_path, tmp_directory); - if (package_dir) |dir_name| { - // Position tmp_directory to dir_name inside tmp_dir_sub_path. - const path = try cache_root.join(arena, &.{ tmp_dir_sub_path, dir_name }); - const handle = tmp_directory.handle.openDir(dir_name, .{ .iterate = true }) catch |err| { - try eb.addRootErrorMessage(.{ - .msg = try eb.printString("unable to open temporary directory '{s}': {s}", .{ - path, @errorName(err), - }), - }); - return error.FetchFailed; - }; - tmp_directory.handle.close(); - tmp_directory = .{ .path = path, .handle = handle }; - } else { - // btrfs workaround; reopen tmp_directory - if (native_os == .linux and f.job_queue.work_around_btrfs_bug) { - // https://github.com/ziglang/zig/issues/17095 - tmp_directory.handle.close(); - tmp_directory.handle = cache_root.handle.makeOpenPath(tmp_dir_sub_path, .{ - .iterate = true, - }) catch @panic("btrfs workaround failed"); - } + var pkg_path: Cache.Path = if (pkg_dir) |pkg_dir_name| + .{ .root_dir = tmp_directory, .sub_path = pkg_dir_name } + else + .{ .root_dir = tmp_directory }; + + // btrfs workaround; reopen tmp_directory + if (native_os == .linux and f.job_queue.work_around_btrfs_bug) { + // https://github.com/ziglang/zig/issues/17095 + pkg_path.root_dir.handle.close(); + pkg_path.root_dir.handle = cache_root.handle.makeOpenPath(tmp_dir_sub_path, .{ + .iterate = true, + }) catch @panic("btrfs workaround failed"); } // Load, parse, and validate the unpacked build.zig.zon file. It is allowed // for the file to be missing, in which case this fetched package is // considered to be a "naked" package. - try loadManifest(f, .{ .root_dir = tmp_directory }); + try loadManifest(f, pkg_path); // Apply the manifest's inclusion rules to the temporary directory by // deleting excluded files. If any error occurred for files that were @@ -505,10 +495,10 @@ fn runResource( // Compute the package hash based on the remaining files in the temporary // directory. - f.actual_hash = try computeHash(f, tmp_directory, filter); + f.actual_hash = try computeHash(f, pkg_path, filter); - break :blk if (package_dir) |dir_name| - try fs.path.join(arena, &.{ tmp_dir_sub_path, dir_name }) + break :blk if (pkg_dir) |pkg_dir_name| + try fs.path.join(arena, &.{ tmp_dir_sub_path, pkg_dir_name }) else tmp_dir_sub_path; }; @@ -1388,7 +1378,7 @@ pub fn renameTmpIntoCache( /// function. fn computeHash( f: *Fetch, - tmp_directory: Cache.Directory, + pkg_path: Cache.Path, filter: Filter, ) RunError!Manifest.Digest { // All the path name strings need to be in memory for sorting. @@ -1396,6 +1386,7 @@ fn computeHash( const gpa = f.arena.child_allocator; const eb = &f.error_bundle; const thread_pool = f.job_queue.thread_pool; + const root_dir = pkg_path.root_dir.handle; // Collect all files, recursively, then sort. var all_files = std.ArrayList(*HashedFile).init(gpa); @@ -1409,7 +1400,7 @@ fn computeHash( var sus_dirs: std.StringArrayHashMapUnmanaged(void) = .{}; defer sus_dirs.deinit(gpa); - var walker = try tmp_directory.handle.walk(gpa); + var walker = try root_dir.walk(gpa); defer walker.deinit(); { @@ -1423,7 +1414,7 @@ fn computeHash( while (walker.next() catch |err| { try eb.addRootErrorMessage(.{ .msg = try eb.printString( "unable to walk temporary directory '{}': {s}", - .{ tmp_directory, @errorName(err) }, + .{ pkg_path, @errorName(err) }, ) }); return error.FetchFailed; }) |entry| { @@ -1444,7 +1435,7 @@ fn computeHash( }; wait_group.start(); try thread_pool.spawn(workerDeleteFile, .{ - tmp_directory.handle, deleted_file, &wait_group, + root_dir, deleted_file, &wait_group, }); try deleted_files.append(deleted_file); continue; @@ -1467,14 +1458,14 @@ fn computeHash( const hashed_file = try arena.create(HashedFile); hashed_file.* = .{ .fs_path = fs_path, - .normalized_path = try normalizePathAlloc(arena, fs_path), + .normalized_path = try normalizePathAlloc(arena, stripRoot(fs_path, pkg_path.sub_path)), .kind = kind, .hash = undefined, // to be populated by the worker .failure = undefined, // to be populated by the worker }; wait_group.start(); try thread_pool.spawn(workerHashFile, .{ - tmp_directory.handle, hashed_file, &wait_group, + root_dir, hashed_file, &wait_group, }); try all_files.append(hashed_file); } @@ -1493,7 +1484,7 @@ fn computeHash( var i: usize = 0; while (i < sus_dirs.count()) : (i += 1) { const sus_dir = sus_dirs.keys()[i]; - tmp_directory.handle.deleteDir(sus_dir) catch |err| switch (err) { + root_dir.deleteDir(sus_dir) catch |err| switch (err) { error.DirNotEmpty => continue, error.FileNotFound => continue, else => |e| { @@ -1657,6 +1648,17 @@ const HashedFile = struct { } }; +/// Strips root directory name from file system path. +fn stripRoot(fs_path: []const u8, root_dir: []const u8) []const u8 { + if (root_dir.len == 0 or fs_path.len <= root_dir.len) return fs_path; + + if (std.mem.eql(u8, fs_path[0..root_dir.len], root_dir) and fs_path[root_dir.len] == fs.path.sep) { + return fs_path[root_dir.len + 1 ..]; + } + + return fs_path; +} + /// Make a file system path identical independently of operating system path inconsistencies. /// This converts backslashes into forward slashes. fn normalizePathAlloc(arena: Allocator, fs_path: []const u8) ![]const u8 { From 363acf49910bbc0ff2b8acecea027f3f17bbce25 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Igor=20Anic=CC=81?= Date: Wed, 3 Apr 2024 21:17:57 +0200 Subject: [PATCH 08/10] fetch: update comments --- src/Package/Fetch.zig | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/src/Package/Fetch.zig b/src/Package/Fetch.zig index a1165afdc4..2d02d9cd68 100644 --- a/src/Package/Fetch.zig +++ b/src/Package/Fetch.zig @@ -461,14 +461,16 @@ fn runResource( }; defer tmp_directory.handle.close(); + // Unpack resource into tmp_directory. A non-null return value means + // that the package contents are inside a `pkg_dir` sub-directory. const pkg_dir = try unpackResource(f, resource, uri_path, tmp_directory); - var pkg_path: Cache.Path = if (pkg_dir) |pkg_dir_name| - .{ .root_dir = tmp_directory, .sub_path = pkg_dir_name } - else - .{ .root_dir = tmp_directory }; + var pkg_path: Cache.Path = .{ + .root_dir = tmp_directory, + .sub_path = if (pkg_dir) |pkg_dir_name| pkg_dir_name else "", + }; - // btrfs workaround; reopen tmp_directory + // Apply btrfs workaround if needed. Reopen tmp_directory. if (native_os == .linux and f.job_queue.work_around_btrfs_bug) { // https://github.com/ziglang/zig/issues/17095 pkg_path.root_dir.handle.close(); @@ -482,17 +484,18 @@ fn runResource( // considered to be a "naked" package. try loadManifest(f, pkg_path); - // Apply the manifest's inclusion rules to the temporary directory by - // deleting excluded files. If any error occurred for files that were - // ultimately excluded, those errors should be ignored, such as failure to - // create symlinks that weren't supposed to be included anyway. - - // Empty directories have already been omitted by `unpackResource`. - const filter: Filter = .{ .include_paths = if (f.manifest) |m| m.paths else .{}, }; + // TODO: + // If any error occurred for files that were ultimately excluded, those + // errors should be ignored, such as failure to create symlinks that + // weren't supposed to be included anyway. + + // Apply the manifest's inclusion rules to the temporary directory by + // deleting excluded files. + // Empty directories have already been omitted by `unpackResource`. // Compute the package hash based on the remaining files in the temporary // directory. f.actual_hash = try computeHash(f, pkg_path, filter); @@ -522,7 +525,7 @@ fn runResource( ) }); return error.FetchFailed; }; - // Remove temporary directory root if not already renamed to cache. + // Remove temporary directory root if not already renamed to global cache. if (!std.mem.eql(u8, package_sub_path, tmp_dir_sub_path)) { cache_root.handle.deleteDir(tmp_dir_sub_path) catch {}; } From 8545cb014795b4137d1ad92c62d08014723abf9f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Igor=20Anic=CC=81?= Date: Wed, 3 Apr 2024 21:20:39 +0200 Subject: [PATCH 09/10] fetch: use package root detection pipeToFileSystem Based on: https://github.com/ziglang/zig/pull/19111#discussion_r1548620985 In pipeToFileSystem we are already iterating over all files in tarball. Inspecting them there for existence of single root folder saves two syscalls later. --- src/Package/Fetch.zig | 46 ++++--------------------------------------- 1 file changed, 4 insertions(+), 42 deletions(-) diff --git a/src/Package/Fetch.zig b/src/Package/Fetch.zig index 2d02d9cd68..aa2845fc97 100644 --- a/src/Package/Fetch.zig +++ b/src/Package/Fetch.zig @@ -625,16 +625,6 @@ fn loadManifest(f: *Fetch, pkg_root: Cache.Path) RunError!void { } } -fn archivePackageDir(arena: Allocator, out_dir: fs.Dir) !?[]const u8 { - var iter = out_dir.iterate(); - if (try iter.next()) |entry| { - if (try iter.next() == null and entry.kind == .directory) { - return try arena.dupe(u8, entry.name); - } - } - return null; -} - fn queueJobsForDeps(f: *Fetch) RunError!void { assert(f.job_queue.recursive); @@ -1235,7 +1225,10 @@ fn unpackTarball(f: *Fetch, out_dir: fs.Dir, reader: anytype) RunError!?[]const return error.FetchFailed; } - return archivePackageDir(arena, out_dir) catch null; + return if (diagnostics.root_dir) |root_dir| + return try arena.dupe(u8, root_dir) + else + null; } fn unpackGitPack(f: *Fetch, out_dir: fs.Dir, resource: *Resource) anyerror!void { @@ -1752,34 +1745,3 @@ test { _ = Filter; _ = FileType; } - -test archivePackageDir { - const testing = std.testing; - - var tmp = std.testing.tmpDir(.{ .iterate = true }); - defer tmp.cleanup(); - - // folder1 - // ├── folder2 - // ├── file1 - // - try tmp.dir.makePath("folder1/folder2"); - (try tmp.dir.createFile("folder1/file1", .{})).close(); - - // start at root returns folder1 as package root - const sub_path = (try archivePackageDir(testing.allocator, tmp.dir)).?; - try testing.expectEqualStrings("folder1", sub_path); - testing.allocator.free(sub_path); - - // start at folder1 returns null - try testing.expect(null == (try archivePackageDir( - testing.allocator, - try tmp.dir.openDir("folder1", .{ .iterate = true }), - ))); - - // start at folder1/folder2 returns null - try testing.expect(null == (try archivePackageDir( - testing.allocator, - try tmp.dir.openDir("folder1/folder2", .{ .iterate = true }), - ))); -} From a60b7af2c19ca14609bd052916299ffb64063856 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Igor=20Anic=CC=81?= Date: Thu, 4 Apr 2024 01:59:15 +0200 Subject: [PATCH 10/10] fetch: fix manifest included paths filtering Filter should be applied on path where package root folder (if there is any) is stripped. Manifest is inside package root and has paths relative to package root not temporary directory root. --- src/Package/Fetch.zig | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/Package/Fetch.zig b/src/Package/Fetch.zig index aa2845fc97..d00beaa478 100644 --- a/src/Package/Fetch.zig +++ b/src/Package/Fetch.zig @@ -1416,7 +1416,8 @@ fn computeHash( }) |entry| { if (entry.kind == .directory) continue; - if (!filter.includePath(entry.path)) { + const entry_pkg_path = stripRoot(entry.path, pkg_path.sub_path); + if (!filter.includePath(entry_pkg_path)) { // Delete instead of including in hash calculation. const fs_path = try arena.dupe(u8, entry.path); @@ -1454,7 +1455,7 @@ fn computeHash( const hashed_file = try arena.create(HashedFile); hashed_file.* = .{ .fs_path = fs_path, - .normalized_path = try normalizePathAlloc(arena, stripRoot(fs_path, pkg_path.sub_path)), + .normalized_path = try normalizePathAlloc(arena, entry_pkg_path), .kind = kind, .hash = undefined, // to be populated by the worker .failure = undefined, // to be populated by the worker @@ -1657,9 +1658,9 @@ fn stripRoot(fs_path: []const u8, root_dir: []const u8) []const u8 { /// Make a file system path identical independently of operating system path inconsistencies. /// This converts backslashes into forward slashes. -fn normalizePathAlloc(arena: Allocator, fs_path: []const u8) ![]const u8 { - if (fs.path.sep == canonical_sep) return fs_path; - const normalized = try arena.dupe(u8, fs_path); +fn normalizePathAlloc(arena: Allocator, pkg_path: []const u8) ![]const u8 { + const normalized = try arena.dupe(u8, pkg_path); + if (fs.path.sep == canonical_sep) return normalized; normalizePath(normalized); return normalized; }