From af0502f6c4202df0223bcefa5121361c07680108 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Igor=20Anic=CC=81?= Date: Fri, 1 Mar 2024 21:37:47 +0100 Subject: [PATCH 01/13] std.tar: add tests for file and symlink create Should do that before I broke package manager! --- lib/std/tar.zig | 23 +++++++++++++++++++++++ lib/std/tar/test.zig | 7 +++---- 2 files changed, 26 insertions(+), 4 deletions(-) diff --git a/lib/std/tar.zig b/lib/std/tar.zig index 121e7db248..db76e176b4 100644 --- a/lib/std/tar.zig +++ b/lib/std/tar.zig @@ -1,3 +1,4 @@ +<<<<<<< HEAD //! Tar archive is single ordinary file which can contain many files (or //! directories, symlinks, ...). It's build by series of blocks each size of 512 //! bytes. First block of each entry is header which defines type, name, size @@ -127,6 +128,8 @@ pub const Header = struct { return buffer[0 .. p.len + 1 + n.len]; } + /// When kind is symbolic_link linked-to name (target_path) is specified in + /// the linkname field. pub fn linkName(header: Header, buffer: *[LINK_NAME_SIZE]u8) []const u8 { const link_name = header.str(157, 100); if (link_name.len == 0) { @@ -619,6 +622,7 @@ fn createDirAndFile(dir: std.fs.Dir, file_name: []const u8) !std.fs.File { return fs_file; } +// Creates a symbolic link at path `file_name` which points to `link_name`. fn createDirAndSymlink(dir: std.fs.Dir, link_name: []const u8, file_name: []const u8) !void { dir.symLink(link_name, file_name, .{}) catch |err| { if (err == error.FileNotFound) { @@ -841,3 +845,22 @@ test "tar header parse mode" { } } } + +test "create file and symlink" { + var root = std.testing.tmpDir(.{}); + defer root.cleanup(); + + _ = try createDirAndFile(root.dir, "file1"); + _ = try createDirAndFile(root.dir, "a/b/c/file2"); + + _ = createDirAndSymlink(root.dir, "a/b/c/file2", "symlink1") catch |err| { + // On Windows when developer mode is not enabled + if (err == error.AccessDenied) return error.SkipZigTest; + return err; + }; + _ = try createDirAndSymlink(root.dir, "../../../file1", "d/e/f/symlink2"); + + // Danglink symlnik, file created later + _ = try createDirAndSymlink(root.dir, "../../../g/h/i/file4", "j/k/l/symlink3"); + _ = try createDirAndFile(root.dir, "g/h/i/file4"); +} diff --git a/lib/std/tar/test.zig b/lib/std/tar/test.zig index 67c4fe0198..bd45bf792a 100644 --- a/lib/std/tar/test.zig +++ b/lib/std/tar/test.zig @@ -1,5 +1,5 @@ -const std = @import("../std.zig"); -const tar = std.tar; +const std = @import("std"); +const tar = @import("../tar.zig"); const testing = std.testing; test "tar run Go test cases" { @@ -489,8 +489,7 @@ test "tar pipeToFileSystem" { try testing.expectError(error.FileNotFound, root.dir.statFile("empty")); try testing.expect((try root.dir.statFile("a/file")).kind == .file); - // TODO is there better way to test symlink try testing.expect((try root.dir.statFile("b/symlink")).kind == .file); // statFile follows symlink var buf: [32]u8 = undefined; - _ = try root.dir.readLink("b/symlink", &buf); + try testing.expectEqualSlices(u8, "../a/file", try root.dir.readLink("b/symlink", &buf)); } From 04e8bbd932c9ce9ac99230a656c8951c467d5b24 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Igor=20Anic=CC=81?= Date: Sat, 2 Mar 2024 00:39:48 +0100 Subject: [PATCH 02/13] std.tar: test buffers provided to the iterator Tar header stores name in max 256 bytes and link name in max 100 bytes. Those are minimums for provided buffers. Error is raised during iterator init if buffers are not long enough. Pax and gnu extensions can store longer names. If such extension is reached during unpack and don't fit into provided buffer error is returned. --- lib/std/tar.zig | 19 +++++++++++++------ lib/std/tar/test.zig | 40 ++++++++++++++++++++++++++++++++++++++-- 2 files changed, 51 insertions(+), 8 deletions(-) diff --git a/lib/std/tar.zig b/lib/std/tar.zig index db76e176b4..a4f6f2e322 100644 --- a/lib/std/tar.zig +++ b/lib/std/tar.zig @@ -87,8 +87,8 @@ pub const Options = struct { pub const Header = struct { const SIZE = 512; - const MAX_NAME_SIZE = 100 + 1 + 155; // name(100) + separator(1) + prefix(155) - const LINK_NAME_SIZE = 100; + pub const MAX_NAME_SIZE = 100 + 1 + 155; // name(100) + separator(1) + prefix(155) + pub const LINK_NAME_SIZE = 100; bytes: *const [SIZE]u8, @@ -248,7 +248,13 @@ pub const IteratorOptions = struct { /// Iterates over files in tar archive. /// `next` returns each file in `reader` tar archive. -pub fn iterator(reader: anytype, options: IteratorOptions) Iterator(@TypeOf(reader)) { +/// Provided buffers should be at least 256 bytes for file_name and 100 bytes +/// for link_name. +pub fn iterator(reader: anytype, options: IteratorOptions) !Iterator(@TypeOf(reader)) { + if (options.file_name_buffer.len < Header.MAX_NAME_SIZE or + options.link_name_buffer.len < Header.LINK_NAME_SIZE) + return error.TarInsufficientBuffer; + return .{ .reader = reader, .diagnostics = options.diagnostics, @@ -318,7 +324,7 @@ fn Iterator(comptime ReaderType: type) type { } fn readString(self: *Self, size: usize, buffer: []u8) ![]const u8 { - if (size > buffer.len) return error.TarCorruptInput; + if (size > buffer.len) return error.TarInsufficientBuffer; const buf = buffer[0..size]; try self.reader.readNoEof(buf); return nullStr(buf); @@ -470,7 +476,8 @@ fn PaxIterator(comptime ReaderType: type) type { // Copies pax attribute value into destination buffer. // Must be called with destination buffer of size at least Attribute.len. pub fn value(self: Attribute, dst: []u8) ![]const u8 { - assert(self.len <= dst.len); + if (self.len > dst.len) return error.TarInsufficientBuffer; + // assert(self.len <= dst.len); const buf = dst[0..self.len]; const n = try self.reader.readAll(buf); if (n < self.len) return error.UnexpectedEndOfStream; @@ -558,7 +565,7 @@ pub fn pipeToFileSystem(dir: std.fs.Dir, reader: anytype, options: Options) !voi var file_name_buffer: [std.fs.MAX_PATH_BYTES]u8 = undefined; var link_name_buffer: [std.fs.MAX_PATH_BYTES]u8 = undefined; - var iter = iterator(reader, .{ + var iter = try iterator(reader, .{ .file_name_buffer = &file_name_buffer, .link_name_buffer = &link_name_buffer, .diagnostics = options.diagnostics, diff --git a/lib/std/tar/test.zig b/lib/std/tar/test.zig index bd45bf792a..fe6efc1b5e 100644 --- a/lib/std/tar/test.zig +++ b/lib/std/tar/test.zig @@ -315,7 +315,7 @@ test "tar run Go test cases" { }, .{ .data = @embedFile("testdata/fuzz1.tar"), - .err = error.TarCorruptInput, + .err = error.TarInsufficientBuffer, }, .{ .data = @embedFile("testdata/fuzz2.tar"), @@ -328,7 +328,7 @@ test "tar run Go test cases" { for (cases) |case| { var fsb = std.io.fixedBufferStream(case.data); - var iter = tar.iterator(fsb.reader(), .{ + var iter = try tar.iterator(fsb.reader(), .{ .file_name_buffer = &file_name_buffer, .link_name_buffer = &link_name_buffer, }); @@ -359,6 +359,27 @@ test "tar run Go test cases" { } try testing.expectEqual(case.files.len, i); } + + var min_file_name_buffer: [tar.Header.MAX_NAME_SIZE]u8 = undefined; + var min_link_name_buffer: [tar.Header.LINK_NAME_SIZE]u8 = undefined; + const long_name_cases = [_]Case{ cases[11], cases[25], cases[28] }; + + for (long_name_cases) |case| { + var fsb = std.io.fixedBufferStream(case.data); + var iter = try tar.iterator(fsb.reader(), .{ + .file_name_buffer = &min_file_name_buffer, + .link_name_buffer = &min_link_name_buffer, + }); + + var iter_err: ?anyerror = null; + while (iter.next() catch |err| brk: { + iter_err = err; + break :brk null; + }) |_| {} + + try testing.expect(iter_err != null); + try testing.expectEqual(error.TarInsufficientBuffer, iter_err.?); + } } // used in test to calculate file chksum @@ -490,6 +511,21 @@ test "tar pipeToFileSystem" { try testing.expectError(error.FileNotFound, root.dir.statFile("empty")); try testing.expect((try root.dir.statFile("a/file")).kind == .file); try testing.expect((try root.dir.statFile("b/symlink")).kind == .file); // statFile follows symlink + var buf: [32]u8 = undefined; try testing.expectEqualSlices(u8, "../a/file", try root.dir.readLink("b/symlink", &buf)); } + +test "insufficient buffer for iterator" { + var file_name_buffer: [10]u8 = undefined; + var link_name_buffer: [10]u8 = undefined; + + var fsb = std.io.fixedBufferStream(""); + try testing.expectError( + error.TarInsufficientBuffer, + tar.iterator(fsb.reader(), .{ + .file_name_buffer = &file_name_buffer, + .link_name_buffer = &link_name_buffer, + }), + ); +} From 10add7c677d368501691178ea0c073a9766cd498 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Igor=20Anic=CC=81?= Date: Sat, 2 Mar 2024 00:50:39 +0100 Subject: [PATCH 03/13] std.tar: remove redundant test name prefixes --- lib/std/tar.zig | 22 ++++++++++++++++++---- lib/std/tar/test.zig | 22 ++++------------------ 2 files changed, 22 insertions(+), 22 deletions(-) diff --git a/lib/std/tar.zig b/lib/std/tar.zig index a4f6f2e322..0eab2456d7 100644 --- a/lib/std/tar.zig +++ b/lib/std/tar.zig @@ -656,7 +656,7 @@ fn stripComponents(path: []const u8, count: u32) []const u8 { return path[i..]; } -test "tar stripComponents" { +test "stripComponents" { const expectEqualStrings = std.testing.expectEqualStrings; try expectEqualStrings("a/b/c", stripComponents("a/b/c", 0)); try expectEqualStrings("b/c", stripComponents("a/b/c", 1)); @@ -665,7 +665,7 @@ test "tar stripComponents" { try expectEqualStrings("", stripComponents("a/b/c", 4)); } -test "tar PaxIterator" { +test "PaxIterator" { const Attr = struct { kind: PaxAttributeKind, value: []const u8 = undefined, @@ -793,7 +793,7 @@ test { _ = @import("tar/test.zig"); } -test "tar header parse size" { +test "header parse size" { const cases = [_]struct { in: []const u8, want: u64 = 0, @@ -828,7 +828,7 @@ test "tar header parse size" { } } -test "tar header parse mode" { +test "header parse mode" { const cases = [_]struct { in: []const u8, want: u64 = 0, @@ -871,3 +871,17 @@ test "create file and symlink" { _ = try createDirAndSymlink(root.dir, "../../../g/h/i/file4", "j/k/l/symlink3"); _ = try createDirAndFile(root.dir, "g/h/i/file4"); } + +test "insufficient buffer for iterator" { + var file_name_buffer: [10]u8 = undefined; + var link_name_buffer: [10]u8 = undefined; + + var fsb = std.io.fixedBufferStream(""); + try std.testing.expectError( + error.TarInsufficientBuffer, + iterator(fsb.reader(), .{ + .file_name_buffer = &file_name_buffer, + .link_name_buffer = &link_name_buffer, + }), + ); +} diff --git a/lib/std/tar/test.zig b/lib/std/tar/test.zig index fe6efc1b5e..9532306728 100644 --- a/lib/std/tar/test.zig +++ b/lib/std/tar/test.zig @@ -2,7 +2,7 @@ const std = @import("std"); const tar = @import("../tar.zig"); const testing = std.testing; -test "tar run Go test cases" { +test "run test cases" { const Case = struct { const File = struct { name: []const u8, @@ -401,7 +401,7 @@ const Md5Writer = struct { } }; -test "tar should not overwrite existing file" { +test "should not overwrite existing file" { // Starting from this folder structure: // $ tree root // root @@ -457,7 +457,7 @@ test "tar should not overwrite existing file" { try tar.pipeToFileSystem(root2.dir, fsb.reader(), .{ .mode_mode = .ignore, .strip_components = 0 }); } -test "tar case sensitivity" { +test "case sensitivity" { // Mimicking issue #18089, this tar contains, same file name in two case // sensitive name version. Should fail on case insensitive file systems. // @@ -484,7 +484,7 @@ test "tar case sensitivity" { try testing.expect((try root.dir.statFile("alacritty/Darkermatrix.yml")).kind == .file); } -test "tar pipeToFileSystem" { +test "pipeToFileSystem" { // $ tar tvf // pipe_to_file_system_test/ // pipe_to_file_system_test/b/ @@ -515,17 +515,3 @@ test "tar pipeToFileSystem" { var buf: [32]u8 = undefined; try testing.expectEqualSlices(u8, "../a/file", try root.dir.readLink("b/symlink", &buf)); } - -test "insufficient buffer for iterator" { - var file_name_buffer: [10]u8 = undefined; - var link_name_buffer: [10]u8 = undefined; - - var fsb = std.io.fixedBufferStream(""); - try testing.expectError( - error.TarInsufficientBuffer, - tar.iterator(fsb.reader(), .{ - .file_name_buffer = &file_name_buffer, - .link_name_buffer = &link_name_buffer, - }), - ); -} From 5ccbb196ad08fd5e58fc8874917a20f9a220d729 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Igor=20Anic=CC=81?= Date: Sat, 2 Mar 2024 10:59:25 +0100 Subject: [PATCH 04/13] std.tar: don't return in iterator init Don't assert min buffer size on iterator init. That was changing public interface. This way we don't break that interface. --- lib/std/tar.zig | 34 +-- lib/std/tar/test.zig | 698 ++++++++++++++++++++++--------------------- 2 files changed, 369 insertions(+), 363 deletions(-) diff --git a/lib/std/tar.zig b/lib/std/tar.zig index 0eab2456d7..6bf46ad507 100644 --- a/lib/std/tar.zig +++ b/lib/std/tar.zig @@ -115,9 +115,10 @@ pub const Header = struct { /// Includes prefix concatenated, if any. /// TODO: check against "../" and other nefarious things - pub fn fullName(header: Header, buffer: *[MAX_NAME_SIZE]u8) ![]const u8 { + pub fn fullName(header: Header, buffer: []u8) ![]const u8 { const n = name(header); const p = prefix(header); + if (buffer.len < n.len + p.len + 1) return error.TarInsufficientBuffer; if (!is_ustar(header) or p.len == 0) { @memcpy(buffer[0..n.len], n); return buffer[0..n.len]; @@ -130,11 +131,12 @@ pub const Header = struct { /// When kind is symbolic_link linked-to name (target_path) is specified in /// the linkname field. - pub fn linkName(header: Header, buffer: *[LINK_NAME_SIZE]u8) []const u8 { + pub fn linkName(header: Header, buffer: []u8) ![]const u8 { const link_name = header.str(157, 100); if (link_name.len == 0) { return buffer[0..0]; } + if (buffer.len < link_name.len) return error.TarInsufficientBuffer; const buf = buffer[0..link_name.len]; @memcpy(buf, link_name); return buf; @@ -248,13 +250,7 @@ pub const IteratorOptions = struct { /// Iterates over files in tar archive. /// `next` returns each file in `reader` tar archive. -/// Provided buffers should be at least 256 bytes for file_name and 100 bytes -/// for link_name. -pub fn iterator(reader: anytype, options: IteratorOptions) !Iterator(@TypeOf(reader)) { - if (options.file_name_buffer.len < Header.MAX_NAME_SIZE or - options.link_name_buffer.len < Header.LINK_NAME_SIZE) - return error.TarInsufficientBuffer; - +pub fn iterator(reader: anytype, options: IteratorOptions) Iterator(@TypeOf(reader)) { return .{ .reader = reader, .diagnostics = options.diagnostics, @@ -372,10 +368,10 @@ fn Iterator(comptime ReaderType: type) type { self.file.size = size; } if (self.file.link_name.len == 0) { - self.file.link_name = header.linkName(self.link_name_buffer[0..Header.LINK_NAME_SIZE]); + self.file.link_name = try header.linkName(self.link_name_buffer); } if (self.file.name.len == 0) { - self.file.name = try header.fullName(self.file_name_buffer[0..Header.MAX_NAME_SIZE]); + self.file.name = try header.fullName(self.file_name_buffer); } self.padding = blockPadding(self.file.size); @@ -565,7 +561,7 @@ pub fn pipeToFileSystem(dir: std.fs.Dir, reader: anytype, options: Options) !voi var file_name_buffer: [std.fs.MAX_PATH_BYTES]u8 = undefined; var link_name_buffer: [std.fs.MAX_PATH_BYTES]u8 = undefined; - var iter = try iterator(reader, .{ + var iter = iterator(reader, .{ .file_name_buffer = &file_name_buffer, .link_name_buffer = &link_name_buffer, .diagnostics = options.diagnostics, @@ -871,17 +867,3 @@ test "create file and symlink" { _ = try createDirAndSymlink(root.dir, "../../../g/h/i/file4", "j/k/l/symlink3"); _ = try createDirAndFile(root.dir, "g/h/i/file4"); } - -test "insufficient buffer for iterator" { - var file_name_buffer: [10]u8 = undefined; - var link_name_buffer: [10]u8 = undefined; - - var fsb = std.io.fixedBufferStream(""); - try std.testing.expectError( - error.TarInsufficientBuffer, - iterator(fsb.reader(), .{ - .file_name_buffer = &file_name_buffer, - .link_name_buffer = &link_name_buffer, - }), - ); -} diff --git a/lib/std/tar/test.zig b/lib/std/tar/test.zig index 9532306728..7cd0cb7544 100644 --- a/lib/std/tar/test.zig +++ b/lib/std/tar/test.zig @@ -2,333 +2,352 @@ const std = @import("std"); const tar = @import("../tar.zig"); const testing = std.testing; +const Case = struct { + const File = struct { + name: []const u8, + size: u64 = 0, + mode: u32 = 0, + link_name: []const u8 = &[0]u8{}, + kind: tar.Header.Kind = .normal, + truncated: bool = false, // when there is no file body, just header, usefull for huge files + }; + + data: []const u8, // testdata file content + files: []const File = &[_]@This().File{}, // expected files to found in archive + chksums: []const []const u8 = &[_][]const u8{}, // chksums of each file content + err: ?anyerror = null, // parsing should fail with this error +}; + +const cases = [_]Case{ + .{ + .data = @embedFile("testdata/gnu.tar"), + .files = &[_]Case.File{ + .{ + .name = "small.txt", + .size = 5, + .mode = 0o640, + }, + .{ + .name = "small2.txt", + .size = 11, + .mode = 0o640, + }, + }, + .chksums = &[_][]const u8{ + "e38b27eaccb4391bdec553a7f3ae6b2f", + "c65bd2e50a56a2138bf1716f2fd56fe9", + }, + }, + .{ + .data = @embedFile("testdata/sparse-formats.tar"), + .err = error.TarUnsupportedHeader, + }, + .{ + .data = @embedFile("testdata/star.tar"), + .files = &[_]Case.File{ + .{ + .name = "small.txt", + .size = 5, + .mode = 0o640, + }, + .{ + .name = "small2.txt", + .size = 11, + .mode = 0o640, + }, + }, + .chksums = &[_][]const u8{ + "e38b27eaccb4391bdec553a7f3ae6b2f", + "c65bd2e50a56a2138bf1716f2fd56fe9", + }, + }, + .{ + .data = @embedFile("testdata/v7.tar"), + .files = &[_]Case.File{ + .{ + .name = "small.txt", + .size = 5, + .mode = 0o444, + }, + .{ + .name = "small2.txt", + .size = 11, + .mode = 0o444, + }, + }, + .chksums = &[_][]const u8{ + "e38b27eaccb4391bdec553a7f3ae6b2f", + "c65bd2e50a56a2138bf1716f2fd56fe9", + }, + }, + .{ + .data = @embedFile("testdata/pax.tar"), + .files = &[_]Case.File{ + .{ + .name = "a/123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100", + .size = 7, + .mode = 0o664, + }, + .{ + .name = "a/b", + .size = 0, + .kind = .symbolic_link, + .mode = 0o777, + .link_name = "123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100", + }, + }, + .chksums = &[_][]const u8{ + "3c382e8f5b6631aa2db52643912ffd4a", + }, + }, + .{ + // pax attribute don't end with \n + .data = @embedFile("testdata/pax-bad-hdr-file.tar"), + .err = error.PaxInvalidAttributeEnd, + }, + .{ + // size is in pax attribute + .data = @embedFile("testdata/pax-pos-size-file.tar"), + .files = &[_]Case.File{ + .{ + .name = "foo", + .size = 999, + .kind = .normal, + .mode = 0o640, + }, + }, + .chksums = &[_][]const u8{ + "0afb597b283fe61b5d4879669a350556", + }, + }, + .{ + // has pax records which we are not interested in + .data = @embedFile("testdata/pax-records.tar"), + .files = &[_]Case.File{ + .{ + .name = "file", + }, + }, + }, + .{ + // has global records which we are ignoring + .data = @embedFile("testdata/pax-global-records.tar"), + .files = &[_]Case.File{ + .{ + .name = "file1", + }, + .{ + .name = "file2", + }, + .{ + .name = "file3", + }, + .{ + .name = "file4", + }, + }, + }, + .{ + .data = @embedFile("testdata/nil-uid.tar"), + .files = &[_]Case.File{ + .{ + .name = "P1050238.JPG.log", + .size = 14, + .kind = .normal, + .mode = 0o664, + }, + }, + .chksums = &[_][]const u8{ + "08d504674115e77a67244beac19668f5", + }, + }, + .{ + // has xattrs and pax records which we are ignoring + .data = @embedFile("testdata/xattrs.tar"), + .files = &[_]Case.File{ + .{ + .name = "small.txt", + .size = 5, + .kind = .normal, + .mode = 0o644, + }, + .{ + .name = "small2.txt", + .size = 11, + .kind = .normal, + .mode = 0o644, + }, + }, + .chksums = &[_][]const u8{ + "e38b27eaccb4391bdec553a7f3ae6b2f", + "c65bd2e50a56a2138bf1716f2fd56fe9", + }, + }, + .{ + .data = @embedFile("testdata/gnu-multi-hdrs.tar"), + .files = &[_]Case.File{ + .{ + .name = "GNU2/GNU2/long-path-name", + .link_name = "GNU4/GNU4/long-linkpath-name", + .kind = .symbolic_link, + }, + }, + }, + .{ + // has gnu type D (directory) and S (sparse) blocks + .data = @embedFile("testdata/gnu-incremental.tar"), + .err = error.TarUnsupportedHeader, + }, + .{ + // should use values only from last pax header + .data = @embedFile("testdata/pax-multi-hdrs.tar"), + .files = &[_]Case.File{ + .{ + .name = "bar", + .link_name = "PAX4/PAX4/long-linkpath-name", + .kind = .symbolic_link, + }, + }, + }, + .{ + .data = @embedFile("testdata/gnu-long-nul.tar"), + .files = &[_]Case.File{ + .{ + .name = "0123456789", + .mode = 0o644, + }, + }, + }, + .{ + .data = @embedFile("testdata/gnu-utf8.tar"), + .files = &[_]Case.File{ + .{ + .name = "☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹", + .mode = 0o644, + }, + }, + }, + .{ + .data = @embedFile("testdata/gnu-not-utf8.tar"), + .files = &[_]Case.File{ + .{ + .name = "hi\x80\x81\x82\x83bye", + .mode = 0o644, + }, + }, + }, + .{ + // null in pax key + .data = @embedFile("testdata/pax-nul-xattrs.tar"), + .err = error.PaxNullInKeyword, + }, + .{ + .data = @embedFile("testdata/pax-nul-path.tar"), + .err = error.PaxNullInValue, + }, + .{ + .data = @embedFile("testdata/neg-size.tar"), + .err = error.TarHeader, + }, + .{ + .data = @embedFile("testdata/issue10968.tar"), + .err = error.TarHeader, + }, + .{ + .data = @embedFile("testdata/issue11169.tar"), + .err = error.TarHeader, + }, + .{ + .data = @embedFile("testdata/issue12435.tar"), + .err = error.TarHeaderChksum, + }, + .{ + // has magic with space at end instead of null + .data = @embedFile("testdata/invalid-go17.tar"), + .files = &[_]Case.File{ + .{ + .name = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa/foo", + }, + }, + }, + .{ + .data = @embedFile("testdata/ustar-file-devs.tar"), + .files = &[_]Case.File{ + .{ + .name = "file", + .mode = 0o644, + }, + }, + }, + .{ + .data = @embedFile("testdata/trailing-slash.tar"), + .files = &[_]Case.File{ + .{ + .name = "123456789/" ** 30, + .kind = .directory, + }, + }, + }, + .{ + // Has size in gnu extended format. To represent size bigger than 8 GB. + .data = @embedFile("testdata/writer-big.tar"), + .files = &[_]Case.File{ + .{ + .name = "tmp/16gig.txt", + .size = 16 * 1024 * 1024 * 1024, + .truncated = true, + .mode = 0o640, + }, + }, + }, + .{ + // Size in gnu extended format, and name in pax attribute. + .data = @embedFile("testdata/writer-big-long.tar"), + .files = &[_]Case.File{ + .{ + .name = "longname/" ** 15 ++ "16gig.txt", + .size = 16 * 1024 * 1024 * 1024, + .mode = 0o644, + .truncated = true, + }, + }, + }, + .{ + .data = @embedFile("testdata/fuzz1.tar"), + .err = error.TarInsufficientBuffer, + }, + .{ + .data = @embedFile("testdata/fuzz2.tar"), + .err = error.PaxSizeAttrOverflow, + }, +}; + +// used in test to calculate file chksum +const Md5Writer = struct { + h: std.crypto.hash.Md5 = std.crypto.hash.Md5.init(.{}), + + pub fn writeAll(self: *Md5Writer, buf: []const u8) !void { + self.h.update(buf); + } + + pub fn writeByte(self: *Md5Writer, byte: u8) !void { + self.h.update(&[_]u8{byte}); + } + + pub fn chksum(self: *Md5Writer) [32]u8 { + var s = [_]u8{0} ** 16; + self.h.final(&s); + return std.fmt.bytesToHex(s, .lower); + } +}; + test "run test cases" { - const Case = struct { - const File = struct { - name: []const u8, - size: u64 = 0, - mode: u32 = 0, - link_name: []const u8 = &[0]u8{}, - kind: tar.Header.Kind = .normal, - truncated: bool = false, // when there is no file body, just header, usefull for huge files - }; - - data: []const u8, // testdata file content - files: []const File = &[_]@This().File{}, // expected files to found in archive - chksums: []const []const u8 = &[_][]const u8{}, // chksums of each file content - err: ?anyerror = null, // parsing should fail with this error - }; - - const cases = [_]Case{ - .{ - .data = @embedFile("testdata/gnu.tar"), - .files = &[_]Case.File{ - .{ - .name = "small.txt", - .size = 5, - .mode = 0o640, - }, - .{ - .name = "small2.txt", - .size = 11, - .mode = 0o640, - }, - }, - .chksums = &[_][]const u8{ - "e38b27eaccb4391bdec553a7f3ae6b2f", - "c65bd2e50a56a2138bf1716f2fd56fe9", - }, - }, - .{ - .data = @embedFile("testdata/sparse-formats.tar"), - .err = error.TarUnsupportedHeader, - }, - .{ - .data = @embedFile("testdata/star.tar"), - .files = &[_]Case.File{ - .{ - .name = "small.txt", - .size = 5, - .mode = 0o640, - }, - .{ - .name = "small2.txt", - .size = 11, - .mode = 0o640, - }, - }, - .chksums = &[_][]const u8{ - "e38b27eaccb4391bdec553a7f3ae6b2f", - "c65bd2e50a56a2138bf1716f2fd56fe9", - }, - }, - .{ - .data = @embedFile("testdata/v7.tar"), - .files = &[_]Case.File{ - .{ - .name = "small.txt", - .size = 5, - .mode = 0o444, - }, - .{ - .name = "small2.txt", - .size = 11, - .mode = 0o444, - }, - }, - .chksums = &[_][]const u8{ - "e38b27eaccb4391bdec553a7f3ae6b2f", - "c65bd2e50a56a2138bf1716f2fd56fe9", - }, - }, - .{ - .data = @embedFile("testdata/pax.tar"), - .files = &[_]Case.File{ - .{ - .name = "a/123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100", - .size = 7, - .mode = 0o664, - }, - .{ - .name = "a/b", - .size = 0, - .kind = .symbolic_link, - .mode = 0o777, - .link_name = "123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100", - }, - }, - .chksums = &[_][]const u8{ - "3c382e8f5b6631aa2db52643912ffd4a", - }, - }, - .{ - // pax attribute don't end with \n - .data = @embedFile("testdata/pax-bad-hdr-file.tar"), - .err = error.PaxInvalidAttributeEnd, - }, - .{ - // size is in pax attribute - .data = @embedFile("testdata/pax-pos-size-file.tar"), - .files = &[_]Case.File{ - .{ - .name = "foo", - .size = 999, - .kind = .normal, - .mode = 0o640, - }, - }, - .chksums = &[_][]const u8{ - "0afb597b283fe61b5d4879669a350556", - }, - }, - .{ - // has pax records which we are not interested in - .data = @embedFile("testdata/pax-records.tar"), - .files = &[_]Case.File{ - .{ - .name = "file", - }, - }, - }, - .{ - // has global records which we are ignoring - .data = @embedFile("testdata/pax-global-records.tar"), - .files = &[_]Case.File{ - .{ - .name = "file1", - }, - .{ - .name = "file2", - }, - .{ - .name = "file3", - }, - .{ - .name = "file4", - }, - }, - }, - .{ - .data = @embedFile("testdata/nil-uid.tar"), - .files = &[_]Case.File{ - .{ - .name = "P1050238.JPG.log", - .size = 14, - .kind = .normal, - .mode = 0o664, - }, - }, - .chksums = &[_][]const u8{ - "08d504674115e77a67244beac19668f5", - }, - }, - .{ - // has xattrs and pax records which we are ignoring - .data = @embedFile("testdata/xattrs.tar"), - .files = &[_]Case.File{ - .{ - .name = "small.txt", - .size = 5, - .kind = .normal, - .mode = 0o644, - }, - .{ - .name = "small2.txt", - .size = 11, - .kind = .normal, - .mode = 0o644, - }, - }, - .chksums = &[_][]const u8{ - "e38b27eaccb4391bdec553a7f3ae6b2f", - "c65bd2e50a56a2138bf1716f2fd56fe9", - }, - }, - .{ - .data = @embedFile("testdata/gnu-multi-hdrs.tar"), - .files = &[_]Case.File{ - .{ - .name = "GNU2/GNU2/long-path-name", - .link_name = "GNU4/GNU4/long-linkpath-name", - .kind = .symbolic_link, - }, - }, - }, - .{ - // has gnu type D (directory) and S (sparse) blocks - .data = @embedFile("testdata/gnu-incremental.tar"), - .err = error.TarUnsupportedHeader, - }, - .{ - // should use values only from last pax header - .data = @embedFile("testdata/pax-multi-hdrs.tar"), - .files = &[_]Case.File{ - .{ - .name = "bar", - .link_name = "PAX4/PAX4/long-linkpath-name", - .kind = .symbolic_link, - }, - }, - }, - .{ - .data = @embedFile("testdata/gnu-long-nul.tar"), - .files = &[_]Case.File{ - .{ - .name = "0123456789", - .mode = 0o644, - }, - }, - }, - .{ - .data = @embedFile("testdata/gnu-utf8.tar"), - .files = &[_]Case.File{ - .{ - .name = "☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹", - .mode = 0o644, - }, - }, - }, - .{ - .data = @embedFile("testdata/gnu-not-utf8.tar"), - .files = &[_]Case.File{ - .{ - .name = "hi\x80\x81\x82\x83bye", - .mode = 0o644, - }, - }, - }, - .{ - // null in pax key - .data = @embedFile("testdata/pax-nul-xattrs.tar"), - .err = error.PaxNullInKeyword, - }, - .{ - .data = @embedFile("testdata/pax-nul-path.tar"), - .err = error.PaxNullInValue, - }, - .{ - .data = @embedFile("testdata/neg-size.tar"), - .err = error.TarHeader, - }, - .{ - .data = @embedFile("testdata/issue10968.tar"), - .err = error.TarHeader, - }, - .{ - .data = @embedFile("testdata/issue11169.tar"), - .err = error.TarHeader, - }, - .{ - .data = @embedFile("testdata/issue12435.tar"), - .err = error.TarHeaderChksum, - }, - .{ - // has magic with space at end instead of null - .data = @embedFile("testdata/invalid-go17.tar"), - .files = &[_]Case.File{ - .{ - .name = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa/foo", - }, - }, - }, - .{ - .data = @embedFile("testdata/ustar-file-devs.tar"), - .files = &[_]Case.File{ - .{ - .name = "file", - .mode = 0o644, - }, - }, - }, - .{ - .data = @embedFile("testdata/trailing-slash.tar"), - .files = &[_]Case.File{ - .{ - .name = "123456789/" ** 30, - .kind = .directory, - }, - }, - }, - .{ - // Has size in gnu extended format. To represent size bigger than 8 GB. - .data = @embedFile("testdata/writer-big.tar"), - .files = &[_]Case.File{ - .{ - .name = "tmp/16gig.txt", - .size = 16 * 1024 * 1024 * 1024, - .truncated = true, - .mode = 0o640, - }, - }, - }, - .{ - // Size in gnu extended format, and name in pax attribute. - .data = @embedFile("testdata/writer-big-long.tar"), - .files = &[_]Case.File{ - .{ - .name = "longname/" ** 15 ++ "16gig.txt", - .size = 16 * 1024 * 1024 * 1024, - .mode = 0o644, - .truncated = true, - }, - }, - }, - .{ - .data = @embedFile("testdata/fuzz1.tar"), - .err = error.TarInsufficientBuffer, - }, - .{ - .data = @embedFile("testdata/fuzz2.tar"), - .err = error.PaxSizeAttrOverflow, - }, - }; - var file_name_buffer: [std.fs.MAX_PATH_BYTES]u8 = undefined; var link_name_buffer: [std.fs.MAX_PATH_BYTES]u8 = undefined; for (cases) |case| { var fsb = std.io.fixedBufferStream(case.data); - var iter = try tar.iterator(fsb.reader(), .{ + var iter = tar.iterator(fsb.reader(), .{ .file_name_buffer = &file_name_buffer, .link_name_buffer = &link_name_buffer, }); @@ -359,6 +378,10 @@ test "run test cases" { } try testing.expectEqual(case.files.len, i); } +} + +test "pax/gnu long names with small buffer" { + // should fail with insufficient buffer error var min_file_name_buffer: [tar.Header.MAX_NAME_SIZE]u8 = undefined; var min_link_name_buffer: [tar.Header.LINK_NAME_SIZE]u8 = undefined; @@ -366,7 +389,7 @@ test "run test cases" { for (long_name_cases) |case| { var fsb = std.io.fixedBufferStream(case.data); - var iter = try tar.iterator(fsb.reader(), .{ + var iter = tar.iterator(fsb.reader(), .{ .file_name_buffer = &min_file_name_buffer, .link_name_buffer = &min_link_name_buffer, }); @@ -382,24 +405,25 @@ test "run test cases" { } } -// used in test to calculate file chksum -const Md5Writer = struct { - h: std.crypto.hash.Md5 = std.crypto.hash.Md5.init(.{}), +test "insufficient buffer in Header name filed" { + var min_file_name_buffer: [9]u8 = undefined; + var min_link_name_buffer: [tar.Header.LINK_NAME_SIZE]u8 = undefined; - pub fn writeAll(self: *Md5Writer, buf: []const u8) !void { - self.h.update(buf); - } + var fsb = std.io.fixedBufferStream(cases[0].data); + var iter = tar.iterator(fsb.reader(), .{ + .file_name_buffer = &min_file_name_buffer, + .link_name_buffer = &min_link_name_buffer, + }); - pub fn writeByte(self: *Md5Writer, byte: u8) !void { - self.h.update(&[_]u8{byte}); - } + var iter_err: ?anyerror = null; + while (iter.next() catch |err| brk: { + iter_err = err; + break :brk null; + }) |_| {} - pub fn chksum(self: *Md5Writer) [32]u8 { - var s = [_]u8{0} ** 16; - self.h.final(&s); - return std.fmt.bytesToHex(s, .lower); - } -}; + try testing.expect(iter_err != null); + try testing.expectEqual(error.TarInsufficientBuffer, iter_err.?); +} test "should not overwrite existing file" { // Starting from this folder structure: From 614161a7cf65f46cb3e2461ffe2e09d972508a97 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Igor=20Anic=CC=81?= Date: Sat, 2 Mar 2024 17:52:31 +0100 Subject: [PATCH 05/13] std.tar make iterator interface more ergonomic for the then end users: 1. Don't require user to call file.skip() on file returned from iterator.next if file is not read. Iterator will now handle this. Previously that returned header parsing error, without knowing some tar internals it is hard to understand what is required from user. 2. Use iterator.File.kind enum which is similar to fs.File.Kind, something familiar. Internal Header.Kind has many types which are not exposed but the user needs to have else in kind switch to cover those cases. 3. Add reader interface to the iterator.File. --- lib/std/tar.zig | 112 +++++++++++++++++++++++++------------------ lib/std/tar/test.zig | 22 +++++---- 2 files changed, 78 insertions(+), 56 deletions(-) diff --git a/lib/std/tar.zig b/lib/std/tar.zig index 6bf46ad507..98bd13fd88 100644 --- a/lib/std/tar.zig +++ b/lib/std/tar.zig @@ -258,10 +258,15 @@ pub fn iterator(reader: anytype, options: IteratorOptions) Iterator(@TypeOf(read .file_name_buffer = options.file_name_buffer, .link_name_buffer = options.link_name_buffer, .padding = 0, - .file = undefined, }; } +pub const FileKind = enum { + directory, + sym_link, + file, +}; + fn Iterator(comptime ReaderType: type) type { return struct { reader: ReaderType, @@ -274,34 +279,42 @@ fn Iterator(comptime ReaderType: type) type { // bytes of padding to the end of the block padding: usize, - // current tar file - file: File, + // not consumed bytes of file from last next iteration + unread_file_bytes: usize = 0, pub const File = struct { name: []const u8, // name of file, symlink or directory link_name: []const u8, // target name of symlink - size: u64, // size of the file in bytes - mode: u32, - kind: Header.Kind, + size: u64 = 0, // size of the file in bytes + mode: u32 = 0, + kind: FileKind = .file, + unread_bytes: *usize, reader: ReaderType, - // Writes file content to writer. - pub fn write(self: File, writer: anytype) !void { - var buffer: [4096]u8 = undefined; + pub const Reader = std.io.Reader(*Self, ReaderType.Error, read); - var n: u64 = 0; - while (n < self.size) { - const buf = buffer[0..@min(buffer.len, self.size - n)]; - try self.reader.readNoEof(buf); - try writer.writeAll(buf); - n += buf.len; - } + pub fn reader(self: *Self) Reader { + return .{ .context = self }; } - // Skips file content. Advances reader. - pub fn skip(self: File) !void { - try self.reader.skipBytes(self.size, .{}); + pub fn read(self: *Self, dest: []u8) ReaderType.Error!usize { + const buf = dest[0..@min(dest.len, self.unread_size.*)]; + const n = try self.reader.read(buf); + self.unread_size.* -= n; + return n; + } + + // Writes file content to writer. + pub fn writeAll(self: File, writer: anytype) !void { + var buffer: [4096]u8 = undefined; + + while (self.unread_bytes.* > 0) { + const buf = buffer[0..@min(buffer.len, self.unread_bytes.*)]; + try self.reader.readNoEof(buf); + try writer.writeAll(buf); + self.unread_bytes.* -= buf.len; + } } }; @@ -326,14 +339,12 @@ fn Iterator(comptime ReaderType: type) type { return nullStr(buf); } - fn initFile(self: *Self) void { - self.file = .{ + fn newFile(self: *Self) File { + return .{ .name = self.file_name_buffer[0..0], .link_name = self.link_name_buffer[0..0], - .size = 0, - .kind = .normal, - .mode = 0, .reader = self.reader, + .unread_bytes = &self.unread_file_bytes, }; } @@ -350,7 +361,12 @@ fn Iterator(comptime ReaderType: type) type { /// loop iterates through one or more entries until it collects a all /// file attributes. pub fn next(self: *Self) !?File { - self.initFile(); + if (self.unread_file_bytes > 0) { + // If file content was not consumed by caller + try self.reader.skipBytes(self.unread_file_bytes, .{}); + self.unread_file_bytes = 0; + } + var file: File = self.newFile(); while (try self.readHeader()) |header| { const kind = header.kind(); @@ -360,46 +376,52 @@ fn Iterator(comptime ReaderType: type) type { switch (kind) { // File types to retrun upstream .directory, .normal, .symbolic_link => { - self.file.kind = kind; - self.file.mode = try header.mode(); + file.kind = switch (kind) { + .directory => .directory, + .normal => .file, + .symbolic_link => .sym_link, + else => unreachable, + }; + file.mode = try header.mode(); // set file attributes if not already set by prefix/extended headers - if (self.file.size == 0) { - self.file.size = size; + if (file.size == 0) { + file.size = size; } - if (self.file.link_name.len == 0) { - self.file.link_name = try header.linkName(self.link_name_buffer); + if (file.link_name.len == 0) { + file.link_name = try header.linkName(self.link_name_buffer); } - if (self.file.name.len == 0) { - self.file.name = try header.fullName(self.file_name_buffer); + if (file.name.len == 0) { + file.name = try header.fullName(self.file_name_buffer); } - self.padding = blockPadding(self.file.size); - return self.file; + self.padding = blockPadding(file.size); + self.unread_file_bytes = file.size; + return file; }, // Prefix header types .gnu_long_name => { - self.file.name = try self.readString(@intCast(size), self.file_name_buffer); + file.name = try self.readString(@intCast(size), self.file_name_buffer); }, .gnu_long_link => { - self.file.link_name = try self.readString(@intCast(size), self.link_name_buffer); + file.link_name = try self.readString(@intCast(size), self.link_name_buffer); }, .extended_header => { // Use just attributes from last extended header. - self.initFile(); + file = self.newFile(); var rdr = paxIterator(self.reader, @intCast(size)); while (try rdr.next()) |attr| { switch (attr.kind) { .path => { - self.file.name = try attr.value(self.file_name_buffer); + file.name = try attr.value(self.file_name_buffer); }, .linkpath => { - self.file.link_name = try attr.value(self.link_name_buffer); + file.link_name = try attr.value(self.link_name_buffer); }, .size => { var buf: [pax_max_size_attr_len]u8 = undefined; - self.file.size = try std.fmt.parseInt(u64, try attr.value(&buf), 10); + file.size = try std.fmt.parseInt(u64, try attr.value(&buf), 10); }, } } @@ -574,24 +596,23 @@ pub fn pipeToFileSystem(dir: std.fs.Dir, reader: anytype, options: Options) !voi try dir.makePath(file_name); } }, - .normal => { + .file => { if (file.size == 0 and file.name.len == 0) return; const file_name = stripComponents(file.name, options.strip_components); if (file_name.len == 0) return error.BadFileName; if (createDirAndFile(dir, file_name)) |fs_file| { defer fs_file.close(); - try file.write(fs_file); + try file.writeAll(fs_file); } else |err| { const d = options.diagnostics orelse return err; try d.errors.append(d.allocator, .{ .unable_to_create_file = .{ .code = err, .file_name = try d.allocator.dupe(u8, file_name), } }); - try file.skip(); } }, - .symbolic_link => { + .sym_link => { // The file system path of the symbolic link. const file_name = stripComponents(file.name, options.strip_components); if (file_name.len == 0) return error.BadFileName; @@ -607,7 +628,6 @@ pub fn pipeToFileSystem(dir: std.fs.Dir, reader: anytype, options: Options) !voi } }); }; }, - else => unreachable, } } } diff --git a/lib/std/tar/test.zig b/lib/std/tar/test.zig index 7cd0cb7544..bea7bb1aa4 100644 --- a/lib/std/tar/test.zig +++ b/lib/std/tar/test.zig @@ -8,7 +8,7 @@ const Case = struct { size: u64 = 0, mode: u32 = 0, link_name: []const u8 = &[0]u8{}, - kind: tar.Header.Kind = .normal, + kind: tar.FileKind = .file, truncated: bool = false, // when there is no file body, just header, usefull for huge files }; @@ -91,7 +91,7 @@ const cases = [_]Case{ .{ .name = "a/b", .size = 0, - .kind = .symbolic_link, + .kind = .sym_link, .mode = 0o777, .link_name = "123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100", }, @@ -112,7 +112,7 @@ const cases = [_]Case{ .{ .name = "foo", .size = 999, - .kind = .normal, + .kind = .file, .mode = 0o640, }, }, @@ -153,7 +153,7 @@ const cases = [_]Case{ .{ .name = "P1050238.JPG.log", .size = 14, - .kind = .normal, + .kind = .file, .mode = 0o664, }, }, @@ -168,13 +168,13 @@ const cases = [_]Case{ .{ .name = "small.txt", .size = 5, - .kind = .normal, + .kind = .file, .mode = 0o644, }, .{ .name = "small2.txt", .size = 11, - .kind = .normal, + .kind = .file, .mode = 0o644, }, }, @@ -189,7 +189,7 @@ const cases = [_]Case{ .{ .name = "GNU2/GNU2/long-path-name", .link_name = "GNU4/GNU4/long-linkpath-name", - .kind = .symbolic_link, + .kind = .sym_link, }, }, }, @@ -205,7 +205,7 @@ const cases = [_]Case{ .{ .name = "bar", .link_name = "PAX4/PAX4/long-linkpath-name", - .kind = .symbolic_link, + .kind = .sym_link, }, }, }, @@ -369,11 +369,13 @@ test "run test cases" { if (case.chksums.len > i) { var md5writer = Md5Writer{}; - try actual.write(&md5writer); + try actual.writeAll(&md5writer); const chksum = md5writer.chksum(); try testing.expectEqualStrings(case.chksums[i], &chksum); } else { - if (!expected.truncated) try actual.skip(); // skip file content + if (expected.truncated) { + iter.unread_file_bytes = 0; + } } } try testing.expectEqual(case.files.len, i); From f5fd4691e5595e895e935ed76342cb729dc6904a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Igor=20Anic=CC=81?= Date: Sat, 2 Mar 2024 18:08:32 +0100 Subject: [PATCH 06/13] std.tar: document iterator interface with example --- lib/std/tar.zig | 30 ++++++++++++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) diff --git a/lib/std/tar.zig b/lib/std/tar.zig index 98bd13fd88..8a4ec444af 100644 --- a/lib/std/tar.zig +++ b/lib/std/tar.zig @@ -1,4 +1,3 @@ -<<<<<<< HEAD //! Tar archive is single ordinary file which can contain many files (or //! directories, symlinks, ...). It's build by series of blocks each size of 512 //! bytes. First block of each entry is header which defines type, name, size @@ -16,7 +15,7 @@ //! GNU tar reference: https://www.gnu.org/software/tar/manual/html_node/Standard.html //! pax reference: https://pubs.opengroup.org/onlinepubs/9699919799/utilities/pax.html#tag_20_92_13 -const std = @import("std.zig"); +const std = @import("std"); const assert = std.debug.assert; pub const output = @import("tar/output.zig"); @@ -250,6 +249,33 @@ pub const IteratorOptions = struct { /// Iterates over files in tar archive. /// `next` returns each file in `reader` tar archive. +/// +/// Init iterator with tar archive reader and provided buffers: +/// +/// var file_name_buffer: [std.fs.MAX_PATH_BYTES]u8 = undefined; +/// var link_name_buffer: [std.fs.MAX_PATH_BYTES]u8 = undefined; +/// +/// var iter = std.tar.iterator(archive.reader(), .{ +/// .file_name_buffer = &file_name_buffer, +/// .link_name_buffer = &link_name_buffer, +/// }); +/// +/// Iterate on each tar archive file: +/// +/// while (try iter.next()) |file| { +/// switch (file.kind) { +/// .directory => { +/// // try dir.makePath(file.name); +/// }, +/// .file => { +/// // try file.writeAll(writer); +/// }, +/// .sym_link => { +/// // try dir.symLink(file.link_name, file.name, .{}); +/// }, +/// } +/// } +/// pub fn iterator(reader: anytype, options: IteratorOptions) Iterator(@TypeOf(reader)) { return .{ .reader = reader, From 67336ca8c64a4c6d9f7304b6319776c64044660a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Igor=20Anic=CC=81?= Date: Sat, 2 Mar 2024 23:29:02 +0100 Subject: [PATCH 07/13] std.tar: fix build on 32 bit platform Fixing error from ci: std/tar.zig:423:54: error: expected type 'usize', found 'u64' std/tar.zig:423:54: note: unsigned 32-bit int cannot represent all possible unsigned 64-bit values --- lib/std/tar.zig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/std/tar.zig b/lib/std/tar.zig index 8a4ec444af..a6ec42d415 100644 --- a/lib/std/tar.zig +++ b/lib/std/tar.zig @@ -306,7 +306,7 @@ fn Iterator(comptime ReaderType: type) type { // bytes of padding to the end of the block padding: usize, // not consumed bytes of file from last next iteration - unread_file_bytes: usize = 0, + unread_file_bytes: u64 = 0, pub const File = struct { name: []const u8, // name of file, symlink or directory @@ -315,7 +315,7 @@ fn Iterator(comptime ReaderType: type) type { mode: u32 = 0, kind: FileKind = .file, - unread_bytes: *usize, + unread_bytes: *u64, reader: ReaderType, pub const Reader = std.io.Reader(*Self, ReaderType.Error, read); From 8cc35a0255cae7728cfadfbe625986dc7c509def Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Igor=20Anic=CC=81?= Date: Sun, 3 Mar 2024 10:58:16 +0100 Subject: [PATCH 08/13] std.tar: fix path testing on windows Fixing ci error: error: 'tar.test.test.pipeToFileSystem' failed: slices differ. first difference occurs at index 2 (0x2) ============ expected this output: ============= len: 9 (0x9) 2E 2E 2F 61 2F 66 69 6C 65 ../a/file ============= instead found this: ============== len: 9 (0x9) 2E 2E 5C 61 5C 66 69 6C 65 ..\a\file After #19136 dir.symlink changes path separtors to \ on windows. --- lib/std/tar/test.zig | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/lib/std/tar/test.zig b/lib/std/tar/test.zig index bea7bb1aa4..6d9788de07 100644 --- a/lib/std/tar/test.zig +++ b/lib/std/tar/test.zig @@ -539,5 +539,16 @@ test "pipeToFileSystem" { try testing.expect((try root.dir.statFile("b/symlink")).kind == .file); // statFile follows symlink var buf: [32]u8 = undefined; - try testing.expectEqualSlices(u8, "../a/file", try root.dir.readLink("b/symlink", &buf)); + try testing.expectEqualSlices( + u8, + "../a/file", + normalizePath(try root.dir.readLink("b/symlink", &buf)), + ); +} + +fn normalizePath(bytes: []u8) []u8 { + const canonical_sep = std.fs.path.sep_posix; + if (std.fs.path.sep == canonical_sep) return bytes; + std.mem.replaceScalar(u8, bytes, std.fs.path.sep, canonical_sep); + return bytes; } From 8ec990d6d71174fbc3961fa11d4a74d6ec81f58a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Igor=20Anic=CC=81?= Date: Mon, 4 Mar 2024 22:47:53 +0100 Subject: [PATCH 09/13] disable test which is hanging on windows in ci When this test is enabled something like: `zig build test docs --zig-lib-dir .\lib\ -Dstatic-llvm -Dskip-non-native -Denable-symlinks-windows` never finishes. Those are failed runs from ci: https://github.com/ziglang/zig/actions/runs/8137710393 https://github.com/ziglang/zig/actions/runs/8129619923 https://github.com/ziglang/zig/actions/runs/8125845128 Isolating that test and running it is not a problem. Running something like `zig test .\lib\std\std.zig --zig-lib-dir .\lib\` is fine. --- lib/std/tar.zig | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/lib/std/tar.zig b/lib/std/tar.zig index a6ec42d415..7730eb9ae7 100644 --- a/lib/std/tar.zig +++ b/lib/std/tar.zig @@ -896,6 +896,11 @@ test "header parse mode" { } test "create file and symlink" { + // With test enabled this is hanging under windows: + // zig build test docs --zig-lib-dir .\lib\ -Dstatic-llvm -Dskip-non-native -Denable-symlinks-windows + const builtin = @import("builtin"); + if (builtin.os.tag == .windows) return error.SkipZigTest; + var root = std.testing.tmpDir(.{}); defer root.cleanup(); From a9e7abda204300bd13ce08721bf13801817dd6ab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Igor=20Anic=CC=81?= Date: Sat, 9 Mar 2024 13:23:07 +0100 Subject: [PATCH 10/13] std.tar: fix test hanging on windows Problem was manifested only on windows with target `-target aarch64-windows-gnu`. I was creating new files but not closing any of them. Tmp dir cleanup hangs looping in deleteTree forever. --- lib/std/tar.zig | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/lib/std/tar.zig b/lib/std/tar.zig index 7730eb9ae7..8c83e63b14 100644 --- a/lib/std/tar.zig +++ b/lib/std/tar.zig @@ -896,25 +896,23 @@ test "header parse mode" { } test "create file and symlink" { - // With test enabled this is hanging under windows: - // zig build test docs --zig-lib-dir .\lib\ -Dstatic-llvm -Dskip-non-native -Denable-symlinks-windows - const builtin = @import("builtin"); - if (builtin.os.tag == .windows) return error.SkipZigTest; - var root = std.testing.tmpDir(.{}); defer root.cleanup(); - _ = try createDirAndFile(root.dir, "file1"); - _ = try createDirAndFile(root.dir, "a/b/c/file2"); + var file = try createDirAndFile(root.dir, "file1"); + file.close(); + file = try createDirAndFile(root.dir, "a/b/c/file2"); + file.close(); - _ = createDirAndSymlink(root.dir, "a/b/c/file2", "symlink1") catch |err| { + createDirAndSymlink(root.dir, "a/b/c/file2", "symlink1") catch |err| { // On Windows when developer mode is not enabled if (err == error.AccessDenied) return error.SkipZigTest; return err; }; - _ = try createDirAndSymlink(root.dir, "../../../file1", "d/e/f/symlink2"); + try createDirAndSymlink(root.dir, "../../../file1", "d/e/f/symlink2"); // Danglink symlnik, file created later - _ = try createDirAndSymlink(root.dir, "../../../g/h/i/file4", "j/k/l/symlink3"); - _ = try createDirAndFile(root.dir, "g/h/i/file4"); + try createDirAndSymlink(root.dir, "../../../g/h/i/file4", "j/k/l/symlink3"); + file = try createDirAndFile(root.dir, "g/h/i/file4"); + file.close(); } From c4868b2bbc1df7ea6a3bd12206d10206db1d8965 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Igor=20Anic=CC=81?= Date: Sun, 10 Mar 2024 16:31:10 +0100 Subject: [PATCH 11/13] std.tar: use doctest Make std.tar look better in docs. Remove from public interface what is not necessary. Add comment to the public methods. Add doctest as usage examples for iterator and pipeToFileSystem. --- lib/std/tar.zig | 223 +++++++++++++----- lib/std/tar/test.zig | 49 +--- ...pe_to_file_system_test.tar => example.tar} | Bin 10240 -> 10240 bytes 3 files changed, 164 insertions(+), 108 deletions(-) rename lib/std/tar/testdata/{pipe_to_file_system_test.tar => example.tar} (89%) diff --git a/lib/std/tar.zig b/lib/std/tar.zig index 8c83e63b14..819dae98b6 100644 --- a/lib/std/tar.zig +++ b/lib/std/tar.zig @@ -17,10 +17,12 @@ const std = @import("std"); const assert = std.debug.assert; +const testing = std.testing; pub const output = @import("tar/output.zig"); -pub const Options = struct { +/// pipeToFileSystem options +pub const PipeOptions = struct { /// Number of directory levels to skip when extracting files. strip_components: u32 = 0, /// How to handle the "mode" property of files from within the tar file. @@ -84,14 +86,14 @@ pub const Options = struct { }; }; -pub const Header = struct { +const Header = struct { const SIZE = 512; - pub const MAX_NAME_SIZE = 100 + 1 + 155; // name(100) + separator(1) + prefix(155) - pub const LINK_NAME_SIZE = 100; + const MAX_NAME_SIZE = 100 + 1 + 155; // name(100) + separator(1) + prefix(155) + const LINK_NAME_SIZE = 100; bytes: *const [SIZE]u8, - pub const Kind = enum(u8) { + const Kind = enum(u8) { normal_alias = 0, normal = '0', hard_link = '1', @@ -237,74 +239,53 @@ fn nullStr(str: []const u8) []const u8 { return str; } +/// Options for iterator. +/// Buffers should be provided by the caller. pub const IteratorOptions = struct { /// Use a buffer with length `std.fs.MAX_PATH_BYTES` to match file system capabilities. file_name_buffer: []u8, /// Use a buffer with length `std.fs.MAX_PATH_BYTES` to match file system capabilities. link_name_buffer: []u8, + /// Provide this to receive detailed error messages. + /// When this is provided, some errors which would otherwise be returned immediately + /// will instead be added to this structure. The API user must check the errors + /// in diagnostics to know whether the operation succeeded or failed. diagnostics: ?*Diagnostics = null, - pub const Diagnostics = Options.Diagnostics; + pub const Diagnostics = PipeOptions.Diagnostics; }; /// Iterates over files in tar archive. -/// `next` returns each file in `reader` tar archive. -/// -/// Init iterator with tar archive reader and provided buffers: -/// -/// var file_name_buffer: [std.fs.MAX_PATH_BYTES]u8 = undefined; -/// var link_name_buffer: [std.fs.MAX_PATH_BYTES]u8 = undefined; -/// -/// var iter = std.tar.iterator(archive.reader(), .{ -/// .file_name_buffer = &file_name_buffer, -/// .link_name_buffer = &link_name_buffer, -/// }); -/// -/// Iterate on each tar archive file: -/// -/// while (try iter.next()) |file| { -/// switch (file.kind) { -/// .directory => { -/// // try dir.makePath(file.name); -/// }, -/// .file => { -/// // try file.writeAll(writer); -/// }, -/// .sym_link => { -/// // try dir.symLink(file.link_name, file.name, .{}); -/// }, -/// } -/// } -/// +/// `next` returns each file in tar archive. pub fn iterator(reader: anytype, options: IteratorOptions) Iterator(@TypeOf(reader)) { return .{ .reader = reader, .diagnostics = options.diagnostics, - .header_buffer = undefined, .file_name_buffer = options.file_name_buffer, .link_name_buffer = options.link_name_buffer, - .padding = 0, }; } +/// Type of the file returned by iterator `next` method. pub const FileKind = enum { directory, sym_link, file, }; -fn Iterator(comptime ReaderType: type) type { +/// Iteartor over entries in the tar file represented by reader. +pub fn Iterator(comptime ReaderType: type) type { return struct { reader: ReaderType, - diagnostics: ?*Options.Diagnostics, + diagnostics: ?*PipeOptions.Diagnostics = null, // buffers for heeader and file attributes - header_buffer: [Header.SIZE]u8, + header_buffer: [Header.SIZE]u8 = undefined, file_name_buffer: []u8, link_name_buffer: []u8, // bytes of padding to the end of the block - padding: usize, + padding: usize = 0, // not consumed bytes of file from last next iteration unread_file_bytes: u64 = 0, @@ -316,18 +297,18 @@ fn Iterator(comptime ReaderType: type) type { kind: FileKind = .file, unread_bytes: *u64, - reader: ReaderType, + parent_reader: ReaderType, - pub const Reader = std.io.Reader(*Self, ReaderType.Error, read); + pub const Reader = std.io.Reader(File, ReaderType.Error, File.read); - pub fn reader(self: *Self) Reader { + pub fn reader(self: File) Reader { return .{ .context = self }; } - pub fn read(self: *Self, dest: []u8) ReaderType.Error!usize { - const buf = dest[0..@min(dest.len, self.unread_size.*)]; - const n = try self.reader.read(buf); - self.unread_size.* -= n; + pub fn read(self: File, dest: []u8) ReaderType.Error!usize { + const buf = dest[0..@min(dest.len, self.unread_bytes.*)]; + const n = try self.parent_reader.read(buf); + self.unread_bytes.* -= n; return n; } @@ -337,7 +318,7 @@ fn Iterator(comptime ReaderType: type) type { while (self.unread_bytes.* > 0) { const buf = buffer[0..@min(buffer.len, self.unread_bytes.*)]; - try self.reader.readNoEof(buf); + try self.parent_reader.readNoEof(buf); try writer.writeAll(buf); self.unread_bytes.* -= buf.len; } @@ -369,7 +350,7 @@ fn Iterator(comptime ReaderType: type) type { return .{ .name = self.file_name_buffer[0..0], .link_name = self.link_name_buffer[0..0], - .reader = self.reader, + .parent_reader = self.reader, .unread_bytes = &self.unread_file_bytes, }; } @@ -594,7 +575,8 @@ fn PaxIterator(comptime ReaderType: type) type { }; } -pub fn pipeToFileSystem(dir: std.fs.Dir, reader: anytype, options: Options) !void { +/// Saves tar file content to the file systems. +pub fn pipeToFileSystem(dir: std.fs.Dir, reader: anytype, options: PipeOptions) !void { switch (options.mode_mode) { .ignore => {}, .executable_bit_only => { @@ -699,7 +681,7 @@ fn stripComponents(path: []const u8, count: u32) []const u8 { } test "stripComponents" { - const expectEqualStrings = std.testing.expectEqualStrings; + const expectEqualStrings = testing.expectEqualStrings; try expectEqualStrings("a/b/c", stripComponents("a/b/c", 0)); try expectEqualStrings("b/c", stripComponents("a/b/c", 1)); try expectEqualStrings("c", stripComponents("a/b/c", 2)); @@ -810,24 +792,24 @@ test "PaxIterator" { var i: usize = 0; while (iter.next() catch |err| { if (case.err) |e| { - try std.testing.expectEqual(e, err); + try testing.expectEqual(e, err); continue; } return err; }) |attr| : (i += 1) { const exp = case.attrs[i]; - try std.testing.expectEqual(exp.kind, attr.kind); + try testing.expectEqual(exp.kind, attr.kind); const value = attr.value(&buffer) catch |err| { if (exp.err) |e| { - try std.testing.expectEqual(e, err); + try testing.expectEqual(e, err); break :outer; } return err; }; - try std.testing.expectEqualStrings(exp.value, value); + try testing.expectEqualStrings(exp.value, value); } - try std.testing.expectEqual(case.attrs.len, i); - try std.testing.expect(case.err == null); + try testing.expectEqual(case.attrs.len, i); + try testing.expect(case.err == null); } } @@ -863,9 +845,9 @@ test "header parse size" { @memcpy(bytes[124 .. 124 + case.in.len], case.in); var header = Header{ .bytes = &bytes }; if (case.err) |err| { - try std.testing.expectError(err, header.size()); + try testing.expectError(err, header.size()); } else { - try std.testing.expectEqual(case.want, try header.size()); + try testing.expectEqual(case.want, try header.size()); } } } @@ -888,15 +870,15 @@ test "header parse mode" { @memcpy(bytes[100 .. 100 + case.in.len], case.in); var header = Header{ .bytes = &bytes }; if (case.err) |err| { - try std.testing.expectError(err, header.mode()); + try testing.expectError(err, header.mode()); } else { - try std.testing.expectEqual(case.want, try header.mode()); + try testing.expectEqual(case.want, try header.mode()); } } } test "create file and symlink" { - var root = std.testing.tmpDir(.{}); + var root = testing.tmpDir(.{}); defer root.cleanup(); var file = try createDirAndFile(root.dir, "file1"); @@ -916,3 +898,120 @@ test "create file and symlink" { file = try createDirAndFile(root.dir, "g/h/i/file4"); file.close(); } + +test iterator { + // Example tar file is created from this tree structure: + // $ tree example + // example + // ├── a + // │   └── file + // ├── b + // │   └── symlink -> ../a/file + // └── empty + // $ cat example/a/file + // content + // $ tar -cf example.tar example + // $ tar -tvf example.tar + // example/ + // example/b/ + // example/b/symlink -> ../a/file + // example/a/ + // example/a/file + // example/empty/ + + const data = @embedFile("tar/testdata/example.tar"); + var fbs = std.io.fixedBufferStream(data); + + // User provided buffers to the iterator + var file_name_buffer: [std.fs.MAX_PATH_BYTES]u8 = undefined; + var link_name_buffer: [std.fs.MAX_PATH_BYTES]u8 = undefined; + // Create iterator + var iter = iterator(fbs.reader(), .{ + .file_name_buffer = &file_name_buffer, + .link_name_buffer = &link_name_buffer, + }); + // Iterate over files in example.tar + var file_no: usize = 0; + while (try iter.next()) |file| : (file_no += 1) { + switch (file.kind) { + .directory => { + switch (file_no) { + 0 => try testing.expectEqualStrings("example/", file.name), + 1 => try testing.expectEqualStrings("example/b/", file.name), + 3 => try testing.expectEqualStrings("example/a/", file.name), + 5 => try testing.expectEqualStrings("example/empty/", file.name), + else => unreachable, + } + }, + .file => { + try testing.expectEqualStrings("example/a/file", file.name); + // Read file content + var buf: [16]u8 = undefined; + const n = try file.reader().readAll(&buf); + try testing.expectEqualStrings("content\n", buf[0..n]); + }, + .sym_link => { + try testing.expectEqualStrings("example/b/symlink", file.name); + try testing.expectEqualStrings("../a/file", file.link_name); + }, + } + } +} + +test pipeToFileSystem { + // Example tar file is created from this tree structure: + // $ tree example + // example + // ├── a + // │   └── file + // ├── b + // │   └── symlink -> ../a/file + // └── empty + // $ cat example/a/file + // content + // $ tar -cf example.tar example + // $ tar -tvf example.tar + // example/ + // example/b/ + // example/b/symlink -> ../a/file + // example/a/ + // example/a/file + // example/empty/ + + const data = @embedFile("tar/testdata/example.tar"); + var fbs = std.io.fixedBufferStream(data); + const reader = fbs.reader(); + + var tmp = testing.tmpDir(.{ .no_follow = true }); + defer tmp.cleanup(); + const dir = tmp.dir; + + // Save tar from `reader` to the file system `dir` + pipeToFileSystem(dir, reader, .{ + .mode_mode = .ignore, + .strip_components = 1, + .exclude_empty_directories = true, + }) catch |err| { + // Skip on platform which don't support symlinks + if (err == error.UnableToCreateSymLink) return error.SkipZigTest; + return err; + }; + + try testing.expectError(error.FileNotFound, dir.statFile("empty")); + try testing.expect((try dir.statFile("a/file")).kind == .file); + try testing.expect((try dir.statFile("b/symlink")).kind == .file); // statFile follows symlink + + var buf: [32]u8 = undefined; + try testing.expectEqualSlices( + u8, + "../a/file", + normalizePath(try dir.readLink("b/symlink", &buf)), + ); +} + +fn normalizePath(bytes: []u8) []u8 { + const canonical_sep = std.fs.path.sep_posix; + if (std.fs.path.sep == canonical_sep) return bytes; + std.mem.replaceScalar(u8, bytes, std.fs.path.sep, canonical_sep); + return bytes; +} diff --git a/lib/std/tar/test.zig b/lib/std/tar/test.zig index 6d9788de07..abb7d3cbe0 100644 --- a/lib/std/tar/test.zig +++ b/lib/std/tar/test.zig @@ -385,8 +385,8 @@ test "run test cases" { test "pax/gnu long names with small buffer" { // should fail with insufficient buffer error - var min_file_name_buffer: [tar.Header.MAX_NAME_SIZE]u8 = undefined; - var min_link_name_buffer: [tar.Header.LINK_NAME_SIZE]u8 = undefined; + var min_file_name_buffer: [256]u8 = undefined; + var min_link_name_buffer: [100]u8 = undefined; const long_name_cases = [_]Case{ cases[11], cases[25], cases[28] }; for (long_name_cases) |case| { @@ -409,7 +409,7 @@ test "pax/gnu long names with small buffer" { test "insufficient buffer in Header name filed" { var min_file_name_buffer: [9]u8 = undefined; - var min_link_name_buffer: [tar.Header.LINK_NAME_SIZE]u8 = undefined; + var min_link_name_buffer: [100]u8 = undefined; var fsb = std.io.fixedBufferStream(cases[0].data); var iter = tar.iterator(fsb.reader(), .{ @@ -509,46 +509,3 @@ test "case sensitivity" { try testing.expect((try root.dir.statFile("alacritty/darkermatrix.yml")).kind == .file); try testing.expect((try root.dir.statFile("alacritty/Darkermatrix.yml")).kind == .file); } - -test "pipeToFileSystem" { - // $ tar tvf - // pipe_to_file_system_test/ - // pipe_to_file_system_test/b/ - // pipe_to_file_system_test/b/symlink -> ../a/file - // pipe_to_file_system_test/a/ - // pipe_to_file_system_test/a/file - // pipe_to_file_system_test/empty/ - const data = @embedFile("testdata/pipe_to_file_system_test.tar"); - var fsb = std.io.fixedBufferStream(data); - - var root = std.testing.tmpDir(.{ .no_follow = true }); - defer root.cleanup(); - - tar.pipeToFileSystem(root.dir, fsb.reader(), .{ - .mode_mode = .ignore, - .strip_components = 1, - .exclude_empty_directories = true, - }) catch |err| { - // Skip on platform which don't support symlinks - if (err == error.UnableToCreateSymLink) return error.SkipZigTest; - return err; - }; - - try testing.expectError(error.FileNotFound, root.dir.statFile("empty")); - try testing.expect((try root.dir.statFile("a/file")).kind == .file); - try testing.expect((try root.dir.statFile("b/symlink")).kind == .file); // statFile follows symlink - - var buf: [32]u8 = undefined; - try testing.expectEqualSlices( - u8, - "../a/file", - normalizePath(try root.dir.readLink("b/symlink", &buf)), - ); -} - -fn normalizePath(bytes: []u8) []u8 { - const canonical_sep = std.fs.path.sep_posix; - if (std.fs.path.sep == canonical_sep) return bytes; - std.mem.replaceScalar(u8, bytes, std.fs.path.sep, canonical_sep); - return bytes; -} diff --git a/lib/std/tar/testdata/pipe_to_file_system_test.tar b/lib/std/tar/testdata/example.tar similarity index 89% rename from lib/std/tar/testdata/pipe_to_file_system_test.tar rename to lib/std/tar/testdata/example.tar index 0c424166ae43d2e56dcbb5410771c58c463aab80..d66c407eaaf012d3b4ca11bece236b5a56b9eb67 100644 GIT binary patch delta 234 zcmZn&Xb9j)tw_u*$Vt_oxG-j7iV?e^iJ1X|g6U*K#+1p7jQ_b{(n&xCQIk`cfGW%l zkX5ilRZNy(@tJ5KI5~yIkQHQz;pSN^X^e3Fi4gs4xb@FxiJhFnp(tT!z+h-%YHn<7 zU~Xt)&R}3@WNKv2pkM%V5=U}=UP)?R3D;&r?jMYNFbAgQ7L-&1EeQkp!pO|f4C;%` IjNJdF0K!Q|j{pDw literal 10240 zcmeH}K@P$o5JfpkPhbneaGa(ZOo&z+!9{N`Q)6^dw24Gr{MnFn+6jGs{=nb|*Tz%3 z^9R?4F+|t5(S=xxa?zx))`+x7YntbkQyfBSLqt+pN_lCdB2k%gTgX?sgxH;{O8D;R zxB1uJ=3jjJ9&|E63Au)vG;*OK*LYv})#wt1m{82f`i?myqgO@I6B`}-W=6vxB= ztiNiMwxUw2Ro&G*nLgh`wgUV=KmWTQ`Cq1U0{+joYw1MR{{@Z51}DXVddhq|e|6M=CvCIr${THAA)BTTBY8Tx9=K2-SKmY_l00ck)1V8`;KmY_l00ck) b1V8`;KmY_l00ck)1V8`;KmY_lAVc5^NJXpY From 0cca7e732eb7f9ced1ecef6cf463c987df32e8e6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Igor=20Anic=CC=81?= Date: Sun, 10 Mar 2024 18:13:47 +0100 Subject: [PATCH 12/13] std.tar: fix broken public interface --- lib/std/tar.zig | 98 +++++++++++++++++++++---------------------- src/Package/Fetch.zig | 2 +- 2 files changed, 48 insertions(+), 52 deletions(-) diff --git a/lib/std/tar.zig b/lib/std/tar.zig index 819dae98b6..13da27ca84 100644 --- a/lib/std/tar.zig +++ b/lib/std/tar.zig @@ -21,6 +21,50 @@ const testing = std.testing; pub const output = @import("tar/output.zig"); +/// Provide this to receive detailed error messages. +/// When this is provided, some errors which would otherwise be returned +/// immediately will instead be added to this structure. The API user must check +/// the errors in diagnostics to know whether the operation succeeded or failed. +pub const Diagnostics = struct { + allocator: std.mem.Allocator, + errors: std.ArrayListUnmanaged(Error) = .{}, + + pub const Error = union(enum) { + unable_to_create_sym_link: struct { + code: anyerror, + file_name: []const u8, + link_name: []const u8, + }, + unable_to_create_file: struct { + code: anyerror, + file_name: []const u8, + }, + unsupported_file_type: struct { + file_name: []const u8, + file_type: Header.Kind, + }, + }; + + pub fn deinit(d: *Diagnostics) void { + for (d.errors.items) |item| { + switch (item) { + .unable_to_create_sym_link => |info| { + d.allocator.free(info.file_name); + d.allocator.free(info.link_name); + }, + .unable_to_create_file => |info| { + d.allocator.free(info.file_name); + }, + .unsupported_file_type => |info| { + d.allocator.free(info.file_name); + }, + } + } + d.errors.deinit(d.allocator); + d.* = undefined; + } +}; + /// pipeToFileSystem options pub const PipeOptions = struct { /// Number of directory levels to skip when extracting files. @@ -29,10 +73,7 @@ pub const PipeOptions = struct { mode_mode: ModeMode = .executable_bit_only, /// Prevents creation of empty directories. exclude_empty_directories: bool = false, - /// Provide this to receive detailed error messages. - /// When this is provided, some errors which would otherwise be returned immediately - /// will instead be added to this structure. The API user must check the errors - /// in diagnostics to know whether the operation succeeded or failed. + /// Collects error messages during unpacking diagnostics: ?*Diagnostics = null, pub const ModeMode = enum { @@ -44,46 +85,6 @@ pub const PipeOptions = struct { /// Other bits of the mode are left as the default when creating files. executable_bit_only, }; - - pub const Diagnostics = struct { - allocator: std.mem.Allocator, - errors: std.ArrayListUnmanaged(Error) = .{}, - - pub const Error = union(enum) { - unable_to_create_sym_link: struct { - code: anyerror, - file_name: []const u8, - link_name: []const u8, - }, - unable_to_create_file: struct { - code: anyerror, - file_name: []const u8, - }, - unsupported_file_type: struct { - file_name: []const u8, - file_type: Header.Kind, - }, - }; - - pub fn deinit(d: *Diagnostics) void { - for (d.errors.items) |item| { - switch (item) { - .unable_to_create_sym_link => |info| { - d.allocator.free(info.file_name); - d.allocator.free(info.link_name); - }, - .unable_to_create_file => |info| { - d.allocator.free(info.file_name); - }, - .unsupported_file_type => |info| { - d.allocator.free(info.file_name); - }, - } - } - d.errors.deinit(d.allocator); - d.* = undefined; - } - }; }; const Header = struct { @@ -246,13 +247,8 @@ pub const IteratorOptions = struct { file_name_buffer: []u8, /// Use a buffer with length `std.fs.MAX_PATH_BYTES` to match file system capabilities. link_name_buffer: []u8, - /// Provide this to receive detailed error messages. - /// When this is provided, some errors which would otherwise be returned immediately - /// will instead be added to this structure. The API user must check the errors - /// in diagnostics to know whether the operation succeeded or failed. + /// Collects error messages during unpacking diagnostics: ?*Diagnostics = null, - - pub const Diagnostics = PipeOptions.Diagnostics; }; /// Iterates over files in tar archive. @@ -277,7 +273,7 @@ pub const FileKind = enum { pub fn Iterator(comptime ReaderType: type) type { return struct { reader: ReaderType, - diagnostics: ?*PipeOptions.Diagnostics = null, + diagnostics: ?*Diagnostics = null, // buffers for heeader and file attributes header_buffer: [Header.SIZE]u8 = undefined, diff --git a/src/Package/Fetch.zig b/src/Package/Fetch.zig index ac21632a60..93a6868603 100644 --- a/src/Package/Fetch.zig +++ b/src/Package/Fetch.zig @@ -1147,7 +1147,7 @@ fn unpackTarball(f: *Fetch, out_dir: fs.Dir, reader: anytype) RunError!void { const eb = &f.error_bundle; const gpa = f.arena.child_allocator; - var diagnostics: std.tar.Options.Diagnostics = .{ .allocator = gpa }; + var diagnostics: std.tar.Diagnostics = .{ .allocator = gpa }; defer diagnostics.deinit(); std.tar.pipeToFileSystem(out_dir, reader, .{ From c974e198164e13fea491e3897a04ec82d132fbf6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Igor=20Anic=CC=81?= Date: Mon, 11 Mar 2024 13:30:26 +0100 Subject: [PATCH 13/13] docs: make docs work with recent tar changes It is no longer need to call skip if file content is not consumed. It is handled internally now. File types are now same as in os.File. --- lib/docs/wasm/main.zig | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/lib/docs/wasm/main.zig b/lib/docs/wasm/main.zig index 824026c96e..496fd618d5 100644 --- a/lib/docs/wasm/main.zig +++ b/lib/docs/wasm/main.zig @@ -767,7 +767,7 @@ fn unpack_inner(tar_bytes: []u8) !void { }); while (try it.next()) |tar_file| { switch (tar_file.kind) { - .normal => { + .file => { if (tar_file.size == 0 and tar_file.name.len == 0) break; if (std.mem.endsWith(u8, tar_file.name, ".zig")) { log.debug("found file: '{s}'", .{tar_file.name}); @@ -790,7 +790,6 @@ fn unpack_inner(tar_bytes: []u8) !void { tar_file.name, }); } - try tar_file.skip(); }, else => continue, }