Merge pull request #19155 from ianic/tar_max_file_size

std.tar: error on insufficient buffers provided to iterator
This commit is contained in:
Andrew Kelley 2024-03-11 17:03:44 -07:00 committed by GitHub
commit f60c24c73c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 682 additions and 498 deletions

View File

@ -767,7 +767,7 @@ fn unpack_inner(tar_bytes: []u8) !void {
});
while (try it.next()) |tar_file| {
switch (tar_file.kind) {
.normal => {
.file => {
if (tar_file.size == 0 and tar_file.name.len == 0) break;
if (std.mem.endsWith(u8, tar_file.name, ".zig")) {
log.debug("found file: '{s}'", .{tar_file.name});
@ -790,7 +790,6 @@ fn unpack_inner(tar_bytes: []u8) !void {
tar_file.name,
});
}
try tar_file.skip();
},
else => continue,
}

View File

@ -15,22 +15,65 @@
//! GNU tar reference: https://www.gnu.org/software/tar/manual/html_node/Standard.html
//! pax reference: https://pubs.opengroup.org/onlinepubs/9699919799/utilities/pax.html#tag_20_92_13
const std = @import("std.zig");
const std = @import("std");
const assert = std.debug.assert;
const testing = std.testing;
pub const output = @import("tar/output.zig");
pub const Options = struct {
/// Provide this to receive detailed error messages.
/// When this is provided, some errors which would otherwise be returned
/// immediately will instead be added to this structure. The API user must check
/// the errors in diagnostics to know whether the operation succeeded or failed.
pub const Diagnostics = struct {
allocator: std.mem.Allocator,
errors: std.ArrayListUnmanaged(Error) = .{},
pub const Error = union(enum) {
unable_to_create_sym_link: struct {
code: anyerror,
file_name: []const u8,
link_name: []const u8,
},
unable_to_create_file: struct {
code: anyerror,
file_name: []const u8,
},
unsupported_file_type: struct {
file_name: []const u8,
file_type: Header.Kind,
},
};
pub fn deinit(d: *Diagnostics) void {
for (d.errors.items) |item| {
switch (item) {
.unable_to_create_sym_link => |info| {
d.allocator.free(info.file_name);
d.allocator.free(info.link_name);
},
.unable_to_create_file => |info| {
d.allocator.free(info.file_name);
},
.unsupported_file_type => |info| {
d.allocator.free(info.file_name);
},
}
}
d.errors.deinit(d.allocator);
d.* = undefined;
}
};
/// pipeToFileSystem options
pub const PipeOptions = struct {
/// Number of directory levels to skip when extracting files.
strip_components: u32 = 0,
/// How to handle the "mode" property of files from within the tar file.
mode_mode: ModeMode = .executable_bit_only,
/// Prevents creation of empty directories.
exclude_empty_directories: bool = false,
/// Provide this to receive detailed error messages.
/// When this is provided, some errors which would otherwise be returned immediately
/// will instead be added to this structure. The API user must check the errors
/// in diagnostics to know whether the operation succeeded or failed.
/// Collects error messages during unpacking
diagnostics: ?*Diagnostics = null,
pub const ModeMode = enum {
@ -42,56 +85,16 @@ pub const Options = struct {
/// Other bits of the mode are left as the default when creating files.
executable_bit_only,
};
pub const Diagnostics = struct {
allocator: std.mem.Allocator,
errors: std.ArrayListUnmanaged(Error) = .{},
pub const Error = union(enum) {
unable_to_create_sym_link: struct {
code: anyerror,
file_name: []const u8,
link_name: []const u8,
},
unable_to_create_file: struct {
code: anyerror,
file_name: []const u8,
},
unsupported_file_type: struct {
file_name: []const u8,
file_type: Header.Kind,
},
};
pub fn deinit(d: *Diagnostics) void {
for (d.errors.items) |item| {
switch (item) {
.unable_to_create_sym_link => |info| {
d.allocator.free(info.file_name);
d.allocator.free(info.link_name);
},
.unable_to_create_file => |info| {
d.allocator.free(info.file_name);
},
.unsupported_file_type => |info| {
d.allocator.free(info.file_name);
},
}
}
d.errors.deinit(d.allocator);
d.* = undefined;
}
};
};
pub const Header = struct {
const Header = struct {
const SIZE = 512;
const MAX_NAME_SIZE = 100 + 1 + 155; // name(100) + separator(1) + prefix(155)
const LINK_NAME_SIZE = 100;
bytes: *const [SIZE]u8,
pub const Kind = enum(u8) {
const Kind = enum(u8) {
normal_alias = 0,
normal = '0',
hard_link = '1',
@ -114,9 +117,10 @@ pub const Header = struct {
/// Includes prefix concatenated, if any.
/// TODO: check against "../" and other nefarious things
pub fn fullName(header: Header, buffer: *[MAX_NAME_SIZE]u8) ![]const u8 {
pub fn fullName(header: Header, buffer: []u8) ![]const u8 {
const n = name(header);
const p = prefix(header);
if (buffer.len < n.len + p.len + 1) return error.TarInsufficientBuffer;
if (!is_ustar(header) or p.len == 0) {
@memcpy(buffer[0..n.len], n);
return buffer[0..n.len];
@ -127,11 +131,14 @@ pub const Header = struct {
return buffer[0 .. p.len + 1 + n.len];
}
pub fn linkName(header: Header, buffer: *[LINK_NAME_SIZE]u8) []const u8 {
/// When kind is symbolic_link linked-to name (target_path) is specified in
/// the linkname field.
pub fn linkName(header: Header, buffer: []u8) ![]const u8 {
const link_name = header.str(157, 100);
if (link_name.len == 0) {
return buffer[0..0];
}
if (buffer.len < link_name.len) return error.TarInsufficientBuffer;
const buf = buffer[0..link_name.len];
@memcpy(buf, link_name);
return buf;
@ -233,70 +240,84 @@ fn nullStr(str: []const u8) []const u8 {
return str;
}
/// Options for iterator.
/// Buffers should be provided by the caller.
pub const IteratorOptions = struct {
/// Use a buffer with length `std.fs.MAX_PATH_BYTES` to match file system capabilities.
file_name_buffer: []u8,
/// Use a buffer with length `std.fs.MAX_PATH_BYTES` to match file system capabilities.
link_name_buffer: []u8,
/// Collects error messages during unpacking
diagnostics: ?*Diagnostics = null,
pub const Diagnostics = Options.Diagnostics;
};
/// Iterates over files in tar archive.
/// `next` returns each file in `reader` tar archive.
/// `next` returns each file in tar archive.
pub fn iterator(reader: anytype, options: IteratorOptions) Iterator(@TypeOf(reader)) {
return .{
.reader = reader,
.diagnostics = options.diagnostics,
.header_buffer = undefined,
.file_name_buffer = options.file_name_buffer,
.link_name_buffer = options.link_name_buffer,
.padding = 0,
.file = undefined,
};
}
fn Iterator(comptime ReaderType: type) type {
/// Type of the file returned by iterator `next` method.
pub const FileKind = enum {
directory,
sym_link,
file,
};
/// Iteartor over entries in the tar file represented by reader.
pub fn Iterator(comptime ReaderType: type) type {
return struct {
reader: ReaderType,
diagnostics: ?*Options.Diagnostics,
diagnostics: ?*Diagnostics = null,
// buffers for heeader and file attributes
header_buffer: [Header.SIZE]u8,
header_buffer: [Header.SIZE]u8 = undefined,
file_name_buffer: []u8,
link_name_buffer: []u8,
// bytes of padding to the end of the block
padding: usize,
// current tar file
file: File,
padding: usize = 0,
// not consumed bytes of file from last next iteration
unread_file_bytes: u64 = 0,
pub const File = struct {
name: []const u8, // name of file, symlink or directory
link_name: []const u8, // target name of symlink
size: u64, // size of the file in bytes
mode: u32,
kind: Header.Kind,
size: u64 = 0, // size of the file in bytes
mode: u32 = 0,
kind: FileKind = .file,
reader: ReaderType,
unread_bytes: *u64,
parent_reader: ReaderType,
// Writes file content to writer.
pub fn write(self: File, writer: anytype) !void {
var buffer: [4096]u8 = undefined;
pub const Reader = std.io.Reader(File, ReaderType.Error, File.read);
var n: u64 = 0;
while (n < self.size) {
const buf = buffer[0..@min(buffer.len, self.size - n)];
try self.reader.readNoEof(buf);
try writer.writeAll(buf);
n += buf.len;
}
pub fn reader(self: File) Reader {
return .{ .context = self };
}
// Skips file content. Advances reader.
pub fn skip(self: File) !void {
try self.reader.skipBytes(self.size, .{});
pub fn read(self: File, dest: []u8) ReaderType.Error!usize {
const buf = dest[0..@min(dest.len, self.unread_bytes.*)];
const n = try self.parent_reader.read(buf);
self.unread_bytes.* -= n;
return n;
}
// Writes file content to writer.
pub fn writeAll(self: File, writer: anytype) !void {
var buffer: [4096]u8 = undefined;
while (self.unread_bytes.* > 0) {
const buf = buffer[0..@min(buffer.len, self.unread_bytes.*)];
try self.parent_reader.readNoEof(buf);
try writer.writeAll(buf);
self.unread_bytes.* -= buf.len;
}
}
};
@ -315,20 +336,18 @@ fn Iterator(comptime ReaderType: type) type {
}
fn readString(self: *Self, size: usize, buffer: []u8) ![]const u8 {
if (size > buffer.len) return error.TarCorruptInput;
if (size > buffer.len) return error.TarInsufficientBuffer;
const buf = buffer[0..size];
try self.reader.readNoEof(buf);
return nullStr(buf);
}
fn initFile(self: *Self) void {
self.file = .{
fn newFile(self: *Self) File {
return .{
.name = self.file_name_buffer[0..0],
.link_name = self.link_name_buffer[0..0],
.size = 0,
.kind = .normal,
.mode = 0,
.reader = self.reader,
.parent_reader = self.reader,
.unread_bytes = &self.unread_file_bytes,
};
}
@ -345,7 +364,12 @@ fn Iterator(comptime ReaderType: type) type {
/// loop iterates through one or more entries until it collects a all
/// file attributes.
pub fn next(self: *Self) !?File {
self.initFile();
if (self.unread_file_bytes > 0) {
// If file content was not consumed by caller
try self.reader.skipBytes(self.unread_file_bytes, .{});
self.unread_file_bytes = 0;
}
var file: File = self.newFile();
while (try self.readHeader()) |header| {
const kind = header.kind();
@ -355,46 +379,52 @@ fn Iterator(comptime ReaderType: type) type {
switch (kind) {
// File types to retrun upstream
.directory, .normal, .symbolic_link => {
self.file.kind = kind;
self.file.mode = try header.mode();
file.kind = switch (kind) {
.directory => .directory,
.normal => .file,
.symbolic_link => .sym_link,
else => unreachable,
};
file.mode = try header.mode();
// set file attributes if not already set by prefix/extended headers
if (self.file.size == 0) {
self.file.size = size;
if (file.size == 0) {
file.size = size;
}
if (self.file.link_name.len == 0) {
self.file.link_name = header.linkName(self.link_name_buffer[0..Header.LINK_NAME_SIZE]);
if (file.link_name.len == 0) {
file.link_name = try header.linkName(self.link_name_buffer);
}
if (self.file.name.len == 0) {
self.file.name = try header.fullName(self.file_name_buffer[0..Header.MAX_NAME_SIZE]);
if (file.name.len == 0) {
file.name = try header.fullName(self.file_name_buffer);
}
self.padding = blockPadding(self.file.size);
return self.file;
self.padding = blockPadding(file.size);
self.unread_file_bytes = file.size;
return file;
},
// Prefix header types
.gnu_long_name => {
self.file.name = try self.readString(@intCast(size), self.file_name_buffer);
file.name = try self.readString(@intCast(size), self.file_name_buffer);
},
.gnu_long_link => {
self.file.link_name = try self.readString(@intCast(size), self.link_name_buffer);
file.link_name = try self.readString(@intCast(size), self.link_name_buffer);
},
.extended_header => {
// Use just attributes from last extended header.
self.initFile();
file = self.newFile();
var rdr = paxIterator(self.reader, @intCast(size));
while (try rdr.next()) |attr| {
switch (attr.kind) {
.path => {
self.file.name = try attr.value(self.file_name_buffer);
file.name = try attr.value(self.file_name_buffer);
},
.linkpath => {
self.file.link_name = try attr.value(self.link_name_buffer);
file.link_name = try attr.value(self.link_name_buffer);
},
.size => {
var buf: [pax_max_size_attr_len]u8 = undefined;
self.file.size = try std.fmt.parseInt(u64, try attr.value(&buf), 10);
file.size = try std.fmt.parseInt(u64, try attr.value(&buf), 10);
},
}
}
@ -467,7 +497,8 @@ fn PaxIterator(comptime ReaderType: type) type {
// Copies pax attribute value into destination buffer.
// Must be called with destination buffer of size at least Attribute.len.
pub fn value(self: Attribute, dst: []u8) ![]const u8 {
assert(self.len <= dst.len);
if (self.len > dst.len) return error.TarInsufficientBuffer;
// assert(self.len <= dst.len);
const buf = dst[0..self.len];
const n = try self.reader.readAll(buf);
if (n < self.len) return error.UnexpectedEndOfStream;
@ -540,7 +571,8 @@ fn PaxIterator(comptime ReaderType: type) type {
};
}
pub fn pipeToFileSystem(dir: std.fs.Dir, reader: anytype, options: Options) !void {
/// Saves tar file content to the file systems.
pub fn pipeToFileSystem(dir: std.fs.Dir, reader: anytype, options: PipeOptions) !void {
switch (options.mode_mode) {
.ignore => {},
.executable_bit_only => {
@ -568,24 +600,23 @@ pub fn pipeToFileSystem(dir: std.fs.Dir, reader: anytype, options: Options) !voi
try dir.makePath(file_name);
}
},
.normal => {
.file => {
if (file.size == 0 and file.name.len == 0) return;
const file_name = stripComponents(file.name, options.strip_components);
if (file_name.len == 0) return error.BadFileName;
if (createDirAndFile(dir, file_name)) |fs_file| {
defer fs_file.close();
try file.write(fs_file);
try file.writeAll(fs_file);
} else |err| {
const d = options.diagnostics orelse return err;
try d.errors.append(d.allocator, .{ .unable_to_create_file = .{
.code = err,
.file_name = try d.allocator.dupe(u8, file_name),
} });
try file.skip();
}
},
.symbolic_link => {
.sym_link => {
// The file system path of the symbolic link.
const file_name = stripComponents(file.name, options.strip_components);
if (file_name.len == 0) return error.BadFileName;
@ -601,7 +632,6 @@ pub fn pipeToFileSystem(dir: std.fs.Dir, reader: anytype, options: Options) !voi
} });
};
},
else => unreachable,
}
}
}
@ -619,6 +649,7 @@ fn createDirAndFile(dir: std.fs.Dir, file_name: []const u8) !std.fs.File {
return fs_file;
}
// Creates a symbolic link at path `file_name` which points to `link_name`.
fn createDirAndSymlink(dir: std.fs.Dir, link_name: []const u8, file_name: []const u8) !void {
dir.symLink(link_name, file_name, .{}) catch |err| {
if (err == error.FileNotFound) {
@ -645,8 +676,8 @@ fn stripComponents(path: []const u8, count: u32) []const u8 {
return path[i..];
}
test "tar stripComponents" {
const expectEqualStrings = std.testing.expectEqualStrings;
test "stripComponents" {
const expectEqualStrings = testing.expectEqualStrings;
try expectEqualStrings("a/b/c", stripComponents("a/b/c", 0));
try expectEqualStrings("b/c", stripComponents("a/b/c", 1));
try expectEqualStrings("c", stripComponents("a/b/c", 2));
@ -654,7 +685,7 @@ test "tar stripComponents" {
try expectEqualStrings("", stripComponents("a/b/c", 4));
}
test "tar PaxIterator" {
test "PaxIterator" {
const Attr = struct {
kind: PaxAttributeKind,
value: []const u8 = undefined,
@ -757,24 +788,24 @@ test "tar PaxIterator" {
var i: usize = 0;
while (iter.next() catch |err| {
if (case.err) |e| {
try std.testing.expectEqual(e, err);
try testing.expectEqual(e, err);
continue;
}
return err;
}) |attr| : (i += 1) {
const exp = case.attrs[i];
try std.testing.expectEqual(exp.kind, attr.kind);
try testing.expectEqual(exp.kind, attr.kind);
const value = attr.value(&buffer) catch |err| {
if (exp.err) |e| {
try std.testing.expectEqual(e, err);
try testing.expectEqual(e, err);
break :outer;
}
return err;
};
try std.testing.expectEqualStrings(exp.value, value);
try testing.expectEqualStrings(exp.value, value);
}
try std.testing.expectEqual(case.attrs.len, i);
try std.testing.expect(case.err == null);
try testing.expectEqual(case.attrs.len, i);
try testing.expect(case.err == null);
}
}
@ -782,7 +813,7 @@ test {
_ = @import("tar/test.zig");
}
test "tar header parse size" {
test "header parse size" {
const cases = [_]struct {
in: []const u8,
want: u64 = 0,
@ -810,14 +841,14 @@ test "tar header parse size" {
@memcpy(bytes[124 .. 124 + case.in.len], case.in);
var header = Header{ .bytes = &bytes };
if (case.err) |err| {
try std.testing.expectError(err, header.size());
try testing.expectError(err, header.size());
} else {
try std.testing.expectEqual(case.want, try header.size());
try testing.expectEqual(case.want, try header.size());
}
}
}
test "tar header parse mode" {
test "header parse mode" {
const cases = [_]struct {
in: []const u8,
want: u64 = 0,
@ -835,9 +866,148 @@ test "tar header parse mode" {
@memcpy(bytes[100 .. 100 + case.in.len], case.in);
var header = Header{ .bytes = &bytes };
if (case.err) |err| {
try std.testing.expectError(err, header.mode());
try testing.expectError(err, header.mode());
} else {
try std.testing.expectEqual(case.want, try header.mode());
try testing.expectEqual(case.want, try header.mode());
}
}
}
test "create file and symlink" {
var root = testing.tmpDir(.{});
defer root.cleanup();
var file = try createDirAndFile(root.dir, "file1");
file.close();
file = try createDirAndFile(root.dir, "a/b/c/file2");
file.close();
createDirAndSymlink(root.dir, "a/b/c/file2", "symlink1") catch |err| {
// On Windows when developer mode is not enabled
if (err == error.AccessDenied) return error.SkipZigTest;
return err;
};
try createDirAndSymlink(root.dir, "../../../file1", "d/e/f/symlink2");
// Danglink symlnik, file created later
try createDirAndSymlink(root.dir, "../../../g/h/i/file4", "j/k/l/symlink3");
file = try createDirAndFile(root.dir, "g/h/i/file4");
file.close();
}
test iterator {
// Example tar file is created from this tree structure:
// $ tree example
// example
// a
//    file
// b
//    symlink -> ../a/file
// empty
// $ cat example/a/file
// content
// $ tar -cf example.tar example
// $ tar -tvf example.tar
// example/
// example/b/
// example/b/symlink -> ../a/file
// example/a/
// example/a/file
// example/empty/
const data = @embedFile("tar/testdata/example.tar");
var fbs = std.io.fixedBufferStream(data);
// User provided buffers to the iterator
var file_name_buffer: [std.fs.MAX_PATH_BYTES]u8 = undefined;
var link_name_buffer: [std.fs.MAX_PATH_BYTES]u8 = undefined;
// Create iterator
var iter = iterator(fbs.reader(), .{
.file_name_buffer = &file_name_buffer,
.link_name_buffer = &link_name_buffer,
});
// Iterate over files in example.tar
var file_no: usize = 0;
while (try iter.next()) |file| : (file_no += 1) {
switch (file.kind) {
.directory => {
switch (file_no) {
0 => try testing.expectEqualStrings("example/", file.name),
1 => try testing.expectEqualStrings("example/b/", file.name),
3 => try testing.expectEqualStrings("example/a/", file.name),
5 => try testing.expectEqualStrings("example/empty/", file.name),
else => unreachable,
}
},
.file => {
try testing.expectEqualStrings("example/a/file", file.name);
// Read file content
var buf: [16]u8 = undefined;
const n = try file.reader().readAll(&buf);
try testing.expectEqualStrings("content\n", buf[0..n]);
},
.sym_link => {
try testing.expectEqualStrings("example/b/symlink", file.name);
try testing.expectEqualStrings("../a/file", file.link_name);
},
}
}
}
test pipeToFileSystem {
// Example tar file is created from this tree structure:
// $ tree example
// example
// a
//    file
// b
//    symlink -> ../a/file
// empty
// $ cat example/a/file
// content
// $ tar -cf example.tar example
// $ tar -tvf example.tar
// example/
// example/b/
// example/b/symlink -> ../a/file
// example/a/
// example/a/file
// example/empty/
const data = @embedFile("tar/testdata/example.tar");
var fbs = std.io.fixedBufferStream(data);
const reader = fbs.reader();
var tmp = testing.tmpDir(.{ .no_follow = true });
defer tmp.cleanup();
const dir = tmp.dir;
// Save tar from `reader` to the file system `dir`
pipeToFileSystem(dir, reader, .{
.mode_mode = .ignore,
.strip_components = 1,
.exclude_empty_directories = true,
}) catch |err| {
// Skip on platform which don't support symlinks
if (err == error.UnableToCreateSymLink) return error.SkipZigTest;
return err;
};
try testing.expectError(error.FileNotFound, dir.statFile("empty"));
try testing.expect((try dir.statFile("a/file")).kind == .file);
try testing.expect((try dir.statFile("b/symlink")).kind == .file); // statFile follows symlink
var buf: [32]u8 = undefined;
try testing.expectEqualSlices(
u8,
"../a/file",
normalizePath(try dir.readLink("b/symlink", &buf)),
);
}
fn normalizePath(bytes: []u8) []u8 {
const canonical_sep = std.fs.path.sep_posix;
if (std.fs.path.sep == canonical_sep) return bytes;
std.mem.replaceScalar(u8, bytes, std.fs.path.sep, canonical_sep);
return bytes;
}

View File

@ -1,328 +1,347 @@
const std = @import("../std.zig");
const tar = std.tar;
const std = @import("std");
const tar = @import("../tar.zig");
const testing = std.testing;
test "tar run Go test cases" {
const Case = struct {
const File = struct {
name: []const u8,
size: u64 = 0,
mode: u32 = 0,
link_name: []const u8 = &[0]u8{},
kind: tar.Header.Kind = .normal,
truncated: bool = false, // when there is no file body, just header, usefull for huge files
};
data: []const u8, // testdata file content
files: []const File = &[_]@This().File{}, // expected files to found in archive
chksums: []const []const u8 = &[_][]const u8{}, // chksums of each file content
err: ?anyerror = null, // parsing should fail with this error
const Case = struct {
const File = struct {
name: []const u8,
size: u64 = 0,
mode: u32 = 0,
link_name: []const u8 = &[0]u8{},
kind: tar.FileKind = .file,
truncated: bool = false, // when there is no file body, just header, usefull for huge files
};
const cases = [_]Case{
.{
.data = @embedFile("testdata/gnu.tar"),
.files = &[_]Case.File{
.{
.name = "small.txt",
.size = 5,
.mode = 0o640,
},
.{
.name = "small2.txt",
.size = 11,
.mode = 0o640,
},
},
.chksums = &[_][]const u8{
"e38b27eaccb4391bdec553a7f3ae6b2f",
"c65bd2e50a56a2138bf1716f2fd56fe9",
},
},
.{
.data = @embedFile("testdata/sparse-formats.tar"),
.err = error.TarUnsupportedHeader,
},
.{
.data = @embedFile("testdata/star.tar"),
.files = &[_]Case.File{
.{
.name = "small.txt",
.size = 5,
.mode = 0o640,
},
.{
.name = "small2.txt",
.size = 11,
.mode = 0o640,
},
},
.chksums = &[_][]const u8{
"e38b27eaccb4391bdec553a7f3ae6b2f",
"c65bd2e50a56a2138bf1716f2fd56fe9",
},
},
.{
.data = @embedFile("testdata/v7.tar"),
.files = &[_]Case.File{
.{
.name = "small.txt",
.size = 5,
.mode = 0o444,
},
.{
.name = "small2.txt",
.size = 11,
.mode = 0o444,
},
},
.chksums = &[_][]const u8{
"e38b27eaccb4391bdec553a7f3ae6b2f",
"c65bd2e50a56a2138bf1716f2fd56fe9",
},
},
.{
.data = @embedFile("testdata/pax.tar"),
.files = &[_]Case.File{
.{
.name = "a/123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100",
.size = 7,
.mode = 0o664,
},
.{
.name = "a/b",
.size = 0,
.kind = .symbolic_link,
.mode = 0o777,
.link_name = "123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100",
},
},
.chksums = &[_][]const u8{
"3c382e8f5b6631aa2db52643912ffd4a",
},
},
.{
// pax attribute don't end with \n
.data = @embedFile("testdata/pax-bad-hdr-file.tar"),
.err = error.PaxInvalidAttributeEnd,
},
.{
// size is in pax attribute
.data = @embedFile("testdata/pax-pos-size-file.tar"),
.files = &[_]Case.File{
.{
.name = "foo",
.size = 999,
.kind = .normal,
.mode = 0o640,
},
},
.chksums = &[_][]const u8{
"0afb597b283fe61b5d4879669a350556",
},
},
.{
// has pax records which we are not interested in
.data = @embedFile("testdata/pax-records.tar"),
.files = &[_]Case.File{
.{
.name = "file",
},
},
},
.{
// has global records which we are ignoring
.data = @embedFile("testdata/pax-global-records.tar"),
.files = &[_]Case.File{
.{
.name = "file1",
},
.{
.name = "file2",
},
.{
.name = "file3",
},
.{
.name = "file4",
},
},
},
.{
.data = @embedFile("testdata/nil-uid.tar"),
.files = &[_]Case.File{
.{
.name = "P1050238.JPG.log",
.size = 14,
.kind = .normal,
.mode = 0o664,
},
},
.chksums = &[_][]const u8{
"08d504674115e77a67244beac19668f5",
},
},
.{
// has xattrs and pax records which we are ignoring
.data = @embedFile("testdata/xattrs.tar"),
.files = &[_]Case.File{
.{
.name = "small.txt",
.size = 5,
.kind = .normal,
.mode = 0o644,
},
.{
.name = "small2.txt",
.size = 11,
.kind = .normal,
.mode = 0o644,
},
},
.chksums = &[_][]const u8{
"e38b27eaccb4391bdec553a7f3ae6b2f",
"c65bd2e50a56a2138bf1716f2fd56fe9",
},
},
.{
.data = @embedFile("testdata/gnu-multi-hdrs.tar"),
.files = &[_]Case.File{
.{
.name = "GNU2/GNU2/long-path-name",
.link_name = "GNU4/GNU4/long-linkpath-name",
.kind = .symbolic_link,
},
},
},
.{
// has gnu type D (directory) and S (sparse) blocks
.data = @embedFile("testdata/gnu-incremental.tar"),
.err = error.TarUnsupportedHeader,
},
.{
// should use values only from last pax header
.data = @embedFile("testdata/pax-multi-hdrs.tar"),
.files = &[_]Case.File{
.{
.name = "bar",
.link_name = "PAX4/PAX4/long-linkpath-name",
.kind = .symbolic_link,
},
},
},
.{
.data = @embedFile("testdata/gnu-long-nul.tar"),
.files = &[_]Case.File{
.{
.name = "0123456789",
.mode = 0o644,
},
},
},
.{
.data = @embedFile("testdata/gnu-utf8.tar"),
.files = &[_]Case.File{
.{
.name = "☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹",
.mode = 0o644,
},
},
},
.{
.data = @embedFile("testdata/gnu-not-utf8.tar"),
.files = &[_]Case.File{
.{
.name = "hi\x80\x81\x82\x83bye",
.mode = 0o644,
},
},
},
.{
// null in pax key
.data = @embedFile("testdata/pax-nul-xattrs.tar"),
.err = error.PaxNullInKeyword,
},
.{
.data = @embedFile("testdata/pax-nul-path.tar"),
.err = error.PaxNullInValue,
},
.{
.data = @embedFile("testdata/neg-size.tar"),
.err = error.TarHeader,
},
.{
.data = @embedFile("testdata/issue10968.tar"),
.err = error.TarHeader,
},
.{
.data = @embedFile("testdata/issue11169.tar"),
.err = error.TarHeader,
},
.{
.data = @embedFile("testdata/issue12435.tar"),
.err = error.TarHeaderChksum,
},
.{
// has magic with space at end instead of null
.data = @embedFile("testdata/invalid-go17.tar"),
.files = &[_]Case.File{
.{
.name = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa/foo",
},
},
},
.{
.data = @embedFile("testdata/ustar-file-devs.tar"),
.files = &[_]Case.File{
.{
.name = "file",
.mode = 0o644,
},
},
},
.{
.data = @embedFile("testdata/trailing-slash.tar"),
.files = &[_]Case.File{
.{
.name = "123456789/" ** 30,
.kind = .directory,
},
},
},
.{
// Has size in gnu extended format. To represent size bigger than 8 GB.
.data = @embedFile("testdata/writer-big.tar"),
.files = &[_]Case.File{
.{
.name = "tmp/16gig.txt",
.size = 16 * 1024 * 1024 * 1024,
.truncated = true,
.mode = 0o640,
},
},
},
.{
// Size in gnu extended format, and name in pax attribute.
.data = @embedFile("testdata/writer-big-long.tar"),
.files = &[_]Case.File{
.{
.name = "longname/" ** 15 ++ "16gig.txt",
.size = 16 * 1024 * 1024 * 1024,
.mode = 0o644,
.truncated = true,
},
},
},
.{
.data = @embedFile("testdata/fuzz1.tar"),
.err = error.TarCorruptInput,
},
.{
.data = @embedFile("testdata/fuzz2.tar"),
.err = error.PaxSizeAttrOverflow,
},
};
data: []const u8, // testdata file content
files: []const File = &[_]@This().File{}, // expected files to found in archive
chksums: []const []const u8 = &[_][]const u8{}, // chksums of each file content
err: ?anyerror = null, // parsing should fail with this error
};
const cases = [_]Case{
.{
.data = @embedFile("testdata/gnu.tar"),
.files = &[_]Case.File{
.{
.name = "small.txt",
.size = 5,
.mode = 0o640,
},
.{
.name = "small2.txt",
.size = 11,
.mode = 0o640,
},
},
.chksums = &[_][]const u8{
"e38b27eaccb4391bdec553a7f3ae6b2f",
"c65bd2e50a56a2138bf1716f2fd56fe9",
},
},
.{
.data = @embedFile("testdata/sparse-formats.tar"),
.err = error.TarUnsupportedHeader,
},
.{
.data = @embedFile("testdata/star.tar"),
.files = &[_]Case.File{
.{
.name = "small.txt",
.size = 5,
.mode = 0o640,
},
.{
.name = "small2.txt",
.size = 11,
.mode = 0o640,
},
},
.chksums = &[_][]const u8{
"e38b27eaccb4391bdec553a7f3ae6b2f",
"c65bd2e50a56a2138bf1716f2fd56fe9",
},
},
.{
.data = @embedFile("testdata/v7.tar"),
.files = &[_]Case.File{
.{
.name = "small.txt",
.size = 5,
.mode = 0o444,
},
.{
.name = "small2.txt",
.size = 11,
.mode = 0o444,
},
},
.chksums = &[_][]const u8{
"e38b27eaccb4391bdec553a7f3ae6b2f",
"c65bd2e50a56a2138bf1716f2fd56fe9",
},
},
.{
.data = @embedFile("testdata/pax.tar"),
.files = &[_]Case.File{
.{
.name = "a/123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100",
.size = 7,
.mode = 0o664,
},
.{
.name = "a/b",
.size = 0,
.kind = .sym_link,
.mode = 0o777,
.link_name = "123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100",
},
},
.chksums = &[_][]const u8{
"3c382e8f5b6631aa2db52643912ffd4a",
},
},
.{
// pax attribute don't end with \n
.data = @embedFile("testdata/pax-bad-hdr-file.tar"),
.err = error.PaxInvalidAttributeEnd,
},
.{
// size is in pax attribute
.data = @embedFile("testdata/pax-pos-size-file.tar"),
.files = &[_]Case.File{
.{
.name = "foo",
.size = 999,
.kind = .file,
.mode = 0o640,
},
},
.chksums = &[_][]const u8{
"0afb597b283fe61b5d4879669a350556",
},
},
.{
// has pax records which we are not interested in
.data = @embedFile("testdata/pax-records.tar"),
.files = &[_]Case.File{
.{
.name = "file",
},
},
},
.{
// has global records which we are ignoring
.data = @embedFile("testdata/pax-global-records.tar"),
.files = &[_]Case.File{
.{
.name = "file1",
},
.{
.name = "file2",
},
.{
.name = "file3",
},
.{
.name = "file4",
},
},
},
.{
.data = @embedFile("testdata/nil-uid.tar"),
.files = &[_]Case.File{
.{
.name = "P1050238.JPG.log",
.size = 14,
.kind = .file,
.mode = 0o664,
},
},
.chksums = &[_][]const u8{
"08d504674115e77a67244beac19668f5",
},
},
.{
// has xattrs and pax records which we are ignoring
.data = @embedFile("testdata/xattrs.tar"),
.files = &[_]Case.File{
.{
.name = "small.txt",
.size = 5,
.kind = .file,
.mode = 0o644,
},
.{
.name = "small2.txt",
.size = 11,
.kind = .file,
.mode = 0o644,
},
},
.chksums = &[_][]const u8{
"e38b27eaccb4391bdec553a7f3ae6b2f",
"c65bd2e50a56a2138bf1716f2fd56fe9",
},
},
.{
.data = @embedFile("testdata/gnu-multi-hdrs.tar"),
.files = &[_]Case.File{
.{
.name = "GNU2/GNU2/long-path-name",
.link_name = "GNU4/GNU4/long-linkpath-name",
.kind = .sym_link,
},
},
},
.{
// has gnu type D (directory) and S (sparse) blocks
.data = @embedFile("testdata/gnu-incremental.tar"),
.err = error.TarUnsupportedHeader,
},
.{
// should use values only from last pax header
.data = @embedFile("testdata/pax-multi-hdrs.tar"),
.files = &[_]Case.File{
.{
.name = "bar",
.link_name = "PAX4/PAX4/long-linkpath-name",
.kind = .sym_link,
},
},
},
.{
.data = @embedFile("testdata/gnu-long-nul.tar"),
.files = &[_]Case.File{
.{
.name = "0123456789",
.mode = 0o644,
},
},
},
.{
.data = @embedFile("testdata/gnu-utf8.tar"),
.files = &[_]Case.File{
.{
.name = "☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹",
.mode = 0o644,
},
},
},
.{
.data = @embedFile("testdata/gnu-not-utf8.tar"),
.files = &[_]Case.File{
.{
.name = "hi\x80\x81\x82\x83bye",
.mode = 0o644,
},
},
},
.{
// null in pax key
.data = @embedFile("testdata/pax-nul-xattrs.tar"),
.err = error.PaxNullInKeyword,
},
.{
.data = @embedFile("testdata/pax-nul-path.tar"),
.err = error.PaxNullInValue,
},
.{
.data = @embedFile("testdata/neg-size.tar"),
.err = error.TarHeader,
},
.{
.data = @embedFile("testdata/issue10968.tar"),
.err = error.TarHeader,
},
.{
.data = @embedFile("testdata/issue11169.tar"),
.err = error.TarHeader,
},
.{
.data = @embedFile("testdata/issue12435.tar"),
.err = error.TarHeaderChksum,
},
.{
// has magic with space at end instead of null
.data = @embedFile("testdata/invalid-go17.tar"),
.files = &[_]Case.File{
.{
.name = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa/foo",
},
},
},
.{
.data = @embedFile("testdata/ustar-file-devs.tar"),
.files = &[_]Case.File{
.{
.name = "file",
.mode = 0o644,
},
},
},
.{
.data = @embedFile("testdata/trailing-slash.tar"),
.files = &[_]Case.File{
.{
.name = "123456789/" ** 30,
.kind = .directory,
},
},
},
.{
// Has size in gnu extended format. To represent size bigger than 8 GB.
.data = @embedFile("testdata/writer-big.tar"),
.files = &[_]Case.File{
.{
.name = "tmp/16gig.txt",
.size = 16 * 1024 * 1024 * 1024,
.truncated = true,
.mode = 0o640,
},
},
},
.{
// Size in gnu extended format, and name in pax attribute.
.data = @embedFile("testdata/writer-big-long.tar"),
.files = &[_]Case.File{
.{
.name = "longname/" ** 15 ++ "16gig.txt",
.size = 16 * 1024 * 1024 * 1024,
.mode = 0o644,
.truncated = true,
},
},
},
.{
.data = @embedFile("testdata/fuzz1.tar"),
.err = error.TarInsufficientBuffer,
},
.{
.data = @embedFile("testdata/fuzz2.tar"),
.err = error.PaxSizeAttrOverflow,
},
};
// used in test to calculate file chksum
const Md5Writer = struct {
h: std.crypto.hash.Md5 = std.crypto.hash.Md5.init(.{}),
pub fn writeAll(self: *Md5Writer, buf: []const u8) !void {
self.h.update(buf);
}
pub fn writeByte(self: *Md5Writer, byte: u8) !void {
self.h.update(&[_]u8{byte});
}
pub fn chksum(self: *Md5Writer) [32]u8 {
var s = [_]u8{0} ** 16;
self.h.final(&s);
return std.fmt.bytesToHex(s, .lower);
}
};
test "run test cases" {
var file_name_buffer: [std.fs.MAX_PATH_BYTES]u8 = undefined;
var link_name_buffer: [std.fs.MAX_PATH_BYTES]u8 = undefined;
@ -350,37 +369,65 @@ test "tar run Go test cases" {
if (case.chksums.len > i) {
var md5writer = Md5Writer{};
try actual.write(&md5writer);
try actual.writeAll(&md5writer);
const chksum = md5writer.chksum();
try testing.expectEqualStrings(case.chksums[i], &chksum);
} else {
if (!expected.truncated) try actual.skip(); // skip file content
if (expected.truncated) {
iter.unread_file_bytes = 0;
}
}
}
try testing.expectEqual(case.files.len, i);
}
}
// used in test to calculate file chksum
const Md5Writer = struct {
h: std.crypto.hash.Md5 = std.crypto.hash.Md5.init(.{}),
test "pax/gnu long names with small buffer" {
// should fail with insufficient buffer error
pub fn writeAll(self: *Md5Writer, buf: []const u8) !void {
self.h.update(buf);
var min_file_name_buffer: [256]u8 = undefined;
var min_link_name_buffer: [100]u8 = undefined;
const long_name_cases = [_]Case{ cases[11], cases[25], cases[28] };
for (long_name_cases) |case| {
var fsb = std.io.fixedBufferStream(case.data);
var iter = tar.iterator(fsb.reader(), .{
.file_name_buffer = &min_file_name_buffer,
.link_name_buffer = &min_link_name_buffer,
});
var iter_err: ?anyerror = null;
while (iter.next() catch |err| brk: {
iter_err = err;
break :brk null;
}) |_| {}
try testing.expect(iter_err != null);
try testing.expectEqual(error.TarInsufficientBuffer, iter_err.?);
}
}
pub fn writeByte(self: *Md5Writer, byte: u8) !void {
self.h.update(&[_]u8{byte});
}
test "insufficient buffer in Header name filed" {
var min_file_name_buffer: [9]u8 = undefined;
var min_link_name_buffer: [100]u8 = undefined;
pub fn chksum(self: *Md5Writer) [32]u8 {
var s = [_]u8{0} ** 16;
self.h.final(&s);
return std.fmt.bytesToHex(s, .lower);
}
};
var fsb = std.io.fixedBufferStream(cases[0].data);
var iter = tar.iterator(fsb.reader(), .{
.file_name_buffer = &min_file_name_buffer,
.link_name_buffer = &min_link_name_buffer,
});
test "tar should not overwrite existing file" {
var iter_err: ?anyerror = null;
while (iter.next() catch |err| brk: {
iter_err = err;
break :brk null;
}) |_| {}
try testing.expect(iter_err != null);
try testing.expectEqual(error.TarInsufficientBuffer, iter_err.?);
}
test "should not overwrite existing file" {
// Starting from this folder structure:
// $ tree root
// root
@ -436,7 +483,7 @@ test "tar should not overwrite existing file" {
try tar.pipeToFileSystem(root2.dir, fsb.reader(), .{ .mode_mode = .ignore, .strip_components = 0 });
}
test "tar case sensitivity" {
test "case sensitivity" {
// Mimicking issue #18089, this tar contains, same file name in two case
// sensitive name version. Should fail on case insensitive file systems.
//
@ -462,35 +509,3 @@ test "tar case sensitivity" {
try testing.expect((try root.dir.statFile("alacritty/darkermatrix.yml")).kind == .file);
try testing.expect((try root.dir.statFile("alacritty/Darkermatrix.yml")).kind == .file);
}
test "tar pipeToFileSystem" {
// $ tar tvf
// pipe_to_file_system_test/
// pipe_to_file_system_test/b/
// pipe_to_file_system_test/b/symlink -> ../a/file
// pipe_to_file_system_test/a/
// pipe_to_file_system_test/a/file
// pipe_to_file_system_test/empty/
const data = @embedFile("testdata/pipe_to_file_system_test.tar");
var fsb = std.io.fixedBufferStream(data);
var root = std.testing.tmpDir(.{ .no_follow = true });
defer root.cleanup();
tar.pipeToFileSystem(root.dir, fsb.reader(), .{
.mode_mode = .ignore,
.strip_components = 1,
.exclude_empty_directories = true,
}) catch |err| {
// Skip on platform which don't support symlinks
if (err == error.UnableToCreateSymLink) return error.SkipZigTest;
return err;
};
try testing.expectError(error.FileNotFound, root.dir.statFile("empty"));
try testing.expect((try root.dir.statFile("a/file")).kind == .file);
// TODO is there better way to test symlink
try testing.expect((try root.dir.statFile("b/symlink")).kind == .file); // statFile follows symlink
var buf: [32]u8 = undefined;
_ = try root.dir.readLink("b/symlink", &buf);
}

View File

@ -1147,7 +1147,7 @@ fn unpackTarball(f: *Fetch, out_dir: fs.Dir, reader: anytype) RunError!void {
const eb = &f.error_bundle;
const gpa = f.arena.child_allocator;
var diagnostics: std.tar.Options.Diagnostics = .{ .allocator = gpa };
var diagnostics: std.tar.Diagnostics = .{ .allocator = gpa };
defer diagnostics.deinit();
std.tar.pipeToFileSystem(out_dir, reader, .{