diff --git a/CMakeLists.txt b/CMakeLists.txt index b78b0012f1..4a3dc27a48 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -390,15 +390,6 @@ set(ZIG_STAGE2_SOURCES lib/std/Io.zig lib/std/Io/Reader.zig lib/std/Io/Writer.zig - lib/std/Io/buffered_atomic_file.zig - lib/std/Io/buffered_writer.zig - lib/std/Io/change_detection_stream.zig - lib/std/Io/counting_reader.zig - lib/std/Io/counting_writer.zig - lib/std/Io/find_byte_writer.zig - lib/std/Io/fixed_buffer_stream.zig - lib/std/Io/limited_reader.zig - lib/std/Io/seekable_stream.zig lib/std/Progress.zig lib/std/Random.zig lib/std/Target.zig diff --git a/lib/compiler/std-docs.zig b/lib/compiler/std-docs.zig index b5bc742717..33304b8c5d 100644 --- a/lib/compiler/std-docs.zig +++ b/lib/compiler/std-docs.zig @@ -60,7 +60,9 @@ pub fn main() !void { const should_open_browser = force_open_browser orelse (listen_port == 0); const address = std.net.Address.parseIp("127.0.0.1", listen_port) catch unreachable; - var http_server = try address.listen(.{}); + var http_server = try address.listen(.{ + .reuse_address = true, + }); const port = http_server.listen_address.in.getPort(); const url_with_newline = try std.fmt.allocPrint(arena, "http://127.0.0.1:{d}/\n", .{port}); std.fs.File.stdout().writeAll(url_with_newline) catch {}; @@ -189,7 +191,11 @@ fn serveSourcesTar(request: *std.http.Server.Request, context: *Context) !void { var walker = try std_dir.walk(gpa); defer walker.deinit(); - var archiver = std.tar.writer(response.writer()); + var adapter_buffer: [500]u8 = undefined; + var response_writer = response.writer().adaptToNewApi(); + response_writer.new_interface.buffer = &adapter_buffer; + + var archiver: std.tar.Writer = .{ .underlying_writer = &response_writer.new_interface }; archiver.prefix = "std"; while (try walker.next()) |entry| { @@ -204,7 +210,13 @@ fn serveSourcesTar(request: *std.http.Server.Request, context: *Context) !void { } var file = try entry.dir.openFile(entry.basename, .{}); defer file.close(); - try archiver.writeFile(entry.path, file); + const stat = try file.stat(); + var file_reader: std.fs.File.Reader = .{ + .file = file, + .interface = std.fs.File.Reader.initInterface(&.{}), + .size = stat.size, + }; + try archiver.writeFile(entry.path, &file_reader, stat.mtime); } { diff --git a/lib/docs/wasm/main.zig b/lib/docs/wasm/main.zig index 7e9ffa5e4c..d3043cd917 100644 --- a/lib/docs/wasm/main.zig +++ b/lib/docs/wasm/main.zig @@ -772,10 +772,10 @@ export fn decl_type_html(decl_index: Decl.Index) String { const Oom = error{OutOfMemory}; fn unpackInner(tar_bytes: []u8) !void { - var fbs = std.io.fixedBufferStream(tar_bytes); + var reader: std.Io.Reader = .fixed(tar_bytes); var file_name_buffer: [1024]u8 = undefined; var link_name_buffer: [1024]u8 = undefined; - var it = std.tar.iterator(fbs.reader(), .{ + var it: std.tar.Iterator = .init(&reader, .{ .file_name_buffer = &file_name_buffer, .link_name_buffer = &link_name_buffer, }); @@ -796,7 +796,7 @@ fn unpackInner(tar_bytes: []u8) !void { { gop.value_ptr.* = file; } - const file_bytes = tar_bytes[fbs.pos..][0..@intCast(tar_file.size)]; + const file_bytes = tar_bytes[reader.seek..][0..@intCast(tar_file.size)]; assert(file == try Walk.add_file(file_name, file_bytes)); } } else { diff --git a/lib/std/Build/Fuzz/WebServer.zig b/lib/std/Build/Fuzz/WebServer.zig index b28a6e185c..0df43408f1 100644 --- a/lib/std/Build/Fuzz/WebServer.zig +++ b/lib/std/Build/Fuzz/WebServer.zig @@ -522,7 +522,9 @@ fn serveSourcesTar(ws: *WebServer, request: *std.http.Server.Request) !void { var cwd_cache: ?[]const u8 = null; - var archiver = std.tar.writer(response.writer()); + var adapter = response.writer().adaptToNewApi(); + var archiver: std.tar.Writer = .{ .underlying_writer = &adapter.new_interface }; + var read_buffer: [1024]u8 = undefined; for (deduped_paths) |joined_path| { var file = joined_path.root_dir.handle.openFile(joined_path.sub_path, .{}) catch |err| { @@ -530,13 +532,14 @@ fn serveSourcesTar(ws: *WebServer, request: *std.http.Server.Request) !void { continue; }; defer file.close(); - + const stat = try file.stat(); + var file_reader: std.fs.File.Reader = .initSize(file, &read_buffer, stat.size); archiver.prefix = joined_path.root_dir.path orelse try memoizedCwd(arena, &cwd_cache); - try archiver.writeFile(joined_path.sub_path, file); + try archiver.writeFile(joined_path.sub_path, &file_reader, stat.mtime); } - // intentionally omitting the pointless trailer - //try archiver.finish(); + // intentionally not calling `archiver.finishPedantically` + try adapter.new_interface.flush(); try response.end(); } diff --git a/lib/std/Io.zig b/lib/std/Io.zig index ff6966d7f7..a93c31954b 100644 --- a/lib/std/Io.zig +++ b/lib/std/Io.zig @@ -314,11 +314,11 @@ pub fn GenericReader( } /// Helper for bridging to the new `Reader` API while upgrading. - pub fn adaptToNewApi(self: *const Self) Adapter { + pub fn adaptToNewApi(self: *const Self, buffer: []u8) Adapter { return .{ .derp_reader = self.*, .new_interface = .{ - .buffer = &.{}, + .buffer = buffer, .vtable = &.{ .stream = Adapter.stream }, .seek = 0, .end = 0, @@ -334,10 +334,12 @@ pub fn GenericReader( fn stream(r: *Reader, w: *Writer, limit: Limit) Reader.StreamError!usize { const a: *@This() = @alignCast(@fieldParentPtr("new_interface", r)); const buf = limit.slice(try w.writableSliceGreedy(1)); - return a.derp_reader.read(buf) catch |err| { + const n = a.derp_reader.read(buf) catch |err| { a.err = err; return error.ReadFailed; }; + w.advance(n); + return n; } }; }; @@ -419,9 +421,14 @@ pub fn GenericWriter( new_interface: Writer, err: ?Error = null, - fn drain(w: *Writer, data: []const []const u8, splat: usize) Writer.Error!usize { + fn drain(w: *std.io.Writer, data: []const []const u8, splat: usize) std.io.Writer.Error!usize { _ = splat; const a: *@This() = @alignCast(@fieldParentPtr("new_interface", w)); + const buffered = w.buffered(); + if (buffered.len != 0) return w.consume(a.derp_writer.write(buffered) catch |err| { + a.err = err; + return error.WriteFailed; + }); return a.derp_writer.write(data[0]) catch |err| { a.err = err; return error.WriteFailed; @@ -435,54 +442,46 @@ pub fn GenericWriter( pub const AnyReader = @import("Io/DeprecatedReader.zig"); /// Deprecated in favor of `Writer`. pub const AnyWriter = @import("Io/DeprecatedWriter.zig"); - +/// Deprecated in favor of `File.Reader` and `File.Writer`. pub const SeekableStream = @import("Io/seekable_stream.zig").SeekableStream; - +/// Deprecated in favor of `Writer`. pub const BufferedWriter = @import("Io/buffered_writer.zig").BufferedWriter; +/// Deprecated in favor of `Writer`. pub const bufferedWriter = @import("Io/buffered_writer.zig").bufferedWriter; - +/// Deprecated in favor of `Reader`. pub const BufferedReader = @import("Io/buffered_reader.zig").BufferedReader; +/// Deprecated in favor of `Reader`. pub const bufferedReader = @import("Io/buffered_reader.zig").bufferedReader; +/// Deprecated in favor of `Reader`. pub const bufferedReaderSize = @import("Io/buffered_reader.zig").bufferedReaderSize; - +/// Deprecated in favor of `Reader`. pub const FixedBufferStream = @import("Io/fixed_buffer_stream.zig").FixedBufferStream; +/// Deprecated in favor of `Reader`. pub const fixedBufferStream = @import("Io/fixed_buffer_stream.zig").fixedBufferStream; - -pub const CWriter = @import("Io/c_writer.zig").CWriter; -pub const cWriter = @import("Io/c_writer.zig").cWriter; - +/// Deprecated in favor of `Reader.Limited`. pub const LimitedReader = @import("Io/limited_reader.zig").LimitedReader; +/// Deprecated in favor of `Reader.Limited`. pub const limitedReader = @import("Io/limited_reader.zig").limitedReader; - +/// Deprecated with no replacement; inefficient pattern pub const CountingWriter = @import("Io/counting_writer.zig").CountingWriter; +/// Deprecated with no replacement; inefficient pattern pub const countingWriter = @import("Io/counting_writer.zig").countingWriter; +/// Deprecated with no replacement; inefficient pattern pub const CountingReader = @import("Io/counting_reader.zig").CountingReader; +/// Deprecated with no replacement; inefficient pattern pub const countingReader = @import("Io/counting_reader.zig").countingReader; -pub const MultiWriter = @import("Io/multi_writer.zig").MultiWriter; -pub const multiWriter = @import("Io/multi_writer.zig").multiWriter; - pub const BitReader = @import("Io/bit_reader.zig").BitReader; pub const bitReader = @import("Io/bit_reader.zig").bitReader; pub const BitWriter = @import("Io/bit_writer.zig").BitWriter; pub const bitWriter = @import("Io/bit_writer.zig").bitWriter; -pub const ChangeDetectionStream = @import("Io/change_detection_stream.zig").ChangeDetectionStream; -pub const changeDetectionStream = @import("Io/change_detection_stream.zig").changeDetectionStream; - -pub const FindByteWriter = @import("Io/find_byte_writer.zig").FindByteWriter; -pub const findByteWriter = @import("Io/find_byte_writer.zig").findByteWriter; - -pub const BufferedAtomicFile = @import("Io/buffered_atomic_file.zig").BufferedAtomicFile; - -pub const StreamSource = @import("Io/stream_source.zig").StreamSource; - pub const tty = @import("Io/tty.zig"); -/// A Writer that doesn't write to anything. +/// Deprecated in favor of `Writer.Discarding`. pub const null_writer: NullWriter = .{ .context = {} }; - +/// Deprecated in favor of `Writer.Discarding`. pub const NullWriter = GenericWriter(void, error{}, dummyWrite); fn dummyWrite(context: void, data: []const u8) error{}!usize { _ = context; @@ -898,16 +897,14 @@ test { _ = Reader; _ = Reader.Limited; _ = Writer; - _ = @import("Io/bit_reader.zig"); - _ = @import("Io/bit_writer.zig"); - _ = @import("Io/buffered_atomic_file.zig"); - _ = @import("Io/buffered_reader.zig"); - _ = @import("Io/buffered_writer.zig"); - _ = @import("Io/c_writer.zig"); - _ = @import("Io/counting_writer.zig"); - _ = @import("Io/counting_reader.zig"); - _ = @import("Io/fixed_buffer_stream.zig"); - _ = @import("Io/seekable_stream.zig"); - _ = @import("Io/stream_source.zig"); + _ = BitReader; + _ = BitWriter; + _ = BufferedReader; + _ = BufferedWriter; + _ = CountingWriter; + _ = CountingReader; + _ = FixedBufferStream; + _ = SeekableStream; + _ = tty; _ = @import("Io/test.zig"); } diff --git a/lib/std/Io/DeprecatedReader.zig b/lib/std/Io/DeprecatedReader.zig index f6cb9f61d5..4d51b05148 100644 --- a/lib/std/Io/DeprecatedReader.zig +++ b/lib/std/Io/DeprecatedReader.zig @@ -393,10 +393,12 @@ pub const Adapter = struct { fn stream(r: *std.io.Reader, w: *std.io.Writer, limit: std.io.Limit) std.io.Reader.StreamError!usize { const a: *@This() = @alignCast(@fieldParentPtr("new_interface", r)); const buf = limit.slice(try w.writableSliceGreedy(1)); - return a.derp_reader.read(buf) catch |err| { + const n = a.derp_reader.read(buf) catch |err| { a.err = err; return error.ReadFailed; }; + w.advance(n); + return n; } }; diff --git a/lib/std/Io/DeprecatedWriter.zig b/lib/std/Io/DeprecatedWriter.zig index 391b985357..81774b357c 100644 --- a/lib/std/Io/DeprecatedWriter.zig +++ b/lib/std/Io/DeprecatedWriter.zig @@ -100,7 +100,12 @@ pub const Adapter = struct { fn drain(w: *std.io.Writer, data: []const []const u8, splat: usize) std.io.Writer.Error!usize { _ = splat; - const a: *@This() = @fieldParentPtr("new_interface", w); + const a: *@This() = @alignCast(@fieldParentPtr("new_interface", w)); + const buffered = w.buffered(); + if (buffered.len != 0) return w.consume(a.derp_writer.write(buffered) catch |err| { + a.err = err; + return error.WriteFailed; + }); return a.derp_writer.write(data[0]) catch |err| { a.err = err; return error.WriteFailed; diff --git a/lib/std/Io/Reader.zig b/lib/std/Io/Reader.zig index f25e113522..5c7fdafe76 100644 --- a/lib/std/Io/Reader.zig +++ b/lib/std/Io/Reader.zig @@ -179,6 +179,12 @@ pub fn streamExact(r: *Reader, w: *Writer, n: usize) StreamError!void { while (remaining != 0) remaining -= try r.stream(w, .limited(remaining)); } +/// "Pump" exactly `n` bytes from the reader to the writer. +pub fn streamExact64(r: *Reader, w: *Writer, n: u64) StreamError!void { + var remaining = n; + while (remaining != 0) remaining -= try r.stream(w, .limited64(remaining)); +} + /// "Pump" data from the reader to the writer, handling `error.EndOfStream` as /// a success case. /// diff --git a/lib/std/Io/buffered_atomic_file.zig b/lib/std/Io/buffered_atomic_file.zig deleted file mode 100644 index 48510bde52..0000000000 --- a/lib/std/Io/buffered_atomic_file.zig +++ /dev/null @@ -1,55 +0,0 @@ -const std = @import("../std.zig"); -const mem = std.mem; -const fs = std.fs; -const File = std.fs.File; - -pub const BufferedAtomicFile = struct { - atomic_file: fs.AtomicFile, - file_writer: File.Writer, - buffered_writer: BufferedWriter, - allocator: mem.Allocator, - - pub const buffer_size = 4096; - pub const BufferedWriter = std.io.BufferedWriter(buffer_size, File.Writer); - pub const Writer = std.io.GenericWriter(*BufferedWriter, BufferedWriter.Error, BufferedWriter.write); - - /// TODO when https://github.com/ziglang/zig/issues/2761 is solved - /// this API will not need an allocator - pub fn create( - allocator: mem.Allocator, - dir: fs.Dir, - dest_path: []const u8, - atomic_file_options: fs.Dir.AtomicFileOptions, - ) !*BufferedAtomicFile { - var self = try allocator.create(BufferedAtomicFile); - self.* = BufferedAtomicFile{ - .atomic_file = undefined, - .file_writer = undefined, - .buffered_writer = undefined, - .allocator = allocator, - }; - errdefer allocator.destroy(self); - - self.atomic_file = try dir.atomicFile(dest_path, atomic_file_options); - errdefer self.atomic_file.deinit(); - - self.file_writer = self.atomic_file.file.deprecatedWriter(); - self.buffered_writer = .{ .unbuffered_writer = self.file_writer }; - return self; - } - - /// always call destroy, even after successful finish() - pub fn destroy(self: *BufferedAtomicFile) void { - self.atomic_file.deinit(); - self.allocator.destroy(self); - } - - pub fn finish(self: *BufferedAtomicFile) !void { - try self.buffered_writer.flush(); - try self.atomic_file.finish(); - } - - pub fn writer(self: *BufferedAtomicFile) Writer { - return .{ .context = &self.buffered_writer }; - } -}; diff --git a/lib/std/Io/c_writer.zig b/lib/std/Io/c_writer.zig deleted file mode 100644 index 30d0cabcf5..0000000000 --- a/lib/std/Io/c_writer.zig +++ /dev/null @@ -1,44 +0,0 @@ -const std = @import("../std.zig"); -const builtin = @import("builtin"); -const io = std.io; -const testing = std.testing; - -pub const CWriter = io.GenericWriter(*std.c.FILE, std.fs.File.WriteError, cWriterWrite); - -pub fn cWriter(c_file: *std.c.FILE) CWriter { - return .{ .context = c_file }; -} - -fn cWriterWrite(c_file: *std.c.FILE, bytes: []const u8) std.fs.File.WriteError!usize { - const amt_written = std.c.fwrite(bytes.ptr, 1, bytes.len, c_file); - if (amt_written >= 0) return amt_written; - switch (@as(std.c.E, @enumFromInt(std.c._errno().*))) { - .SUCCESS => unreachable, - .INVAL => unreachable, - .FAULT => unreachable, - .AGAIN => unreachable, // this is a blocking API - .BADF => unreachable, // always a race condition - .DESTADDRREQ => unreachable, // connect was never called - .DQUOT => return error.DiskQuota, - .FBIG => return error.FileTooBig, - .IO => return error.InputOutput, - .NOSPC => return error.NoSpaceLeft, - .PERM => return error.PermissionDenied, - .PIPE => return error.BrokenPipe, - else => |err| return std.posix.unexpectedErrno(err), - } -} - -test cWriter { - if (!builtin.link_libc or builtin.os.tag == .wasi) return error.SkipZigTest; - - const filename = "tmp_io_test_file.txt"; - const out_file = std.c.fopen(filename, "w") orelse return error.UnableToOpenTestFile; - defer { - _ = std.c.fclose(out_file); - std.fs.cwd().deleteFileZ(filename) catch {}; - } - - const writer = cWriter(out_file); - try writer.print("hi: {}\n", .{@as(i32, 123)}); -} diff --git a/lib/std/Io/change_detection_stream.zig b/lib/std/Io/change_detection_stream.zig deleted file mode 100644 index d9da1c4a0e..0000000000 --- a/lib/std/Io/change_detection_stream.zig +++ /dev/null @@ -1,55 +0,0 @@ -const std = @import("../std.zig"); -const io = std.io; -const mem = std.mem; -const assert = std.debug.assert; - -/// Used to detect if the data written to a stream differs from a source buffer -pub fn ChangeDetectionStream(comptime WriterType: type) type { - return struct { - const Self = @This(); - pub const Error = WriterType.Error; - pub const Writer = io.GenericWriter(*Self, Error, write); - - anything_changed: bool, - underlying_writer: WriterType, - source_index: usize, - source: []const u8, - - pub fn writer(self: *Self) Writer { - return .{ .context = self }; - } - - fn write(self: *Self, bytes: []const u8) Error!usize { - if (!self.anything_changed) { - const end = self.source_index + bytes.len; - if (end > self.source.len) { - self.anything_changed = true; - } else { - const src_slice = self.source[self.source_index..end]; - self.source_index += bytes.len; - if (!mem.eql(u8, bytes, src_slice)) { - self.anything_changed = true; - } - } - } - - return self.underlying_writer.write(bytes); - } - - pub fn changeDetected(self: *Self) bool { - return self.anything_changed or (self.source_index != self.source.len); - } - }; -} - -pub fn changeDetectionStream( - source: []const u8, - underlying_writer: anytype, -) ChangeDetectionStream(@TypeOf(underlying_writer)) { - return ChangeDetectionStream(@TypeOf(underlying_writer)){ - .anything_changed = false, - .underlying_writer = underlying_writer, - .source_index = 0, - .source = source, - }; -} diff --git a/lib/std/Io/find_byte_writer.zig b/lib/std/Io/find_byte_writer.zig deleted file mode 100644 index fe6836f603..0000000000 --- a/lib/std/Io/find_byte_writer.zig +++ /dev/null @@ -1,40 +0,0 @@ -const std = @import("../std.zig"); -const io = std.io; -const assert = std.debug.assert; - -/// A Writer that returns whether the given character has been written to it. -/// The contents are not written to anything. -pub fn FindByteWriter(comptime UnderlyingWriter: type) type { - return struct { - const Self = @This(); - pub const Error = UnderlyingWriter.Error; - pub const Writer = io.GenericWriter(*Self, Error, write); - - underlying_writer: UnderlyingWriter, - byte_found: bool, - byte: u8, - - pub fn writer(self: *Self) Writer { - return .{ .context = self }; - } - - fn write(self: *Self, bytes: []const u8) Error!usize { - if (!self.byte_found) { - self.byte_found = blk: { - for (bytes) |b| - if (b == self.byte) break :blk true; - break :blk false; - }; - } - return self.underlying_writer.write(bytes); - } - }; -} - -pub fn findByteWriter(byte: u8, underlying_writer: anytype) FindByteWriter(@TypeOf(underlying_writer)) { - return FindByteWriter(@TypeOf(underlying_writer)){ - .underlying_writer = underlying_writer, - .byte = byte, - .byte_found = false, - }; -} diff --git a/lib/std/Io/multi_writer.zig b/lib/std/Io/multi_writer.zig deleted file mode 100644 index 20e9e782de..0000000000 --- a/lib/std/Io/multi_writer.zig +++ /dev/null @@ -1,53 +0,0 @@ -const std = @import("../std.zig"); -const io = std.io; - -/// Takes a tuple of streams, and constructs a new stream that writes to all of them -pub fn MultiWriter(comptime Writers: type) type { - comptime var ErrSet = error{}; - inline for (@typeInfo(Writers).@"struct".fields) |field| { - const StreamType = field.type; - ErrSet = ErrSet || StreamType.Error; - } - - return struct { - const Self = @This(); - - streams: Writers, - - pub const Error = ErrSet; - pub const Writer = io.GenericWriter(*Self, Error, write); - - pub fn writer(self: *Self) Writer { - return .{ .context = self }; - } - - pub fn write(self: *Self, bytes: []const u8) Error!usize { - inline for (self.streams) |stream| - try stream.writeAll(bytes); - return bytes.len; - } - }; -} - -pub fn multiWriter(streams: anytype) MultiWriter(@TypeOf(streams)) { - return .{ .streams = streams }; -} - -const testing = std.testing; - -test "MultiWriter" { - var tmp = testing.tmpDir(.{}); - defer tmp.cleanup(); - var f = try tmp.dir.createFile("t.txt", .{}); - - var buf1: [255]u8 = undefined; - var fbs1 = io.fixedBufferStream(&buf1); - var buf2: [255]u8 = undefined; - var stream = multiWriter(.{ fbs1.writer(), f.writer() }); - - try stream.writer().print("HI", .{}); - f.close(); - - try testing.expectEqualSlices(u8, "HI", fbs1.getWritten()); - try testing.expectEqualSlices(u8, "HI", try tmp.dir.readFile("t.txt", &buf2)); -} diff --git a/lib/std/Io/stream_source.zig b/lib/std/Io/stream_source.zig deleted file mode 100644 index 2a3527e479..0000000000 --- a/lib/std/Io/stream_source.zig +++ /dev/null @@ -1,127 +0,0 @@ -const std = @import("../std.zig"); -const builtin = @import("builtin"); -const io = std.io; - -/// Provides `io.GenericReader`, `io.GenericWriter`, and `io.SeekableStream` for in-memory buffers as -/// well as files. -/// For memory sources, if the supplied byte buffer is const, then `io.GenericWriter` is not available. -/// The error set of the stream functions is the error set of the corresponding file functions. -pub const StreamSource = union(enum) { - // TODO: expose UEFI files to std.os in a way that allows this to be true - const has_file = (builtin.os.tag != .freestanding and builtin.os.tag != .uefi); - - /// The stream access is redirected to this buffer. - buffer: io.FixedBufferStream([]u8), - - /// The stream access is redirected to this buffer. - /// Writing to the source will always yield `error.AccessDenied`. - const_buffer: io.FixedBufferStream([]const u8), - - /// The stream access is redirected to this file. - /// On freestanding, this must never be initialized! - file: if (has_file) std.fs.File else void, - - pub const ReadError = io.FixedBufferStream([]u8).ReadError || (if (has_file) std.fs.File.ReadError else error{}); - pub const WriteError = error{AccessDenied} || io.FixedBufferStream([]u8).WriteError || (if (has_file) std.fs.File.WriteError else error{}); - pub const SeekError = io.FixedBufferStream([]u8).SeekError || (if (has_file) std.fs.File.SeekError else error{}); - pub const GetSeekPosError = io.FixedBufferStream([]u8).GetSeekPosError || (if (has_file) std.fs.File.GetSeekPosError else error{}); - - pub const Reader = io.GenericReader(*StreamSource, ReadError, read); - pub const Writer = io.GenericWriter(*StreamSource, WriteError, write); - pub const SeekableStream = io.SeekableStream( - *StreamSource, - SeekError, - GetSeekPosError, - seekTo, - seekBy, - getPos, - getEndPos, - ); - - pub fn read(self: *StreamSource, dest: []u8) ReadError!usize { - switch (self.*) { - .buffer => |*x| return x.read(dest), - .const_buffer => |*x| return x.read(dest), - .file => |x| if (!has_file) unreachable else return x.read(dest), - } - } - - pub fn write(self: *StreamSource, bytes: []const u8) WriteError!usize { - switch (self.*) { - .buffer => |*x| return x.write(bytes), - .const_buffer => return error.AccessDenied, - .file => |x| if (!has_file) unreachable else return x.write(bytes), - } - } - - pub fn seekTo(self: *StreamSource, pos: u64) SeekError!void { - switch (self.*) { - .buffer => |*x| return x.seekTo(pos), - .const_buffer => |*x| return x.seekTo(pos), - .file => |x| if (!has_file) unreachable else return x.seekTo(pos), - } - } - - pub fn seekBy(self: *StreamSource, amt: i64) SeekError!void { - switch (self.*) { - .buffer => |*x| return x.seekBy(amt), - .const_buffer => |*x| return x.seekBy(amt), - .file => |x| if (!has_file) unreachable else return x.seekBy(amt), - } - } - - pub fn getEndPos(self: *StreamSource) GetSeekPosError!u64 { - switch (self.*) { - .buffer => |*x| return x.getEndPos(), - .const_buffer => |*x| return x.getEndPos(), - .file => |x| if (!has_file) unreachable else return x.getEndPos(), - } - } - - pub fn getPos(self: *StreamSource) GetSeekPosError!u64 { - switch (self.*) { - .buffer => |*x| return x.getPos(), - .const_buffer => |*x| return x.getPos(), - .file => |x| if (!has_file) unreachable else return x.getPos(), - } - } - - pub fn reader(self: *StreamSource) Reader { - return .{ .context = self }; - } - - pub fn writer(self: *StreamSource) Writer { - return .{ .context = self }; - } - - pub fn seekableStream(self: *StreamSource) SeekableStream { - return .{ .context = self }; - } -}; - -test "refs" { - std.testing.refAllDecls(StreamSource); -} - -test "mutable buffer" { - var buffer: [64]u8 = undefined; - var source = StreamSource{ .buffer = std.io.fixedBufferStream(&buffer) }; - - var writer = source.writer(); - - try writer.writeAll("Hello, World!"); - - try std.testing.expectEqualStrings("Hello, World!", source.buffer.getWritten()); -} - -test "const buffer" { - const buffer: [64]u8 = "Hello, World!".* ++ ([1]u8{0xAA} ** 51); - var source = StreamSource{ .const_buffer = std.io.fixedBufferStream(&buffer) }; - - var reader = source.reader(); - - var dst_buffer: [13]u8 = undefined; - try reader.readNoEof(&dst_buffer); - - try std.testing.expectEqualStrings("Hello, World!", &dst_buffer); -} diff --git a/lib/std/crypto/md5.zig b/lib/std/crypto/md5.zig index 92c8dac796..a580f826f3 100644 --- a/lib/std/crypto/md5.zig +++ b/lib/std/crypto/md5.zig @@ -54,12 +54,20 @@ pub const Md5 = struct { }; } - pub fn hash(b: []const u8, out: *[digest_length]u8, options: Options) void { + pub fn hash(data: []const u8, out: *[digest_length]u8, options: Options) void { var d = Md5.init(options); - d.update(b); + d.update(data); d.final(out); } + pub fn hashResult(data: []const u8) [digest_length]u8 { + var out: [digest_length]u8 = undefined; + var d = Md5.init(.{}); + d.update(data); + d.final(&out); + return out; + } + pub fn update(d: *Self, b: []const u8) void { var off: usize = 0; diff --git a/lib/std/tar.zig b/lib/std/tar.zig index 729a07db0a..e397677cf3 100644 --- a/lib/std/tar.zig +++ b/lib/std/tar.zig @@ -19,7 +19,7 @@ const std = @import("std"); const assert = std.debug.assert; const testing = std.testing; -pub const writer = @import("tar/writer.zig").writer; +pub const Writer = @import("tar/Writer.zig"); /// Provide this to receive detailed error messages. /// When this is provided, some errors which would otherwise be returned @@ -293,28 +293,6 @@ fn nullStr(str: []const u8) []const u8 { return str; } -/// Options for iterator. -/// Buffers should be provided by the caller. -pub const IteratorOptions = struct { - /// Use a buffer with length `std.fs.max_path_bytes` to match file system capabilities. - file_name_buffer: []u8, - /// Use a buffer with length `std.fs.max_path_bytes` to match file system capabilities. - link_name_buffer: []u8, - /// Collects error messages during unpacking - diagnostics: ?*Diagnostics = null, -}; - -/// Iterates over files in tar archive. -/// `next` returns each file in tar archive. -pub fn iterator(reader: anytype, options: IteratorOptions) Iterator(@TypeOf(reader)) { - return .{ - .reader = reader, - .diagnostics = options.diagnostics, - .file_name_buffer = options.file_name_buffer, - .link_name_buffer = options.link_name_buffer, - }; -} - /// Type of the file returned by iterator `next` method. pub const FileKind = enum { directory, @@ -323,206 +301,192 @@ pub const FileKind = enum { }; /// Iterator over entries in the tar file represented by reader. -pub fn Iterator(comptime ReaderType: type) type { - return struct { - reader: ReaderType, - diagnostics: ?*Diagnostics = null, +pub const Iterator = struct { + reader: *std.Io.Reader, + diagnostics: ?*Diagnostics = null, - // buffers for heeader and file attributes - header_buffer: [Header.SIZE]u8 = undefined, + // buffers for heeader and file attributes + header_buffer: [Header.SIZE]u8 = undefined, + file_name_buffer: []u8, + link_name_buffer: []u8, + + // bytes of padding to the end of the block + padding: usize = 0, + // not consumed bytes of file from last next iteration + unread_file_bytes: u64 = 0, + + /// Options for iterator. + /// Buffers should be provided by the caller. + pub const Options = struct { + /// Use a buffer with length `std.fs.max_path_bytes` to match file system capabilities. file_name_buffer: []u8, + /// Use a buffer with length `std.fs.max_path_bytes` to match file system capabilities. link_name_buffer: []u8, + /// Collects error messages during unpacking + diagnostics: ?*Diagnostics = null, + }; - // bytes of padding to the end of the block - padding: usize = 0, - // not consumed bytes of file from last next iteration - unread_file_bytes: u64 = 0, - - pub const File = struct { - name: []const u8, // name of file, symlink or directory - link_name: []const u8, // target name of symlink - size: u64 = 0, // size of the file in bytes - mode: u32 = 0, - kind: FileKind = .file, - - unread_bytes: *u64, - parent_reader: ReaderType, - - pub const Reader = std.io.GenericReader(File, ReaderType.Error, File.read); - - pub fn reader(self: File) Reader { - return .{ .context = self }; - } - - pub fn read(self: File, dest: []u8) ReaderType.Error!usize { - const buf = dest[0..@min(dest.len, self.unread_bytes.*)]; - const n = try self.parent_reader.read(buf); - self.unread_bytes.* -= n; - return n; - } - - // Writes file content to writer. - pub fn writeAll(self: File, out_writer: anytype) !void { - var buffer: [4096]u8 = undefined; - - while (self.unread_bytes.* > 0) { - const buf = buffer[0..@min(buffer.len, self.unread_bytes.*)]; - try self.parent_reader.readNoEof(buf); - try out_writer.writeAll(buf); - self.unread_bytes.* -= buf.len; - } - } + /// Iterates over files in tar archive. + /// `next` returns each file in tar archive. + pub fn init(reader: *std.Io.Reader, options: Options) Iterator { + return .{ + .reader = reader, + .diagnostics = options.diagnostics, + .file_name_buffer = options.file_name_buffer, + .link_name_buffer = options.link_name_buffer, }; + } - const Self = @This(); - - fn readHeader(self: *Self) !?Header { - if (self.padding > 0) { - try self.reader.skipBytes(self.padding, .{}); - } - const n = try self.reader.readAll(&self.header_buffer); - if (n == 0) return null; - if (n < Header.SIZE) return error.UnexpectedEndOfStream; - const header = Header{ .bytes = self.header_buffer[0..Header.SIZE] }; - if (try header.checkChksum() == 0) return null; - return header; - } - - fn readString(self: *Self, size: usize, buffer: []u8) ![]const u8 { - if (size > buffer.len) return error.TarInsufficientBuffer; - const buf = buffer[0..size]; - try self.reader.readNoEof(buf); - return nullStr(buf); - } - - fn newFile(self: *Self) File { - return .{ - .name = self.file_name_buffer[0..0], - .link_name = self.link_name_buffer[0..0], - .parent_reader = self.reader, - .unread_bytes = &self.unread_file_bytes, - }; - } - - // Number of padding bytes in the last file block. - fn blockPadding(size: u64) usize { - const block_rounded = std.mem.alignForward(u64, size, Header.SIZE); // size rounded to te block boundary - return @intCast(block_rounded - size); - } - - /// Iterates through the tar archive as if it is a series of files. - /// Internally, the tar format often uses entries (header with optional - /// content) to add meta data that describes the next file. These - /// entries should not normally be visible to the outside. As such, this - /// loop iterates through one or more entries until it collects a all - /// file attributes. - pub fn next(self: *Self) !?File { - if (self.unread_file_bytes > 0) { - // If file content was not consumed by caller - try self.reader.skipBytes(self.unread_file_bytes, .{}); - self.unread_file_bytes = 0; - } - var file: File = self.newFile(); - - while (try self.readHeader()) |header| { - const kind = header.kind(); - const size: u64 = try header.size(); - self.padding = blockPadding(size); - - switch (kind) { - // File types to return upstream - .directory, .normal, .symbolic_link => { - file.kind = switch (kind) { - .directory => .directory, - .normal => .file, - .symbolic_link => .sym_link, - else => unreachable, - }; - file.mode = try header.mode(); - - // set file attributes if not already set by prefix/extended headers - if (file.size == 0) { - file.size = size; - } - if (file.link_name.len == 0) { - file.link_name = try header.linkName(self.link_name_buffer); - } - if (file.name.len == 0) { - file.name = try header.fullName(self.file_name_buffer); - } - - self.padding = blockPadding(file.size); - self.unread_file_bytes = file.size; - return file; - }, - // Prefix header types - .gnu_long_name => { - file.name = try self.readString(@intCast(size), self.file_name_buffer); - }, - .gnu_long_link => { - file.link_name = try self.readString(@intCast(size), self.link_name_buffer); - }, - .extended_header => { - // Use just attributes from last extended header. - file = self.newFile(); - - var rdr = paxIterator(self.reader, @intCast(size)); - while (try rdr.next()) |attr| { - switch (attr.kind) { - .path => { - file.name = try attr.value(self.file_name_buffer); - }, - .linkpath => { - file.link_name = try attr.value(self.link_name_buffer); - }, - .size => { - var buf: [pax_max_size_attr_len]u8 = undefined; - file.size = try std.fmt.parseInt(u64, try attr.value(&buf), 10); - }, - } - } - }, - // Ignored header type - .global_extended_header => { - self.reader.skipBytes(size, .{}) catch return error.TarHeadersTooBig; - }, - // All other are unsupported header types - else => { - const d = self.diagnostics orelse return error.TarUnsupportedHeader; - try d.errors.append(d.allocator, .{ .unsupported_file_type = .{ - .file_name = try d.allocator.dupe(u8, header.name()), - .file_type = kind, - } }); - if (kind == .gnu_sparse) { - try self.skipGnuSparseExtendedHeaders(header); - } - self.reader.skipBytes(size, .{}) catch return error.TarHeadersTooBig; - }, - } - } - return null; - } - - fn skipGnuSparseExtendedHeaders(self: *Self, header: Header) !void { - var is_extended = header.bytes[482] > 0; - while (is_extended) { - var buf: [Header.SIZE]u8 = undefined; - const n = try self.reader.readAll(&buf); - if (n < Header.SIZE) return error.UnexpectedEndOfStream; - is_extended = buf[504] > 0; - } - } + pub const File = struct { + name: []const u8, // name of file, symlink or directory + link_name: []const u8, // target name of symlink + size: u64 = 0, // size of the file in bytes + mode: u32 = 0, + kind: FileKind = .file, }; -} -/// Pax attributes iterator. -/// Size is length of pax extended header in reader. -fn paxIterator(reader: anytype, size: usize) PaxIterator(@TypeOf(reader)) { - return PaxIterator(@TypeOf(reader)){ - .reader = reader, - .size = size, - }; -} + fn readHeader(self: *Iterator) !?Header { + if (self.padding > 0) { + try self.reader.discardAll(self.padding); + } + const n = try self.reader.readSliceShort(&self.header_buffer); + if (n == 0) return null; + if (n < Header.SIZE) return error.UnexpectedEndOfStream; + const header = Header{ .bytes = self.header_buffer[0..Header.SIZE] }; + if (try header.checkChksum() == 0) return null; + return header; + } + + fn readString(self: *Iterator, size: usize, buffer: []u8) ![]const u8 { + if (size > buffer.len) return error.TarInsufficientBuffer; + const buf = buffer[0..size]; + try self.reader.readSliceAll(buf); + return nullStr(buf); + } + + fn newFile(self: *Iterator) File { + return .{ + .name = self.file_name_buffer[0..0], + .link_name = self.link_name_buffer[0..0], + }; + } + + // Number of padding bytes in the last file block. + fn blockPadding(size: u64) usize { + const block_rounded = std.mem.alignForward(u64, size, Header.SIZE); // size rounded to te block boundary + return @intCast(block_rounded - size); + } + + /// Iterates through the tar archive as if it is a series of files. + /// Internally, the tar format often uses entries (header with optional + /// content) to add meta data that describes the next file. These + /// entries should not normally be visible to the outside. As such, this + /// loop iterates through one or more entries until it collects a all + /// file attributes. + pub fn next(self: *Iterator) !?File { + if (self.unread_file_bytes > 0) { + // If file content was not consumed by caller + try self.reader.discardAll64(self.unread_file_bytes); + self.unread_file_bytes = 0; + } + var file: File = self.newFile(); + + while (try self.readHeader()) |header| { + const kind = header.kind(); + const size: u64 = try header.size(); + self.padding = blockPadding(size); + + switch (kind) { + // File types to return upstream + .directory, .normal, .symbolic_link => { + file.kind = switch (kind) { + .directory => .directory, + .normal => .file, + .symbolic_link => .sym_link, + else => unreachable, + }; + file.mode = try header.mode(); + + // set file attributes if not already set by prefix/extended headers + if (file.size == 0) { + file.size = size; + } + if (file.link_name.len == 0) { + file.link_name = try header.linkName(self.link_name_buffer); + } + if (file.name.len == 0) { + file.name = try header.fullName(self.file_name_buffer); + } + + self.padding = blockPadding(file.size); + self.unread_file_bytes = file.size; + return file; + }, + // Prefix header types + .gnu_long_name => { + file.name = try self.readString(@intCast(size), self.file_name_buffer); + }, + .gnu_long_link => { + file.link_name = try self.readString(@intCast(size), self.link_name_buffer); + }, + .extended_header => { + // Use just attributes from last extended header. + file = self.newFile(); + + var rdr: PaxIterator = .{ + .reader = self.reader, + .size = @intCast(size), + }; + while (try rdr.next()) |attr| { + switch (attr.kind) { + .path => { + file.name = try attr.value(self.file_name_buffer); + }, + .linkpath => { + file.link_name = try attr.value(self.link_name_buffer); + }, + .size => { + var buf: [pax_max_size_attr_len]u8 = undefined; + file.size = try std.fmt.parseInt(u64, try attr.value(&buf), 10); + }, + } + } + }, + // Ignored header type + .global_extended_header => { + self.reader.discardAll64(size) catch return error.TarHeadersTooBig; + }, + // All other are unsupported header types + else => { + const d = self.diagnostics orelse return error.TarUnsupportedHeader; + try d.errors.append(d.allocator, .{ .unsupported_file_type = .{ + .file_name = try d.allocator.dupe(u8, header.name()), + .file_type = kind, + } }); + if (kind == .gnu_sparse) { + try self.skipGnuSparseExtendedHeaders(header); + } + self.reader.discardAll64(size) catch return error.TarHeadersTooBig; + }, + } + } + return null; + } + + pub fn streamRemaining(it: *Iterator, file: File, w: *std.Io.Writer) std.Io.Reader.StreamError!void { + try it.reader.streamExact64(w, file.size); + it.unread_file_bytes = 0; + } + + fn skipGnuSparseExtendedHeaders(self: *Iterator, header: Header) !void { + var is_extended = header.bytes[482] > 0; + while (is_extended) { + var buf: [Header.SIZE]u8 = undefined; + try self.reader.readSliceAll(&buf); + is_extended = buf[504] > 0; + } + } +}; const PaxAttributeKind = enum { path, @@ -533,108 +497,99 @@ const PaxAttributeKind = enum { // maxInt(u64) has 20 chars, base 10 in practice we got 24 chars const pax_max_size_attr_len = 64; -fn PaxIterator(comptime ReaderType: type) type { - return struct { - size: usize, // cumulative size of all pax attributes - reader: ReaderType, - // scratch buffer used for reading attribute length and keyword - scratch: [128]u8 = undefined, +pub const PaxIterator = struct { + size: usize, // cumulative size of all pax attributes + reader: *std.Io.Reader, - const Self = @This(); + const Self = @This(); - const Attribute = struct { - kind: PaxAttributeKind, - len: usize, // length of the attribute value - reader: ReaderType, // reader positioned at value start + const Attribute = struct { + kind: PaxAttributeKind, + len: usize, // length of the attribute value + reader: *std.Io.Reader, // reader positioned at value start - // Copies pax attribute value into destination buffer. - // Must be called with destination buffer of size at least Attribute.len. - pub fn value(self: Attribute, dst: []u8) ![]const u8 { - if (self.len > dst.len) return error.TarInsufficientBuffer; - // assert(self.len <= dst.len); - const buf = dst[0..self.len]; - const n = try self.reader.readAll(buf); - if (n < self.len) return error.UnexpectedEndOfStream; - try validateAttributeEnding(self.reader); - if (hasNull(buf)) return error.PaxNullInValue; - return buf; - } - }; - - // Iterates over pax attributes. Returns known only known attributes. - // Caller has to call value in Attribute, to advance reader across value. - pub fn next(self: *Self) !?Attribute { - // Pax extended header consists of one or more attributes, each constructed as follows: - // "%d %s=%s\n", , , - while (self.size > 0) { - const length_buf = try self.readUntil(' '); - const length = try std.fmt.parseInt(usize, length_buf, 10); // record length in bytes - - const keyword = try self.readUntil('='); - if (hasNull(keyword)) return error.PaxNullInKeyword; - - // calculate value_len - const value_start = length_buf.len + keyword.len + 2; // 2 separators - if (length < value_start + 1 or self.size < length) return error.UnexpectedEndOfStream; - const value_len = length - value_start - 1; // \n separator at end - self.size -= length; - - const kind: PaxAttributeKind = if (eql(keyword, "path")) - .path - else if (eql(keyword, "linkpath")) - .linkpath - else if (eql(keyword, "size")) - .size - else { - try self.reader.skipBytes(value_len, .{}); - try validateAttributeEnding(self.reader); - continue; - }; - if (kind == .size and value_len > pax_max_size_attr_len) { - return error.PaxSizeAttrOverflow; - } - return Attribute{ - .kind = kind, - .len = value_len, - .reader = self.reader, - }; - } - - return null; - } - - fn readUntil(self: *Self, delimiter: u8) ![]const u8 { - var fbs = std.io.fixedBufferStream(&self.scratch); - try self.reader.streamUntilDelimiter(fbs.writer(), delimiter, null); - return fbs.getWritten(); - } - - fn eql(a: []const u8, b: []const u8) bool { - return std.mem.eql(u8, a, b); - } - - fn hasNull(str: []const u8) bool { - return (std.mem.indexOfScalar(u8, str, 0)) != null; - } - - // Checks that each record ends with new line. - fn validateAttributeEnding(reader: ReaderType) !void { - if (try reader.readByte() != '\n') return error.PaxInvalidAttributeEnd; + // Copies pax attribute value into destination buffer. + // Must be called with destination buffer of size at least Attribute.len. + pub fn value(self: Attribute, dst: []u8) ![]const u8 { + if (self.len > dst.len) return error.TarInsufficientBuffer; + // assert(self.len <= dst.len); + const buf = dst[0..self.len]; + const n = try self.reader.readSliceShort(buf); + if (n < self.len) return error.UnexpectedEndOfStream; + try validateAttributeEnding(self.reader); + if (hasNull(buf)) return error.PaxNullInValue; + return buf; } }; -} + + // Iterates over pax attributes. Returns known only known attributes. + // Caller has to call value in Attribute, to advance reader across value. + pub fn next(self: *Self) !?Attribute { + // Pax extended header consists of one or more attributes, each constructed as follows: + // "%d %s=%s\n", , , + while (self.size > 0) { + const length_buf = try self.reader.takeSentinel(' '); + const length = try std.fmt.parseInt(usize, length_buf, 10); // record length in bytes + + const keyword = try self.reader.takeSentinel('='); + if (hasNull(keyword)) return error.PaxNullInKeyword; + + // calculate value_len + const value_start = length_buf.len + keyword.len + 2; // 2 separators + if (length < value_start + 1 or self.size < length) return error.UnexpectedEndOfStream; + const value_len = length - value_start - 1; // \n separator at end + self.size -= length; + + const kind: PaxAttributeKind = if (eql(keyword, "path")) + .path + else if (eql(keyword, "linkpath")) + .linkpath + else if (eql(keyword, "size")) + .size + else { + try self.reader.discardAll(value_len); + try validateAttributeEnding(self.reader); + continue; + }; + if (kind == .size and value_len > pax_max_size_attr_len) { + return error.PaxSizeAttrOverflow; + } + return .{ + .kind = kind, + .len = value_len, + .reader = self.reader, + }; + } + + return null; + } + + fn eql(a: []const u8, b: []const u8) bool { + return std.mem.eql(u8, a, b); + } + + fn hasNull(str: []const u8) bool { + return (std.mem.indexOfScalar(u8, str, 0)) != null; + } + + // Checks that each record ends with new line. + fn validateAttributeEnding(reader: *std.Io.Reader) !void { + if (try reader.takeByte() != '\n') return error.PaxInvalidAttributeEnd; + } +}; /// Saves tar file content to the file systems. -pub fn pipeToFileSystem(dir: std.fs.Dir, reader: anytype, options: PipeOptions) !void { +pub fn pipeToFileSystem(dir: std.fs.Dir, reader: *std.Io.Reader, options: PipeOptions) !void { var file_name_buffer: [std.fs.max_path_bytes]u8 = undefined; var link_name_buffer: [std.fs.max_path_bytes]u8 = undefined; - var iter = iterator(reader, .{ + var file_contents_buffer: [1024]u8 = undefined; + var it: Iterator = .init(reader, .{ .file_name_buffer = &file_name_buffer, .link_name_buffer = &link_name_buffer, .diagnostics = options.diagnostics, }); - while (try iter.next()) |file| { + while (try it.next()) |file| { const file_name = stripComponents(file.name, options.strip_components); if (file_name.len == 0 and file.kind != .directory) { const d = options.diagnostics orelse return error.TarComponentsOutsideStrippedPrefix; @@ -656,7 +611,9 @@ pub fn pipeToFileSystem(dir: std.fs.Dir, reader: anytype, options: PipeOptions) .file => { if (createDirAndFile(dir, file_name, fileMode(file.mode, options))) |fs_file| { defer fs_file.close(); - try file.writeAll(fs_file); + var file_writer = fs_file.writer(&file_contents_buffer); + try it.streamRemaining(file, &file_writer.interface); + try file_writer.interface.flush(); } else |err| { const d = options.diagnostics orelse return err; try d.errors.append(d.allocator, .{ .unable_to_create_file = .{ @@ -826,11 +783,14 @@ test PaxIterator { var buffer: [1024]u8 = undefined; outer: for (cases) |case| { - var stream = std.io.fixedBufferStream(case.data); - var iter = paxIterator(stream.reader(), case.data.len); + var reader: std.Io.Reader = .fixed(case.data); + var it: PaxIterator = .{ + .size = case.data.len, + .reader = &reader, + }; var i: usize = 0; - while (iter.next() catch |err| { + while (it.next() catch |err| { if (case.err) |e| { try testing.expectEqual(e, err); continue; @@ -853,12 +813,6 @@ test PaxIterator { } } -test { - _ = @import("tar/test.zig"); - _ = @import("tar/writer.zig"); - _ = Diagnostics; -} - test "header parse size" { const cases = [_]struct { in: []const u8, @@ -941,7 +895,7 @@ test "create file and symlink" { file.close(); } -test iterator { +test Iterator { // Example tar file is created from this tree structure: // $ tree example // example @@ -962,19 +916,19 @@ test iterator { // example/empty/ const data = @embedFile("tar/testdata/example.tar"); - var fbs = std.io.fixedBufferStream(data); + var reader: std.Io.Reader = .fixed(data); // User provided buffers to the iterator var file_name_buffer: [std.fs.max_path_bytes]u8 = undefined; var link_name_buffer: [std.fs.max_path_bytes]u8 = undefined; // Create iterator - var iter = iterator(fbs.reader(), .{ + var it: Iterator = .init(&reader, .{ .file_name_buffer = &file_name_buffer, .link_name_buffer = &link_name_buffer, }); // Iterate over files in example.tar var file_no: usize = 0; - while (try iter.next()) |file| : (file_no += 1) { + while (try it.next()) |file| : (file_no += 1) { switch (file.kind) { .directory => { switch (file_no) { @@ -987,10 +941,10 @@ test iterator { }, .file => { try testing.expectEqualStrings("example/a/file", file.name); - // Read file content var buf: [16]u8 = undefined; - const n = try file.reader().readAll(&buf); - try testing.expectEqualStrings("content\n", buf[0..n]); + var w: std.Io.Writer = .fixed(&buf); + try it.streamRemaining(file, &w); + try testing.expectEqualStrings("content\n", w.buffered()); }, .sym_link => { try testing.expectEqualStrings("example/b/symlink", file.name); @@ -1021,15 +975,14 @@ test pipeToFileSystem { // example/empty/ const data = @embedFile("tar/testdata/example.tar"); - var fbs = std.io.fixedBufferStream(data); - const reader = fbs.reader(); + var reader: std.Io.Reader = .fixed(data); var tmp = testing.tmpDir(.{ .no_follow = true }); defer tmp.cleanup(); const dir = tmp.dir; - // Save tar from `reader` to the file system `dir` - pipeToFileSystem(dir, reader, .{ + // Save tar from reader to the file system `dir` + pipeToFileSystem(dir, &reader, .{ .mode_mode = .ignore, .strip_components = 1, .exclude_empty_directories = true, @@ -1053,8 +1006,7 @@ test pipeToFileSystem { test "pipeToFileSystem root_dir" { const data = @embedFile("tar/testdata/example.tar"); - var fbs = std.io.fixedBufferStream(data); - const reader = fbs.reader(); + var reader: std.Io.Reader = .fixed(data); // with strip_components = 1 { @@ -1063,7 +1015,7 @@ test "pipeToFileSystem root_dir" { var diagnostics: Diagnostics = .{ .allocator = testing.allocator }; defer diagnostics.deinit(); - pipeToFileSystem(tmp.dir, reader, .{ + pipeToFileSystem(tmp.dir, &reader, .{ .strip_components = 1, .diagnostics = &diagnostics, }) catch |err| { @@ -1079,13 +1031,13 @@ test "pipeToFileSystem root_dir" { // with strip_components = 0 { - fbs.reset(); + reader = .fixed(data); var tmp = testing.tmpDir(.{ .no_follow = true }); defer tmp.cleanup(); var diagnostics: Diagnostics = .{ .allocator = testing.allocator }; defer diagnostics.deinit(); - pipeToFileSystem(tmp.dir, reader, .{ + pipeToFileSystem(tmp.dir, &reader, .{ .strip_components = 0, .diagnostics = &diagnostics, }) catch |err| { @@ -1102,45 +1054,42 @@ test "pipeToFileSystem root_dir" { test "findRoot with single file archive" { const data = @embedFile("tar/testdata/22752.tar"); - var fbs = std.io.fixedBufferStream(data); - const reader = fbs.reader(); + var reader: std.Io.Reader = .fixed(data); var tmp = testing.tmpDir(.{}); defer tmp.cleanup(); var diagnostics: Diagnostics = .{ .allocator = testing.allocator }; defer diagnostics.deinit(); - try pipeToFileSystem(tmp.dir, reader, .{ .diagnostics = &diagnostics }); + try pipeToFileSystem(tmp.dir, &reader, .{ .diagnostics = &diagnostics }); try testing.expectEqualStrings("", diagnostics.root_dir); } test "findRoot without explicit root dir" { const data = @embedFile("tar/testdata/19820.tar"); - var fbs = std.io.fixedBufferStream(data); - const reader = fbs.reader(); + var reader: std.Io.Reader = .fixed(data); var tmp = testing.tmpDir(.{}); defer tmp.cleanup(); var diagnostics: Diagnostics = .{ .allocator = testing.allocator }; defer diagnostics.deinit(); - try pipeToFileSystem(tmp.dir, reader, .{ .diagnostics = &diagnostics }); + try pipeToFileSystem(tmp.dir, &reader, .{ .diagnostics = &diagnostics }); try testing.expectEqualStrings("root", diagnostics.root_dir); } test "pipeToFileSystem strip_components" { const data = @embedFile("tar/testdata/example.tar"); - var fbs = std.io.fixedBufferStream(data); - const reader = fbs.reader(); + var reader: std.Io.Reader = .fixed(data); var tmp = testing.tmpDir(.{ .no_follow = true }); defer tmp.cleanup(); var diagnostics: Diagnostics = .{ .allocator = testing.allocator }; defer diagnostics.deinit(); - pipeToFileSystem(tmp.dir, reader, .{ + pipeToFileSystem(tmp.dir, &reader, .{ .strip_components = 3, .diagnostics = &diagnostics, }) catch |err| { @@ -1194,13 +1143,12 @@ test "executable bit" { const data = @embedFile("tar/testdata/example.tar"); for ([_]PipeOptions.ModeMode{ .ignore, .executable_bit_only }) |opt| { - var fbs = std.io.fixedBufferStream(data); - const reader = fbs.reader(); + var reader: std.Io.Reader = .fixed(data); var tmp = testing.tmpDir(.{ .no_follow = true }); //defer tmp.cleanup(); - pipeToFileSystem(tmp.dir, reader, .{ + pipeToFileSystem(tmp.dir, &reader, .{ .strip_components = 1, .exclude_empty_directories = true, .mode_mode = opt, @@ -1226,3 +1174,9 @@ test "executable bit" { } } } + +test { + _ = @import("tar/test.zig"); + _ = Writer; + _ = Diagnostics; +} diff --git a/lib/std/tar/Writer.zig b/lib/std/tar/Writer.zig new file mode 100644 index 0000000000..61ae00b24e --- /dev/null +++ b/lib/std/tar/Writer.zig @@ -0,0 +1,462 @@ +const std = @import("std"); +const assert = std.debug.assert; +const testing = std.testing; +const Writer = @This(); + +const block_size = @sizeOf(Header); + +/// Options for writing file/dir/link. If left empty 0o664 is used for +/// file mode and current time for mtime. +pub const Options = struct { + /// File system permission mode. + mode: u32 = 0, + /// File system modification time. + mtime: u64 = 0, +}; + +underlying_writer: *std.Io.Writer, +prefix: []const u8 = "", +mtime_now: u64 = 0, + +const Error = error{ + WriteFailed, + OctalOverflow, + NameTooLong, +}; + +/// Sets prefix for all other write* method paths. +pub fn setRoot(w: *Writer, root: []const u8) Error!void { + if (root.len > 0) + try w.writeDir(root, .{}); + + w.prefix = root; +} + +pub fn writeDir(w: *Writer, sub_path: []const u8, options: Options) Error!void { + try w.writeHeader(.directory, sub_path, "", 0, options); +} + +pub const WriteFileError = std.Io.Writer.FileError || Error || std.fs.File.GetEndPosError; + +pub fn writeFile( + w: *Writer, + sub_path: []const u8, + file_reader: *std.fs.File.Reader, + stat_mtime: i128, +) WriteFileError!void { + const size = try file_reader.getSize(); + const mtime: u64 = @intCast(@divFloor(stat_mtime, std.time.ns_per_s)); + + var header: Header = .{}; + try w.setPath(&header, sub_path); + try header.setSize(size); + try header.setMtime(mtime); + try header.updateChecksum(); + + try w.underlying_writer.writeAll(@ptrCast((&header)[0..1])); + _ = try w.underlying_writer.sendFileAll(file_reader, .unlimited); + try w.writePadding64(size); +} + +pub const WriteFileStreamError = Error || std.Io.Reader.StreamError; + +/// Writes file reading file content from `reader`. Reads exactly `size` bytes +/// from `reader`, or returns `error.EndOfStream`. +pub fn writeFileStream( + w: *Writer, + sub_path: []const u8, + size: u64, + reader: *std.Io.Reader, + options: Options, +) WriteFileStreamError!void { + try w.writeHeader(.regular, sub_path, "", size, options); + try reader.streamExact64(w.underlying_writer, size); + try w.writePadding64(size); +} + +/// Writes file using bytes buffer `content` for size and file content. +pub fn writeFileBytes(w: *Writer, sub_path: []const u8, content: []const u8, options: Options) Error!void { + try w.writeHeader(.regular, sub_path, "", content.len, options); + try w.underlying_writer.writeAll(content); + try w.writePadding(content.len); +} + +pub fn writeLink(w: *Writer, sub_path: []const u8, link_name: []const u8, options: Options) Error!void { + try w.writeHeader(.symbolic_link, sub_path, link_name, 0, options); +} + +fn writeHeader( + w: *Writer, + typeflag: Header.FileType, + sub_path: []const u8, + link_name: []const u8, + size: u64, + options: Options, +) Error!void { + var header = Header.init(typeflag); + try w.setPath(&header, sub_path); + try header.setSize(size); + try header.setMtime(options.mtime); + if (options.mode != 0) + try header.setMode(options.mode); + if (typeflag == .symbolic_link) + header.setLinkname(link_name) catch |err| switch (err) { + error.NameTooLong => try w.writeExtendedHeader(.gnu_long_link, &.{link_name}), + else => return err, + }; + try header.write(w.underlying_writer); +} + +/// Writes path in posix header, if don't fit (in name+prefix; 100+155 +/// bytes) writes it in gnu extended header. +fn setPath(w: *Writer, header: *Header, sub_path: []const u8) Error!void { + header.setPath(w.prefix, sub_path) catch |err| switch (err) { + error.NameTooLong => { + // write extended header + const buffers: []const []const u8 = if (w.prefix.len == 0) + &.{sub_path} + else + &.{ w.prefix, "/", sub_path }; + try w.writeExtendedHeader(.gnu_long_name, buffers); + }, + else => return err, + }; +} + +/// Writes gnu extended header: gnu_long_name or gnu_long_link. +fn writeExtendedHeader(w: *Writer, typeflag: Header.FileType, buffers: []const []const u8) Error!void { + var len: usize = 0; + for (buffers) |buf| len += buf.len; + + var header: Header = .init(typeflag); + try header.setSize(len); + try header.write(w.underlying_writer); + for (buffers) |buf| + try w.underlying_writer.writeAll(buf); + try w.writePadding(len); +} + +fn writePadding(w: *Writer, bytes: usize) std.Io.Writer.Error!void { + return writePaddingPos(w, bytes % block_size); +} + +fn writePadding64(w: *Writer, bytes: u64) std.Io.Writer.Error!void { + return writePaddingPos(w, @intCast(bytes % block_size)); +} + +fn writePaddingPos(w: *Writer, pos: usize) std.Io.Writer.Error!void { + if (pos == 0) return; + try w.underlying_writer.splatByteAll(0, block_size - pos); +} + +/// According to the specification, tar should finish with two zero blocks, but +/// "reasonable system must not assume that such a block exists when reading an +/// archive". Therefore, the Zig standard library recommends to not call this +/// function. +pub fn finishPedantically(w: *Writer) std.Io.Writer.Error!void { + try w.underlying_writer.splatByteAll(0, block_size * 2); +} + +/// A struct that is exactly 512 bytes and matches tar file format. This is +/// intended to be used for outputting tar files; for parsing there is +/// `std.tar.Header`. +pub const Header = extern struct { + // This struct was originally copied from + // https://github.com/mattnite/tar/blob/main/src/main.zig which is MIT + // licensed. + // + // The name, linkname, magic, uname, and gname are null-terminated character + // strings. All other fields are zero-filled octal numbers in ASCII. Each + // numeric field of width w contains w minus 1 digits, and a null. + // Reference: https://www.gnu.org/software/tar/manual/html_node/Standard.html + // POSIX header: byte offset + name: [100]u8 = [_]u8{0} ** 100, // 0 + mode: [7:0]u8 = default_mode.file, // 100 + uid: [7:0]u8 = [_:0]u8{0} ** 7, // unused 108 + gid: [7:0]u8 = [_:0]u8{0} ** 7, // unused 116 + size: [11:0]u8 = [_:0]u8{'0'} ** 11, // 124 + mtime: [11:0]u8 = [_:0]u8{'0'} ** 11, // 136 + checksum: [7:0]u8 = [_:0]u8{' '} ** 7, // 148 + typeflag: FileType = .regular, // 156 + linkname: [100]u8 = [_]u8{0} ** 100, // 157 + magic: [6]u8 = [_]u8{ 'u', 's', 't', 'a', 'r', 0 }, // 257 + version: [2]u8 = [_]u8{ '0', '0' }, // 263 + uname: [32]u8 = [_]u8{0} ** 32, // unused 265 + gname: [32]u8 = [_]u8{0} ** 32, // unused 297 + devmajor: [7:0]u8 = [_:0]u8{0} ** 7, // unused 329 + devminor: [7:0]u8 = [_:0]u8{0} ** 7, // unused 337 + prefix: [155]u8 = [_]u8{0} ** 155, // 345 + pad: [12]u8 = [_]u8{0} ** 12, // unused 500 + + pub const FileType = enum(u8) { + regular = '0', + symbolic_link = '2', + directory = '5', + gnu_long_name = 'L', + gnu_long_link = 'K', + }; + + const default_mode = struct { + const file = [_:0]u8{ '0', '0', '0', '0', '6', '6', '4' }; // 0o664 + const dir = [_:0]u8{ '0', '0', '0', '0', '7', '7', '5' }; // 0o775 + const sym_link = [_:0]u8{ '0', '0', '0', '0', '7', '7', '7' }; // 0o777 + const other = [_:0]u8{ '0', '0', '0', '0', '0', '0', '0' }; // 0o000 + }; + + pub fn init(typeflag: FileType) Header { + return .{ + .typeflag = typeflag, + .mode = switch (typeflag) { + .directory => default_mode.dir, + .symbolic_link => default_mode.sym_link, + .regular => default_mode.file, + else => default_mode.other, + }, + }; + } + + pub fn setSize(w: *Header, size: u64) error{OctalOverflow}!void { + try octal(&w.size, size); + } + + fn octal(buf: []u8, value: u64) error{OctalOverflow}!void { + var remainder: u64 = value; + var pos: usize = buf.len; + while (remainder > 0 and pos > 0) { + pos -= 1; + const c: u8 = @as(u8, @intCast(remainder % 8)) + '0'; + buf[pos] = c; + remainder /= 8; + if (pos == 0 and remainder > 0) return error.OctalOverflow; + } + } + + pub fn setMode(w: *Header, mode: u32) error{OctalOverflow}!void { + try octal(&w.mode, mode); + } + + // Integer number of seconds since January 1, 1970, 00:00 Coordinated Universal Time. + // mtime == 0 will use current time + pub fn setMtime(w: *Header, mtime: u64) error{OctalOverflow}!void { + try octal(&w.mtime, mtime); + } + + pub fn updateChecksum(w: *Header) !void { + var checksum: usize = ' '; // other 7 w.checksum bytes are initialized to ' ' + for (std.mem.asBytes(w)) |val| + checksum += val; + try octal(&w.checksum, checksum); + } + + pub fn write(h: *Header, bw: *std.Io.Writer) error{ OctalOverflow, WriteFailed }!void { + try h.updateChecksum(); + try bw.writeAll(std.mem.asBytes(h)); + } + + pub fn setLinkname(w: *Header, link: []const u8) !void { + if (link.len > w.linkname.len) return error.NameTooLong; + @memcpy(w.linkname[0..link.len], link); + } + + pub fn setPath(w: *Header, prefix: []const u8, sub_path: []const u8) !void { + const max_prefix = w.prefix.len; + const max_name = w.name.len; + const sep = std.fs.path.sep_posix; + + if (prefix.len + sub_path.len > max_name + max_prefix or prefix.len > max_prefix) + return error.NameTooLong; + + // both fit into name + if (prefix.len > 0 and prefix.len + sub_path.len < max_name) { + @memcpy(w.name[0..prefix.len], prefix); + w.name[prefix.len] = sep; + @memcpy(w.name[prefix.len + 1 ..][0..sub_path.len], sub_path); + return; + } + + // sub_path fits into name + // there is no prefix or prefix fits into prefix + if (sub_path.len <= max_name) { + @memcpy(w.name[0..sub_path.len], sub_path); + @memcpy(w.prefix[0..prefix.len], prefix); + return; + } + + if (prefix.len > 0) { + @memcpy(w.prefix[0..prefix.len], prefix); + w.prefix[prefix.len] = sep; + } + const prefix_pos = if (prefix.len > 0) prefix.len + 1 else 0; + + // add as much to prefix as you can, must split at / + const prefix_remaining = max_prefix - prefix_pos; + if (std.mem.lastIndexOf(u8, sub_path[0..@min(prefix_remaining, sub_path.len)], &.{'/'})) |sep_pos| { + @memcpy(w.prefix[prefix_pos..][0..sep_pos], sub_path[0..sep_pos]); + if ((sub_path.len - sep_pos - 1) > max_name) return error.NameTooLong; + @memcpy(w.name[0..][0 .. sub_path.len - sep_pos - 1], sub_path[sep_pos + 1 ..]); + return; + } + + return error.NameTooLong; + } + + comptime { + assert(@sizeOf(Header) == 512); + } + + test "setPath" { + const cases = [_]struct { + in: []const []const u8, + out: []const []const u8, + }{ + .{ + .in = &.{ "", "123456789" }, + .out = &.{ "", "123456789" }, + }, + // can fit into name + .{ + .in = &.{ "prefix", "sub_path" }, + .out = &.{ "", "prefix/sub_path" }, + }, + // no more both fits into name + .{ + .in = &.{ "prefix", "0123456789/" ** 8 ++ "basename" }, + .out = &.{ "prefix", "0123456789/" ** 8 ++ "basename" }, + }, + // put as much as you can into prefix the rest goes into name + .{ + .in = &.{ "prefix", "0123456789/" ** 10 ++ "basename" }, + .out = &.{ "prefix/" ++ "0123456789/" ** 9 ++ "0123456789", "basename" }, + }, + + .{ + .in = &.{ "prefix", "0123456789/" ** 15 ++ "basename" }, + .out = &.{ "prefix/" ++ "0123456789/" ** 12 ++ "0123456789", "0123456789/0123456789/basename" }, + }, + .{ + .in = &.{ "prefix", "0123456789/" ** 21 ++ "basename" }, + .out = &.{ "prefix/" ++ "0123456789/" ** 12 ++ "0123456789", "0123456789/" ** 8 ++ "basename" }, + }, + .{ + .in = &.{ "", "012345678/" ** 10 ++ "foo" }, + .out = &.{ "012345678/" ** 9 ++ "012345678", "foo" }, + }, + }; + + for (cases) |case| { + var header = Header.init(.regular); + try header.setPath(case.in[0], case.in[1]); + try testing.expectEqualStrings(case.out[0], std.mem.sliceTo(&header.prefix, 0)); + try testing.expectEqualStrings(case.out[1], std.mem.sliceTo(&header.name, 0)); + } + + const error_cases = [_]struct { + in: []const []const u8, + }{ + // basename can't fit into name (106 characters) + .{ .in = &.{ "zig", "test/cases/compile_errors/regression_test_2980_base_type_u32_is_not_type_checked_properly_when_assigning_a_value_within_a_struct.zig" } }, + // cant fit into 255 + sep + .{ .in = &.{ "prefix", "0123456789/" ** 22 ++ "basename" } }, + // can fit but sub_path can't be split (there is no separator) + .{ .in = &.{ "prefix", "0123456789" ** 10 ++ "a" } }, + .{ .in = &.{ "prefix", "0123456789" ** 14 ++ "basename" } }, + }; + + for (error_cases) |case| { + var header = Header.init(.regular); + try testing.expectError( + error.NameTooLong, + header.setPath(case.in[0], case.in[1]), + ); + } + } +}; + +test { + _ = Header; +} + +test "write files" { + const files = [_]struct { + path: []const u8, + content: []const u8, + }{ + .{ .path = "foo", .content = "bar" }, + .{ .path = "a12345678/" ** 10 ++ "foo", .content = "a" ** 511 }, + .{ .path = "b12345678/" ** 24 ++ "foo", .content = "b" ** 512 }, + .{ .path = "c12345678/" ** 25 ++ "foo", .content = "c" ** 513 }, + .{ .path = "d12345678/" ** 51 ++ "foo", .content = "d" ** 1025 }, + .{ .path = "e123456789" ** 11, .content = "e" }, + }; + + var file_name_buffer: [std.fs.max_path_bytes]u8 = undefined; + var link_name_buffer: [std.fs.max_path_bytes]u8 = undefined; + + // with root + { + const root = "root"; + + var output: std.Io.Writer.Allocating = .init(testing.allocator); + var w: Writer = .{ .underlying_writer = &output.writer }; + defer output.deinit(); + try w.setRoot(root); + for (files) |file| + try w.writeFileBytes(file.path, file.content, .{}); + + var input: std.Io.Reader = .fixed(output.getWritten()); + var it: std.tar.Iterator = .init(&input, .{ + .file_name_buffer = &file_name_buffer, + .link_name_buffer = &link_name_buffer, + }); + + // first entry is directory with prefix + { + const actual = (try it.next()).?; + try testing.expectEqualStrings(root, actual.name); + try testing.expectEqual(std.tar.FileKind.directory, actual.kind); + } + + var i: usize = 0; + while (try it.next()) |actual| { + defer i += 1; + const expected = files[i]; + try testing.expectEqualStrings(root, actual.name[0..root.len]); + try testing.expectEqual('/', actual.name[root.len..][0]); + try testing.expectEqualStrings(expected.path, actual.name[root.len + 1 ..]); + + var content: std.Io.Writer.Allocating = .init(testing.allocator); + defer content.deinit(); + try it.streamRemaining(actual, &content.writer); + try testing.expectEqualSlices(u8, expected.content, content.getWritten()); + } + } + // without root + { + var output: std.Io.Writer.Allocating = .init(testing.allocator); + var w: Writer = .{ .underlying_writer = &output.writer }; + defer output.deinit(); + for (files) |file| { + var content: std.Io.Reader = .fixed(file.content); + try w.writeFileStream(file.path, file.content.len, &content, .{}); + } + + var input: std.Io.Reader = .fixed(output.getWritten()); + var it: std.tar.Iterator = .init(&input, .{ + .file_name_buffer = &file_name_buffer, + .link_name_buffer = &link_name_buffer, + }); + + var i: usize = 0; + while (try it.next()) |actual| { + defer i += 1; + const expected = files[i]; + try testing.expectEqualStrings(expected.path, actual.name); + + var content: std.Io.Writer.Allocating = .init(testing.allocator); + defer content.deinit(); + try it.streamRemaining(actual, &content.writer); + try testing.expectEqualSlices(u8, expected.content, content.getWritten()); + } + try w.finishPedantically(); + } +} diff --git a/lib/std/tar/test.zig b/lib/std/tar/test.zig index 3bcb5af90c..3356baacb5 100644 --- a/lib/std/tar/test.zig +++ b/lib/std/tar/test.zig @@ -18,31 +18,72 @@ const Case = struct { err: ?anyerror = null, // parsing should fail with this error }; -const cases = [_]Case{ - .{ - .data = @embedFile("testdata/gnu.tar"), - .files = &[_]Case.File{ - .{ - .name = "small.txt", - .size = 5, - .mode = 0o640, - }, - .{ - .name = "small2.txt", - .size = 11, - .mode = 0o640, - }, +const gnu_case: Case = .{ + .data = @embedFile("testdata/gnu.tar"), + .files = &[_]Case.File{ + .{ + .name = "small.txt", + .size = 5, + .mode = 0o640, }, - .chksums = &[_][]const u8{ - "e38b27eaccb4391bdec553a7f3ae6b2f", - "c65bd2e50a56a2138bf1716f2fd56fe9", + .{ + .name = "small2.txt", + .size = 11, + .mode = 0o640, }, }, - .{ + .chksums = &[_][]const u8{ + "e38b27eaccb4391bdec553a7f3ae6b2f", + "c65bd2e50a56a2138bf1716f2fd56fe9", + }, +}; + +const gnu_multi_headers_case: Case = .{ + .data = @embedFile("testdata/gnu-multi-hdrs.tar"), + .files = &[_]Case.File{ + .{ + .name = "GNU2/GNU2/long-path-name", + .link_name = "GNU4/GNU4/long-linkpath-name", + .kind = .sym_link, + }, + }, +}; + +const trailing_slash_case: Case = .{ + .data = @embedFile("testdata/trailing-slash.tar"), + .files = &[_]Case.File{ + .{ + .name = "123456789/" ** 30, + .kind = .directory, + }, + }, +}; + +const writer_big_long_case: Case = .{ + // Size in gnu extended format, and name in pax attribute. + .data = @embedFile("testdata/writer-big-long.tar"), + .files = &[_]Case.File{ + .{ + .name = "longname/" ** 15 ++ "16gig.txt", + .size = 16 * 1024 * 1024 * 1024, + .mode = 0o644, + .truncated = true, + }, + }, +}; + +const fuzz1_case: Case = .{ + .data = @embedFile("testdata/fuzz1.tar"), + .err = error.TarInsufficientBuffer, +}; + +test "run test cases" { + try testCase(gnu_case); + try testCase(.{ .data = @embedFile("testdata/sparse-formats.tar"), .err = error.TarUnsupportedHeader, - }, - .{ + }); + try testCase(.{ .data = @embedFile("testdata/star.tar"), .files = &[_]Case.File{ .{ @@ -60,8 +101,8 @@ const cases = [_]Case{ "e38b27eaccb4391bdec553a7f3ae6b2f", "c65bd2e50a56a2138bf1716f2fd56fe9", }, - }, - .{ + }); + try testCase(.{ .data = @embedFile("testdata/v7.tar"), .files = &[_]Case.File{ .{ @@ -79,8 +120,8 @@ const cases = [_]Case{ "e38b27eaccb4391bdec553a7f3ae6b2f", "c65bd2e50a56a2138bf1716f2fd56fe9", }, - }, - .{ + }); + try testCase(.{ .data = @embedFile("testdata/pax.tar"), .files = &[_]Case.File{ .{ @@ -99,13 +140,13 @@ const cases = [_]Case{ .chksums = &[_][]const u8{ "3c382e8f5b6631aa2db52643912ffd4a", }, - }, - .{ + }); + try testCase(.{ // pax attribute don't end with \n .data = @embedFile("testdata/pax-bad-hdr-file.tar"), .err = error.PaxInvalidAttributeEnd, - }, - .{ + }); + try testCase(.{ // size is in pax attribute .data = @embedFile("testdata/pax-pos-size-file.tar"), .files = &[_]Case.File{ @@ -119,8 +160,8 @@ const cases = [_]Case{ .chksums = &[_][]const u8{ "0afb597b283fe61b5d4879669a350556", }, - }, - .{ + }); + try testCase(.{ // has pax records which we are not interested in .data = @embedFile("testdata/pax-records.tar"), .files = &[_]Case.File{ @@ -128,8 +169,8 @@ const cases = [_]Case{ .name = "file", }, }, - }, - .{ + }); + try testCase(.{ // has global records which we are ignoring .data = @embedFile("testdata/pax-global-records.tar"), .files = &[_]Case.File{ @@ -146,8 +187,8 @@ const cases = [_]Case{ .name = "file4", }, }, - }, - .{ + }); + try testCase(.{ .data = @embedFile("testdata/nil-uid.tar"), .files = &[_]Case.File{ .{ @@ -160,8 +201,8 @@ const cases = [_]Case{ .chksums = &[_][]const u8{ "08d504674115e77a67244beac19668f5", }, - }, - .{ + }); + try testCase(.{ // has xattrs and pax records which we are ignoring .data = @embedFile("testdata/xattrs.tar"), .files = &[_]Case.File{ @@ -182,23 +223,14 @@ const cases = [_]Case{ "e38b27eaccb4391bdec553a7f3ae6b2f", "c65bd2e50a56a2138bf1716f2fd56fe9", }, - }, - .{ - .data = @embedFile("testdata/gnu-multi-hdrs.tar"), - .files = &[_]Case.File{ - .{ - .name = "GNU2/GNU2/long-path-name", - .link_name = "GNU4/GNU4/long-linkpath-name", - .kind = .sym_link, - }, - }, - }, - .{ + }); + try testCase(gnu_multi_headers_case); + try testCase(.{ // has gnu type D (directory) and S (sparse) blocks .data = @embedFile("testdata/gnu-incremental.tar"), .err = error.TarUnsupportedHeader, - }, - .{ + }); + try testCase(.{ // should use values only from last pax header .data = @embedFile("testdata/pax-multi-hdrs.tar"), .files = &[_]Case.File{ @@ -208,8 +240,8 @@ const cases = [_]Case{ .kind = .sym_link, }, }, - }, - .{ + }); + try testCase(.{ .data = @embedFile("testdata/gnu-long-nul.tar"), .files = &[_]Case.File{ .{ @@ -217,8 +249,8 @@ const cases = [_]Case{ .mode = 0o644, }, }, - }, - .{ + }); + try testCase(.{ .data = @embedFile("testdata/gnu-utf8.tar"), .files = &[_]Case.File{ .{ @@ -226,8 +258,8 @@ const cases = [_]Case{ .mode = 0o644, }, }, - }, - .{ + }); + try testCase(.{ .data = @embedFile("testdata/gnu-not-utf8.tar"), .files = &[_]Case.File{ .{ @@ -235,33 +267,33 @@ const cases = [_]Case{ .mode = 0o644, }, }, - }, - .{ + }); + try testCase(.{ // null in pax key .data = @embedFile("testdata/pax-nul-xattrs.tar"), .err = error.PaxNullInKeyword, - }, - .{ + }); + try testCase(.{ .data = @embedFile("testdata/pax-nul-path.tar"), .err = error.PaxNullInValue, - }, - .{ + }); + try testCase(.{ .data = @embedFile("testdata/neg-size.tar"), .err = error.TarHeader, - }, - .{ + }); + try testCase(.{ .data = @embedFile("testdata/issue10968.tar"), .err = error.TarHeader, - }, - .{ + }); + try testCase(.{ .data = @embedFile("testdata/issue11169.tar"), .err = error.TarHeader, - }, - .{ + }); + try testCase(.{ .data = @embedFile("testdata/issue12435.tar"), .err = error.TarHeaderChksum, - }, - .{ + }); + try testCase(.{ // has magic with space at end instead of null .data = @embedFile("testdata/invalid-go17.tar"), .files = &[_]Case.File{ @@ -269,8 +301,8 @@ const cases = [_]Case{ .name = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa/foo", }, }, - }, - .{ + }); + try testCase(.{ .data = @embedFile("testdata/ustar-file-devs.tar"), .files = &[_]Case.File{ .{ @@ -278,17 +310,9 @@ const cases = [_]Case{ .mode = 0o644, }, }, - }, - .{ - .data = @embedFile("testdata/trailing-slash.tar"), - .files = &[_]Case.File{ - .{ - .name = "123456789/" ** 30, - .kind = .directory, - }, - }, - }, - .{ + }); + try testCase(trailing_slash_case); + try testCase(.{ // Has size in gnu extended format. To represent size bigger than 8 GB. .data = @embedFile("testdata/writer-big.tar"), .files = &[_]Case.File{ @@ -299,120 +323,92 @@ const cases = [_]Case{ .mode = 0o640, }, }, - }, - .{ - // Size in gnu extended format, and name in pax attribute. - .data = @embedFile("testdata/writer-big-long.tar"), - .files = &[_]Case.File{ - .{ - .name = "longname/" ** 15 ++ "16gig.txt", - .size = 16 * 1024 * 1024 * 1024, - .mode = 0o644, - .truncated = true, - }, - }, - }, - .{ - .data = @embedFile("testdata/fuzz1.tar"), - .err = error.TarInsufficientBuffer, - }, - .{ + }); + try testCase(writer_big_long_case); + try testCase(fuzz1_case); + try testCase(.{ .data = @embedFile("testdata/fuzz2.tar"), .err = error.PaxSizeAttrOverflow, - }, -}; + }); +} -// used in test to calculate file chksum -const Md5Writer = struct { - h: std.crypto.hash.Md5 = std.crypto.hash.Md5.init(.{}), - - pub fn writeAll(self: *Md5Writer, buf: []const u8) !void { - self.h.update(buf); - } - - pub fn writeByte(self: *Md5Writer, byte: u8) !void { - self.h.update(&[_]u8{byte}); - } - - pub fn chksum(self: *Md5Writer) [32]u8 { - var s = [_]u8{0} ** 16; - self.h.final(&s); - return std.fmt.bytesToHex(s, .lower); - } -}; - -test "run test cases" { +fn testCase(case: Case) !void { var file_name_buffer: [std.fs.max_path_bytes]u8 = undefined; var link_name_buffer: [std.fs.max_path_bytes]u8 = undefined; - for (cases) |case| { - var fsb = std.io.fixedBufferStream(case.data); - var iter = tar.iterator(fsb.reader(), .{ - .file_name_buffer = &file_name_buffer, - .link_name_buffer = &link_name_buffer, - }); - var i: usize = 0; - while (iter.next() catch |err| { - if (case.err) |e| { - try testing.expectEqual(e, err); - continue; - } else { - return err; - } - }) |actual| : (i += 1) { - const expected = case.files[i]; - try testing.expectEqualStrings(expected.name, actual.name); - try testing.expectEqual(expected.size, actual.size); - try testing.expectEqual(expected.kind, actual.kind); - try testing.expectEqual(expected.mode, actual.mode); - try testing.expectEqualStrings(expected.link_name, actual.link_name); + var br: std.io.Reader = .fixed(case.data); + var it: tar.Iterator = .init(&br, .{ + .file_name_buffer = &file_name_buffer, + .link_name_buffer = &link_name_buffer, + }); + var i: usize = 0; + while (it.next() catch |err| { + if (case.err) |e| { + try testing.expectEqual(e, err); + return; + } else { + return err; + } + }) |actual| : (i += 1) { + const expected = case.files[i]; + try testing.expectEqualStrings(expected.name, actual.name); + try testing.expectEqual(expected.size, actual.size); + try testing.expectEqual(expected.kind, actual.kind); + try testing.expectEqual(expected.mode, actual.mode); + try testing.expectEqualStrings(expected.link_name, actual.link_name); - if (case.chksums.len > i) { - var md5writer = Md5Writer{}; - try actual.writeAll(&md5writer); - const chksum = md5writer.chksum(); - try testing.expectEqualStrings(case.chksums[i], &chksum); - } else { - if (expected.truncated) { - iter.unread_file_bytes = 0; - } + if (case.chksums.len > i) { + var aw: std.Io.Writer.Allocating = .init(std.testing.allocator); + defer aw.deinit(); + try it.streamRemaining(actual, &aw.writer); + const chksum = std.fmt.bytesToHex(std.crypto.hash.Md5.hashResult(aw.getWritten()), .lower); + try testing.expectEqualStrings(case.chksums[i], &chksum); + } else { + if (expected.truncated) { + it.unread_file_bytes = 0; } } - try testing.expectEqual(case.files.len, i); } + try testing.expectEqual(case.files.len, i); } test "pax/gnu long names with small buffer" { + try testLongNameCase(gnu_multi_headers_case); + try testLongNameCase(trailing_slash_case); + try testLongNameCase(.{ + .data = @embedFile("testdata/fuzz1.tar"), + .err = error.TarInsufficientBuffer, + }); +} + +fn testLongNameCase(case: Case) !void { // should fail with insufficient buffer error var min_file_name_buffer: [256]u8 = undefined; var min_link_name_buffer: [100]u8 = undefined; - const long_name_cases = [_]Case{ cases[11], cases[25], cases[28] }; - for (long_name_cases) |case| { - var fsb = std.io.fixedBufferStream(case.data); - var iter = tar.iterator(fsb.reader(), .{ - .file_name_buffer = &min_file_name_buffer, - .link_name_buffer = &min_link_name_buffer, - }); + var br: std.io.Reader = .fixed(case.data); + var iter: tar.Iterator = .init(&br, .{ + .file_name_buffer = &min_file_name_buffer, + .link_name_buffer = &min_link_name_buffer, + }); - var iter_err: ?anyerror = null; - while (iter.next() catch |err| brk: { - iter_err = err; - break :brk null; - }) |_| {} + var iter_err: ?anyerror = null; + while (iter.next() catch |err| brk: { + iter_err = err; + break :brk null; + }) |_| {} - try testing.expect(iter_err != null); - try testing.expectEqual(error.TarInsufficientBuffer, iter_err.?); - } + try testing.expect(iter_err != null); + try testing.expectEqual(error.TarInsufficientBuffer, iter_err.?); } test "insufficient buffer in Header name filed" { var min_file_name_buffer: [9]u8 = undefined; var min_link_name_buffer: [100]u8 = undefined; - var fsb = std.io.fixedBufferStream(cases[0].data); - var iter = tar.iterator(fsb.reader(), .{ + var br: std.io.Reader = .fixed(gnu_case.data); + var iter: tar.Iterator = .init(&br, .{ .file_name_buffer = &min_file_name_buffer, .link_name_buffer = &min_link_name_buffer, }); @@ -466,21 +462,21 @@ test "should not overwrite existing file" { // This ensures that file is not overwritten. // const data = @embedFile("testdata/overwrite_file.tar"); - var fsb = std.io.fixedBufferStream(data); + var r: std.io.Reader = .fixed(data); // Unpack with strip_components = 1 should fail var root = std.testing.tmpDir(.{}); defer root.cleanup(); try testing.expectError( error.PathAlreadyExists, - tar.pipeToFileSystem(root.dir, fsb.reader(), .{ .mode_mode = .ignore, .strip_components = 1 }), + tar.pipeToFileSystem(root.dir, &r, .{ .mode_mode = .ignore, .strip_components = 1 }), ); // Unpack with strip_components = 0 should pass - fsb.reset(); + r = .fixed(data); var root2 = std.testing.tmpDir(.{}); defer root2.cleanup(); - try tar.pipeToFileSystem(root2.dir, fsb.reader(), .{ .mode_mode = .ignore, .strip_components = 0 }); + try tar.pipeToFileSystem(root2.dir, &r, .{ .mode_mode = .ignore, .strip_components = 0 }); } test "case sensitivity" { @@ -494,12 +490,12 @@ test "case sensitivity" { // 18089/alacritty/Darkermatrix.yml // const data = @embedFile("testdata/18089.tar"); - var fsb = std.io.fixedBufferStream(data); + var r: std.io.Reader = .fixed(data); var root = std.testing.tmpDir(.{}); defer root.cleanup(); - tar.pipeToFileSystem(root.dir, fsb.reader(), .{ .mode_mode = .ignore, .strip_components = 1 }) catch |err| { + tar.pipeToFileSystem(root.dir, &r, .{ .mode_mode = .ignore, .strip_components = 1 }) catch |err| { // on case insensitive fs we fail on overwrite existing file try testing.expectEqual(error.PathAlreadyExists, err); return; diff --git a/lib/std/tar/writer.zig b/lib/std/tar/writer.zig deleted file mode 100644 index 4ced287eec..0000000000 --- a/lib/std/tar/writer.zig +++ /dev/null @@ -1,497 +0,0 @@ -const std = @import("std"); -const assert = std.debug.assert; -const testing = std.testing; - -/// Creates tar Writer which will write tar content to the `underlying_writer`. -/// Use setRoot to nest all following entries under single root. If file don't -/// fit into posix header (name+prefix: 100+155 bytes) gnu extented header will -/// be used for long names. Options enables setting file premission mode and -/// mtime. Default is to use current time for mtime and 0o664 for file mode. -pub fn writer(underlying_writer: anytype) Writer(@TypeOf(underlying_writer)) { - return .{ .underlying_writer = underlying_writer }; -} - -pub fn Writer(comptime WriterType: type) type { - return struct { - const block_size = @sizeOf(Header); - const empty_block: [block_size]u8 = [_]u8{0} ** block_size; - - /// Options for writing file/dir/link. If left empty 0o664 is used for - /// file mode and current time for mtime. - pub const Options = struct { - /// File system permission mode. - mode: u32 = 0, - /// File system modification time. - mtime: u64 = 0, - }; - const Self = @This(); - - underlying_writer: WriterType, - prefix: []const u8 = "", - mtime_now: u64 = 0, - - /// Sets prefix for all other write* method paths. - pub fn setRoot(self: *Self, root: []const u8) !void { - if (root.len > 0) - try self.writeDir(root, .{}); - - self.prefix = root; - } - - /// Writes directory. - pub fn writeDir(self: *Self, sub_path: []const u8, opt: Options) !void { - try self.writeHeader(.directory, sub_path, "", 0, opt); - } - - /// Writes file system file. - pub fn writeFile(self: *Self, sub_path: []const u8, file: std.fs.File) !void { - const stat = try file.stat(); - const mtime: u64 = @intCast(@divFloor(stat.mtime, std.time.ns_per_s)); - - var header = Header{}; - try self.setPath(&header, sub_path); - try header.setSize(stat.size); - try header.setMtime(mtime); - try header.write(self.underlying_writer); - - try self.underlying_writer.writeFile(file); - try self.writePadding(stat.size); - } - - /// Writes file reading file content from `reader`. Number of bytes in - /// reader must be equal to `size`. - pub fn writeFileStream(self: *Self, sub_path: []const u8, size: usize, reader: anytype, opt: Options) !void { - try self.writeHeader(.regular, sub_path, "", @intCast(size), opt); - - var counting_reader = std.io.countingReader(reader); - var fifo = std.fifo.LinearFifo(u8, .{ .Static = 4096 }).init(); - try fifo.pump(counting_reader.reader(), self.underlying_writer); - if (counting_reader.bytes_read != size) return error.WrongReaderSize; - try self.writePadding(size); - } - - /// Writes file using bytes buffer `content` for size and file content. - pub fn writeFileBytes(self: *Self, sub_path: []const u8, content: []const u8, opt: Options) !void { - try self.writeHeader(.regular, sub_path, "", @intCast(content.len), opt); - try self.underlying_writer.writeAll(content); - try self.writePadding(content.len); - } - - /// Writes symlink. - pub fn writeLink(self: *Self, sub_path: []const u8, link_name: []const u8, opt: Options) !void { - try self.writeHeader(.symbolic_link, sub_path, link_name, 0, opt); - } - - /// Writes fs.Dir.WalkerEntry. Uses `mtime` from file system entry and - /// default for entry mode . - pub fn writeEntry(self: *Self, entry: std.fs.Dir.Walker.Entry) !void { - switch (entry.kind) { - .directory => { - try self.writeDir(entry.path, .{ .mtime = try entryMtime(entry) }); - }, - .file => { - var file = try entry.dir.openFile(entry.basename, .{}); - defer file.close(); - try self.writeFile(entry.path, file); - }, - .sym_link => { - var link_name_buffer: [std.fs.max_path_bytes]u8 = undefined; - const link_name = try entry.dir.readLink(entry.basename, &link_name_buffer); - try self.writeLink(entry.path, link_name, .{ .mtime = try entryMtime(entry) }); - }, - else => { - return error.UnsupportedWalkerEntryKind; - }, - } - } - - fn writeHeader( - self: *Self, - typeflag: Header.FileType, - sub_path: []const u8, - link_name: []const u8, - size: u64, - opt: Options, - ) !void { - var header = Header.init(typeflag); - try self.setPath(&header, sub_path); - try header.setSize(size); - try header.setMtime(if (opt.mtime != 0) opt.mtime else self.mtimeNow()); - if (opt.mode != 0) - try header.setMode(opt.mode); - if (typeflag == .symbolic_link) - header.setLinkname(link_name) catch |err| switch (err) { - error.NameTooLong => try self.writeExtendedHeader(.gnu_long_link, &.{link_name}), - else => return err, - }; - try header.write(self.underlying_writer); - } - - fn mtimeNow(self: *Self) u64 { - if (self.mtime_now == 0) - self.mtime_now = @intCast(std.time.timestamp()); - return self.mtime_now; - } - - fn entryMtime(entry: std.fs.Dir.Walker.Entry) !u64 { - const stat = try entry.dir.statFile(entry.basename); - return @intCast(@divFloor(stat.mtime, std.time.ns_per_s)); - } - - /// Writes path in posix header, if don't fit (in name+prefix; 100+155 - /// bytes) writes it in gnu extended header. - fn setPath(self: *Self, header: *Header, sub_path: []const u8) !void { - header.setPath(self.prefix, sub_path) catch |err| switch (err) { - error.NameTooLong => { - // write extended header - const buffers: []const []const u8 = if (self.prefix.len == 0) - &.{sub_path} - else - &.{ self.prefix, "/", sub_path }; - try self.writeExtendedHeader(.gnu_long_name, buffers); - }, - else => return err, - }; - } - - /// Writes gnu extended header: gnu_long_name or gnu_long_link. - fn writeExtendedHeader(self: *Self, typeflag: Header.FileType, buffers: []const []const u8) !void { - var len: usize = 0; - for (buffers) |buf| - len += buf.len; - - var header = Header.init(typeflag); - try header.setSize(len); - try header.write(self.underlying_writer); - for (buffers) |buf| - try self.underlying_writer.writeAll(buf); - try self.writePadding(len); - } - - fn writePadding(self: *Self, bytes: u64) !void { - const pos: usize = @intCast(bytes % block_size); - if (pos == 0) return; - try self.underlying_writer.writeAll(empty_block[pos..]); - } - - /// Tar should finish with two zero blocks, but 'reasonable system must - /// not assume that such a block exists when reading an archive' (from - /// reference). In practice it is safe to skip this finish. - pub fn finish(self: *Self) !void { - try self.underlying_writer.writeAll(&empty_block); - try self.underlying_writer.writeAll(&empty_block); - } - }; -} - -/// A struct that is exactly 512 bytes and matches tar file format. This is -/// intended to be used for outputting tar files; for parsing there is -/// `std.tar.Header`. -const Header = extern struct { - // This struct was originally copied from - // https://github.com/mattnite/tar/blob/main/src/main.zig which is MIT - // licensed. - // - // The name, linkname, magic, uname, and gname are null-terminated character - // strings. All other fields are zero-filled octal numbers in ASCII. Each - // numeric field of width w contains w minus 1 digits, and a null. - // Reference: https://www.gnu.org/software/tar/manual/html_node/Standard.html - // POSIX header: byte offset - name: [100]u8 = [_]u8{0} ** 100, // 0 - mode: [7:0]u8 = default_mode.file, // 100 - uid: [7:0]u8 = [_:0]u8{0} ** 7, // unused 108 - gid: [7:0]u8 = [_:0]u8{0} ** 7, // unused 116 - size: [11:0]u8 = [_:0]u8{'0'} ** 11, // 124 - mtime: [11:0]u8 = [_:0]u8{'0'} ** 11, // 136 - checksum: [7:0]u8 = [_:0]u8{' '} ** 7, // 148 - typeflag: FileType = .regular, // 156 - linkname: [100]u8 = [_]u8{0} ** 100, // 157 - magic: [6]u8 = [_]u8{ 'u', 's', 't', 'a', 'r', 0 }, // 257 - version: [2]u8 = [_]u8{ '0', '0' }, // 263 - uname: [32]u8 = [_]u8{0} ** 32, // unused 265 - gname: [32]u8 = [_]u8{0} ** 32, // unused 297 - devmajor: [7:0]u8 = [_:0]u8{0} ** 7, // unused 329 - devminor: [7:0]u8 = [_:0]u8{0} ** 7, // unused 337 - prefix: [155]u8 = [_]u8{0} ** 155, // 345 - pad: [12]u8 = [_]u8{0} ** 12, // unused 500 - - pub const FileType = enum(u8) { - regular = '0', - symbolic_link = '2', - directory = '5', - gnu_long_name = 'L', - gnu_long_link = 'K', - }; - - const default_mode = struct { - const file = [_:0]u8{ '0', '0', '0', '0', '6', '6', '4' }; // 0o664 - const dir = [_:0]u8{ '0', '0', '0', '0', '7', '7', '5' }; // 0o775 - const sym_link = [_:0]u8{ '0', '0', '0', '0', '7', '7', '7' }; // 0o777 - const other = [_:0]u8{ '0', '0', '0', '0', '0', '0', '0' }; // 0o000 - }; - - pub fn init(typeflag: FileType) Header { - return .{ - .typeflag = typeflag, - .mode = switch (typeflag) { - .directory => default_mode.dir, - .symbolic_link => default_mode.sym_link, - .regular => default_mode.file, - else => default_mode.other, - }, - }; - } - - pub fn setSize(self: *Header, size: u64) !void { - try octal(&self.size, size); - } - - fn octal(buf: []u8, value: u64) !void { - var remainder: u64 = value; - var pos: usize = buf.len; - while (remainder > 0 and pos > 0) { - pos -= 1; - const c: u8 = @as(u8, @intCast(remainder % 8)) + '0'; - buf[pos] = c; - remainder /= 8; - if (pos == 0 and remainder > 0) return error.OctalOverflow; - } - } - - pub fn setMode(self: *Header, mode: u32) !void { - try octal(&self.mode, mode); - } - - // Integer number of seconds since January 1, 1970, 00:00 Coordinated Universal Time. - // mtime == 0 will use current time - pub fn setMtime(self: *Header, mtime: u64) !void { - try octal(&self.mtime, mtime); - } - - pub fn updateChecksum(self: *Header) !void { - var checksum: usize = ' '; // other 7 self.checksum bytes are initialized to ' ' - for (std.mem.asBytes(self)) |val| - checksum += val; - try octal(&self.checksum, checksum); - } - - pub fn write(self: *Header, output_writer: anytype) !void { - try self.updateChecksum(); - try output_writer.writeAll(std.mem.asBytes(self)); - } - - pub fn setLinkname(self: *Header, link: []const u8) !void { - if (link.len > self.linkname.len) return error.NameTooLong; - @memcpy(self.linkname[0..link.len], link); - } - - pub fn setPath(self: *Header, prefix: []const u8, sub_path: []const u8) !void { - const max_prefix = self.prefix.len; - const max_name = self.name.len; - const sep = std.fs.path.sep_posix; - - if (prefix.len + sub_path.len > max_name + max_prefix or prefix.len > max_prefix) - return error.NameTooLong; - - // both fit into name - if (prefix.len > 0 and prefix.len + sub_path.len < max_name) { - @memcpy(self.name[0..prefix.len], prefix); - self.name[prefix.len] = sep; - @memcpy(self.name[prefix.len + 1 ..][0..sub_path.len], sub_path); - return; - } - - // sub_path fits into name - // there is no prefix or prefix fits into prefix - if (sub_path.len <= max_name) { - @memcpy(self.name[0..sub_path.len], sub_path); - @memcpy(self.prefix[0..prefix.len], prefix); - return; - } - - if (prefix.len > 0) { - @memcpy(self.prefix[0..prefix.len], prefix); - self.prefix[prefix.len] = sep; - } - const prefix_pos = if (prefix.len > 0) prefix.len + 1 else 0; - - // add as much to prefix as you can, must split at / - const prefix_remaining = max_prefix - prefix_pos; - if (std.mem.lastIndexOf(u8, sub_path[0..@min(prefix_remaining, sub_path.len)], &.{'/'})) |sep_pos| { - @memcpy(self.prefix[prefix_pos..][0..sep_pos], sub_path[0..sep_pos]); - if ((sub_path.len - sep_pos - 1) > max_name) return error.NameTooLong; - @memcpy(self.name[0..][0 .. sub_path.len - sep_pos - 1], sub_path[sep_pos + 1 ..]); - return; - } - - return error.NameTooLong; - } - - comptime { - assert(@sizeOf(Header) == 512); - } - - test setPath { - const cases = [_]struct { - in: []const []const u8, - out: []const []const u8, - }{ - .{ - .in = &.{ "", "123456789" }, - .out = &.{ "", "123456789" }, - }, - // can fit into name - .{ - .in = &.{ "prefix", "sub_path" }, - .out = &.{ "", "prefix/sub_path" }, - }, - // no more both fits into name - .{ - .in = &.{ "prefix", "0123456789/" ** 8 ++ "basename" }, - .out = &.{ "prefix", "0123456789/" ** 8 ++ "basename" }, - }, - // put as much as you can into prefix the rest goes into name - .{ - .in = &.{ "prefix", "0123456789/" ** 10 ++ "basename" }, - .out = &.{ "prefix/" ++ "0123456789/" ** 9 ++ "0123456789", "basename" }, - }, - - .{ - .in = &.{ "prefix", "0123456789/" ** 15 ++ "basename" }, - .out = &.{ "prefix/" ++ "0123456789/" ** 12 ++ "0123456789", "0123456789/0123456789/basename" }, - }, - .{ - .in = &.{ "prefix", "0123456789/" ** 21 ++ "basename" }, - .out = &.{ "prefix/" ++ "0123456789/" ** 12 ++ "0123456789", "0123456789/" ** 8 ++ "basename" }, - }, - .{ - .in = &.{ "", "012345678/" ** 10 ++ "foo" }, - .out = &.{ "012345678/" ** 9 ++ "012345678", "foo" }, - }, - }; - - for (cases) |case| { - var header = Header.init(.regular); - try header.setPath(case.in[0], case.in[1]); - try testing.expectEqualStrings(case.out[0], str(&header.prefix)); - try testing.expectEqualStrings(case.out[1], str(&header.name)); - } - - const error_cases = [_]struct { - in: []const []const u8, - }{ - // basename can't fit into name (106 characters) - .{ .in = &.{ "zig", "test/cases/compile_errors/regression_test_2980_base_type_u32_is_not_type_checked_properly_when_assigning_a_value_within_a_struct.zig" } }, - // cant fit into 255 + sep - .{ .in = &.{ "prefix", "0123456789/" ** 22 ++ "basename" } }, - // can fit but sub_path can't be split (there is no separator) - .{ .in = &.{ "prefix", "0123456789" ** 10 ++ "a" } }, - .{ .in = &.{ "prefix", "0123456789" ** 14 ++ "basename" } }, - }; - - for (error_cases) |case| { - var header = Header.init(.regular); - try testing.expectError( - error.NameTooLong, - header.setPath(case.in[0], case.in[1]), - ); - } - } - - // Breaks string on first null character. - fn str(s: []const u8) []const u8 { - for (s, 0..) |c, i| { - if (c == 0) return s[0..i]; - } - return s; - } -}; - -test { - _ = Header; -} - -test "write files" { - const files = [_]struct { - path: []const u8, - content: []const u8, - }{ - .{ .path = "foo", .content = "bar" }, - .{ .path = "a12345678/" ** 10 ++ "foo", .content = "a" ** 511 }, - .{ .path = "b12345678/" ** 24 ++ "foo", .content = "b" ** 512 }, - .{ .path = "c12345678/" ** 25 ++ "foo", .content = "c" ** 513 }, - .{ .path = "d12345678/" ** 51 ++ "foo", .content = "d" ** 1025 }, - .{ .path = "e123456789" ** 11, .content = "e" }, - }; - - var file_name_buffer: [std.fs.max_path_bytes]u8 = undefined; - var link_name_buffer: [std.fs.max_path_bytes]u8 = undefined; - - // with root - { - const root = "root"; - - var output = std.ArrayList(u8).init(testing.allocator); - defer output.deinit(); - var wrt = writer(output.writer()); - try wrt.setRoot(root); - for (files) |file| - try wrt.writeFileBytes(file.path, file.content, .{}); - - var input = std.io.fixedBufferStream(output.items); - var iter = std.tar.iterator( - input.reader(), - .{ .file_name_buffer = &file_name_buffer, .link_name_buffer = &link_name_buffer }, - ); - - // first entry is directory with prefix - { - const actual = (try iter.next()).?; - try testing.expectEqualStrings(root, actual.name); - try testing.expectEqual(std.tar.FileKind.directory, actual.kind); - } - - var i: usize = 0; - while (try iter.next()) |actual| { - defer i += 1; - const expected = files[i]; - try testing.expectEqualStrings(root, actual.name[0..root.len]); - try testing.expectEqual('/', actual.name[root.len..][0]); - try testing.expectEqualStrings(expected.path, actual.name[root.len + 1 ..]); - - var content = std.ArrayList(u8).init(testing.allocator); - defer content.deinit(); - try actual.writeAll(content.writer()); - try testing.expectEqualSlices(u8, expected.content, content.items); - } - } - // without root - { - var output = std.ArrayList(u8).init(testing.allocator); - defer output.deinit(); - var wrt = writer(output.writer()); - for (files) |file| { - var content = std.io.fixedBufferStream(file.content); - try wrt.writeFileStream(file.path, file.content.len, content.reader(), .{}); - } - - var input = std.io.fixedBufferStream(output.items); - var iter = std.tar.iterator( - input.reader(), - .{ .file_name_buffer = &file_name_buffer, .link_name_buffer = &link_name_buffer }, - ); - - var i: usize = 0; - while (try iter.next()) |actual| { - defer i += 1; - const expected = files[i]; - try testing.expectEqualStrings(expected.path, actual.name); - - var content = std.ArrayList(u8).init(testing.allocator); - defer content.deinit(); - try actual.writeAll(content.writer()); - try testing.expectEqualSlices(u8, expected.content, content.items); - } - try wrt.finish(); - } -} diff --git a/src/Compilation.zig b/src/Compilation.zig index 412d6a023c..2b661c04d6 100644 --- a/src/Compilation.zig +++ b/src/Compilation.zig @@ -4862,6 +4862,9 @@ fn docsCopyFallible(comp: *Compilation) anyerror!void { }; defer tar_file.close(); + var buffer: [1024]u8 = undefined; + var tar_file_writer = tar_file.writer(&buffer); + var seen_table: std.AutoArrayHashMapUnmanaged(*Package.Module, []const u8) = .empty; defer seen_table.deinit(comp.gpa); @@ -4871,7 +4874,7 @@ fn docsCopyFallible(comp: *Compilation) anyerror!void { var i: usize = 0; while (i < seen_table.count()) : (i += 1) { const mod = seen_table.keys()[i]; - try comp.docsCopyModule(mod, seen_table.values()[i], tar_file); + try comp.docsCopyModule(mod, seen_table.values()[i], &tar_file_writer); const deps = mod.deps.values(); try seen_table.ensureUnusedCapacity(comp.gpa, deps.len); @@ -4879,24 +4882,29 @@ fn docsCopyFallible(comp: *Compilation) anyerror!void { } } -fn docsCopyModule(comp: *Compilation, module: *Package.Module, name: []const u8, tar_file: fs.File) !void { +fn docsCopyModule( + comp: *Compilation, + module: *Package.Module, + name: []const u8, + tar_file_writer: *fs.File.Writer, +) !void { const root = module.root; var mod_dir = d: { const root_dir, const sub_path = root.openInfo(comp.dirs); break :d root_dir.openDir(sub_path, .{ .iterate = true }); } catch |err| { - return comp.lockAndSetMiscFailure(.docs_copy, "unable to open directory '{f}': {s}", .{ - root.fmt(comp), @errorName(err), - }); + return comp.lockAndSetMiscFailure(.docs_copy, "unable to open directory '{f}': {t}", .{ root.fmt(comp), err }); }; defer mod_dir.close(); var walker = try mod_dir.walk(comp.gpa); defer walker.deinit(); - var archiver = std.tar.writer(tar_file.deprecatedWriter().any()); + var archiver: std.tar.Writer = .{ .underlying_writer = &tar_file_writer.interface }; archiver.prefix = name; + var buffer: [1024]u8 = undefined; + while (try walker.next()) |entry| { switch (entry.kind) { .file => { @@ -4907,14 +4915,17 @@ fn docsCopyModule(comp: *Compilation, module: *Package.Module, name: []const u8, else => continue, } var file = mod_dir.openFile(entry.path, .{}) catch |err| { - return comp.lockAndSetMiscFailure(.docs_copy, "unable to open '{f}{s}': {s}", .{ - root.fmt(comp), entry.path, @errorName(err), + return comp.lockAndSetMiscFailure(.docs_copy, "unable to open {f}{s}: {t}", .{ + root.fmt(comp), entry.path, err, }); }; defer file.close(); - archiver.writeFile(entry.path, file) catch |err| { - return comp.lockAndSetMiscFailure(.docs_copy, "unable to archive '{f}{s}': {s}", .{ - root.fmt(comp), entry.path, @errorName(err), + const stat = try file.stat(); + var file_reader: fs.File.Reader = .initSize(file, &buffer, stat.size); + + archiver.writeFile(entry.path, &file_reader, stat.mtime) catch |err| { + return comp.lockAndSetMiscFailure(.docs_copy, "unable to archive {f}{s}: {t}", .{ + root.fmt(comp), entry.path, err, }); }; } @@ -4926,9 +4937,7 @@ fn workerDocsWasm(comp: *Compilation, parent_prog_node: std.Progress.Node) void workerDocsWasmFallible(comp, prog_node) catch |err| switch (err) { error.SubCompilationFailed => return, // error reported already - else => comp.lockAndSetMiscFailure(.docs_wasm, "unable to build autodocs: {s}", .{ - @errorName(err), - }), + else => comp.lockAndSetMiscFailure(.docs_wasm, "unable to build autodocs: {t}", .{err}), }; } diff --git a/src/Package/Fetch.zig b/src/Package/Fetch.zig index a97b60a17c..5d6819149f 100644 --- a/src/Package/Fetch.zig +++ b/src/Package/Fetch.zig @@ -1197,12 +1197,17 @@ fn unpackResource( }; switch (file_type) { - .tar => return try unpackTarball(f, tmp_directory.handle, resource.reader()), + .tar => { + var adapter = resource.reader().adaptToNewApi(); + return unpackTarball(f, tmp_directory.handle, &adapter.new_interface); + }, .@"tar.gz" => { const reader = resource.reader(); var br = std.io.bufferedReaderSize(std.crypto.tls.max_ciphertext_record_len, reader); var dcp = std.compress.gzip.decompressor(br.reader()); - return try unpackTarball(f, tmp_directory.handle, dcp.reader()); + var adapter_buffer: [1024]u8 = undefined; + var adapter = dcp.reader().adaptToNewApi(&adapter_buffer); + return try unpackTarball(f, tmp_directory.handle, &adapter.new_interface); }, .@"tar.xz" => { const gpa = f.arena.child_allocator; @@ -1215,7 +1220,9 @@ fn unpackResource( )); }; defer dcp.deinit(); - return try unpackTarball(f, tmp_directory.handle, dcp.reader()); + var adapter_buffer: [1024]u8 = undefined; + var adapter = dcp.reader().adaptToNewApi(&adapter_buffer); + return try unpackTarball(f, tmp_directory.handle, &adapter.new_interface); }, .@"tar.zst" => { const window_size = std.compress.zstd.DecompressorOptions.default_window_buffer_len; @@ -1225,7 +1232,9 @@ fn unpackResource( var dcp = std.compress.zstd.decompressor(br.reader(), .{ .window_buffer = window_buffer, }); - return try unpackTarball(f, tmp_directory.handle, dcp.reader()); + var adapter_buffer: [1024]u8 = undefined; + var adapter = dcp.reader().adaptToNewApi(&adapter_buffer); + return try unpackTarball(f, tmp_directory.handle, &adapter.new_interface); }, .git_pack => return unpackGitPack(f, tmp_directory.handle, &resource.git) catch |err| switch (err) { error.FetchFailed => return error.FetchFailed, @@ -1239,7 +1248,7 @@ fn unpackResource( } } -fn unpackTarball(f: *Fetch, out_dir: fs.Dir, reader: anytype) RunError!UnpackResult { +fn unpackTarball(f: *Fetch, out_dir: fs.Dir, reader: *std.Io.Reader) RunError!UnpackResult { const eb = &f.error_bundle; const arena = f.arena.allocator(); @@ -1250,10 +1259,10 @@ fn unpackTarball(f: *Fetch, out_dir: fs.Dir, reader: anytype) RunError!UnpackRes .strip_components = 0, .mode_mode = .ignore, .exclude_empty_directories = true, - }) catch |err| return f.fail(f.location_tok, try eb.printString( - "unable to unpack tarball to temporary directory: {s}", - .{@errorName(err)}, - )); + }) catch |err| return f.fail( + f.location_tok, + try eb.printString("unable to unpack tarball to temporary directory: {t}", .{err}), + ); var res: UnpackResult = .{ .root_dir = diagnostics.root_dir }; if (diagnostics.errors.items.len > 0) {