From da303bdaf1ae8717df2d4ede9e7dfb215636ae33 Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Mon, 26 May 2025 20:31:35 -0700 Subject: [PATCH] std: fix a bunch of compilation errors --- lib/std/coff.zig | 26 +- lib/std/compress/flate.zig | 78 +--- lib/std/compress/flate/Compress.zig | 329 ++++++++------ lib/std/compress/flate/Decompress.zig | 606 ++++++++++++++------------ lib/std/compress/flate/Lookup.zig | 2 +- lib/std/compress/xz/test.zig | 5 +- lib/std/compress/zstd/Decompress.zig | 4 +- lib/std/crypto/codecs/asn1.zig | 7 +- lib/std/crypto/tls/Client.zig | 4 +- lib/std/debug/Dwarf.zig | 2 +- lib/std/debug/Pdb.zig | 10 +- lib/std/elf.zig | 215 +++++---- lib/std/fmt.zig | 17 +- lib/std/io.zig | 60 +++ lib/std/io/AllocatingWriter.zig | 2 +- lib/std/io/BufferedReader.zig | 11 +- lib/std/io/Reader.zig | 86 +--- lib/std/io/Reader/Limited.zig | 14 +- lib/std/io/Writer.zig | 3 +- lib/std/os/uefi/protocol/file.zig | 24 - lib/std/testing.zig | 5 + lib/std/tz.zig | 21 +- lib/std/zip.zig | 179 +++++--- lib/std/zip/test.zig | 132 ++++-- src/Package/Fetch.zig | 3 +- 25 files changed, 1008 insertions(+), 837 deletions(-) diff --git a/lib/std/coff.zig b/lib/std/coff.zig index da7dc0bcda..0b9c407e70 100644 --- a/lib/std/coff.zig +++ b/lib/std/coff.zig @@ -1087,16 +1087,14 @@ pub const Coff = struct { const pe_pointer_offset = 0x3C; const pe_magic = "PE\x00\x00"; - var stream = std.io.fixedBufferStream(data); - const reader = stream.reader(); - try stream.seekTo(pe_pointer_offset); + var reader: std.io.BufferedReader = undefined; + reader.initFixed(data[pe_pointer_offset..]); const coff_header_offset = try reader.readInt(u32, .little); - try stream.seekTo(coff_header_offset); - var buf: [4]u8 = undefined; - try reader.readNoEof(&buf); - const is_image = mem.eql(u8, pe_magic, &buf); + reader.initFixed(data[coff_header_offset..]); + const magic = try reader.peek(4); + const is_image = mem.eql(u8, pe_magic, magic); - var coff = @This(){ + var coff: Coff = .{ .data = data, .is_image = is_image, .is_loaded = is_loaded, @@ -1123,16 +1121,16 @@ pub const Coff = struct { if (@intFromEnum(DirectoryEntry.DEBUG) >= data_dirs.len) return null; const debug_dir = data_dirs[@intFromEnum(DirectoryEntry.DEBUG)]; - var stream = std.io.fixedBufferStream(self.data); - const reader = stream.reader(); + var reader: std.io.BufferedReader = undefined; + reader.initFixed(self.data); if (self.is_loaded) { - try stream.seekTo(debug_dir.virtual_address); + reader.initFixed(self.data[debug_dir.virtual_address..]); } else { // Find what section the debug_dir is in, in order to convert the RVA to a file offset for (self.getSectionHeaders()) |*sect| { if (debug_dir.virtual_address >= sect.virtual_address and debug_dir.virtual_address < sect.virtual_address + sect.virtual_size) { - try stream.seekTo(sect.pointer_to_raw_data + (debug_dir.virtual_address - sect.virtual_address)); + reader.initFixed(self.data[sect.pointer_to_raw_data + (debug_dir.virtual_address - sect.virtual_address) ..]); break; } } else return error.InvalidDebugDirectory; @@ -1143,10 +1141,10 @@ pub const Coff = struct { const debug_dir_entry_count = debug_dir.size / @sizeOf(DebugDirectoryEntry); var i: u32 = 0; while (i < debug_dir_entry_count) : (i += 1) { - const debug_dir_entry = try reader.readStruct(DebugDirectoryEntry); + const debug_dir_entry = try reader.takeStruct(DebugDirectoryEntry); if (debug_dir_entry.type == .CODEVIEW) { const dir_offset = if (self.is_loaded) debug_dir_entry.address_of_raw_data else debug_dir_entry.pointer_to_raw_data; - try stream.seekTo(dir_offset); + reader.initFixed(self.data[dir_offset..]); break; } } else return null; diff --git a/lib/std/compress/flate.zig b/lib/std/compress/flate.zig index 324ef816e3..8e57998aad 100644 --- a/lib/std/compress/flate.zig +++ b/lib/std/compress/flate.zig @@ -68,81 +68,23 @@ pub const Container = enum { // // CINFO = 7, CM = 8, FLEVEL = 0b10, FDICT = 0, FCHECK = 0b11100 .zlib => &[_]u8{ 0x78, 0b10_0_11100 }, - .raw => &{}, + .raw => &.{}, }; } - pub fn parseHeader(comptime wrap: Container, reader: *std.io.BufferedReader) !void { - switch (wrap) { - .gzip => try parseGzipHeader(reader), - .zlib => try parseZlibHeader(reader), - .raw => {}, - } - } - - fn parseGzipHeader(reader: *std.io.BufferedReader) !void { - const magic1 = try reader.read(u8); - const magic2 = try reader.read(u8); - const method = try reader.read(u8); - const flags = try reader.read(u8); - try reader.skipBytes(6); // mtime(4), xflags, os - if (magic1 != 0x1f or magic2 != 0x8b or method != 0x08) - return error.BadGzipHeader; - // Flags description: https://www.rfc-editor.org/rfc/rfc1952.html#page-5 - if (flags != 0) { - if (flags & 0b0000_0100 != 0) { // FEXTRA - const extra_len = try reader.read(u16); - try reader.skipBytes(extra_len); - } - if (flags & 0b0000_1000 != 0) { // FNAME - try reader.skipStringZ(); - } - if (flags & 0b0001_0000 != 0) { // FCOMMENT - try reader.skipStringZ(); - } - if (flags & 0b0000_0010 != 0) { // FHCRC - try reader.skipBytes(2); - } - } - } - - fn parseZlibHeader(reader: *std.io.BufferedReader) !void { - const cm = try reader.read(u4); - const cinfo = try reader.read(u4); - _ = try reader.read(u8); - if (cm != 8 or cinfo > 7) { - return error.BadZlibHeader; - } - } - - pub fn parseFooter(comptime wrap: Container, hasher: *Hasher(wrap), reader: *std.io.BufferedReader) !void { - switch (wrap) { - .gzip => { - try reader.fill(0); - if (try reader.read(u32) != hasher.chksum()) return error.WrongGzipChecksum; - if (try reader.read(u32) != hasher.bytesRead()) return error.WrongGzipSize; - }, - .zlib => { - const chksum: u32 = @byteSwap(hasher.chksum()); - if (try reader.read(u32) != chksum) return error.WrongZlibChecksum; - }, - .raw => {}, - } - } - pub const Hasher = union(Container) { + raw: void, gzip: struct { crc: std.hash.Crc32 = .init(), count: usize = 0, }, zlib: std.hash.Adler32, - raw: void, pub fn init(containter: Container) Hasher { return switch (containter) { .gzip => .{ .gzip = .{} }, .zlib => .{ .zlib = .init() }, - .raw => {}, + .raw => .raw, }; } @@ -288,15 +230,18 @@ test "compress/decompress" { // compress original stream to compressed stream { var original: std.io.BufferedReader = undefined; - original.initFixed(data); + original.initFixed(@constCast(data)); var compressed: std.io.BufferedWriter = undefined; compressed.initFixed(&cmp_buf); - try Compress.pump(container, original.reader(), &compressed, .{ .level = level }); + var compress: Compress = .init(&original, .raw); + var compress_br = compress.readable(&.{}); + const n = try compress_br.readRemaining(&compressed, .{ .level = level }); if (compressed_size == 0) { if (container == .gzip) print("case {d} gzip level {} compressed size: {d}\n", .{ case_no, level, compressed.pos }); compressed_size = compressed.pos; } + try testing.expectEqual(compressed_size, n); try testing.expectEqual(compressed_size, compressed.pos); } // decompress compressed stream to decompressed stream @@ -688,9 +633,7 @@ pub const match = struct { pub const max_distance = 32768; }; -pub const history = struct { - pub const len = match.max_distance; -}; +pub const history_len = match.max_distance; pub const lookup = struct { pub const bits = 15; @@ -707,7 +650,8 @@ test "zlib should not overshoot" { 0x03, 0x00, 0x8b, 0x61, 0x0f, 0xa4, 0x52, 0x5a, 0x94, 0x12, }; - var stream = std.io.fixedBufferStream(data[0..]); + var stream: std.io.BufferedReader = undefined; + stream.initFixed(&data); const reader = stream.reader(); var dcp = Decompress.init(reader); diff --git a/lib/std/compress/flate/Compress.zig b/lib/std/compress/flate/Compress.zig index 22a88d2f60..bc54128f05 100644 --- a/lib/std/compress/flate/Compress.zig +++ b/lib/std/compress/flate/Compress.zig @@ -51,9 +51,37 @@ const math = std.math; const Compress = @This(); const Token = @import("Token.zig"); const BlockWriter = @import("BlockWriter.zig"); -const Container = std.compress.flate.Container; +const flate = @import("../flate.zig"); +const Container = flate.Container; const Lookup = @import("Lookup.zig"); -const huffman = std.compress.flate.huffman; +const huffman = flate.huffman; + +lookup: Lookup = .{}, +tokens: Tokens = .{}, +/// Asserted to have a buffer capacity of at least `flate.max_window_len`. +input: *std.io.BufferedReader, +block_writer: BlockWriter, +level: LevelArgs, +hasher: Container.Hasher, + +// Match and literal at the previous position. +// Used for lazy match finding in processWindow. +prev_match: ?Token = null, +prev_literal: ?u8 = null, + +pub fn readable(c: *Compress, buffer: []u8) std.io.BufferedReader { + return .{ + .unbuffered_reader = .{ + .context = c, + .vtable = .{ + .read = read, + .readVec = readVec, + .discard = discard, + }, + }, + .buffer = buffer, + }; +} pub const Options = struct { level: Level = .default, @@ -77,10 +105,10 @@ pub const Level = enum(u4) { best = 0xd, }; -// Number of tokens to accumulate in deflate before starting block encoding. -// -// In zlib this depends on memlevel: 6 + memlevel, where default memlevel is -// 8 and max 9 that gives 14 or 15 bits. +/// Number of tokens to accumulate in deflate before starting block encoding. +/// +/// In zlib this depends on memlevel: 6 + memlevel, where default memlevel is +/// 8 and max 9 that gives 14 or 15 bits. pub const n_tokens = 1 << 15; /// Algorithm knobs for each level. @@ -102,85 +130,60 @@ const LevelArgs = struct { } }; -lookup: Lookup = .{}, -tokens: Tokens = .{}, -output: *std.io.BufferedWriter, -block_writer: BlockWriter, -level: LevelArgs, -hasher: Container.Hasher, - -// Match and literal at the previous position. -// Used for lazy match finding in processWindow. -prev_match: ?Token = null, -prev_literal: ?u8 = null, - -pub fn init(output: *std.io.BufferedWriter, options: Options) std.io.Writer.Error!Compress { - try output.writeAll(options.container.header(output)); +pub fn init(input: *std.io.BufferedReader, options: Options) Compress { return .{ - .output = output, - .block_writer = .init(output), + .input = input, + .block_writer = undefined, .level = .get(options.level), .hasher = .init(options.container), + .state = .header, }; } const FlushOption = enum { none, flush, final }; -// Process data in window and create tokens. If token buffer is full -// flush tokens to the token writer. In the case of `flush` or `final` -// option it will process all data from the window. In the `none` case -// it will preserve some data for the next match. -fn tokenize(self: *Compress, flush_opt: FlushOption) !void { - // flush - process all data from window - const should_flush = (flush_opt != .none); +/// Process data in window and create tokens. If token buffer is full +/// flush tokens to the token writer. +/// +/// Returns number of bytes consumed from `lh`. +fn tokenizeSlice(c: *Compress, bw: *std.io.BufferedWriter, limit: std.io.Limit, lh: []const u8) !usize { + _ = bw; + _ = limit; + if (true) @panic("TODO"); + var step: u16 = 1; // 1 in the case of literal, match length otherwise + const pos: u16 = c.win.pos(); + const literal = lh[0]; // literal at current position + const min_len: u16 = if (c.prev_match) |m| m.length() else 0; - // While there is data in active lookahead buffer. - while (self.win.activeLookahead(should_flush)) |lh| { - var step: u16 = 1; // 1 in the case of literal, match length otherwise - const pos: u16 = self.win.pos(); - const literal = lh[0]; // literal at current position - const min_len: u16 = if (self.prev_match) |m| m.length() else 0; + // Try to find match at least min_len long. + if (c.findMatch(pos, lh, min_len)) |match| { + // Found better match than previous. + try c.addPrevLiteral(); - // Try to find match at least min_len long. - if (self.findMatch(pos, lh, min_len)) |match| { - // Found better match than previous. - try self.addPrevLiteral(); - - // Is found match length good enough? - if (match.length() >= self.level.lazy) { - // Don't try to lazy find better match, use this. - step = try self.addMatch(match); - } else { - // Store this match. - self.prev_literal = literal; - self.prev_match = match; - } + // Is found match length good enough? + if (match.length() >= c.level.lazy) { + // Don't try to lazy find better match, use this. + step = try c.addMatch(match); } else { - // There is no better match at current pos then it was previous. - // Write previous match or literal. - if (self.prev_match) |m| { - // Write match from previous position. - step = try self.addMatch(m) - 1; // we already advanced 1 from previous position - } else { - // No match at previous position. - // Write previous literal if any, and remember this literal. - try self.addPrevLiteral(); - self.prev_literal = literal; - } + // Store this match. + c.prev_literal = literal; + c.prev_match = match; + } + } else { + // There is no better match at current pos then it was previous. + // Write previous match or literal. + if (c.prev_match) |m| { + // Write match from previous position. + step = try c.addMatch(m) - 1; // we already advanced 1 from previous position + } else { + // No match at previous position. + // Write previous literal if any, and remember this literal. + try c.addPrevLiteral(); + c.prev_literal = literal; } - // Advance window and add hashes. - self.windowAdvance(step, lh, pos); - } - - if (should_flush) { - // In the case of flushing, last few lookahead buffers were smaller then min match len. - // So only last literal can be unwritten. - assert(self.prev_match == null); - try self.addPrevLiteral(); - self.prev_literal = null; - - try self.flushTokens(flush_opt); } + // Advance window and add hashes. + c.windowAdvance(step, lh, pos); } fn windowAdvance(self: *Compress, step: u16, lh: []const u8, pos: u16) void { @@ -226,7 +229,7 @@ fn findMatch(self: *Compress, pos: u16, lh: []const u8, min_len: u16) ?Token { // Hot path loop! while (prev_pos > 0 and chain > 0) : (chain -= 1) { const distance = pos - prev_pos; - if (distance > std.compress.flate.match.max_distance) + if (distance > flate.match.max_distance) break; const new_len = self.win.match(prev_pos, pos, len); @@ -272,33 +275,6 @@ fn slide(self: *Compress) void { self.lookup.slide(n); } -/// Compresses as much data as possible, stops when the reader becomes -/// empty. It will introduce some output latency (reading input without -/// producing all output) because some data are still in internal -/// buffers. -/// -/// It is up to the caller to call flush (if needed) or finish (required) -/// when is need to output any pending data or complete stream. -/// -pub fn compress(self: *Compress, reader: anytype) !void { - while (true) { - // Fill window from reader - const buf = self.win.writable(); - if (buf.len == 0) { - try self.tokenize(.none); - self.slide(); - continue; - } - const n = try reader.readAll(buf); - self.hasher.update(buf[0..n]); - self.win.written(n); - // Process window - try self.tokenize(.none); - // Exit when no more data in reader - if (n < buf.len) break; - } -} - /// Flushes internal buffers to the output writer. Outputs empty stored /// block to sync bit stream to the byte boundary, so that the /// decompressor can get all input data available so far. @@ -311,8 +287,8 @@ pub fn compress(self: *Compress, reader: anytype) !void { /// stored block that is three zero bits plus filler bits to the next /// byte, followed by four bytes (00 00 ff ff). /// -pub fn flush(self: *Compress) !void { - try self.tokenize(.flush); +pub fn flush(c: *Compress) !void { + try c.tokenize(.flush); } /// Completes deflate bit stream by writing any pending data as deflate @@ -320,9 +296,9 @@ pub fn flush(self: *Compress) !void { /// the compressor as a signal that next block has to have final bit /// set. /// -pub fn finish(self: *Compress) !void { - try self.tokenize(.final); - try self.hasher.writeFooter(self.output); +pub fn finish(c: *Compress) !void { + _ = c; + @panic("TODO"); } /// Use another writer while preserving history. Most probably flush @@ -437,24 +413,6 @@ fn SimpleCompressor( } self.wp = 0; } - - // Writes all data from the input reader of uncompressed data. - // It is up to the caller to call flush or finish if there is need to - // output compressed blocks. - pub fn compress(self: *Self, reader: anytype) !void { - while (true) { - // read from rdr into buffer - const buf = self.buffer[self.wp..]; - if (buf.len == 0) { - try self.flushBuffer(false); - continue; - } - const n = try reader.readAll(buf); - self.hasher.update(buf[0..n]); - self.wp += n; - if (n < buf.len) break; // no more data in reader - } - } }; } @@ -811,6 +769,119 @@ fn byFreq(context: void, a: LiteralNode, b: LiteralNode) bool { return a.freq < b.freq; } +fn read( + context: ?*anyopaque, + bw: *std.io.BufferedWriter, + limit: std.io.Reader.Limit, +) std.io.Reader.RwError!usize { + const c: *Compress = @ptrCast(@alignCast(context)); + switch (c.state) { + .header => |i| { + const header = c.hasher.container().header(); + const n = try bw.write(header[i..]); + if (header.len - i - n == 0) { + c.state = .middle; + } else { + c.state.header += n; + } + return n; + }, + .middle => { + c.input.fillMore() catch |err| switch (err) { + error.EndOfStream => { + c.state = .final; + return 0; + }, + else => |e| return e, + }; + const buffer_contents = c.input.bufferContents(); + const min_lookahead = flate.match.min_length + flate.match.max_length; + const history_plus_lookahead_len = flate.history_len + min_lookahead; + if (buffer_contents.len < history_plus_lookahead_len) return 0; + const lookahead = buffer_contents[flate.history_len..]; + const start = bw.count; + const n = try c.tokenizeSlice(bw, limit, lookahead) catch |err| switch (err) { + error.WriteFailed => return error.WriteFailed, + }; + c.hasher.update(lookahead[0..n]); + c.input.toss(n); + return bw.count - start; + }, + .final => { + const buffer_contents = c.input.bufferContents(); + const start = bw.count; + const n = c.tokenizeSlice(bw, limit, buffer_contents) catch |err| switch (err) { + error.WriteFailed => return error.WriteFailed, + }; + if (buffer_contents.len - n == 0) { + c.hasher.update(buffer_contents); + c.input.tossAll(); + { + // In the case of flushing, last few lookahead buffers were + // smaller than min match len, so only last literal can be + // unwritten. + assert(c.prev_match == null); + try c.addPrevLiteral(); + c.prev_literal = null; + + try c.flushTokens(.final); + } + switch (c.hasher) { + .gzip => |*gzip| { + // GZIP 8 bytes footer + // - 4 bytes, CRC32 (CRC-32) + // - 4 bytes, ISIZE (Input SIZE) - size of the original (uncompressed) input data modulo 2^32 + comptime assert(c.footer_buffer.len == 8); + std.mem.writeInt(u32, c.footer_buffer[0..4], gzip.final(), .little); + std.mem.writeInt(u32, c.footer_buffer[4..8], gzip.bytes_read, .little); + c.state = .{ .footer = 0 }; + }, + .zlib => |*zlib| { + // ZLIB (RFC 1950) is big-endian, unlike GZIP (RFC 1952). + // 4 bytes of ADLER32 (Adler-32 checksum) + // Checksum value of the uncompressed data (excluding any + // dictionary data) computed according to Adler-32 + // algorithm. + comptime assert(c.footer_buffer.len == 8); + std.mem.writeInt(u32, c.footer_buffer[4..8], zlib.final, .big); + c.state = .{ .footer = 4 }; + }, + .raw => { + c.state = .ended; + }, + } + } + return bw.count - start; + }, + .ended => return error.EndOfStream, + .footer => |i| { + const remaining = c.footer_buffer[i..]; + const n = try bw.write(limit.slice(remaining)); + c.state = if (n == remaining) .ended else .{ .footer = i - n }; + return n; + }, + } +} + +fn readVec(context: ?*anyopaque, data: []const []u8) std.io.Reader.Error!usize { + var bw: std.io.BufferedWriter = undefined; + bw.initVec(data); + return read(context, &bw, .countVec(data)) catch |err| switch (err) { + error.WriteFailed => unreachable, // Prevented by the limit. + else => |e| return e, + }; +} + +fn discard(context: ?*anyopaque, limit: std.io.Reader.Limit) std.io.Reader.Error!usize { + var trash_buffer: [64]u8 = undefined; + var null_writer: std.io.Writer.Null = undefined; + var bw = null_writer.writer().buffered(&trash_buffer); + return read(context, &bw, limit) catch |err| switch (err) { + error.WriteFailed => unreachable, + else => |e| return e, + }; +} + test "generate a Huffman code from an array of frequencies" { var freqs: [19]u16 = [_]u16{ 8, // 0 @@ -1099,7 +1170,8 @@ test "file tokenization" { const data = case.data; for (levels, 0..) |level, i| { // for each compression level - var original = io.fixedBufferStream(data); + var original: std.io.BufferedReader = undefined; + original.initFixed(data); // buffer for decompressed data var al = std.ArrayList(u8).init(testing.allocator); @@ -1173,21 +1245,22 @@ test "store simple compressor" { //0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x77, 0x6f, 0x72, 0x6c, 0x64, 0x21, }; - var fbs = std.io.fixedBufferStream(data); + var fbs: std.io.BufferedReader = undefined; + fbs.initFixed(data); var al = std.ArrayList(u8).init(testing.allocator); defer al.deinit(); var cmp = try store.compressor(.raw, al.writer()); - try cmp.compress(fbs.reader()); + try cmp.compress(&fbs); try cmp.finish(); try testing.expectEqualSlices(u8, &expected, al.items); - fbs.reset(); + fbs.initFixed(data); try al.resize(0); // huffman only compresoor will also emit store block for this small sample var hc = try huffman.compressor(.raw, al.writer()); - try hc.compress(fbs.reader()); + try hc.compress(&fbs); try hc.finish(); try testing.expectEqualSlices(u8, &expected, al.items); } diff --git a/lib/std/compress/flate/Decompress.zig b/lib/std/compress/flate/Decompress.zig index 76a4566d76..7c5e93aa9f 100644 --- a/lib/std/compress/flate/Decompress.zig +++ b/lib/std/compress/flate/Decompress.zig @@ -22,32 +22,38 @@ const flate = std.compress.flate; const Container = flate.Container; const Token = @import("Token.zig"); const testing = std.testing; +const Decompress = @This(); input: *std.io.BufferedReader, // Hashes, produces checksum, of uncompressed data for gzip/zlib footer. -hasher: Container.Hasher(), +hasher: Container.Hasher, // dynamic block huffman code decoders lit_dec: LiteralDecoder, dst_dec: DistanceDecoder, // current read state -bfinal: u1, -block_type: u2, -state: ReadState, +final_block: bool, +state: State, -read_err: Error!void, +read_err: ?Error, -const ReadState = enum { +const BlockType = enum(u2) { + stored = 0, + fixed = 1, + dynamic = 2, +}; + +const State = union(enum) { protocol_header, block_header, - block, + stored_block: u16, + fixed_block, + dynamic_block, protocol_footer, end, }; -const Decompress = @This(); - pub const Error = Container.Error || error{ InvalidCode, InvalidMatch, @@ -61,71 +67,25 @@ pub const Error = Container.Error || error{ MissingEndOfBlockCode, }; -pub fn init(input: *std.io.BufferedReader) Decompress { +pub fn init(input: *std.io.BufferedReader, container: Container) Decompress { return .{ .input = input, - .hasher = .{}, + .hasher = .init(container), .lit_dec = .{}, .dst_dec = .{}, - .bfinal = 0, - .block_type = 0b11, + .final_block = false, .state = .protocol_header, - .read_err = {}, + .read_err = null, }; } -fn blockHeader(self: *Decompress) Error!void { - self.bfinal = try self.bits.read(u1); - self.block_type = try self.bits.read(u2); -} - -fn storedBlock(self: *Decompress) !bool { - self.bits.alignToByte(); // skip padding until byte boundary - // everything after this is byte aligned in stored block - var len = try self.bits.read(u16); - const nlen = try self.bits.read(u16); - if (len != ~nlen) return error.WrongStoredBlockNlen; - - while (len > 0) { - const buf = self.hist.getWritable(len); - try self.bits.readAll(buf); - len -= @intCast(buf.len); - } - return true; -} - -fn fixedBlock(self: *Decompress) !bool { - while (!self.hist.full()) { - const code = try self.bits.readFixedCode(); - switch (code) { - 0...255 => self.hist.write(@intCast(code)), - 256 => return true, // end of block - 257...285 => try self.fixedDistanceCode(@intCast(code - 257)), - else => return error.InvalidCode, - } - } - return false; -} - -// Handles fixed block non literal (length) code. -// Length code is followed by 5 bits of distance code. -fn fixedDistanceCode(self: *Decompress, code: u8) !void { - try self.bits.fill(5 + 5 + 13); - const length = try self.decodeLength(code); - const distance = try self.decodeDistance(try self.bits.readF(u5, .{ - .buffered = true, - .reverse = true, - })); - try self.hist.writeMatch(length, distance); -} - fn decodeLength(self: *Decompress, code: u8) !u16 { if (code > 28) return error.InvalidCode; const ml = Token.matchLength(code); return if (ml.extra_bits == 0) // 0 - 5 extra bits ml.base else - ml.base + try self.bits.readN(ml.extra_bits, .{ .buffered = true }); + ml.base + try self.takeNBitsBuffered(ml.extra_bits); } fn decodeDistance(self: *Decompress, code: u8) !u16 { @@ -134,42 +94,7 @@ fn decodeDistance(self: *Decompress, code: u8) !u16 { return if (md.extra_bits == 0) // 0 - 13 extra bits md.base else - md.base + try self.bits.readN(md.extra_bits, .{ .buffered = true }); -} - -fn dynamicBlockHeader(self: *Decompress) !void { - const hlit: u16 = @as(u16, try self.bits.read(u5)) + 257; // number of ll code entries present - 257 - const hdist: u16 = @as(u16, try self.bits.read(u5)) + 1; // number of distance code entries - 1 - const hclen: u8 = @as(u8, try self.bits.read(u4)) + 4; // hclen + 4 code lengths are encoded - - if (hlit > 286 or hdist > 30) - return error.InvalidDynamicBlockHeader; - - // lengths for code lengths - var cl_lens = [_]u4{0} ** 19; - for (0..hclen) |i| { - cl_lens[flate.huffman.codegen_order[i]] = try self.bits.read(u3); - } - var cl_dec: CodegenDecoder = .{}; - try cl_dec.generate(&cl_lens); - - // decoded code lengths - var dec_lens = [_]u4{0} ** (286 + 30); - var pos: usize = 0; - while (pos < hlit + hdist) { - const sym = try cl_dec.find(try self.bits.peekF(u7, .{ .reverse = true })); - try self.bits.shift(sym.code_bits); - pos += try self.dynamicCodeLength(sym.symbol, &dec_lens, pos); - } - if (pos > hlit + hdist) { - return error.InvalidDynamicBlockHeader; - } - - // literal code lengths to literal decoder - try self.lit_dec.generate(dec_lens[0..hlit]); - - // distance code lengths to distance decoder - try self.dst_dec.generate(dec_lens[hlit .. hlit + hdist]); + md.base + try self.takeNBitsBuffered(md.extra_bits); } // Decode code length symbol to code length. Writes decoded length into @@ -188,7 +113,7 @@ fn dynamicCodeLength(self: *Decompress, code: u16, lens: []u4, pos: usize) !usiz 16 => { // Copy the previous code length 3 - 6 times. // The next 2 bits indicate repeat length - const n: u8 = @as(u8, try self.bits.read(u2)) + 3; + const n: u8 = @as(u8, try self.takeBits(u2)) + 3; if (pos == 0 or pos + n > lens.len) return error.InvalidDynamicBlockHeader; for (0..n) |i| { @@ -197,188 +122,258 @@ fn dynamicCodeLength(self: *Decompress, code: u16, lens: []u4, pos: usize) !usiz return n; }, // Repeat a code length of 0 for 3 - 10 times. (3 bits of length) - 17 => return @as(u8, try self.bits.read(u3)) + 3, + 17 => return @as(u8, try self.takeBits(u3)) + 3, // Repeat a code length of 0 for 11 - 138 times (7 bits of length) - 18 => return @as(u8, try self.bits.read(u7)) + 11, + 18 => return @as(u8, try self.takeBits(u7)) + 11, else => return error.InvalidDynamicBlockHeader, } } -// In larger archives most blocks are usually dynamic, so decompression -// performance depends on this function. -fn dynamicBlock(self: *Decompress) !bool { - // Hot path loop! - while (!self.hist.full()) { - // optimization so other bit reads can be buffered (avoiding one `if` in hot path) - try self.bits.fill(15); - const sym = try self.decodeSymbol(&self.lit_dec); - - switch (sym.kind) { - .literal => self.hist.write(sym.symbol), - .match => { - // Decode match backreference - try self.bits.fill(5 + 15 + 13); - const length = try self.decodeLength(sym.symbol); - const dsm = try self.decodeSymbol(&self.dst_dec); - const distance = try self.decodeDistance(dsm.symbol); - try self.hist.writeMatch(length, distance); - }, - .end_of_block => return true, - } - } - return false; -} - // Peek 15 bits from bits reader (maximum code len is 15 bits). Use // decoder to find symbol for that code. We then know how many bits is // used. Shift bit reader for that much bits, those bits are used. And // return symbol. fn decodeSymbol(self: *Decompress, decoder: anytype) !Symbol { - const sym = try decoder.find(try self.bits.peekF(u15, .{ .buffered = true, .reverse = true })); - try self.bits.shift(sym.code_bits); + const sym = try decoder.find(try self.peekBitsReverseBuffered(u15)); + try self.shiftBits(sym.code_bits); return sym; } -fn step(self: *Decompress) !void { - switch (self.state) { - .protocol_header => { - try self.hasher.container().parseHeader(&self.bits); - self.state = .block_header; - }, - .block_header => { - try self.blockHeader(); - self.state = .block; - if (self.block_type == 2) try self.dynamicBlockHeader(); - }, - .block => { - const done = switch (self.block_type) { - 0 => try self.storedBlock(), - 1 => try self.fixedBlock(), - 2 => try self.dynamicBlock(), - else => return error.InvalidBlockType, - }; - if (done) { - self.state = if (self.bfinal == 1) .protocol_footer else .block_header; - } - }, - .protocol_footer => { - self.bits.alignToByte(); - try self.hasher.container().parseFooter(&self.hasher, &self.bits); - self.state = .end; - }, - .end => {}, - } -} - -/// Replaces the inner reader with new reader. -pub fn setReader(self: *Decompress, new_reader: *std.io.BufferedReader) void { - self.bits.forward_reader = new_reader; - if (self.state == .end or self.state == .protocol_footer) { - self.state = .protocol_header; - } -} - -// Reads all compressed data from the internal reader and outputs plain -// (uncompressed) data to the provided writer. -pub fn decompress(self: *Decompress, writer: *std.io.BufferedWriter) !void { - while (try self.next()) |buf| { - try writer.writeAll(buf); - } -} - -/// Returns the number of bytes that have been read from the internal -/// reader but not yet consumed by the decompressor. -pub fn unreadBytes(self: Decompress) usize { - // There can be no error here: the denominator is not zero, and - // overflow is not possible since the type is unsigned. - return std.math.divCeil(usize, self.bits.nbits, 8) catch unreachable; -} - -// Iterator interface - -/// Can be used in iterator like loop without memcpy to another buffer: -/// while (try inflate.next()) |buf| { ... } -pub fn next(self: *Decompress) Error!?[]const u8 { - const out = try self.get(0); - if (out.len == 0) return null; - return out; -} - -/// Returns decompressed data from internal sliding window buffer. -/// Returned buffer can be any length between 0 and `limit` bytes. 0 -/// returned bytes means end of stream reached. With limit=0 returns as -/// much data it can. It newer will be more than 65536 bytes, which is -/// size of internal buffer. -/// TODO merge this logic into readerRead and readerReadVec -pub fn get(self: *Decompress, limit: usize) Error![]const u8 { - while (true) { - const out = self.hist.readAtMost(limit); - if (out.len > 0) { - self.hasher.update(out); - return out; - } - if (self.state == .end) return out; - try self.step(); - } -} - -fn readerRead( +pub fn read( context: ?*anyopaque, bw: *std.io.BufferedWriter, limit: std.io.Reader.Limit, ) std.io.Reader.RwError!usize { - const self: *Decompress = @alignCast(@ptrCast(context)); - const out = try bw.writableSliceGreedy(1); - const in = self.get(limit.minInt(out.len)) catch |err| switch (err) { + const d: *Decompress = @alignCast(@ptrCast(context)); + return readInner(d, bw, limit) catch |err| switch (err) { error.EndOfStream => return error.EndOfStream, - error.ReadFailed => return error.ReadFailed, + error.WriteFailed => return error.WriteFailed, else => |e| { - self.read_err = e; - return error.ReadFailed; - }, - }; - if (in.len == 0) return error.EndOfStream; - @memcpy(out[0..in.len], in); - bw.advance(in.len); - return in.len; -} - -fn readerReadVec(context: ?*anyopaque, data: []const []u8) std.io.Reader.Error!usize { - const self: *Decompress = @alignCast(@ptrCast(context)); - return readVec(self, data) catch |err| switch (err) { - error.EndOfStream => return error.EndOfStream, - error.ReadFailed => return error.ReadFailed, - else => |e| { - self.read_err = e; + // In the event of an error, state is unmodified so that it can be + // better used to diagnose the failure. + d.read_err = e; return error.ReadFailed; }, }; } -fn readerDiscard(context: ?*anyopaque, limit: std.io.Reader.Limit) std.io.Reader.Error!usize { +fn readInner( + d: *Decompress, + bw: *std.io.BufferedWriter, + limit: std.io.Reader.Limit, +) (Error || error{ WriteFailed, EndOfStream })!usize { + const in = d.input; + sw: switch (d.state) { + .protocol_header => switch (d.hasher.container()) { + .gzip => { + const Header = extern struct { + magic: u16 align(1), + method: u8, + flags: packed struct(u8) { + text: bool, + hcrc: bool, + extra: bool, + name: bool, + comment: bool, + reserved: u3, + }, + mtime: u32 align(1), + xfl: u8, + os: u8, + }; + const header = try in.takeStructEndian(Header, .little); + if (header.magic != 0x8b1f or header.method != 0x08) + return error.BadGzipHeader; + if (header.flags.extra) { + const extra_len = try in.takeInt(u16, .little); + try in.discardAll(extra_len); + } + if (header.flags.name) { + try in.discardDelimiterInclusive(0); + } + if (header.flags.comment) { + try in.discardDelimiterInclusive(0); + } + if (header.flags.hcrc) { + try in.discardAll(2); + } + continue :sw .block_header; + }, + .zlib => { + const Header = extern struct { + cmf: packed struct(u8) { + cm: u4, + cinfo: u4, + }, + flg: u8, + }; + const header = try in.takeStruct(Header); + if (header.cmf.cm != 8 or header.cmf.cinfo > 7) return error.BadZlibHeader; + continue :sw .block_header; + }, + .raw => continue :sw .block_header, + }, + .block_header => { + d.final_block = (try d.takeBits(u1)) != 0; + const block_type = try d.takeBits(BlockType); + switch (block_type) { + .stored => { + d.alignBitsToByte(); // skip padding until byte boundary + // everything after this is byte aligned in stored block + const len = try in.takeInt(u16, .little); + const nlen = try in.takeInt(u16, .little); + if (len != ~nlen) return error.WrongStoredBlockNlen; + continue :sw .{ .stored_block = len }; + }, + .fixed => continue :sw .fixed_block, + .dynamic => { + const hlit: u16 = @as(u16, try d.takeBits(u5)) + 257; // number of ll code entries present - 257 + const hdist: u16 = @as(u16, try d.takeBits(u5)) + 1; // number of distance code entries - 1 + const hclen: u8 = @as(u8, try d.takeBits(u4)) + 4; // hclen + 4 code lengths are encoded + + if (hlit > 286 or hdist > 30) + return error.InvalidDynamicBlockHeader; + + // lengths for code lengths + var cl_lens = [_]u4{0} ** 19; + for (0..hclen) |i| { + cl_lens[flate.huffman.codegen_order[i]] = try d.takeBits(u3); + } + var cl_dec: CodegenDecoder = .{}; + try cl_dec.generate(&cl_lens); + + // decoded code lengths + var dec_lens = [_]u4{0} ** (286 + 30); + var pos: usize = 0; + while (pos < hlit + hdist) { + const sym = try cl_dec.find(try d.peekBitsReverse(u7)); + try d.shiftBits(sym.code_bits); + pos += try d.dynamicCodeLength(sym.symbol, &dec_lens, pos); + } + if (pos > hlit + hdist) { + return error.InvalidDynamicBlockHeader; + } + + // literal code lengths to literal decoder + try d.lit_dec.generate(dec_lens[0..hlit]); + + // distance code lengths to distance decoder + try d.dst_dec.generate(dec_lens[hlit .. hlit + hdist]); + + continue :sw .dynamic_block; + }, + } + }, + .stored_block => |remaining_len| { + const out = try bw.writableSliceGreedyPreserving(flate.history_len, 1); + const limited_out = limit.min(.limited(remaining_len)).slice(out); + const n = try d.input.readVec(bw, &.{limited_out}); + if (remaining_len - n == 0) { + d.state = if (d.final_block) .protocol_footer else .block_header; + } else { + d.state = .{ .stored_block = remaining_len - n }; + } + bw.advance(n); + return n; + }, + .fixed_block => { + const start = bw.count; + while (@intFromEnum(limit) > bw.count - start) { + const code = try d.readFixedCode(); + switch (code) { + 0...255 => try bw.writeBytePreserving(flate.history_len, @intCast(code)), + 256 => { + d.state = if (d.final_block) .protocol_footer else .block_header; + return bw.count - start; + }, + 257...285 => { + // Handles fixed block non literal (length) code. + // Length code is followed by 5 bits of distance code. + const rebased_code = code - 257; + const length = try d.decodeLength(rebased_code); + const distance = try d.decodeDistance(try d.takeBitsReverseBuffered(u5)); + try writeMatch(bw, length, distance); + }, + else => return error.InvalidCode, + } + } + d.state = .fixed_block; + return bw.count - start; + }, + .dynamic_block => { + // In larger archives most blocks are usually dynamic, so decompression + // performance depends on this logic. + const start = bw.count; + while (@intFromEnum(limit) > bw.count - start) { + const sym = try d.decodeSymbol(&d.lit_dec); + + switch (sym.kind) { + .literal => d.hist.write(sym.symbol), + .match => { + // Decode match backreference + const length = try d.decodeLength(sym.symbol); + const dsm = try d.decodeSymbol(&d.dst_dec); + const distance = try d.decodeDistance(dsm.symbol); + try writeMatch(bw, length, distance); + }, + .end_of_block => { + d.state = if (d.final_block) .protocol_footer else .block_header; + return bw.count - start; + }, + } + } + d.state = .dynamic_block; + return bw.count - start; + }, + .protocol_footer => { + d.alignBitsToByte(); + switch (d.hasher.container()) { + .gzip => |*gzip| { + if (try reader.read(u32) != gzip.final()) return error.WrongGzipChecksum; + if (try reader.read(u32) != gzip.count) return error.WrongGzipSize; + }, + .zlib => |*zlib| { + const chksum: u32 = @byteSwap(zlib.final()); + if (try reader.read(u32) != chksum) return error.WrongZlibChecksum; + }, + .raw => {}, + } + d.state = .end; + return 0; + }, + .end => return error.EndOfStream, + } +} + +fn readVec(context: ?*anyopaque, data: []const []u8) std.io.Reader.Error!usize { + _ = context; + _ = data; + @panic("TODO remove readVec primitive"); +} + +fn discard(context: ?*anyopaque, limit: std.io.Reader.Limit) std.io.Reader.Error!usize { _ = context; _ = limit; - @panic("TODO"); + // Problem here is we still need access to the output ring buffer. + @panic("TODO allow discard to be null"); } -pub fn readVec(self: *Decompress, data: []const []u8) Error!usize { - for (data) |out| { - if (out.len == 0) continue; - const in = try self.get(out.len); - @memcpy(out[0..in.len], in); - if (in.len == 0) return error.EndOfStream; - return in.len; - } - return 0; +/// Write match (back-reference to the same data slice) starting at `distance` +/// back from current write position, and `length` of bytes. +fn writeMatch(bw: *std.io.BufferedWriter, length: u16, distance: u16) !void { + _ = bw; + _ = length; + _ = distance; + @panic("TODO"); } pub fn reader(self: *Decompress) std.io.Reader { return .{ .context = self, .vtable = &.{ - .read = readerRead, - .readVec = readerReadVec, - .discard = readerDiscard, + .read = read, + .readVec = readVec, + .discard = discard, }, }; } @@ -387,6 +382,43 @@ pub fn readable(self: *Decompress, buffer: []u8) std.io.BufferedReader { return reader(self).buffered(buffer); } +fn takeBits(d: *Decompress, comptime T: type) !T { + _ = d; + @panic("TODO"); +} + +fn takeNBitsBuffered(d: *Decompress, n: u4) !u16 { + _ = d; + _ = n; + @panic("TODO"); +} + +fn peekBitsReverse(d: *Decompress, comptime T: type) !T { + _ = d; + @panic("TODO"); +} + +fn peekBitsReverseBuffered(d: *Decompress, comptime T: type) !T { + _ = d; + @panic("TODO"); +} + +fn alignBitsToByte(d: *Decompress) void { + _ = d; + @panic("TODO"); +} + +fn shiftBits(d: *Decompress, n: u6) !void { + _ = d; + _ = n; + @panic("TODO"); +} + +fn readFixedCode(d: *Decompress) !u16 { + _ = d; + @panic("TODO"); +} + pub const Symbol = packed struct { pub const Kind = enum(u2) { literal, @@ -712,12 +744,16 @@ test "decompress" { }, }; for (cases) |c| { - var fb = std.io.fixedBufferStream(c.in); - var al = std.ArrayList(u8).init(testing.allocator); - defer al.deinit(); + var fb: std.io.BufferedReader = undefined; + fb.initFixed(@constCast(c.in)); + var aw: std.io.AllocatingWriter = undefined; + aw.init(testing.allocator); + defer aw.deinit(); - try decompress(.raw, fb.reader(), al.writer()); - try testing.expectEqualStrings(c.out, al.items); + var decompress: Decompress = .init(&fb, .raw); + var decompress_br = decompress.readable(&.{}); + _ = try decompress_br.readRemaining(&aw.buffered_writer); + try testing.expectEqualStrings(c.out, aw.getWritten()); } } @@ -769,12 +805,16 @@ test "gzip decompress" { }, }; for (cases) |c| { - var fb = std.io.fixedBufferStream(c.in); - var al = std.ArrayList(u8).init(testing.allocator); - defer al.deinit(); + var fb: std.io.BufferedReader = undefined; + fb.initFixed(@constCast(c.in)); + var aw: std.io.AllocatingWriter = undefined; + aw.init(testing.allocator); + defer aw.deinit(); - try decompress(.gzip, fb.reader(), al.writer()); - try testing.expectEqualStrings(c.out, al.items); + var decompress: Decompress = .init(&fb, .gzip); + var decompress_br = decompress.readable(&.{}); + _ = try decompress_br.readRemaining(&aw.buffered_writer); + try testing.expectEqualStrings(c.out, aw.getWritten()); } } @@ -795,12 +835,16 @@ test "zlib decompress" { }, }; for (cases) |c| { - var fb = std.io.fixedBufferStream(c.in); - var al = std.ArrayList(u8).init(testing.allocator); - defer al.deinit(); + var fb: std.io.BufferedReader = undefined; + fb.initFixed(@constCast(c.in)); + var aw: std.io.AllocatingWriter = undefined; + aw.init(testing.allocator); + defer aw.deinit(); - try decompress(.zlib, fb.reader(), al.writer()); - try testing.expectEqualStrings(c.out, al.items); + var decompress: Decompress = .init(&fb, .zlib); + var decompress_br = decompress.readable(&.{}); + _ = try decompress_br.readRemaining(&aw.buffered_writer); + try testing.expectEqualStrings(c.out, aw.getWritten()); } } @@ -853,16 +897,21 @@ test "fuzzing tests" { }; inline for (cases, 0..) |c, case_no| { - var in = std.io.fixedBufferStream(@embedFile("testdata/fuzz/" ++ c.input ++ ".input")); - var out = std.ArrayList(u8).init(testing.allocator); - defer out.deinit(); + var in: std.io.BufferedReader = undefined; + in.initFixed(@constCast(@embedFile("testdata/fuzz/" ++ c.input ++ ".input"))); + var aw: std.io.AllocatingWriter = undefined; + aw.init(testing.allocator); + defer aw.deinit(); errdefer std.debug.print("test case failed {}\n", .{case_no}); + var decompress: Decompress = .init(&in, .raw); + var decompress_br = decompress.readable(&.{}); if (c.err) |expected_err| { - try testing.expectError(expected_err, decompress(.raw, in.reader(), out.writer())); + try testing.expectError(error.ReadFailed, decompress_br.readRemaining(&aw.buffered_writer)); + try testing.expectError(expected_err, decompress.read_err.?); } else { - try decompress(.raw, in.reader(), out.writer()); - try testing.expectEqualStrings(c.out, out.items); + _ = try decompress_br.readRemaining(&aw.buffered_writer); + try testing.expectEqualStrings(c.out, aw.getWritten()); } } } @@ -871,21 +920,28 @@ test "bug 18966" { const input = @embedFile("testdata/fuzz/bug_18966.input"); const expect = @embedFile("testdata/fuzz/bug_18966.expect"); - var in = std.io.fixedBufferStream(input); - var out = std.ArrayList(u8).init(testing.allocator); - defer out.deinit(); + var in: std.io.BufferedReader = undefined; + in.initFixed(@constCast(input)); + var aw: std.io.AllocatingWriter = undefined; + aw.init(testing.allocator); + defer aw.deinit(); - try decompress(.gzip, in.reader(), out.writer()); - try testing.expectEqualStrings(expect, out.items); + var decompress: Decompress = .init(&in, .gzip); + var decompress_br = decompress.readable(&.{}); + _ = try decompress_br.readRemaining(&aw.buffered_writer); + try testing.expectEqualStrings(expect, aw.getWritten()); } -test "bug 19895" { +test "reading into empty buffer" { + // Inspired by https://github.com/ziglang/zig/issues/19895 const input = &[_]u8{ 0b0000_0001, 0b0000_1100, 0x00, 0b1111_0011, 0xff, // deflate fixed buffer header len, nlen 'H', 'e', 'l', 'l', 'o', ' ', 'w', 'o', 'r', 'l', 'd', 0x0a, // non compressed data }; - var in = std.io.fixedBufferStream(input); - var decomp = Decompress.init(.raw, in.reader()); + var in: std.io.BufferedReader = undefined; + in.initFixed(@constCast(input)); + var decomp: Decompress = .init(&in, .raw); + var decompress_br = decomp.readable(&.{}); var buf: [0]u8 = undefined; - try testing.expectEqual(0, try decomp.read(&buf)); + try testing.expectEqual(0, try decompress_br.readVec(&.{&buf})); } diff --git a/lib/std/compress/flate/Lookup.zig b/lib/std/compress/flate/Lookup.zig index b3660e8877..722e175c8a 100644 --- a/lib/std/compress/flate/Lookup.zig +++ b/lib/std/compress/flate/Lookup.zig @@ -10,7 +10,7 @@ const flate = @import("../flate.zig"); const Lookup = @This(); const prime4 = 0x9E3779B1; // 4 bytes prime number 2654435761 -const chain_len = 2 * flate.history.len; +const chain_len = 2 * flate.history_len; // Maps hash => first position head: [flate.lookup.len]u16 = [_]u16{0} ** flate.lookup.len, diff --git a/lib/std/compress/xz/test.zig b/lib/std/compress/xz/test.zig index 08180e45c0..02f74e4421 100644 --- a/lib/std/compress/xz/test.zig +++ b/lib/std/compress/xz/test.zig @@ -3,9 +3,10 @@ const testing = std.testing; const xz = std.compress.xz; fn decompress(data: []const u8) ![]u8 { - var in_stream = std.io.fixedBufferStream(data); + var in_stream: std.io.BufferedReader = undefined; + in_stream.initFixed(data); - var xz_stream = try xz.decompress(testing.allocator, in_stream.reader()); + var xz_stream = try xz.decompress(testing.allocator, &in_stream); defer xz_stream.deinit(); return xz_stream.reader().readAllAlloc(testing.allocator, std.math.maxInt(usize)); diff --git a/lib/std/compress/zstd/Decompress.zig b/lib/std/compress/zstd/Decompress.zig index fcc1cb8ecf..b477029dbc 100644 --- a/lib/std/compress/zstd/Decompress.zig +++ b/lib/std/compress/zstd/Decompress.zig @@ -267,8 +267,8 @@ fn discard(context: ?*anyopaque, limit: Reader.Limit) Reader.Error!usize { fn readVec(context: ?*anyopaque, data: []const []u8) Reader.Error!usize { var bw: BufferedWriter = undefined; - bw.initFixed(data[0]); - return read(context, &bw, .limited(data[0].len)) catch |err| switch (err) { + bw.initVec(data); + return read(context, &bw, .countVec(data)) catch |err| switch (err) { error.WriteFailed => unreachable, else => |e| return e, }; diff --git a/lib/std/crypto/codecs/asn1.zig b/lib/std/crypto/codecs/asn1.zig index 5ae156ecbd..70580d12da 100644 --- a/lib/std/crypto/codecs/asn1.zig +++ b/lib/std/crypto/codecs/asn1.zig @@ -154,7 +154,8 @@ pub const Tag = struct { test Tag { const buf = [_]u8{0xa3}; - var stream = std.io.fixedBufferStream(&buf); + var stream: std.io.BufferedReader = undefined; + stream.initFixed(&buf); const t = Tag.decode(stream.reader()); try std.testing.expectEqual(Tag.init(@enumFromInt(3), true, .context_specific), t); } @@ -184,8 +185,8 @@ pub const Element = struct { /// - Ensures length is within `bytes` /// - Ensures length is less than `std.math.maxInt(Index)` pub fn decode(bytes: []const u8, index: Index) DecodeError!Element { - var stream = std.io.fixedBufferStream(bytes[index..]); - var reader = stream.reader(); + var reader: std.io.BufferedReader = undefined; + reader.initFixed(bytes[index..]); const tag = try Tag.decode(reader); const size_or_len_size = try reader.readByte(); diff --git a/lib/std/crypto/tls/Client.zig b/lib/std/crypto/tls/Client.zig index bbcf166a78..e5b98abdea 100644 --- a/lib/std/crypto/tls/Client.zig +++ b/lib/std/crypto/tls/Client.zig @@ -1227,8 +1227,8 @@ fn read(context: ?*anyopaque, bw: *std.io.BufferedWriter, limit: Reader.Limit) R fn readVec(context: ?*anyopaque, data: []const []u8) Reader.Error!usize { var bw: std.io.BufferedWriter = undefined; - bw.initFixed(data[0]); - return read(context, &bw, .limited(data[0].len)) catch |err| switch (err) { + bw.initVec(data); + return read(context, &bw, .countVec(data)) catch |err| switch (err) { error.WriteFailed => unreachable, else => |e| return e, }; diff --git a/lib/std/debug/Dwarf.zig b/lib/std/debug/Dwarf.zig index e81e997948..1b52e1a589 100644 --- a/lib/std/debug/Dwarf.zig +++ b/lib/std/debug/Dwarf.zig @@ -2241,7 +2241,7 @@ pub const ElfModule = struct { if (chdr.ch_type != .ZLIB) continue; const ch_size = chdr.ch_size; - var zlib_stream: std.compress.zlib.Decompressor = .init(§ion_reader); + var zlib_stream: std.compress.flate.Decompress = .init(§ion_reader, .zlib); const decompressed_section = zlib_stream.reader().readRemainingAlloc(gpa, .limited(ch_size)) catch continue; if (decompressed_section.len != ch_size) { diff --git a/lib/std/debug/Pdb.zig b/lib/std/debug/Pdb.zig index 3b1adb3c56..599686c14d 100644 --- a/lib/std/debug/Pdb.zig +++ b/lib/std/debug/Pdb.zig @@ -67,7 +67,7 @@ pub fn parseDbiStream(self: *Pdb) !void { return error.InvalidDebugInfo; const reader = stream.reader(); - const header = try reader.readStruct(std.pdb.DbiStreamHeader); + const header = try reader.takeStruct(std.pdb.DbiStreamHeader); if (header.version_header != 19990903) // V70, only value observed by LLVM team return error.UnknownPDBVersion; // if (header.Age != age) @@ -82,7 +82,7 @@ pub fn parseDbiStream(self: *Pdb) !void { // Module Info Substream var mod_info_offset: usize = 0; while (mod_info_offset != mod_info_size) { - const mod_info = try reader.readStruct(pdb.ModInfo); + const mod_info = try reader.takeStruct(pdb.ModInfo); var this_record_len: usize = @sizeOf(pdb.ModInfo); const module_name = try reader.readUntilDelimiterAlloc(self.allocator, 0, 1024); @@ -131,7 +131,7 @@ pub fn parseDbiStream(self: *Pdb) !void { } while (sect_cont_offset != section_contrib_size) { const entry = try sect_contribs.addOne(); - entry.* = try reader.readStruct(pdb.SectionContribEntry); + entry.* = try reader.takeStruct(pdb.SectionContribEntry); sect_cont_offset += @sizeOf(pdb.SectionContribEntry); if (sect_cont_offset > section_contrib_size) @@ -175,7 +175,7 @@ pub fn parseInfoStream(self: *Pdb) !void { return cap * 2 / 3 + 1; } }; - const hash_tbl_hdr = try reader.readStruct(HashTableHeader); + const hash_tbl_hdr = try reader.takeStruct(HashTableHeader); if (hash_tbl_hdr.capacity == 0) return error.InvalidDebugInfo; @@ -397,7 +397,7 @@ const Msf = struct { fn init(allocator: Allocator, file: File) !Msf { const in = file.reader(); - const superblock = try in.readStruct(pdb.SuperBlock); + const superblock = try in.takeStruct(pdb.SuperBlock); // Sanity checks if (!std.mem.eql(u8, &superblock.file_magic, pdb.SuperBlock.expect_magic)) diff --git a/lib/std/elf.zig b/lib/std/elf.zig index 023430d110..63f5d156dd 100644 --- a/lib/std/elf.zig +++ b/lib/std/elf.zig @@ -494,30 +494,39 @@ pub const Header = struct { shnum: u16, shstrndx: u16, - pub fn program_header_iterator(self: Header, parse_source: anytype) ProgramHeaderIterator(@TypeOf(parse_source)) { - return ProgramHeaderIterator(@TypeOf(parse_source)){ - .elf_header = self, - .parse_source = parse_source, + pub fn iterateProgramHeaders(h: Header, file_reader: *std.fs.File.Reader) ProgramHeaderIterator { + return .{ + .elf_header = h, + .file_reader = file_reader, }; } - pub fn section_header_iterator(self: Header, parse_source: anytype) SectionHeaderIterator(@TypeOf(parse_source)) { - return SectionHeaderIterator(@TypeOf(parse_source)){ - .elf_header = self, - .parse_source = parse_source, + pub fn iterateSectionHeaders(h: Header, file_reader: *std.fs.File.Reader) SectionHeaderIterator { + return .{ + .elf_header = h, + .file_reader = file_reader, }; } - pub fn read(parse_source: anytype) !Header { - var hdr_buf: [@sizeOf(Elf64_Ehdr)]u8 align(@alignOf(Elf64_Ehdr)) = undefined; - try parse_source.seekableStream().seekTo(0); - try parse_source.reader().readNoEof(&hdr_buf); - return Header.parse(&hdr_buf); + pub const ReadError = std.io.Reader.Error || ParseError; + + pub fn read(br: *std.io.BufferedReader) ReadError!Header { + const buf = try br.peek(@sizeOf(Elf64_Ehdr)); + const result = try parse(@ptrCast(buf)); + br.toss(if (result.is_64) @sizeOf(Elf64_Ehdr) else @sizeOf(Elf32_Ehdr)); + return result; } - pub fn parse(hdr_buf: *align(@alignOf(Elf64_Ehdr)) const [@sizeOf(Elf64_Ehdr)]u8) !Header { - const hdr32 = @as(*const Elf32_Ehdr, @ptrCast(hdr_buf)); - const hdr64 = @as(*const Elf64_Ehdr, @ptrCast(hdr_buf)); + pub const ParseError = error{ + InvalidElfMagic, + InvalidElfVersion, + InvalidElfClass, + InvalidElfEndian, + }; + + pub fn parse(hdr_buf: *align(@alignOf(Elf64_Ehdr)) const [@sizeOf(Elf64_Ehdr)]u8) ParseError!Header { + const hdr32: *const Elf32_Ehdr = @ptrCast(hdr_buf); + const hdr64: *const Elf64_Ehdr = @ptrCast(hdr_buf); if (!mem.eql(u8, hdr32.e_ident[0..4], MAGIC)) return error.InvalidElfMagic; if (hdr32.e_ident[EI_VERSION] != 1) return error.InvalidElfVersion; @@ -541,19 +550,19 @@ pub const Header = struct { // The meaning of this value depends on `os_abi` so just make it available as `u8`. const abi_version = hdr32.e_ident[EI_ABIVERSION]; - const @"type" = if (need_bswap) blk: { + const @"type": ET = if (need_bswap) blk: { comptime assert(!@typeInfo(ET).@"enum".is_exhaustive); const value = @intFromEnum(hdr32.e_type); - break :blk @as(ET, @enumFromInt(@byteSwap(value))); + break :blk @enumFromInt(@byteSwap(value)); } else hdr32.e_type; - const machine = if (need_bswap) blk: { + const machine: EM = if (need_bswap) blk: { comptime assert(!@typeInfo(EM).@"enum".is_exhaustive); const value = @intFromEnum(hdr32.e_machine); - break :blk @as(EM, @enumFromInt(@byteSwap(value))); + break :blk @enumFromInt(@byteSwap(value)); } else hdr32.e_machine; - return @as(Header, .{ + return .{ .is_64 = is_64, .endian = endian, .os_abi = os_abi, @@ -568,111 +577,91 @@ pub const Header = struct { .shentsize = int(is_64, need_bswap, hdr32.e_shentsize, hdr64.e_shentsize), .shnum = int(is_64, need_bswap, hdr32.e_shnum, hdr64.e_shnum), .shstrndx = int(is_64, need_bswap, hdr32.e_shstrndx, hdr64.e_shstrndx), - }); + }; } }; -pub fn ProgramHeaderIterator(comptime ParseSource: anytype) type { - return struct { - elf_header: Header, - parse_source: ParseSource, - index: usize = 0, +pub const ProgramHeaderIterator = struct { + elf_header: Header, + file_reader: *std.fs.File.Reader, + index: usize = 0, - pub fn next(self: *@This()) !?Elf64_Phdr { - if (self.index >= self.elf_header.phnum) return null; - defer self.index += 1; + pub fn next(it: *ProgramHeaderIterator) !?Elf64_Phdr { + if (it.index >= it.elf_header.phnum) return null; + defer it.index += 1; - if (self.elf_header.is_64) { - var phdr: Elf64_Phdr = undefined; - const offset = self.elf_header.phoff + @sizeOf(@TypeOf(phdr)) * self.index; - try self.parse_source.seekableStream().seekTo(offset); - try self.parse_source.reader().readNoEof(mem.asBytes(&phdr)); - - // ELF endianness matches native endianness. - if (self.elf_header.endian == native_endian) return phdr; - - // Convert fields to native endianness. + if (it.elf_header.is_64) { + var phdr: Elf64_Phdr = undefined; + const offset = it.elf_header.phoff + @sizeOf(@TypeOf(phdr)) * it.index; + try it.file_reader.seekTo(offset); + var br = it.file_reader.readable(&.{}); + try br.readSlice(@ptrCast(&phdr)); + if (it.elf_header.endian != native_endian) mem.byteSwapAllFields(Elf64_Phdr, &phdr); - return phdr; - } - - var phdr: Elf32_Phdr = undefined; - const offset = self.elf_header.phoff + @sizeOf(@TypeOf(phdr)) * self.index; - try self.parse_source.seekableStream().seekTo(offset); - try self.parse_source.reader().readNoEof(mem.asBytes(&phdr)); - - // ELF endianness does NOT match native endianness. - if (self.elf_header.endian != native_endian) { - // Convert fields to native endianness. - mem.byteSwapAllFields(Elf32_Phdr, &phdr); - } - - // Convert 32-bit header to 64-bit. - return Elf64_Phdr{ - .p_type = phdr.p_type, - .p_offset = phdr.p_offset, - .p_vaddr = phdr.p_vaddr, - .p_paddr = phdr.p_paddr, - .p_filesz = phdr.p_filesz, - .p_memsz = phdr.p_memsz, - .p_flags = phdr.p_flags, - .p_align = phdr.p_align, - }; + return phdr; } - }; -} -pub fn SectionHeaderIterator(comptime ParseSource: anytype) type { - return struct { - elf_header: Header, - parse_source: ParseSource, - index: usize = 0, + var phdr: Elf32_Phdr = undefined; + const offset = it.elf_header.phoff + @sizeOf(@TypeOf(phdr)) * it.index; + try it.file_reader.seekTo(offset); + var br = it.file_reader.readable(&.{}); + try br.readSlice(@ptrCast(&phdr)); + if (it.elf_header.endian != native_endian) + mem.byteSwapAllFields(Elf32_Phdr, &phdr); + return .{ + .p_type = phdr.p_type, + .p_offset = phdr.p_offset, + .p_vaddr = phdr.p_vaddr, + .p_paddr = phdr.p_paddr, + .p_filesz = phdr.p_filesz, + .p_memsz = phdr.p_memsz, + .p_flags = phdr.p_flags, + .p_align = phdr.p_align, + }; + } +}; - pub fn next(self: *@This()) !?Elf64_Shdr { - if (self.index >= self.elf_header.shnum) return null; - defer self.index += 1; +pub const SectionHeaderIterator = struct { + elf_header: Header, + file_reader: *std.fs.File.Reader, + index: usize = 0, - if (self.elf_header.is_64) { - var shdr: Elf64_Shdr = undefined; - const offset = self.elf_header.shoff + @sizeOf(@TypeOf(shdr)) * self.index; - try self.parse_source.seekableStream().seekTo(offset); - try self.parse_source.reader().readNoEof(mem.asBytes(&shdr)); + pub fn next(it: *SectionHeaderIterator) !?Elf64_Shdr { + if (it.index >= it.elf_header.shnum) return null; + defer it.index += 1; - // ELF endianness matches native endianness. - if (self.elf_header.endian == native_endian) return shdr; - - // Convert fields to native endianness. + if (it.elf_header.is_64) { + var shdr: Elf64_Shdr = undefined; + const offset = it.elf_header.shoff + @sizeOf(@TypeOf(shdr)) * it.index; + try it.file_reader.seekTo(offset); + var br = it.file_reader.readable(&.{}); + try br.readSlice(@ptrCast(&shdr)); + if (it.elf_header.endian != native_endian) mem.byteSwapAllFields(Elf64_Shdr, &shdr); - return shdr; - } - - var shdr: Elf32_Shdr = undefined; - const offset = self.elf_header.shoff + @sizeOf(@TypeOf(shdr)) * self.index; - try self.parse_source.seekableStream().seekTo(offset); - try self.parse_source.reader().readNoEof(mem.asBytes(&shdr)); - - // ELF endianness does NOT match native endianness. - if (self.elf_header.endian != native_endian) { - // Convert fields to native endianness. - mem.byteSwapAllFields(Elf32_Shdr, &shdr); - } - - // Convert 32-bit header to 64-bit. - return Elf64_Shdr{ - .sh_name = shdr.sh_name, - .sh_type = shdr.sh_type, - .sh_flags = shdr.sh_flags, - .sh_addr = shdr.sh_addr, - .sh_offset = shdr.sh_offset, - .sh_size = shdr.sh_size, - .sh_link = shdr.sh_link, - .sh_info = shdr.sh_info, - .sh_addralign = shdr.sh_addralign, - .sh_entsize = shdr.sh_entsize, - }; + return shdr; } - }; -} + + var shdr: Elf32_Shdr = undefined; + const offset = it.elf_header.shoff + @sizeOf(@TypeOf(shdr)) * it.index; + try it.file_reader.seekTo(offset); + var br = it.file_reader.readable(&.{}); + try br.readSlice(@ptrCast(&shdr)); + if (it.elf_header.endian != native_endian) + mem.byteSwapAllFields(Elf32_Shdr, &shdr); + return .{ + .sh_name = shdr.sh_name, + .sh_type = shdr.sh_type, + .sh_flags = shdr.sh_flags, + .sh_addr = shdr.sh_addr, + .sh_offset = shdr.sh_offset, + .sh_size = shdr.sh_size, + .sh_link = shdr.sh_link, + .sh_info = shdr.sh_info, + .sh_addralign = shdr.sh_addralign, + .sh_entsize = shdr.sh_entsize, + }; + } +}; fn int(is_64: bool, need_bswap: bool, int_32: anytype, int_64: anytype) @TypeOf(int_64) { if (is_64) { diff --git a/lib/std/fmt.zig b/lib/std/fmt.zig index c241b51ab7..30abafc876 100644 --- a/lib/std/fmt.zig +++ b/lib/std/fmt.zig @@ -828,8 +828,7 @@ pub const BufPrintError = error{ NoSpaceLeft, }; -/// Print a Formatter string into `buf`. Actually just a thin wrapper around `format` and `fixedBufferStream`. -/// Returns a slice of the bytes printed to. +/// Print a Formatter string into `buf`. Returns a slice of the bytes printed. pub fn bufPrint(buf: []u8, comptime fmt: []const u8, args: anytype) BufPrintError![]u8 { var bw: std.io.BufferedWriter = undefined; bw.initFixed(buf); @@ -1015,18 +1014,18 @@ test "int.padded" { test "buffer" { { var buf1: [32]u8 = undefined; - var fbs = std.io.fixedBufferStream(&buf1); - var bw = fbs.writer(); + var bw: std.io.BufferedWriter = undefined; + bw.initFixed(&buf1); try bw.printValue("", .{}, 1234, std.options.fmt_max_depth); - try std.testing.expectEqualStrings("1234", fbs.getWritten()); + try std.testing.expectEqualStrings("1234", bw.getWritten()); - fbs.reset(); + bw.initFixed(&buf1); try bw.printValue("c", .{}, 'a', std.options.fmt_max_depth); - try std.testing.expectEqualStrings("a", fbs.getWritten()); + try std.testing.expectEqualStrings("a", bw.getWritten()); - fbs.reset(); + bw.initFixed(&buf1); try bw.printValue("b", .{}, 0b1100, std.options.fmt_max_depth); - try std.testing.expectEqualStrings("1100", fbs.getWritten()); + try std.testing.expectEqualStrings("1100", bw.getWritten()); } } diff --git a/lib/std/io.zig b/lib/std/io.zig index 3f214062e4..36dd3e681b 100644 --- a/lib/std/io.zig +++ b/lib/std/io.zig @@ -9,6 +9,66 @@ const assert = std.debug.assert; const Allocator = std.mem.Allocator; const Alignment = std.mem.Alignment; +pub const Limit = enum(usize) { + nothing = 0, + unlimited = std.math.maxInt(usize), + _, + + /// `std.math.maxInt(usize)` is interpreted to mean `.unlimited`. + pub fn limited(n: usize) Limit { + return @enumFromInt(n); + } + + pub fn countVec(data: []const []const u8) Limit { + var total: usize = 0; + for (data) |d| total += d.len; + return .limited(total); + } + + pub fn min(a: Limit, b: Limit) Limit { + return @enumFromInt(@min(@intFromEnum(a), @intFromEnum(b))); + } + + pub fn minInt(l: Limit, n: usize) usize { + return @min(n, @intFromEnum(l)); + } + + pub fn slice(l: Limit, s: []u8) []u8 { + return s[0..l.minInt(s.len)]; + } + + pub fn sliceConst(l: Limit, s: []const u8) []const u8 { + return s[0..l.minInt(s.len)]; + } + + pub fn toInt(l: Limit) ?usize { + return switch (l) { + else => @intFromEnum(l), + .unlimited => null, + }; + } + + /// Reduces a slice to account for the limit, leaving room for one extra + /// byte above the limit, allowing for the use case of differentiating + /// between end-of-stream and reaching the limit. + pub fn slice1(l: Limit, non_empty_buffer: []u8) []u8 { + assert(non_empty_buffer.len >= 1); + return non_empty_buffer[0..@min(@intFromEnum(l) +| 1, non_empty_buffer.len)]; + } + + pub fn nonzero(l: Limit) bool { + return @intFromEnum(l) > 0; + } + + /// Return a new limit reduced by `amount` or return `null` indicating + /// limit would be exceeded. + pub fn subtract(l: Limit, amount: usize) ?Limit { + if (l == .unlimited) return .unlimited; + if (amount > @intFromEnum(l)) return null; + return @enumFromInt(@intFromEnum(l) - amount); + } +}; + pub const Reader = @import("io/Reader.zig"); pub const Writer = @import("io/Writer.zig"); diff --git a/lib/std/io/AllocatingWriter.zig b/lib/std/io/AllocatingWriter.zig index e22c95fcad..a14c7e5601 100644 --- a/lib/std/io/AllocatingWriter.zig +++ b/lib/std/io/AllocatingWriter.zig @@ -130,7 +130,7 @@ pub fn clearRetainingCapacity(aw: *AllocatingWriter) void { } fn writeSplat(context: ?*anyopaque, data: []const []const u8, splat: usize) std.io.Writer.Error!usize { - if (data.len == 0 and splat == 0) return 0; + assert(data.len != 0); const aw: *AllocatingWriter = @alignCast(@ptrCast(context)); const start_len = aw.written.len; const bw = &aw.buffered_writer; diff --git a/lib/std/io/BufferedReader.zig b/lib/std/io/BufferedReader.zig index 6d0870ddf4..135d67a3e7 100644 --- a/lib/std/io/BufferedReader.zig +++ b/lib/std/io/BufferedReader.zig @@ -253,6 +253,12 @@ pub fn toss(br: *BufferedReader, n: usize) void { assert(br.seek <= br.end); } +/// Equivalent to `toss(br.bufferedLen())`. +pub fn tossAll(br: *BufferedReader) void { + br.seek = 0; + br.end = 0; +} + /// Equivalent to `peek` followed by `toss`. /// /// The data returned is invalidated by the next call to `take`, `peek`, @@ -791,8 +797,9 @@ pub fn fill(br: *BufferedReader, n: usize) Reader.Error!void { } } -/// Fills the buffer with at least one more byte of data, without advancing the -/// seek position, doing exactly one underlying read. +/// Without advancing the seek position, does exactly one underlying read, filling the buffer as +/// much as possible. This may result in zero bytes added to the buffer, which is not an end of +/// stream condition. End of stream is communicated via returning `error.EndOfStream`. /// /// Asserts buffer capacity is at least 1. pub fn fillMore(br: *BufferedReader) Reader.Error!void { diff --git a/lib/std/io/Reader.zig b/lib/std/io/Reader.zig index b2763ddbfc..dce01d7c04 100644 --- a/lib/std/io/Reader.zig +++ b/lib/std/io/Reader.zig @@ -14,14 +14,17 @@ vtable: *const VTable, pub const VTable = struct { /// Writes bytes from the internally tracked stream position to `bw`. /// - /// Returns the number of bytes written, which will be at minimum `0` and at - /// most `limit`. The number of bytes read, including zero, does not - /// indicate end of stream. + /// Returns the number of bytes written, which will be at minimum `0` and + /// at most `limit`. The number returned, including zero, does not indicate + /// end of stream. `limit` is guaranteed to be at least as large as the + /// buffer capacity of `bw`. /// - /// If the reader has an internal seek position, it moves forward in - /// accordance with the number of bytes return from this function. + /// The reader's internal logical seek position moves forward in accordance + /// with the number of bytes returned from this function. /// - /// The implementation should do a maximum of one underlying read call. + /// Implementations are encouraged to utilize mandatory minimum buffer + /// sizes combined with short reads (returning a value less than `limit`) + /// in order to minimize complexity. read: *const fn (context: ?*anyopaque, bw: *BufferedWriter, limit: Limit) RwError!usize, /// Writes bytes from the internally tracked stream position to `data`. @@ -30,10 +33,12 @@ pub const VTable = struct { /// at most the sum of each data slice length. The number of bytes read, /// including zero, does not indicate end of stream. /// - /// If the reader has an internal seek position, it moves forward in - /// accordance with the number of bytes return from this function. + /// The reader's internal logical seek position moves forward in accordance + /// with the number of bytes returned from this function. /// - /// The implementation should do a maximum of one underlying read call. + /// Implementations are encouraged to utilize mandatory minimum buffer + /// sizes combined with short reads (returning a value less than the total + /// buffer capacity inside `data`) in order to minimize complexity. readVec: *const fn (context: ?*anyopaque, data: []const []u8) Error!usize, /// Consumes bytes from the internally tracked stream position without @@ -43,10 +48,12 @@ pub const VTable = struct { /// at most `limit`. The number of bytes returned, including zero, does not /// indicate end of stream. /// - /// If the reader has an internal seek position, it moves forward in - /// accordance with the number of bytes return from this function. + /// The reader's internal logical seek position moves forward in accordance + /// with the number of bytes returned from this function. /// - /// The implementation should do a maximum of one underlying read call. + /// Implementations are encouraged to utilize mandatory minimum buffer + /// sizes combined with short reads (returning a value less than `limit`) + /// in order to minimize complexity. discard: *const fn (context: ?*anyopaque, limit: Limit) Error!usize, }; @@ -78,59 +85,8 @@ pub const ShortError = error{ ReadFailed, }; -pub const Limit = enum(usize) { - nothing = 0, - unlimited = std.math.maxInt(usize), - _, - - /// `std.math.maxInt(usize)` is interpreted to mean `.unlimited`. - pub fn limited(n: usize) Limit { - return @enumFromInt(n); - } - - pub fn min(a: Limit, b: Limit) Limit { - return @enumFromInt(@min(@intFromEnum(a), @intFromEnum(b))); - } - - pub fn minInt(l: Limit, n: usize) usize { - return @min(n, @intFromEnum(l)); - } - - pub fn slice(l: Limit, s: []u8) []u8 { - return s[0..l.minInt(s.len)]; - } - - pub fn sliceConst(l: Limit, s: []const u8) []const u8 { - return s[0..l.minInt(s.len)]; - } - - pub fn toInt(l: Limit) ?usize { - return switch (l) { - else => @intFromEnum(l), - .unlimited => null, - }; - } - - /// Reduces a slice to account for the limit, leaving room for one extra - /// byte above the limit, allowing for the use case of differentiating - /// between end-of-stream and reaching the limit. - pub fn slice1(l: Limit, non_empty_buffer: []u8) []u8 { - assert(non_empty_buffer.len >= 1); - return non_empty_buffer[0..@min(@intFromEnum(l) +| 1, non_empty_buffer.len)]; - } - - pub fn nonzero(l: Limit) bool { - return @intFromEnum(l) > 0; - } - - /// Return a new limit reduced by `amount` or return `null` indicating - /// limit would be exceeded. - pub fn subtract(l: Limit, amount: usize) ?Limit { - if (l == .unlimited) return .unlimited; - if (amount > @intFromEnum(l)) return null; - return @enumFromInt(@intFromEnum(l) - amount); - } -}; +/// TODO: no pub +pub const Limit = std.io.Limit; pub fn read(r: Reader, bw: *BufferedWriter, limit: Limit) RwError!usize { const before = bw.count; diff --git a/lib/std/io/Reader/Limited.zig b/lib/std/io/Reader/Limited.zig index fd7197ae47..80c0e2d4c4 100644 --- a/lib/std/io/Reader/Limited.zig +++ b/lib/std/io/Reader/Limited.zig @@ -22,7 +22,7 @@ fn passthruRead(context: ?*anyopaque, bw: *BufferedWriter, limit: Reader.Limit) const l: *Limited = @alignCast(@ptrCast(context)); const combined_limit = limit.min(l.remaining); const n = try l.unlimited_reader.read(bw, combined_limit); - l.remaining.subtract(n); + l.remaining = l.remaining.subtract(n).?; return n; } @@ -30,24 +30,24 @@ fn passthruDiscard(context: ?*anyopaque, limit: Reader.Limit) Reader.Error!usize const l: *Limited = @alignCast(@ptrCast(context)); const combined_limit = limit.min(l.remaining); const n = try l.unlimited_reader.discard(combined_limit); - l.remaining.subtract(n); + l.remaining = l.remaining.subtract(n).?; return n; } fn passthruReadVec(context: ?*anyopaque, data: []const []u8) Reader.Error!usize { const l: *Limited = @alignCast(@ptrCast(context)); if (data.len == 0) return 0; - if (data[0].len >= @intFromEnum(l.limit)) { - const n = try l.unlimited_reader.readVec(&.{l.limit.slice(data[0])}); - l.remaining.subtract(n); + if (data[0].len >= @intFromEnum(l.remaining)) { + const n = try l.unlimited_reader.readVec(&.{l.remaining.slice(data[0])}); + l.remaining = l.remaining.subtract(n).?; return n; } var total: usize = 0; for (data, 0..) |buf, i| { total += buf.len; - if (total > @intFromEnum(l.limit)) { + if (total > @intFromEnum(l.remaining)) { const n = try l.unlimited_reader.readVec(data[0..i]); - l.remaining.subtract(n); + l.remaining = l.remaining.subtract(n).?; return n; } } diff --git a/lib/std/io/Writer.zig b/lib/std/io/Writer.zig index e89235c80f..f611f1e339 100644 --- a/lib/std/io/Writer.zig +++ b/lib/std/io/Writer.zig @@ -68,7 +68,8 @@ pub const FileError = std.fs.File.PReadError || error{ Unimplemented, }; -pub const Limit = std.io.Reader.Limit; +/// TODO: no pub +pub const Limit = std.io.Limit; pub const Offset = enum(u64) { zero = 0, diff --git a/lib/std/os/uefi/protocol/file.zig b/lib/std/os/uefi/protocol/file.zig index 2e9c251642..f8802fa64f 100644 --- a/lib/std/os/uefi/protocol/file.zig +++ b/lib/std/os/uefi/protocol/file.zig @@ -79,30 +79,6 @@ pub const File = extern struct { VolumeFull, }; - pub const SeekableStream = io.SeekableStream( - *File, - SeekError, - SeekError, - setPosition, - seekBy, - getPosition, - getEndPos, - ); - pub const Reader = io.Reader(*File, ReadError, read); - pub const Writer = io.Writer(*File, WriteError, write); - - pub fn seekableStream(self: *File) SeekableStream { - return .{ .context = self }; - } - - pub fn reader(self: *File) Reader { - return .{ .context = self }; - } - - pub fn writer(self: *File) Writer { - return .{ .context = self }; - } - pub fn open( self: *const File, file_name: [*:0]const u16, diff --git a/lib/std/testing.zig b/lib/std/testing.zig index c286a12798..edd7b93486 100644 --- a/lib/std/testing.zig +++ b/lib/std/testing.zig @@ -609,6 +609,11 @@ pub const TmpDir = struct { self.parent_dir.close(); self.* = undefined; } + + pub fn createFile(td: *TmpDir) std.fs.File { + _ = td; + @panic("TODO"); + } }; pub fn tmpDir(opts: std.fs.Dir.OpenOptions) TmpDir { diff --git a/lib/std/tz.zig b/lib/std/tz.zig index bff0101439..5ff58f1a5b 100644 --- a/lib/std/tz.zig +++ b/lib/std/tz.zig @@ -54,8 +54,8 @@ pub const Tz = struct { }, }; - pub fn parse(allocator: std.mem.Allocator, reader: anytype) !Tz { - var legacy_header = try reader.readStruct(Header); + pub fn parse(allocator: std.mem.Allocator, reader: *std.io.BufferedReader) !Tz { + var legacy_header = try reader.takeStruct(Header); if (!std.mem.eql(u8, &legacy_header.magic, "TZif")) return error.BadHeader; if (legacy_header.version != 0 and legacy_header.version != '2' and legacy_header.version != '3') return error.BadVersion; @@ -70,7 +70,7 @@ pub const Tz = struct { const skipv = legacy_header.counts.timecnt * 5 + legacy_header.counts.typecnt * 6 + legacy_header.counts.charcnt + legacy_header.counts.leapcnt * 8 + legacy_header.counts.isstdcnt + legacy_header.counts.isutcnt; try reader.skipBytes(skipv, .{}); - var header = try reader.readStruct(Header); + var header = try reader.takeStruct(Header); if (!std.mem.eql(u8, &header.magic, "TZif")) return error.BadHeader; if (header.version != '2' and header.version != '3') return error.BadVersion; if (builtin.target.cpu.arch.endian() != std.builtin.Endian.big) { @@ -215,9 +215,10 @@ pub const Tz = struct { test "slim" { const data = @embedFile("tz/asia_tokyo.tzif"); - var in_stream = std.io.fixedBufferStream(data); + var in_stream: std.io.BufferedReader = undefined; + in_stream.initFixed(data); - var tz = try std.Tz.parse(std.testing.allocator, in_stream.reader()); + var tz = try std.Tz.parse(std.testing.allocator, &in_stream); defer tz.deinit(); try std.testing.expectEqual(tz.transitions.len, 9); @@ -228,9 +229,10 @@ test "slim" { test "fat" { const data = @embedFile("tz/antarctica_davis.tzif"); - var in_stream = std.io.fixedBufferStream(data); + var in_stream: std.io.BufferedReader = undefined; + in_stream.initFixed(data); - var tz = try std.Tz.parse(std.testing.allocator, in_stream.reader()); + var tz = try std.Tz.parse(std.testing.allocator, &in_stream); defer tz.deinit(); try std.testing.expectEqual(tz.transitions.len, 8); @@ -241,9 +243,10 @@ test "fat" { test "legacy" { // Taken from Slackware 8.0, from 2001 const data = @embedFile("tz/europe_vatican.tzif"); - var in_stream = std.io.fixedBufferStream(data); + var in_stream: std.io.BufferedReader = undefined; + in_stream.initFixed(data); - var tz = try std.Tz.parse(std.testing.allocator, in_stream.reader()); + var tz = try std.Tz.parse(std.testing.allocator, &in_stream); defer tz.deinit(); try std.testing.expectEqual(tz.transitions.len, 170); diff --git a/lib/std/zip.zig b/lib/std/zip.zig index 5226fe6909..e042e00ee2 100644 --- a/lib/std/zip.zig +++ b/lib/std/zip.zig @@ -112,7 +112,7 @@ pub const EndRecord = extern struct { return record; } - pub const FindFileError = File.GetEndPosError || File.SeekError || error{ + pub const FindFileError = File.GetEndPosError || File.SeekError || File.ReadError || error{ ZipNoEndRecord, EndOfStream, }; @@ -138,6 +138,7 @@ pub const EndRecord = extern struct { var br = fr.interface().unbuffered(); br.readSlice(read_buf) catch |err| switch (err) { error.ReadFailed => return fr.err.?, + error.EndOfStream => return error.EndOfStream, }; loaded_len = new_loaded_len; } @@ -158,45 +159,83 @@ pub const EndRecord = extern struct { } }; -/// Decompresses the given data from `reader` into `writer`. Stops early if more -/// than `uncompressed_size` bytes are processed and verifies that exactly that -/// number of bytes are decompressed. Returns the CRC-32 of the uncompressed data. -/// `writer` can be anything with a `writeAll(self: *Self, chunk: []const u8) anyerror!void` method. -pub fn decompress( - method: CompressionMethod, - uncompressed_size: u64, - reader: *std.io.BufferedReader, - writer: *std.io.BufferedWriter, - compressed_remaining: *u64, -) !u32 { - var hash = std.hash.Crc32.init(); - var total_uncompressed: u64 = 0; - switch (method) { - .store => { - reader.writeAll(writer, .limited(compressed_remaining.*)) catch |err| switch (err) { - error.EndOfStream => return error.ZipDecompressTruncated, - else => |e| return e, - }; - total_uncompressed += compressed_remaining.*; - }, - .deflate => { - var decompressor: std.compress.flate.Decompressor = .init(reader); - while (try decompressor.next()) |chunk| { - try writer.writeAll(chunk); - hash.update(chunk); - total_uncompressed += @intCast(chunk.len); - if (total_uncompressed > uncompressed_size) - return error.ZipUncompressSizeTooSmall; - compressed_remaining.* -= chunk.len; - } - }, - _ => return error.UnsupportedCompressionMethod, - } - if (total_uncompressed != uncompressed_size) - return error.ZipUncompressSizeMismatch; +pub const Decompress = union { + inflate: std.compress.flate.Decompress, + store: *std.io.BufferedReader, - return hash.final(); -} + fn readable( + d: *Decompress, + reader: *std.io.BufferedReader, + method: CompressionMethod, + buffer: []u8, + ) std.io.BufferedReader { + switch (method) { + .store => { + d.* = .{ .store = reader }; + return .{ + .unbuffered_reader = .{ + .context = d, + .vtable = &.{ + .read = readStore, + .readVec = readVecUnimplemented, + .discard = discardUnimplemented, + }, + }, + .buffer = buffer, + .end = 0, + .seek = 0, + }; + }, + .deflate => { + d.* = .{ .inflate = .init(reader, .raw) }; + return .{ + .unbuffered_reader = .{ + .context = d, + .vtable = &.{ + .read = readDeflate, + .readVec = readVecUnimplemented, + .discard = discardUnimplemented, + }, + }, + .buffer = buffer, + .end = 0, + .seek = 0, + }; + }, + else => unreachable, + } + } + + fn readStore( + context: ?*anyopaque, + writer: *std.io.BufferedWriter, + limit: std.io.Limit, + ) std.io.Reader.RwError!usize { + const d: *Decompress = @ptrCast(@alignCast(context)); + return d.store.read(writer, limit); + } + + fn readDeflate( + context: ?*anyopaque, + writer: *std.io.BufferedWriter, + limit: std.io.Limit, + ) std.io.Reader.RwError!usize { + const d: *Decompress = @ptrCast(@alignCast(context)); + return std.compress.flate.Decompress.read(&d.inflate, writer, limit); + } + + fn readVecUnimplemented(context: ?*anyopaque, data: []const []u8) std.io.Reader.Error!usize { + _ = context; + _ = data; + @panic("TODO remove readVec primitive"); + } + + fn discardUnimplemented(context: ?*anyopaque, limit: std.io.Reader.Limit) std.io.Reader.Error!usize { + _ = context; + _ = limit; + @panic("TODO allow discard to be null"); + } +}; fn isBadFilename(filename: []const u8) bool { if (filename.len == 0 or filename[0] == '/') @@ -299,8 +338,9 @@ pub const Iterator = struct { return error.ZipTruncated; try input.seekTo(stream_len - locator_end_offset); var br = input.interface().unbuffered(); - const locator = br.readStructEndian(EndLocator64, .little) catch |err| switch (err) { + const locator = br.takeStructEndian(EndLocator64, .little) catch |err| switch (err) { error.ReadFailed => return input.err.?, + error.EndOfStream => return error.EndOfStream, }; if (!std.mem.eql(u8, &locator.signature, &end_locator64_sig)) return error.ZipBadLocatorSig; @@ -311,8 +351,9 @@ pub const Iterator = struct { try input.seekTo(locator.record_file_offset); - const record64 = br.readStructEndian(EndRecord64, .little) catch |err| switch (err) { + const record64 = br.takeStructEndian(EndRecord64, .little) catch |err| switch (err) { error.ReadFailed => return input.err.?, + error.EndOfStream => return error.EndOfStream, }; if (!std.mem.eql(u8, &record64.signature, &end_record64_sig)) @@ -367,8 +408,9 @@ pub const Iterator = struct { const input = self.input; try input.seekTo(header_zip_offset); var br = input.interface().unbuffered(); - const header = br.readStructEndian(CentralDirectoryFileHeader, .little) catch |err| switch (err) { + const header = br.takeStructEndian(CentralDirectoryFileHeader, .little) catch |err| switch (err) { error.ReadFailed => return input.err.?, + error.EndOfStream => return error.EndOfStream, }; if (!std.mem.eql(u8, &header.signature, ¢ral_file_header_sig)) return error.ZipBadCdOffset; @@ -399,6 +441,7 @@ pub const Iterator = struct { try input.seekTo(header_zip_offset + @sizeOf(CentralDirectoryFileHeader) + header.filename_len); br.readSlice(extra) catch |err| switch (err) { error.ReadFailed => return input.err.?, + error.EndOfStream => return error.EndOfStream, }; var extra_offset: usize = 0; @@ -454,20 +497,23 @@ pub const Iterator = struct { ) !u32 { if (filename_buf.len < self.filename_len) return error.ZipInsufficientBuffer; + switch (self.compression_method) { + .store, .deflate => {}, + else => return error.UnsupportedCompressionMethod, + } const filename = filename_buf[0..self.filename_len]; - - try stream.seekTo(self.header_zip_offset + @sizeOf(CentralDirectoryFileHeader)); - { - const len = try stream.context.reader().readAll(filename); - if (len != filename.len) - return error.ZipBadFileOffset; + try stream.seekTo(self.header_zip_offset + @sizeOf(CentralDirectoryFileHeader)); + var stream_br = stream.readable(&.{}); + try stream_br.readSlice(filename); } const local_data_header_offset: u64 = local_data_header_offset: { const local_header = blk: { try stream.seekTo(self.file_offset); - break :blk try stream.context.reader().readStructEndian(LocalFileHeader, .little); + var read_buffer: [@sizeOf(LocalFileHeader)]u8 = undefined; + var stream_br = stream.readable(&read_buffer); + break :blk try stream_br.takeStructEndian(LocalFileHeader, .little); }; if (!std.mem.eql(u8, &local_header.signature, &local_file_header_sig)) return error.ZipBadFileOffset; @@ -493,9 +539,8 @@ pub const Iterator = struct { { try stream.seekTo(self.file_offset + @sizeOf(LocalFileHeader) + local_header.filename_len); - const len = try stream.context.reader().readAll(extra); - if (len != extra.len) - return error.ZipTruncated; + var stream_br = stream.readable(&.{}); + try stream_br.readSlice(extra); } var extra_offset: usize = 0; @@ -557,21 +602,31 @@ pub const Iterator = struct { break :blk try dest.createFile(filename, .{ .exclusive = true }); }; defer out_file.close(); + var file_writer = out_file.writer(); + var file_bw = file_writer.writable(&.{}); const local_data_file_offset: u64 = @as(u64, self.file_offset) + @as(u64, @sizeOf(LocalFileHeader)) + local_data_header_offset; try stream.seekTo(local_data_file_offset); - var compressed_remaining: u64 = self.compressed_size; - const crc = try decompress( - self.compression_method, - self.uncompressed_size, - stream.context.reader(), - out_file.writer(), - &compressed_remaining, - ); - if (compressed_remaining != 0) return error.ZipDecompressTruncated; - return crc; + var limited_file_reader = stream.interface().limited(.limited(self.compressed_size)); + var file_read_buffer: [1000]u8 = undefined; + var decompress_read_buffer: [1000]u8 = undefined; + var limited_br = limited_file_reader.reader().buffered(&file_read_buffer); + var decompress: Decompress = undefined; + var decompress_br = decompress.readable(&limited_br, self.compression_method, &decompress_read_buffer); + const start_out = file_bw.count; + var hash_writer = file_bw.hashed(std.hash.Crc32.init()); + var hash_bw = hash_writer.writable(&.{}); + decompress_br.readAll(&hash_bw, .limited(self.uncompressed_size)) catch |err| switch (err) { + error.ReadFailed => return stream.err.?, + error.WriteFailed => return file_writer.err.?, + error.EndOfStream => return error.ZipDecompressTruncated, + }; + if (limited_file_reader.remaining.nonzero()) return error.ZipDecompressTruncated; + const written = file_bw.count - start_out; + if (written != self.uncompressed_size) return error.ZipUncompressSizeMismatch; + return hash_writer.hasher.final(); } }; }; diff --git a/lib/std/zip/test.zig b/lib/std/zip/test.zig index 8db603651d..49ebffe15b 100644 --- a/lib/std/zip/test.zig +++ b/lib/std/zip/test.zig @@ -33,8 +33,10 @@ fn expectFiles( std.mem.replaceScalar(u8, normalized_sub_path, '\\', '/'); var file = try dir.openFile(normalized_sub_path, .{}); defer file.close(); + var file_reader = file.reader(); + var file_br = file_reader.readable(&.{}); var content_buf: [4096]u8 = undefined; - const n = try file.reader().readAll(&content_buf); + const n = try file_br.readSliceShort(&content_buf); try testing.expectEqualStrings(test_file.content, content_buf[0..n]); } } @@ -49,24 +51,21 @@ const FileStore = struct { uncompressed_size: usize, }; -fn makeZip(buf: []u8, files: []const File, options: WriteZipOptions) !std.io.BufferedReader { +fn makeZip(file_writer: *std.fs.File.Writer, files: []const File, options: WriteZipOptions) !std.io.BufferedReader { const store = try std.testing.allocator.alloc(FileStore, files.len); defer std.testing.allocator.free(store); - return makeZipWithStore(buf, files, options, store); + return makeZipWithStore(file_writer, files, options, store); } fn makeZipWithStore( - buf: []u8, + file_writer: *std.fs.File.Writer, files: []const File, options: WriteZipOptions, store: []FileStore, -) !std.io.BufferedReader { - var out: std.io.BufferedWriter = undefined; - out.initFixed(buf); - try writeZip(&out, files, store, options); - var result: std.io.BufferedReader = undefined; - result.initFixed(buf[0..out.end]); - return result; +) !void { + var buffer: [200]u8 = undefined; + var bw = file_writer.writable(&buffer); + try writeZip(&bw, files, store, options); } const WriteZipOptions = struct { @@ -201,9 +200,12 @@ const Zipper = struct { const offset = writer.count; var br: std.io.BufferedReader = undefined; br.initFixed(@constCast(opt.content)); - try std.compress.flate.deflate.compress(.raw, &br, writer, .{}); + var compress: std.compress.flate.Compress = .init(&br, .{}); + var compress_br = compress.readable(&.{}); + const n = try compress_br.readRemaining(writer); assert(br.seek == opt.content.len); - compressed_size = @intCast(writer.count - offset); + try testing.expectEqual(n, writer.count - offset); + compressed_size = @intCast(n); }, else => unreachable, } @@ -306,21 +308,27 @@ fn testZipWithStore( write_opt: WriteZipOptions, store: []FileStore, ) !void { - var zip_buf: [4096]u8 = undefined; - var fbs = try makeZipWithStore(&zip_buf, test_files, write_opt, store); - var tmp = testing.tmpDir(.{ .no_follow = true }); defer tmp.cleanup(); - try zip.extract(tmp.dir, fbs.seekableStream(), options); + + var file = tmp.createFile(); + defer file.close(); + var file_writer = file.writer(); + try makeZipWithStore(&file_writer, test_files, write_opt, store); + var file_reader = file_writer.moveToReader(); + try zip.extract(tmp.dir, &file_reader, options); try expectFiles(test_files, tmp.dir, .{}); } fn testZipError(expected_error: anyerror, file: File, options: zip.ExtractOptions) !void { - var zip_buf: [4096]u8 = undefined; - var store: [1]FileStore = undefined; - var fbs = try makeZipWithStore(&zip_buf, &[_]File{file}, .{}, &store); var tmp = testing.tmpDir(.{ .no_follow = true }); defer tmp.cleanup(); - try testing.expectError(expected_error, zip.extract(tmp.dir, fbs.seekableStream(), options)); + const tmp_file = tmp.createFile(); + defer tmp_file.close(); + var file_writer = tmp_file.writer(); + var store: [1]FileStore = undefined; + try makeZipWithStore(&file_writer, &[_]File{file}, .{}, &store); + var file_reader = file_writer.moveToReader(); + try testing.expectError(expected_error, zip.extract(tmp.dir, &file_reader, options)); } test "zip one file" { @@ -416,53 +424,93 @@ test "zip64" { test "bad zip files" { var tmp = testing.tmpDir(.{ .no_follow = true }); defer tmp.cleanup(); - var zip_buf: [4096]u8 = undefined; + var buffer: [4096]u8 = undefined; const file_a = [_]File{.{ .name = "a", .content = "", .compression = .store }}; { - var fbs = try makeZip(&zip_buf, &.{}, .{ .end = .{ .sig = [_]u8{ 1, 2, 3, 4 } } }); - try testing.expectError(error.ZipNoEndRecord, zip.extract(tmp.dir, fbs.seekableStream(), .{})); + const tmp_file = tmp.createFile(); + defer tmp_file.close(); + var file_writer = tmp_file.writable(&buffer); + try makeZip(&file_writer, &.{}, .{ .end = .{ .sig = [_]u8{ 1, 2, 3, 4 } } }); + var file_reader = file_writer.moveToReader(); + try testing.expectError(error.ZipNoEndRecord, zip.extract(tmp.dir, &file_reader, .{})); } { - var fbs = try makeZip(&zip_buf, &.{}, .{ .end = .{ .comment_len = 1 } }); - try testing.expectError(error.ZipNoEndRecord, zip.extract(tmp.dir, fbs.seekableStream(), .{})); + const tmp_file = tmp.createFile(); + defer tmp_file.close(); + var file_writer = tmp_file.writable(&buffer); + try makeZip(&file_writer, &.{}, .{ .end = .{ .comment_len = 1 } }); + var file_reader = file_writer.moveToReader(); + try testing.expectError(error.ZipNoEndRecord, zip.extract(tmp.dir, &file_reader, .{})); } { - var fbs = try makeZip(&zip_buf, &.{}, .{ .end = .{ .comment = "a", .comment_len = 0 } }); - try testing.expectError(error.ZipNoEndRecord, zip.extract(tmp.dir, fbs.seekableStream(), .{})); + const tmp_file = tmp.createFile(); + defer tmp_file.close(); + var file_writer = tmp_file.writable(&buffer); + try makeZip(&file_writer, &.{}, .{ .end = .{ .comment = "a", .comment_len = 0 } }); + var file_reader = file_writer.moveToReader(); + try testing.expectError(error.ZipNoEndRecord, zip.extract(tmp.dir, &file_reader, .{})); } { - var fbs = try makeZip(&zip_buf, &.{}, .{ .end = .{ .disk_number = 1 } }); - try testing.expectError(error.ZipMultiDiskUnsupported, zip.extract(tmp.dir, fbs.seekableStream(), .{})); + const tmp_file = tmp.createFile(); + defer tmp_file.close(); + var file_writer = tmp_file.writable(&buffer); + try makeZip(&file_writer, &.{}, .{ .end = .{ .disk_number = 1 } }); + var file_reader = file_writer.moveToReader(); + try testing.expectError(error.ZipMultiDiskUnsupported, zip.extract(tmp.dir, &file_reader, .{})); } { - var fbs = try makeZip(&zip_buf, &.{}, .{ .end = .{ .central_directory_disk_number = 1 } }); - try testing.expectError(error.ZipMultiDiskUnsupported, zip.extract(tmp.dir, fbs.seekableStream(), .{})); + const tmp_file = tmp.createFile(); + defer tmp_file.close(); + var file_writer = tmp_file.writable(&buffer); + try makeZip(&file_writer, &.{}, .{ .end = .{ .central_directory_disk_number = 1 } }); + var file_reader = file_writer.moveToReader(); + try testing.expectError(error.ZipMultiDiskUnsupported, zip.extract(tmp.dir, &file_reader, .{})); } { - var fbs = try makeZip(&zip_buf, &.{}, .{ .end = .{ .record_count_disk = 1 } }); - try testing.expectError(error.ZipDiskRecordCountTooLarge, zip.extract(tmp.dir, fbs.seekableStream(), .{})); + const tmp_file = tmp.createFile(); + defer tmp_file.close(); + var file_writer = tmp_file.writable(&buffer); + try makeZip(&file_writer, &.{}, .{ .end = .{ .record_count_disk = 1 } }); + var file_reader = file_writer.moveToReader(); + try testing.expectError(error.ZipDiskRecordCountTooLarge, zip.extract(tmp.dir, &file_reader, .{})); } { - var fbs = try makeZip(&zip_buf, &.{}, .{ .end = .{ .central_directory_size = 1 } }); - try testing.expectError(error.ZipCdOversized, zip.extract(tmp.dir, fbs.seekableStream(), .{})); + const tmp_file = tmp.createFile(); + defer tmp_file.close(); + var file_writer = tmp_file.writable(&buffer); + try makeZip(&file_writer, &.{}, .{ .end = .{ .central_directory_size = 1 } }); + var file_reader = file_writer.moveToReader(); + try testing.expectError(error.ZipCdOversized, zip.extract(tmp.dir, &file_reader, .{})); } { - var fbs = try makeZip(&zip_buf, &file_a, .{ .end = .{ .central_directory_size = 0 } }); - try testing.expectError(error.ZipCdUndersized, zip.extract(tmp.dir, fbs.seekableStream(), .{})); + const tmp_file = tmp.createFile(); + defer tmp_file.close(); + var file_writer = tmp_file.writable(&buffer); + try makeZip(&file_writer, &file_a, .{ .end = .{ .central_directory_size = 0 } }); + var file_reader = file_writer.moveToReader(); + try testing.expectError(error.ZipCdUndersized, zip.extract(tmp.dir, &file_reader, .{})); } { - var fbs = try makeZip(&zip_buf, &file_a, .{ .end = .{ .central_directory_offset = 0 } }); - try testing.expectError(error.ZipBadCdOffset, zip.extract(tmp.dir, fbs.seekableStream(), .{})); + const tmp_file = tmp.createFile(); + defer tmp_file.close(); + var file_writer = tmp_file.writable(&buffer); + try makeZip(&file_writer, &file_a, .{ .end = .{ .central_directory_offset = 0 } }); + var file_reader = file_writer.moveToReader(); + try testing.expectError(error.ZipBadCdOffset, zip.extract(tmp.dir, &file_reader, .{})); } { - var fbs = try makeZip(&zip_buf, &file_a, .{ + const tmp_file = tmp.createFile(); + defer tmp_file.close(); + var file_writer = tmp_file.writable(&buffer); + try makeZip(&file_writer, &file_a, .{ .end = .{ .zip64 = .{ .locator_sig = [_]u8{ 1, 2, 3, 4 } }, .central_directory_size = std.math.maxInt(u32), // trigger 64 }, }); - try testing.expectError(error.ZipBadLocatorSig, zip.extract(tmp.dir, fbs.seekableStream(), .{})); + var file_reader = file_writer.moveToReader(); + try testing.expectError(error.ZipBadLocatorSig, zip.extract(tmp.dir, &file_reader, .{})); } } diff --git a/src/Package/Fetch.zig b/src/Package/Fetch.zig index 6f49cbdb3f..fbcc459443 100644 --- a/src/Package/Fetch.zig +++ b/src/Package/Fetch.zig @@ -1357,8 +1357,7 @@ fn unpackGitPack(f: *Fetch, out_dir: fs.Dir, resource: *Resource.Git) anyerror!U defer pack_dir.close(); var pack_file = try pack_dir.createFile("pkg.pack", .{ .read = true }); defer pack_file.close(); - var fifo = std.fifo.LinearFifo(u8, .{ .Static = 4096 }).init(); - try fifo.pump(resource.fetch_stream.reader(), pack_file.writer()); + _ = try resource.fetch_stream.reader().readRemaining(pack_file.writer()); try pack_file.sync(); var index_file = try pack_dir.createFile("pkg.idx", .{ .read = true });