From f2508abfa6461b4d28d6a8b1d72026810ff30a21 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Igor=20Anic=CC=81?= Date: Sat, 2 Mar 2024 23:23:23 +0100 Subject: [PATCH] flate: use 4 bytes lookahead for zlib That ensures no bytes are left in the BitReader buffer after we reach end of the stream. --- lib/std/compress/flate.zig | 2 +- lib/std/compress/flate/container.zig | 1 + lib/std/compress/flate/inflate.zig | 28 ++++++++++++++++++++++------ lib/std/compress/gzip.zig | 2 +- lib/std/compress/zlib.zig | 2 +- 5 files changed, 26 insertions(+), 9 deletions(-) diff --git a/lib/std/compress/flate.zig b/lib/std/compress/flate.zig index 354f95b6cc..65af44b7b4 100644 --- a/lib/std/compress/flate.zig +++ b/lib/std/compress/flate.zig @@ -13,7 +13,7 @@ pub fn decompress(reader: anytype, writer: anytype) !void { /// Decompressor type pub fn Decompressor(comptime ReaderType: type) type { - return inflate.Inflate(.raw, ReaderType); + return inflate.Decompressor(.raw, ReaderType); } /// Create Decompressor which will read compressed data from reader. diff --git a/lib/std/compress/flate/container.zig b/lib/std/compress/flate/container.zig index 23eec920de..fe6dec446d 100644 --- a/lib/std/compress/flate/container.zig +++ b/lib/std/compress/flate/container.zig @@ -154,6 +154,7 @@ pub const Container = enum { pub fn parseFooter(comptime wrap: Container, hasher: *Hasher(wrap), reader: anytype) !void { switch (wrap) { .gzip => { + try reader.fill(0); if (try reader.read(u32) != hasher.chksum()) return error.WrongGzipChecksum; if (try reader.read(u32) != hasher.bytesRead()) return error.WrongGzipSize; }, diff --git a/lib/std/compress/flate/inflate.zig b/lib/std/compress/flate/inflate.zig index 7b28c52e6c..cf23961b21 100644 --- a/lib/std/compress/flate/inflate.zig +++ b/lib/std/compress/flate/inflate.zig @@ -3,7 +3,7 @@ const assert = std.debug.assert; const testing = std.testing; const hfd = @import("huffman_decoder.zig"); -const BitReader = @import("bit_reader.zig").BitReader64; +const BitReader = @import("bit_reader.zig").BitReader; const CircularBuffer = @import("CircularBuffer.zig"); const Container = @import("container.zig").Container; const Token = @import("Token.zig"); @@ -17,8 +17,16 @@ pub fn decompress(comptime container: Container, reader: anytype, writer: anytyp } /// Inflate decompressor for the reader type. -pub fn decompressor(comptime container: Container, reader: anytype) Inflate(container, @TypeOf(reader)) { - return Inflate(container, @TypeOf(reader)).init(reader); +pub fn decompressor(comptime container: Container, reader: anytype) Decompressor(container, @TypeOf(reader)) { + return Decompressor(container, @TypeOf(reader)).init(reader); +} + +pub fn Decompressor(comptime container: Container, comptime ReaderType: type) type { + // zlib has 4 bytes footer, lookahead of 4 bytes ensures that we will not overshoot. + // gzip has 8 bytes footer so we will not overshoot even with 8 bytes of lookahead. + // For raw deflate there is always possibility of overshot so we use 8 bytes lookahead. + const lookahead: type = if (container == .zlib) u32 else u64; + return Inflate(container, lookahead, ReaderType); } /// Inflate decompresses deflate bit stream. Reads compressed data from reader @@ -40,9 +48,12 @@ pub fn decompressor(comptime container: Container, reader: anytype) Inflate(cont /// * 64K for history (CircularBuffer) /// * ~10K huffman decoders (Literal and DistanceDecoder) /// -pub fn Inflate(comptime container: Container, comptime ReaderType: type) type { +pub fn Inflate(comptime container: Container, comptime LookaheadType: type, comptime ReaderType: type) type { + assert(LookaheadType == u32 or LookaheadType == u64); + const BitReaderType = BitReader(LookaheadType, ReaderType); + return struct { - const BitReaderType = BitReader(ReaderType); + //const BitReaderType = BitReader(ReaderType); const F = BitReaderType.flag; bits: BitReaderType = .{}, @@ -219,9 +230,14 @@ pub fn Inflate(comptime container: Container, comptime ReaderType: type) type { switch (sym.kind) { .literal => self.hist.write(sym.symbol), .match => { // Decode match backreference - try self.bits.fill(5 + 15 + 13); // so we can use buffered reads + // fill so we can use buffered reads + if (LookaheadType == u32) + try self.bits.fill(5 + 15) + else + try self.bits.fill(5 + 15 + 13); const length = try self.decodeLength(sym.symbol); const dsm = try self.decodeSymbol(&self.dst_dec); + if (LookaheadType == u32) try self.bits.fill(13); const distance = try self.decodeDistance(dsm.symbol); try self.hist.writeMatch(length, distance); }, diff --git a/lib/std/compress/gzip.zig b/lib/std/compress/gzip.zig index 8bb09c612a..e619b575de 100644 --- a/lib/std/compress/gzip.zig +++ b/lib/std/compress/gzip.zig @@ -8,7 +8,7 @@ pub fn decompress(reader: anytype, writer: anytype) !void { /// Decompressor type pub fn Decompressor(comptime ReaderType: type) type { - return inflate.Inflate(.gzip, ReaderType); + return inflate.Decompressor(.gzip, ReaderType); } /// Create Decompressor which will read compressed data from reader. diff --git a/lib/std/compress/zlib.zig b/lib/std/compress/zlib.zig index 33401ce845..722047812b 100644 --- a/lib/std/compress/zlib.zig +++ b/lib/std/compress/zlib.zig @@ -8,7 +8,7 @@ pub fn decompress(reader: anytype, writer: anytype) !void { /// Decompressor type pub fn Decompressor(comptime ReaderType: type) type { - return inflate.Inflate(.zlib, ReaderType); + return inflate.Decompressor(.zlib, ReaderType); } /// Create Decompressor which will read compressed data from reader.