diff --git a/build.zig b/build.zig index 5dcf8709e5..2d41df42d9 100644 --- a/build.zig +++ b/build.zig @@ -151,6 +151,7 @@ pub fn build(b: *std.Build) !void { "rfc1952.txt", "rfc8478.txt", // exclude files from lib/std/compress/deflate/testdata + // and lib/std/compress/flate/testdata ".expect", ".expect-noinput", ".golden", diff --git a/lib/std/compress.zig b/lib/std/compress.zig index e56008cefe..b2aeabcbee 100644 --- a/lib/std/compress.zig +++ b/lib/std/compress.zig @@ -1,13 +1,21 @@ const std = @import("std.zig"); -pub const deflate = @import("compress/deflate.zig"); -pub const gzip = @import("compress/gzip.zig"); pub const lzma = @import("compress/lzma.zig"); pub const lzma2 = @import("compress/lzma2.zig"); pub const xz = @import("compress/xz.zig"); -pub const zlib = @import("compress/zlib.zig"); pub const zstd = @import("compress/zstandard.zig"); +pub const flate = @import("compress/flate/root.zig").flate; +pub const gzip = @import("compress/flate/root.zig").gzip; +pub const zlib = @import("compress/flate/root.zig").zlib; + +// Version 1 interface +pub const v1 = struct { + pub const deflate = @import("compress/deflate.zig"); + pub const gzip = @import("compress/gzip.zig"); + pub const zlib = @import("compress/zlib.zig"); +}; + pub fn HashedReader( comptime ReaderType: anytype, comptime HasherType: anytype, @@ -69,11 +77,14 @@ pub fn hashedWriter( } test { - _ = deflate; - _ = gzip; + _ = v1.deflate; + _ = v1.gzip; _ = lzma; _ = lzma2; _ = xz; - _ = zlib; + _ = v1.zlib; _ = zstd; + _ = flate; + _ = gzip; + _ = zlib; } diff --git a/lib/std/compress/deflate/compressor.zig b/lib/std/compress/deflate/compressor.zig index 0326668793..5ce00dd6f1 100644 --- a/lib/std/compress/deflate/compressor.zig +++ b/lib/std/compress/deflate/compressor.zig @@ -327,7 +327,7 @@ pub fn Compressor(comptime WriterType: anytype) type { } } } - const n = std.compress.deflate.copy(self.window[self.window_end..], b); + const n = std.compress.v1.deflate.copy(self.window[self.window_end..], b); self.window_end += n; return @as(u32, @intCast(n)); } @@ -705,7 +705,7 @@ pub fn Compressor(comptime WriterType: anytype) type { } fn fillStore(self: *Self, b: []const u8) u32 { - const n = std.compress.deflate.copy(self.window[self.window_end..], b); + const n = std.compress.v1.deflate.copy(self.window[self.window_end..], b); self.window_end += n; return @as(u32, @intCast(n)); } diff --git a/lib/std/compress/deflate/decompressor.zig b/lib/std/compress/deflate/decompressor.zig index 896f931a66..616984364d 100644 --- a/lib/std/compress/deflate/decompressor.zig +++ b/lib/std/compress/deflate/decompressor.zig @@ -450,7 +450,7 @@ pub fn Decompressor(comptime ReaderType: type) type { pub fn read(self: *Self, output: []u8) Error!usize { while (true) { if (self.to_read.len > 0) { - const n = std.compress.deflate.copy(output, self.to_read); + const n = std.compress.v1.deflate.copy(output, self.to_read); self.to_read = self.to_read[n..]; if (self.to_read.len == 0 and self.err != null) diff --git a/lib/std/compress/flate/CircularBuffer.zig b/lib/std/compress/flate/CircularBuffer.zig new file mode 100644 index 0000000000..1e2f99dbb2 --- /dev/null +++ b/lib/std/compress/flate/CircularBuffer.zig @@ -0,0 +1,234 @@ +/// 64K buffer of uncompressed data created in inflate (decompression). Has enough +/// history to support writing match; copying length of bytes +/// from the position distance backward from current. +/// +/// Reads can return less than available bytes if they are spread across +/// different circles. So reads should repeat until get required number of bytes +/// or until returned slice is zero length. +/// +/// Note on deflate limits: +/// * non-compressible block is limited to 65,535 bytes. +/// * backward pointer is limited in distance to 32K bytes and in length to 258 bytes. +/// +/// Whole non-compressed block can be written without overlap. We always have +/// history of up to 64K, more then 32K needed. +/// +const std = @import("std"); +const assert = std.debug.assert; +const testing = std.testing; + +const consts = @import("consts.zig").match; + +const mask = 0xffff; // 64K - 1 +const buffer_len = mask + 1; // 64K buffer + +const Self = @This(); + +buffer: [buffer_len]u8 = undefined, +wp: usize = 0, // write position +rp: usize = 0, // read position + +fn writeAll(self: *Self, buf: []const u8) void { + for (buf) |c| self.write(c); +} + +// Write literal. +pub fn write(self: *Self, b: u8) void { + assert(self.wp - self.rp < mask); + self.buffer[self.wp & mask] = b; + self.wp += 1; +} + +// Write match (back-reference to the same data slice) starting at `distance` +// back from current write position, and `length` of bytes. +pub fn writeMatch(self: *Self, length: u16, distance: u16) !void { + if (self.wp < distance or + length < consts.base_length or length > consts.max_length or + distance < consts.min_distance or distance > consts.max_distance) + { + return error.InvalidMatch; + } + assert(self.wp - self.rp < mask); + + var from: usize = self.wp - distance; + const from_end: usize = from + length; + var to: usize = self.wp; + const to_end: usize = to + length; + + self.wp += length; + + // Fast path using memcpy + if (length <= distance and // no overlapping buffers + (from >> 16 == from_end >> 16) and // start and and at the same circle + (to >> 16 == to_end >> 16)) + { + @memcpy(self.buffer[to & mask .. to_end & mask], self.buffer[from & mask .. from_end & mask]); + return; + } + + // Slow byte by byte + while (to < to_end) { + self.buffer[to & mask] = self.buffer[from & mask]; + to += 1; + from += 1; + } +} + +// Returns writable part of the internal buffer of size `n` at most. Advances +// write pointer, assumes that returned buffer will be filled with data. +pub fn getWritable(self: *Self, n: usize) []u8 { + const wp = self.wp & mask; + const len = @min(n, buffer_len - wp); + self.wp += len; + return self.buffer[wp .. wp + len]; +} + +// Read available data. Can return part of the available data if it is +// spread across two circles. So read until this returns zero length. +pub fn read(self: *Self) []const u8 { + return self.readAtMost(buffer_len); +} + +// Read part of available data. Can return less than max even if there are +// more than max decoded data. +pub fn readAtMost(self: *Self, limit: usize) []const u8 { + const rb = self.readBlock(if (limit == 0) buffer_len else limit); + defer self.rp += rb.len; + return self.buffer[rb.head..rb.tail]; +} + +const ReadBlock = struct { + head: usize, + tail: usize, + len: usize, +}; + +// Returns position of continous read block data. +fn readBlock(self: *Self, max: usize) ReadBlock { + const r = self.rp & mask; + const w = self.wp & mask; + const n = @min( + max, + if (w >= r) w - r else buffer_len - r, + ); + return .{ + .head = r, + .tail = r + n, + .len = n, + }; +} + +// Number of free bytes for write. +pub fn free(self: *Self) usize { + return buffer_len - (self.wp - self.rp); +} + +// Full if largest match can't fit. 258 is largest match length. That much bytes +// can be produced in single decode step. +pub fn full(self: *Self) bool { + return self.free() < 258 + 1; +} + +// example from: https://youtu.be/SJPvNi4HrWQ?t=3558 +test "flate.CircularBuffer writeMatch" { + var cb: Self = .{}; + + cb.writeAll("a salad; "); + try cb.writeMatch(5, 9); + try cb.writeMatch(3, 3); + + try testing.expectEqualStrings("a salad; a salsal", cb.read()); +} + +test "flate.CircularBuffer writeMatch overlap" { + var cb: Self = .{}; + + cb.writeAll("a b c "); + try cb.writeMatch(8, 4); + cb.write('d'); + + try testing.expectEqualStrings("a b c b c b c d", cb.read()); +} + +test "flate.CircularBuffer readAtMost" { + var cb: Self = .{}; + + cb.writeAll("0123456789"); + try cb.writeMatch(50, 10); + + try testing.expectEqualStrings("0123456789" ** 6, cb.buffer[cb.rp..cb.wp]); + for (0..6) |i| { + try testing.expectEqual(i * 10, cb.rp); + try testing.expectEqualStrings("0123456789", cb.readAtMost(10)); + } + try testing.expectEqualStrings("", cb.readAtMost(10)); + try testing.expectEqualStrings("", cb.read()); +} + +test "flate.CircularBuffer" { + var cb: Self = .{}; + + const data = "0123456789abcdef" ** (1024 / 16); + cb.writeAll(data); + try testing.expectEqual(@as(usize, 0), cb.rp); + try testing.expectEqual(@as(usize, 1024), cb.wp); + try testing.expectEqual(@as(usize, 1024 * 63), cb.free()); + + for (0..62 * 4) |_| + try cb.writeMatch(256, 1024); // write 62K + + try testing.expectEqual(@as(usize, 0), cb.rp); + try testing.expectEqual(@as(usize, 63 * 1024), cb.wp); + try testing.expectEqual(@as(usize, 1024), cb.free()); + + cb.writeAll(data[0..200]); + _ = cb.readAtMost(1024); // make some space + cb.writeAll(data); // overflows write position + try testing.expectEqual(@as(usize, 200 + 65536), cb.wp); + try testing.expectEqual(@as(usize, 1024), cb.rp); + try testing.expectEqual(@as(usize, 1024 - 200), cb.free()); + + const rb = cb.readBlock(Self.buffer_len); + try testing.expectEqual(@as(usize, 65536 - 1024), rb.len); + try testing.expectEqual(@as(usize, 1024), rb.head); + try testing.expectEqual(@as(usize, 65536), rb.tail); + + try testing.expectEqual(@as(usize, 65536 - 1024), cb.read().len); // read to the end of the buffer + try testing.expectEqual(@as(usize, 200 + 65536), cb.wp); + try testing.expectEqual(@as(usize, 65536), cb.rp); + try testing.expectEqual(@as(usize, 65536 - 200), cb.free()); + + try testing.expectEqual(@as(usize, 200), cb.read().len); // read the rest +} + +test "flate.CircularBuffer write overlap" { + var cb: Self = .{}; + cb.wp = cb.buffer.len - 15; + cb.rp = cb.wp; + + cb.writeAll("0123456789"); + cb.writeAll("abcdefghij"); + + try testing.expectEqual(cb.buffer.len + 5, cb.wp); + try testing.expectEqual(cb.buffer.len - 15, cb.rp); + + try testing.expectEqualStrings("0123456789abcde", cb.read()); + try testing.expectEqualStrings("fghij", cb.read()); + + try testing.expect(cb.wp == cb.rp); +} + +test "flate.CircularBuffer writeMatch/read overlap" { + var cb: Self = .{}; + cb.wp = cb.buffer.len - 15; + cb.rp = cb.wp; + + cb.writeAll("0123456789"); + try cb.writeMatch(15, 5); + + try testing.expectEqualStrings("012345678956789", cb.read()); + try testing.expectEqualStrings("5678956789", cb.read()); + + try cb.writeMatch(20, 25); + try testing.expectEqualStrings("01234567895678956789", cb.read()); +} diff --git a/lib/std/compress/flate/Lookup.zig b/lib/std/compress/flate/Lookup.zig new file mode 100644 index 0000000000..b5d1fd3c97 --- /dev/null +++ b/lib/std/compress/flate/Lookup.zig @@ -0,0 +1,125 @@ +/// Lookup of the previous locations for the same 4 byte data. Works on hash of +/// 4 bytes data. Head contains position of the first match for each hash. Chain +/// points to the previous position of the same hash given the current location. +/// +const std = @import("std"); +const testing = std.testing; +const expect = testing.expect; +const consts = @import("consts.zig"); + +const Self = @This(); + +const prime4 = 0x9E3779B1; // 4 bytes prime number 2654435761 +const chain_len = 2 * consts.history.len; + +// Maps hash => first position +head: [consts.lookup.len]u16 = [_]u16{0} ** consts.lookup.len, +// Maps position => previous positions for the same hash value +chain: [chain_len]u16 = [_]u16{0} ** (chain_len), + +// Calculates hash of the 4 bytes from data. +// Inserts `pos` position of that hash in the lookup tables. +// Returns previous location with the same hash value. +pub fn add(self: *Self, data: []const u8, pos: u16) u16 { + if (data.len < 4) return 0; + const h = hash(data[0..4]); + return self.set(h, pos); +} + +// Retruns previous location with the same hash value given the current +// position. +pub fn prev(self: *Self, pos: u16) u16 { + return self.chain[pos]; +} + +fn set(self: *Self, h: u32, pos: u16) u16 { + const p = self.head[h]; + self.head[h] = pos; + self.chain[pos] = p; + return p; +} + +// Slide all positions in head and chain for `n` +pub fn slide(self: *Self, n: u16) void { + for (&self.head) |*v| { + v.* -|= n; + } + var i: usize = 0; + while (i < n) : (i += 1) { + self.chain[i] = self.chain[i + n] -| n; + } +} + +// Add `len` 4 bytes hashes from `data` into lookup. +// Position of the first byte is `pos`. +pub fn bulkAdd(self: *Self, data: []const u8, len: u16, pos: u16) void { + if (len == 0 or data.len < consts.match.min_length) { + return; + } + var hb = + @as(u32, data[3]) | + @as(u32, data[2]) << 8 | + @as(u32, data[1]) << 16 | + @as(u32, data[0]) << 24; + _ = self.set(hashu(hb), pos); + + var i = pos; + for (4..@min(len + 3, data.len)) |j| { + hb = (hb << 8) | @as(u32, data[j]); + i += 1; + _ = self.set(hashu(hb), i); + } +} + +// Calculates hash of the first 4 bytes of `b`. +fn hash(b: *const [4]u8) u32 { + return hashu(@as(u32, b[3]) | + @as(u32, b[2]) << 8 | + @as(u32, b[1]) << 16 | + @as(u32, b[0]) << 24); +} + +fn hashu(v: u32) u32 { + return @intCast((v *% prime4) >> consts.lookup.shift); +} + +test "flate.Lookup add/prev" { + const data = [_]u8{ + 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, + 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, + 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, + 0x01, 0x02, 0x03, + }; + + var h: Self = .{}; + for (data, 0..) |_, i| { + const p = h.add(data[i..], @intCast(i)); + if (i >= 8 and i < 24) { + try expect(p == i - 8); + } else { + try expect(p == 0); + } + } + + const v = Self.hash(data[2 .. 2 + 4]); + try expect(h.head[v] == 2 + 16); + try expect(h.chain[2 + 16] == 2 + 8); + try expect(h.chain[2 + 8] == 2); +} + +test "flate.Lookup bulkAdd" { + const data = "Lorem ipsum dolor sit amet, consectetur adipiscing elit."; + + // one by one + var h: Self = .{}; + for (data, 0..) |_, i| { + _ = h.add(data[i..], @intCast(i)); + } + + // in bulk + var bh: Self = .{}; + bh.bulkAdd(data, data.len, 0); + + try testing.expectEqualSlices(u16, &h.head, &bh.head); + try testing.expectEqualSlices(u16, &h.chain, &bh.chain); +} diff --git a/lib/std/compress/flate/SlidingWindow.zig b/lib/std/compress/flate/SlidingWindow.zig new file mode 100644 index 0000000000..ceab98c0d3 --- /dev/null +++ b/lib/std/compress/flate/SlidingWindow.zig @@ -0,0 +1,160 @@ +/// Used in deflate (compression), holds uncompressed data form which Tokens are +/// produces. In combination with Lookup it is used to find matches in history data. +/// +const std = @import("std"); +const consts = @import("consts.zig"); + +const expect = testing.expect; +const assert = std.debug.assert; +const testing = std.testing; + +const hist_len = consts.history.len; +const buffer_len = 2 * hist_len; +const min_lookahead = consts.match.min_length + consts.match.max_length; +const max_rp = buffer_len - min_lookahead; + +const Self = @This(); + +buffer: [buffer_len]u8 = undefined, +wp: usize = 0, // write position +rp: usize = 0, // read position +fp: isize = 0, // last flush position, tokens are build from fp..rp + +// Returns number of bytes written, or 0 if buffer is full and need to slide. +pub fn write(self: *Self, buf: []const u8) usize { + if (self.rp >= max_rp) return 0; // need to slide + + const n = @min(buf.len, buffer_len - self.wp); + @memcpy(self.buffer[self.wp .. self.wp + n], buf[0..n]); + self.wp += n; + return n; +} + +// Slide buffer for hist_len. +// Drops old history, preserves between hist_len and hist_len - min_lookahead. +// Returns number of bytes removed. +pub fn slide(self: *Self) u16 { + assert(self.rp >= max_rp and self.wp >= self.rp); + const n = self.wp - hist_len; + @memcpy(self.buffer[0..n], self.buffer[hist_len..self.wp]); + self.rp -= hist_len; + self.wp -= hist_len; + self.fp -= hist_len; + return @intCast(n); +} + +// Data from the current position (read position). Those part of the buffer is +// not converted to tokens yet. +fn lookahead(self: *Self) []const u8 { + assert(self.wp >= self.rp); + return self.buffer[self.rp..self.wp]; +} + +// Returns part of the lookahead buffer. If should_flush is set no lookahead is +// preserved otherwise preserves enough data for the longest match. Returns +// null if there is not enough data. +pub fn activeLookahead(self: *Self, should_flush: bool) ?[]const u8 { + const min: usize = if (should_flush) 0 else min_lookahead; + const lh = self.lookahead(); + return if (lh.len > min) lh else null; +} + +// Advances read position, shrinks lookahead. +pub fn advance(self: *Self, n: u16) void { + assert(self.wp >= self.rp + n); + self.rp += n; +} + +// Returns writable part of the buffer, where new uncompressed data can be +// written. +pub fn writable(self: *Self) []u8 { + return self.buffer[self.wp..]; +} + +// Notification of what part of writable buffer is filled with data. +pub fn written(self: *Self, n: usize) void { + self.wp += n; +} + +// Finds match length between previous and current position. +// Used in hot path! +pub fn match(self: *Self, prev_pos: u16, curr_pos: u16, min_len: u16) u16 { + const max_len: usize = @min(self.wp - curr_pos, consts.match.max_length); + // lookahead buffers from previous and current positions + const prev_lh = self.buffer[prev_pos..][0..max_len]; + const curr_lh = self.buffer[curr_pos..][0..max_len]; + + // If we alread have match (min_len > 0), + // test the first byte above previous len a[min_len] != b[min_len] + // and then all the bytes from that position to zero. + // That is likely positions to find difference than looping from first bytes. + var i: usize = min_len; + if (i > 0) { + if (max_len <= i) return 0; + while (true) { + if (prev_lh[i] != curr_lh[i]) return 0; + if (i == 0) break; + i -= 1; + } + i = min_len; + } + while (i < max_len) : (i += 1) + if (prev_lh[i] != curr_lh[i]) break; + return if (i >= consts.match.min_length) @intCast(i) else 0; +} + +// Current position of non-compressed data. Data before rp are already converted +// to tokens. +pub fn pos(self: *Self) u16 { + return @intCast(self.rp); +} + +// Notification that token list is cleared. +pub fn flush(self: *Self) void { + self.fp = @intCast(self.rp); +} + +// Part of the buffer since last flush or null if there was slide in between (so +// fp becomes negative). +pub fn tokensBuffer(self: *Self) ?[]const u8 { + assert(self.fp <= self.rp); + if (self.fp < 0) return null; + return self.buffer[@intCast(self.fp)..self.rp]; +} + +test "flate.SlidingWindow match" { + const data = "Blah blah blah blah blah!"; + var win: Self = .{}; + try expect(win.write(data) == data.len); + try expect(win.wp == data.len); + try expect(win.rp == 0); + + // length between l symbols + try expect(win.match(1, 6, 0) == 18); + try expect(win.match(1, 11, 0) == 13); + try expect(win.match(1, 16, 0) == 8); + try expect(win.match(1, 21, 0) == 0); + + // position 15 = "blah blah!" + // position 20 = "blah!" + try expect(win.match(15, 20, 0) == 4); + try expect(win.match(15, 20, 3) == 4); + try expect(win.match(15, 20, 4) == 0); +} + +test "flate.SlidingWindow slide" { + var win: Self = .{}; + win.wp = Self.buffer_len - 11; + win.rp = Self.buffer_len - 111; + win.buffer[win.rp] = 0xab; + try expect(win.lookahead().len == 100); + try expect(win.tokensBuffer().?.len == win.rp); + + const n = win.slide(); + try expect(n == 32757); + try expect(win.buffer[win.rp] == 0xab); + try expect(win.rp == Self.hist_len - 111); + try expect(win.wp == Self.hist_len - 11); + try expect(win.lookahead().len == 100); + try expect(win.tokensBuffer() == null); +} diff --git a/lib/std/compress/flate/Token.zig b/lib/std/compress/flate/Token.zig new file mode 100644 index 0000000000..fd9dd1d1e3 --- /dev/null +++ b/lib/std/compress/flate/Token.zig @@ -0,0 +1,327 @@ +/// Token cat be literal: single byte of data or match; reference to the slice of +/// data in the same stream represented with . Where length +/// can be 3 - 258 bytes, and distance 1 - 32768 bytes. +/// +const std = @import("std"); +const assert = std.debug.assert; +const print = std.debug.print; +const expect = std.testing.expect; +const consts = @import("consts.zig").match; + +const Token = @This(); + +pub const Kind = enum(u1) { + literal, + match, +}; + +// Distance range 1 - 32768, stored in dist as 0 - 32767 (fits u15) +dist: u15 = 0, +// Length range 3 - 258, stored in len_lit as 0 - 255 (fits u8) +len_lit: u8 = 0, +kind: Kind = .literal, + +pub fn literal(t: Token) u8 { + return t.len_lit; +} + +pub fn distance(t: Token) u16 { + return @as(u16, t.dist) + consts.min_distance; +} + +pub fn length(t: Token) u16 { + return @as(u16, t.len_lit) + consts.base_length; +} + +pub fn initLiteral(lit: u8) Token { + return .{ .kind = .literal, .len_lit = lit }; +} + +// distance range 1 - 32768, stored in dist as 0 - 32767 (u15) +// length range 3 - 258, stored in len_lit as 0 - 255 (u8) +pub fn initMatch(dist: u16, len: u16) Token { + assert(len >= consts.min_length and len <= consts.max_length); + assert(dist >= consts.min_distance and dist <= consts.max_distance); + return .{ + .kind = .match, + .dist = @intCast(dist - consts.min_distance), + .len_lit = @intCast(len - consts.base_length), + }; +} + +pub fn eql(t: Token, o: Token) bool { + return t.kind == o.kind and + t.dist == o.dist and + t.len_lit == o.len_lit; +} + +pub fn lengthCode(t: Token) u16 { + return match_lengths[match_lengths_index[t.len_lit]].code; +} + +pub fn lengthEncoding(t: Token) MatchLength { + var c = match_lengths[match_lengths_index[t.len_lit]]; + c.extra_length = t.len_lit - c.base_scaled; + return c; +} + +// Returns the distance code corresponding to a specific distance. +// Distance code is in range: 0 - 29. +pub fn distanceCode(t: Token) u8 { + var dist: u16 = t.dist; + if (dist < match_distances_index.len) { + return match_distances_index[dist]; + } + dist >>= 7; + if (dist < match_distances_index.len) { + return match_distances_index[dist] + 14; + } + dist >>= 7; + return match_distances_index[dist] + 28; +} + +pub fn distanceEncoding(t: Token) MatchDistance { + var c = match_distances[t.distanceCode()]; + c.extra_distance = t.dist - c.base_scaled; + return c; +} + +pub fn lengthExtraBits(code: u32) u8 { + return match_lengths[code - length_codes_start].extra_bits; +} + +pub fn matchLength(code: u8) MatchLength { + return match_lengths[code]; +} + +pub fn matchDistance(code: u8) MatchDistance { + return match_distances[code]; +} + +pub fn distanceExtraBits(code: u32) u8 { + return match_distances[code].extra_bits; +} + +pub fn show(t: Token) void { + if (t.kind == .literal) { + print("L('{c}'), ", .{t.literal()}); + } else { + print("M({d}, {d}), ", .{ t.distance(), t.length() }); + } +} + +// Retruns index in match_lengths table for each length in range 0-255. +const match_lengths_index = [_]u8{ + 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, + 9, 9, 10, 10, 11, 11, 12, 12, 12, 12, + 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, + 15, 15, 16, 16, 16, 16, 16, 16, 16, 16, + 17, 17, 17, 17, 17, 17, 17, 17, 18, 18, + 18, 18, 18, 18, 18, 18, 19, 19, 19, 19, + 19, 19, 19, 19, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 27, 27, 27, 27, 27, 27, + 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, + 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, + 27, 27, 27, 27, 27, 28, +}; + +const MatchLength = struct { + code: u16, + base_scaled: u8, // base - 3, scaled to fit into u8 (0-255), same as lit_len field in Token. + base: u16, // 3-258 + extra_length: u8 = 0, + extra_bits: u4, +}; + +// match_lengths represents table from rfc (https://datatracker.ietf.org/doc/html/rfc1951#page-12) +// +// Extra Extra Extra +// Code Bits Length(s) Code Bits Lengths Code Bits Length(s) +// ---- ---- ------ ---- ---- ------- ---- ---- ------- +// 257 0 3 267 1 15,16 277 4 67-82 +// 258 0 4 268 1 17,18 278 4 83-98 +// 259 0 5 269 2 19-22 279 4 99-114 +// 260 0 6 270 2 23-26 280 4 115-130 +// 261 0 7 271 2 27-30 281 5 131-162 +// 262 0 8 272 2 31-34 282 5 163-194 +// 263 0 9 273 3 35-42 283 5 195-226 +// 264 0 10 274 3 43-50 284 5 227-257 +// 265 1 11,12 275 3 51-58 285 0 258 +// 266 1 13,14 276 3 59-66 +// +pub const length_codes_start = 257; + +const match_lengths = [_]MatchLength{ + .{ .extra_bits = 0, .base_scaled = 0, .base = 3, .code = 257 }, + .{ .extra_bits = 0, .base_scaled = 1, .base = 4, .code = 258 }, + .{ .extra_bits = 0, .base_scaled = 2, .base = 5, .code = 259 }, + .{ .extra_bits = 0, .base_scaled = 3, .base = 6, .code = 260 }, + .{ .extra_bits = 0, .base_scaled = 4, .base = 7, .code = 261 }, + .{ .extra_bits = 0, .base_scaled = 5, .base = 8, .code = 262 }, + .{ .extra_bits = 0, .base_scaled = 6, .base = 9, .code = 263 }, + .{ .extra_bits = 0, .base_scaled = 7, .base = 10, .code = 264 }, + .{ .extra_bits = 1, .base_scaled = 8, .base = 11, .code = 265 }, + .{ .extra_bits = 1, .base_scaled = 10, .base = 13, .code = 266 }, + .{ .extra_bits = 1, .base_scaled = 12, .base = 15, .code = 267 }, + .{ .extra_bits = 1, .base_scaled = 14, .base = 17, .code = 268 }, + .{ .extra_bits = 2, .base_scaled = 16, .base = 19, .code = 269 }, + .{ .extra_bits = 2, .base_scaled = 20, .base = 23, .code = 270 }, + .{ .extra_bits = 2, .base_scaled = 24, .base = 27, .code = 271 }, + .{ .extra_bits = 2, .base_scaled = 28, .base = 31, .code = 272 }, + .{ .extra_bits = 3, .base_scaled = 32, .base = 35, .code = 273 }, + .{ .extra_bits = 3, .base_scaled = 40, .base = 43, .code = 274 }, + .{ .extra_bits = 3, .base_scaled = 48, .base = 51, .code = 275 }, + .{ .extra_bits = 3, .base_scaled = 56, .base = 59, .code = 276 }, + .{ .extra_bits = 4, .base_scaled = 64, .base = 67, .code = 277 }, + .{ .extra_bits = 4, .base_scaled = 80, .base = 83, .code = 278 }, + .{ .extra_bits = 4, .base_scaled = 96, .base = 99, .code = 279 }, + .{ .extra_bits = 4, .base_scaled = 112, .base = 115, .code = 280 }, + .{ .extra_bits = 5, .base_scaled = 128, .base = 131, .code = 281 }, + .{ .extra_bits = 5, .base_scaled = 160, .base = 163, .code = 282 }, + .{ .extra_bits = 5, .base_scaled = 192, .base = 195, .code = 283 }, + .{ .extra_bits = 5, .base_scaled = 224, .base = 227, .code = 284 }, + .{ .extra_bits = 0, .base_scaled = 255, .base = 258, .code = 285 }, +}; + +// Used in distanceCode fn to get index in match_distance table for each distance in range 0-32767. +const match_distances_index = [_]u8{ + 0, 1, 2, 3, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, + 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, +}; + +const MatchDistance = struct { + base_scaled: u16, // base - 1, same as Token dist field + base: u16, + extra_distance: u16 = 0, + code: u8, + extra_bits: u4, +}; + +// match_distances represents table from rfc (https://datatracker.ietf.org/doc/html/rfc1951#page-12) +// +// Extra Extra Extra +// Code Bits Dist Code Bits Dist Code Bits Distance +// ---- ---- ---- ---- ---- ------ ---- ---- -------- +// 0 0 1 10 4 33-48 20 9 1025-1536 +// 1 0 2 11 4 49-64 21 9 1537-2048 +// 2 0 3 12 5 65-96 22 10 2049-3072 +// 3 0 4 13 5 97-128 23 10 3073-4096 +// 4 1 5,6 14 6 129-192 24 11 4097-6144 +// 5 1 7,8 15 6 193-256 25 11 6145-8192 +// 6 2 9-12 16 7 257-384 26 12 8193-12288 +// 7 2 13-16 17 7 385-512 27 12 12289-16384 +// 8 3 17-24 18 8 513-768 28 13 16385-24576 +// 9 3 25-32 19 8 769-1024 29 13 24577-32768 +// +const match_distances = [_]MatchDistance{ + .{ .extra_bits = 0, .base_scaled = 0x0000, .code = 0, .base = 1 }, + .{ .extra_bits = 0, .base_scaled = 0x0001, .code = 1, .base = 2 }, + .{ .extra_bits = 0, .base_scaled = 0x0002, .code = 2, .base = 3 }, + .{ .extra_bits = 0, .base_scaled = 0x0003, .code = 3, .base = 4 }, + .{ .extra_bits = 1, .base_scaled = 0x0004, .code = 4, .base = 5 }, + .{ .extra_bits = 1, .base_scaled = 0x0006, .code = 5, .base = 7 }, + .{ .extra_bits = 2, .base_scaled = 0x0008, .code = 6, .base = 9 }, + .{ .extra_bits = 2, .base_scaled = 0x000c, .code = 7, .base = 13 }, + .{ .extra_bits = 3, .base_scaled = 0x0010, .code = 8, .base = 17 }, + .{ .extra_bits = 3, .base_scaled = 0x0018, .code = 9, .base = 25 }, + .{ .extra_bits = 4, .base_scaled = 0x0020, .code = 10, .base = 33 }, + .{ .extra_bits = 4, .base_scaled = 0x0030, .code = 11, .base = 49 }, + .{ .extra_bits = 5, .base_scaled = 0x0040, .code = 12, .base = 65 }, + .{ .extra_bits = 5, .base_scaled = 0x0060, .code = 13, .base = 97 }, + .{ .extra_bits = 6, .base_scaled = 0x0080, .code = 14, .base = 129 }, + .{ .extra_bits = 6, .base_scaled = 0x00c0, .code = 15, .base = 193 }, + .{ .extra_bits = 7, .base_scaled = 0x0100, .code = 16, .base = 257 }, + .{ .extra_bits = 7, .base_scaled = 0x0180, .code = 17, .base = 385 }, + .{ .extra_bits = 8, .base_scaled = 0x0200, .code = 18, .base = 513 }, + .{ .extra_bits = 8, .base_scaled = 0x0300, .code = 19, .base = 769 }, + .{ .extra_bits = 9, .base_scaled = 0x0400, .code = 20, .base = 1025 }, + .{ .extra_bits = 9, .base_scaled = 0x0600, .code = 21, .base = 1537 }, + .{ .extra_bits = 10, .base_scaled = 0x0800, .code = 22, .base = 2049 }, + .{ .extra_bits = 10, .base_scaled = 0x0c00, .code = 23, .base = 3073 }, + .{ .extra_bits = 11, .base_scaled = 0x1000, .code = 24, .base = 4097 }, + .{ .extra_bits = 11, .base_scaled = 0x1800, .code = 25, .base = 6145 }, + .{ .extra_bits = 12, .base_scaled = 0x2000, .code = 26, .base = 8193 }, + .{ .extra_bits = 12, .base_scaled = 0x3000, .code = 27, .base = 12289 }, + .{ .extra_bits = 13, .base_scaled = 0x4000, .code = 28, .base = 16385 }, + .{ .extra_bits = 13, .base_scaled = 0x6000, .code = 29, .base = 24577 }, +}; + +test "flate.Token size" { + try expect(@sizeOf(Token) == 4); +} + +// testing table https://datatracker.ietf.org/doc/html/rfc1951#page-12 +test "flate.Token MatchLength" { + var c = Token.initMatch(1, 4).lengthEncoding(); + try expect(c.code == 258); + try expect(c.extra_bits == 0); + try expect(c.extra_length == 0); + + c = Token.initMatch(1, 11).lengthEncoding(); + try expect(c.code == 265); + try expect(c.extra_bits == 1); + try expect(c.extra_length == 0); + + c = Token.initMatch(1, 12).lengthEncoding(); + try expect(c.code == 265); + try expect(c.extra_bits == 1); + try expect(c.extra_length == 1); + + c = Token.initMatch(1, 130).lengthEncoding(); + try expect(c.code == 280); + try expect(c.extra_bits == 4); + try expect(c.extra_length == 130 - 115); +} + +test "flate.Token MatchDistance" { + var c = Token.initMatch(1, 4).distanceEncoding(); + try expect(c.code == 0); + try expect(c.extra_bits == 0); + try expect(c.extra_distance == 0); + + c = Token.initMatch(192, 4).distanceEncoding(); + try expect(c.code == 14); + try expect(c.extra_bits == 6); + try expect(c.extra_distance == 192 - 129); +} + +test "flate.Token match_lengths" { + for (match_lengths, 0..) |ml, i| { + try expect(@as(u16, ml.base_scaled) + 3 == ml.base); + try expect(i + 257 == ml.code); + } + + for (match_distances, 0..) |mo, i| { + try expect(mo.base_scaled + 1 == mo.base); + try expect(i == mo.code); + } +} diff --git a/lib/std/compress/flate/bit_reader.zig b/lib/std/compress/flate/bit_reader.zig new file mode 100644 index 0000000000..e8717c8e17 --- /dev/null +++ b/lib/std/compress/flate/bit_reader.zig @@ -0,0 +1,333 @@ +const std = @import("std"); +const assert = std.debug.assert; +const testing = std.testing; + +pub fn bitReader(reader: anytype) BitReader(@TypeOf(reader)) { + return BitReader(@TypeOf(reader)).init(reader); +} + +/// Bit reader used during inflate (decompression). Has internal buffer of 64 +/// bits which shifts right after bits are consumed. Uses forward_reader to fill +/// that internal buffer when needed. +/// +/// readF is the core function. Supports few different ways of getting bits +/// controlled by flags. In hot path we try to avoid checking whether we need to +/// fill buffer from forward_reader by calling fill in advance and readF with +/// buffered flag set. +/// +pub fn BitReader(comptime ReaderType: type) type { + return struct { + // Underlying reader used for filling internal bits buffer + forward_reader: ReaderType = undefined, + // Internal buffer of 64 bits + bits: u64 = 0, + // Number of bits in the buffer + nbits: u32 = 0, + + const Self = @This(); + + pub const Error = ReaderType.Error || error{EndOfStream}; + + pub fn init(rdr: ReaderType) Self { + var self = Self{ .forward_reader = rdr }; + self.fill(1) catch {}; + return self; + } + + // Try to have `nice` bits are available in buffer. Reads from + // forward reader if there is no `nice` bits in buffer. Returns error + // if end of forward stream is reached and internal buffer is empty. + // It will not error if less than `nice` bits are in buffer, only when + // all bits are exhausted. During inflate we usually know what is the + // maximum bits for the next step but usually that step will need less + // bits to decode. So `nice` is not hard limit, it will just try to have + // that number of bits available. If end of forward stream is reached + // it may be some extra zero bits in buffer. + pub inline fn fill(self: *Self, nice: u6) !void { + if (self.nbits >= nice) { + return; // We have enought bits + } + // Read more bits from forward reader + + // Number of empty bytes in bits, round nbits to whole bytes. + const empty_bytes = + @as(u8, if (self.nbits & 0x7 == 0) 8 else 7) - // 8 for 8, 16, 24..., 7 otherwise + (self.nbits >> 3); // 0 for 0-7, 1 for 8-16, ... same as / 8 + + var buf: [8]u8 = [_]u8{0} ** 8; + const bytes_read = self.forward_reader.read(buf[0..empty_bytes]) catch 0; + if (bytes_read > 0) { + const u: u64 = std.mem.readInt(u64, buf[0..8], .little); + self.bits |= u << @as(u6, @intCast(self.nbits)); + self.nbits += 8 * @as(u8, @intCast(bytes_read)); + return; + } + + if (self.nbits == 0) + return error.EndOfStream; + } + + // Read exactly buf.len bytes into buf. + pub fn readAll(self: *Self, buf: []u8) !void { + assert(self.alignBits() == 0); // internal bits must be at byte boundary + + // First read from internal bits buffer. + var n: usize = 0; + while (self.nbits > 0 and n < buf.len) { + buf[n] = try self.readF(u8, flag.buffered); + n += 1; + } + // Then use forward reader for all other bytes. + try self.forward_reader.readNoEof(buf[n..]); + } + + pub const flag = struct { + pub const peek: u3 = 0b001; // dont advance internal buffer, just get bits, leave them in buffer + pub const buffered: u3 = 0b010; // assume that there is no need to fill, fill should be called before + pub const reverse: u3 = 0b100; // bit reverse readed bits + }; + + // Alias for readF(U, 0). + pub fn read(self: *Self, comptime U: type) !U { + return self.readF(U, 0); + } + + // Alias for readF with flag.peak set. + pub inline fn peekF(self: *Self, comptime U: type, comptime how: u3) !U { + return self.readF(U, how | flag.peek); + } + + // Read with flags provided. + pub fn readF(self: *Self, comptime U: type, comptime how: u3) !U { + const n: u6 = @bitSizeOf(U); + switch (how) { + 0 => { // `normal` read + try self.fill(n); // ensure that there are n bits in the buffer + const u: U = @truncate(self.bits); // get n bits + try self.shift(n); // advance buffer for n + return u; + }, + (flag.peek) => { // no shift, leave bits in the buffer + try self.fill(n); + return @truncate(self.bits); + }, + flag.buffered => { // no fill, assume that buffer has enought bits + const u: U = @truncate(self.bits); + try self.shift(n); + return u; + }, + (flag.reverse) => { // same as 0 with bit reverse + try self.fill(n); + const u: U = @truncate(self.bits); + try self.shift(n); + return @bitReverse(u); + }, + (flag.peek | flag.reverse) => { + try self.fill(n); + return @bitReverse(@as(U, @truncate(self.bits))); + }, + (flag.buffered | flag.reverse) => { + const u: U = @truncate(self.bits); + try self.shift(n); + return @bitReverse(u); + }, + (flag.peek | flag.buffered) => { + return @truncate(self.bits); + }, + (flag.peek | flag.buffered | flag.reverse) => { + return @bitReverse(@as(U, @truncate(self.bits))); + }, + } + } + + // Read n number of bits. + // Only buffered flag can be used in how. + pub fn readN(self: *Self, n: u4, comptime how: u3) !u16 { + switch (how) { + 0 => { + try self.fill(n); + }, + flag.buffered => {}, + else => unreachable, + } + const mask: u16 = (@as(u16, 1) << n) - 1; + const u: u16 = @as(u16, @truncate(self.bits)) & mask; + try self.shift(n); + return u; + } + + // Advance buffer for n bits. + pub fn shift(self: *Self, n: u6) !void { + if (n > self.nbits) return error.EndOfStream; + self.bits >>= n; + self.nbits -= n; + } + + // Skip n bytes. + pub fn skipBytes(self: *Self, n: u16) !void { + for (0..n) |_| { + try self.fill(8); + try self.shift(8); + } + } + + // Number of bits to align stream to the byte boundary. + fn alignBits(self: *Self) u3 { + return @intCast(self.nbits & 0x7); + } + + // Align stream to the byte boundary. + pub fn alignToByte(self: *Self) void { + const ab = self.alignBits(); + if (ab > 0) self.shift(ab) catch unreachable; + } + + // Skip zero terminated string. + pub fn skipStringZ(self: *Self) !void { + while (true) { + if (try self.readF(u8, 0) == 0) break; + } + } + + // Read deflate fixed fixed code. + // Reads first 7 bits, and then mybe 1 or 2 more to get full 7,8 or 9 bit code. + // ref: https://datatracker.ietf.org/doc/html/rfc1951#page-12 + // Lit Value Bits Codes + // --------- ---- ----- + // 0 - 143 8 00110000 through + // 10111111 + // 144 - 255 9 110010000 through + // 111111111 + // 256 - 279 7 0000000 through + // 0010111 + // 280 - 287 8 11000000 through + // 11000111 + pub fn readFixedCode(self: *Self) !u16 { + try self.fill(7 + 2); + const code7 = try self.readF(u7, flag.buffered | flag.reverse); + if (code7 <= 0b0010_111) { // 7 bits, 256-279, codes 0000_000 - 0010_111 + return @as(u16, code7) + 256; + } else if (code7 <= 0b1011_111) { // 8 bits, 0-143, codes 0011_0000 through 1011_1111 + return (@as(u16, code7) << 1) + @as(u16, try self.readF(u1, flag.buffered)) - 0b0011_0000; + } else if (code7 <= 0b1100_011) { // 8 bit, 280-287, codes 1100_0000 - 1100_0111 + return (@as(u16, code7 - 0b1100000) << 1) + try self.readF(u1, flag.buffered) + 280; + } else { // 9 bit, 144-255, codes 1_1001_0000 - 1_1111_1111 + return (@as(u16, code7 - 0b1100_100) << 2) + @as(u16, try self.readF(u2, flag.buffered | flag.reverse)) + 144; + } + } + }; +} + +test "flate.BitReader" { + var fbs = std.io.fixedBufferStream(&[_]u8{ 0xf3, 0x48, 0xcd, 0xc9, 0x00, 0x00 }); + var br = bitReader(fbs.reader()); + const F = BitReader(@TypeOf(fbs.reader())).flag; + + try testing.expectEqual(@as(u8, 48), br.nbits); + try testing.expectEqual(@as(u64, 0xc9cd48f3), br.bits); + + try testing.expect(try br.readF(u1, 0) == 0b0000_0001); + try testing.expect(try br.readF(u2, 0) == 0b0000_0001); + try testing.expectEqual(@as(u8, 48 - 3), br.nbits); + try testing.expectEqual(@as(u3, 5), br.alignBits()); + + try testing.expect(try br.readF(u8, F.peek) == 0b0001_1110); + try testing.expect(try br.readF(u9, F.peek) == 0b1_0001_1110); + try br.shift(9); + try testing.expectEqual(@as(u8, 36), br.nbits); + try testing.expectEqual(@as(u3, 4), br.alignBits()); + + try testing.expect(try br.readF(u4, 0) == 0b0100); + try testing.expectEqual(@as(u8, 32), br.nbits); + try testing.expectEqual(@as(u3, 0), br.alignBits()); + + try br.shift(1); + try testing.expectEqual(@as(u3, 7), br.alignBits()); + try br.shift(1); + try testing.expectEqual(@as(u3, 6), br.alignBits()); + br.alignToByte(); + try testing.expectEqual(@as(u3, 0), br.alignBits()); + + try testing.expectEqual(@as(u64, 0xc9), br.bits); + try testing.expectEqual(@as(u16, 0x9), try br.readN(4, 0)); + try testing.expectEqual(@as(u16, 0xc), try br.readN(4, 0)); +} + +test "flate.BitReader read block type 1 data" { + const data = [_]u8{ + 0xf3, 0x48, 0xcd, 0xc9, 0xc9, 0x57, 0x28, 0xcf, // deflate data block type 1 + 0x2f, 0xca, 0x49, 0xe1, 0x02, 0x00, + 0x0c, 0x01, 0x02, 0x03, // + 0xaa, 0xbb, 0xcc, 0xdd, + }; + var fbs = std.io.fixedBufferStream(&data); + var br = bitReader(fbs.reader()); + const F = BitReader(@TypeOf(fbs.reader())).flag; + + try testing.expectEqual(@as(u1, 1), try br.readF(u1, 0)); // bfinal + try testing.expectEqual(@as(u2, 1), try br.readF(u2, 0)); // block_type + + for ("Hello world\n") |c| { + try testing.expectEqual(@as(u8, c), try br.readF(u8, F.reverse) - 0x30); + } + try testing.expectEqual(@as(u7, 0), try br.readF(u7, 0)); // end of block + br.alignToByte(); + try testing.expectEqual(@as(u32, 0x0302010c), try br.readF(u32, 0)); + try testing.expectEqual(@as(u16, 0xbbaa), try br.readF(u16, 0)); + try testing.expectEqual(@as(u16, 0xddcc), try br.readF(u16, 0)); +} + +test "flate.BitReader init" { + const data = [_]u8{ + 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, + 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, + }; + var fbs = std.io.fixedBufferStream(&data); + var br = bitReader(fbs.reader()); + + try testing.expectEqual(@as(u64, 0x08_07_06_05_04_03_02_01), br.bits); + try br.shift(8); + try testing.expectEqual(@as(u64, 0x00_08_07_06_05_04_03_02), br.bits); + try br.fill(60); // fill with 1 byte + try testing.expectEqual(@as(u64, 0x01_08_07_06_05_04_03_02), br.bits); + try br.shift(8 * 4 + 4); + try testing.expectEqual(@as(u64, 0x00_00_00_00_00_10_80_70), br.bits); + + try br.fill(60); // fill with 4 bytes (shift by 4) + try testing.expectEqual(@as(u64, 0x00_50_40_30_20_10_80_70), br.bits); + try testing.expectEqual(@as(u8, 8 * 7 + 4), br.nbits); + + try br.shift(@intCast(br.nbits)); // clear buffer + try br.fill(8); // refill with the rest of the bytes + try testing.expectEqual(@as(u64, 0x00_00_00_00_00_08_07_06), br.bits); +} + +test "flate.BitReader readAll" { + const data = [_]u8{ + 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, + 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, + }; + var fbs = std.io.fixedBufferStream(&data); + var br = bitReader(fbs.reader()); + + try testing.expectEqual(@as(u64, 0x08_07_06_05_04_03_02_01), br.bits); + + var out: [16]u8 = undefined; + try br.readAll(out[0..]); + try testing.expect(br.nbits == 0); + try testing.expect(br.bits == 0); + + try testing.expectEqualSlices(u8, data[0..16], &out); +} + +test "flate.BitReader readFixedCode" { + const fixed_codes = @import("huffman_encoder.zig").fixed_codes; + + var fbs = std.io.fixedBufferStream(&fixed_codes); + var rdr = bitReader(fbs.reader()); + + for (0..286) |c| { + try testing.expectEqual(c, try rdr.readFixedCode()); + } + try testing.expect(rdr.nbits == 0); +} diff --git a/lib/std/compress/flate/bit_writer.zig b/lib/std/compress/flate/bit_writer.zig new file mode 100644 index 0000000000..b5d84c7e2a --- /dev/null +++ b/lib/std/compress/flate/bit_writer.zig @@ -0,0 +1,99 @@ +const std = @import("std"); +const assert = std.debug.assert; + +/// Bit writer for use in deflate (compression). +/// +/// Has internal bits buffer of 64 bits and internal bytes buffer of 248 bytes. +/// When we accumulate 48 bits 6 bytes are moved to the bytes buffer. When we +/// accumulate 240 bytes they are flushed to the underlying inner_writer. +/// +pub fn BitWriter(comptime WriterType: type) type { + // buffer_flush_size indicates the buffer size + // after which bytes are flushed to the writer. + // Should preferably be a multiple of 6, since + // we accumulate 6 bytes between writes to the buffer. + const buffer_flush_size = 240; + + // buffer_size is the actual output byte buffer size. + // It must have additional headroom for a flush + // which can contain up to 8 bytes. + const buffer_size = buffer_flush_size + 8; + + return struct { + inner_writer: WriterType, + + // Data waiting to be written is bytes[0 .. nbytes] + // and then the low nbits of bits. Data is always written + // sequentially into the bytes array. + bits: u64 = 0, + nbits: u32 = 0, // number of bits + bytes: [buffer_size]u8 = undefined, + nbytes: u32 = 0, // number of bytes + + const Self = @This(); + + pub const Error = WriterType.Error || error{UnfinishedBits}; + + pub fn init(writer: WriterType) Self { + return .{ .inner_writer = writer }; + } + + pub fn setWriter(self: *Self, new_writer: WriterType) void { + //assert(self.bits == 0 and self.nbits == 0 and self.nbytes == 0); + self.inner_writer = new_writer; + } + + pub fn flush(self: *Self) Error!void { + var n = self.nbytes; + while (self.nbits != 0) { + self.bytes[n] = @as(u8, @truncate(self.bits)); + self.bits >>= 8; + if (self.nbits > 8) { // Avoid underflow + self.nbits -= 8; + } else { + self.nbits = 0; + } + n += 1; + } + self.bits = 0; + _ = try self.inner_writer.write(self.bytes[0..n]); + self.nbytes = 0; + } + + pub fn writeBits(self: *Self, b: u32, nb: u32) Error!void { + self.bits |= @as(u64, @intCast(b)) << @as(u6, @intCast(self.nbits)); + self.nbits += nb; + if (self.nbits < 48) + return; + + var n = self.nbytes; + std.mem.writeInt(u64, self.bytes[n..][0..8], self.bits, .little); + n += 6; + if (n >= buffer_flush_size) { + _ = try self.inner_writer.write(self.bytes[0..n]); + n = 0; + } + self.nbytes = n; + self.bits >>= 48; + self.nbits -= 48; + } + + pub fn writeBytes(self: *Self, bytes: []const u8) Error!void { + var n = self.nbytes; + if (self.nbits & 7 != 0) { + return error.UnfinishedBits; + } + while (self.nbits != 0) { + self.bytes[n] = @as(u8, @truncate(self.bits)); + self.bits >>= 8; + self.nbits -= 8; + n += 1; + } + if (n != 0) { + _ = try self.inner_writer.write(self.bytes[0..n]); + } + self.nbytes = 0; + _ = try self.inner_writer.write(bytes); + } + }; +} diff --git a/lib/std/compress/flate/block_writer.zig b/lib/std/compress/flate/block_writer.zig new file mode 100644 index 0000000000..394f1cbec5 --- /dev/null +++ b/lib/std/compress/flate/block_writer.zig @@ -0,0 +1,706 @@ +const std = @import("std"); +const io = std.io; +const assert = std.debug.assert; + +const hc = @import("huffman_encoder.zig"); +const consts = @import("consts.zig").huffman; +const Token = @import("Token.zig"); +const BitWriter = @import("bit_writer.zig").BitWriter; + +pub fn blockWriter(writer: anytype) BlockWriter(@TypeOf(writer)) { + return BlockWriter(@TypeOf(writer)).init(writer); +} + +/// Accepts list of tokens, decides what is best block type to write. What block +/// type will provide best compression. Writes header and body of the block. +/// +pub fn BlockWriter(comptime WriterType: type) type { + const BitWriterType = BitWriter(WriterType); + return struct { + const codegen_order = consts.codegen_order; + const end_code_mark = 255; + const Self = @This(); + + pub const Error = BitWriterType.Error; + bit_writer: BitWriterType, + + codegen_freq: [consts.codegen_code_count]u16 = undefined, + literal_freq: [consts.max_num_lit]u16 = undefined, + distance_freq: [consts.distance_code_count]u16 = undefined, + codegen: [consts.max_num_lit + consts.distance_code_count + 1]u8 = undefined, + literal_encoding: hc.LiteralEncoder = .{}, + distance_encoding: hc.DistanceEncoder = .{}, + codegen_encoding: hc.CodegenEncoder = .{}, + fixed_literal_encoding: hc.LiteralEncoder, + fixed_distance_encoding: hc.DistanceEncoder, + huff_distance: hc.DistanceEncoder, + + pub fn init(writer: WriterType) Self { + return .{ + .bit_writer = BitWriterType.init(writer), + .fixed_literal_encoding = hc.fixedLiteralEncoder(), + .fixed_distance_encoding = hc.fixedDistanceEncoder(), + .huff_distance = hc.huffmanDistanceEncoder(), + }; + } + + /// Flush intrenal bit buffer to the writer. + /// Should be called only when bit stream is at byte boundary. + /// + /// That is after final block; when last byte could be incomplete or + /// after stored block; which is aligned to the byte bounday (it has x + /// padding bits after first 3 bits). + pub fn flush(self: *Self) Error!void { + try self.bit_writer.flush(); + } + + pub fn setWriter(self: *Self, new_writer: WriterType) void { + self.bit_writer.setWriter(new_writer); + } + + fn writeCode(self: *Self, c: hc.HuffCode) Error!void { + try self.bit_writer.writeBits(c.code, c.len); + } + + // RFC 1951 3.2.7 specifies a special run-length encoding for specifying + // the literal and distance lengths arrays (which are concatenated into a single + // array). This method generates that run-length encoding. + // + // The result is written into the codegen array, and the frequencies + // of each code is written into the codegen_freq array. + // Codes 0-15 are single byte codes. Codes 16-18 are followed by additional + // information. Code bad_code is an end marker + // + // num_literals: The number of literals in literal_encoding + // num_distances: The number of distances in distance_encoding + // lit_enc: The literal encoder to use + // dist_enc: The distance encoder to use + fn generateCodegen( + self: *Self, + num_literals: u32, + num_distances: u32, + lit_enc: *hc.LiteralEncoder, + dist_enc: *hc.DistanceEncoder, + ) void { + for (self.codegen_freq, 0..) |_, i| { + self.codegen_freq[i] = 0; + } + + // Note that we are using codegen both as a temporary variable for holding + // a copy of the frequencies, and as the place where we put the result. + // This is fine because the output is always shorter than the input used + // so far. + var codegen = &self.codegen; // cache + // Copy the concatenated code sizes to codegen. Put a marker at the end. + var cgnl = codegen[0..num_literals]; + for (cgnl, 0..) |_, i| { + cgnl[i] = @as(u8, @intCast(lit_enc.codes[i].len)); + } + + cgnl = codegen[num_literals .. num_literals + num_distances]; + for (cgnl, 0..) |_, i| { + cgnl[i] = @as(u8, @intCast(dist_enc.codes[i].len)); + } + codegen[num_literals + num_distances] = end_code_mark; + + var size = codegen[0]; + var count: i32 = 1; + var out_index: u32 = 0; + var in_index: u32 = 1; + while (size != end_code_mark) : (in_index += 1) { + // INVARIANT: We have seen "count" copies of size that have not yet + // had output generated for them. + const next_size = codegen[in_index]; + if (next_size == size) { + count += 1; + continue; + } + // We need to generate codegen indicating "count" of size. + if (size != 0) { + codegen[out_index] = size; + out_index += 1; + self.codegen_freq[size] += 1; + count -= 1; + while (count >= 3) { + var n: i32 = 6; + if (n > count) { + n = count; + } + codegen[out_index] = 16; + out_index += 1; + codegen[out_index] = @as(u8, @intCast(n - 3)); + out_index += 1; + self.codegen_freq[16] += 1; + count -= n; + } + } else { + while (count >= 11) { + var n: i32 = 138; + if (n > count) { + n = count; + } + codegen[out_index] = 18; + out_index += 1; + codegen[out_index] = @as(u8, @intCast(n - 11)); + out_index += 1; + self.codegen_freq[18] += 1; + count -= n; + } + if (count >= 3) { + // 3 <= count <= 10 + codegen[out_index] = 17; + out_index += 1; + codegen[out_index] = @as(u8, @intCast(count - 3)); + out_index += 1; + self.codegen_freq[17] += 1; + count = 0; + } + } + count -= 1; + while (count >= 0) : (count -= 1) { + codegen[out_index] = size; + out_index += 1; + self.codegen_freq[size] += 1; + } + // Set up invariant for next time through the loop. + size = next_size; + count = 1; + } + // Marker indicating the end of the codegen. + codegen[out_index] = end_code_mark; + } + + const DynamicSize = struct { + size: u32, + num_codegens: u32, + }; + + // dynamicSize returns the size of dynamically encoded data in bits. + fn dynamicSize( + self: *Self, + lit_enc: *hc.LiteralEncoder, // literal encoder + dist_enc: *hc.DistanceEncoder, // distance encoder + extra_bits: u32, + ) DynamicSize { + var num_codegens = self.codegen_freq.len; + while (num_codegens > 4 and self.codegen_freq[codegen_order[num_codegens - 1]] == 0) { + num_codegens -= 1; + } + const header = 3 + 5 + 5 + 4 + (3 * num_codegens) + + self.codegen_encoding.bitLength(self.codegen_freq[0..]) + + self.codegen_freq[16] * 2 + + self.codegen_freq[17] * 3 + + self.codegen_freq[18] * 7; + const size = header + + lit_enc.bitLength(&self.literal_freq) + + dist_enc.bitLength(&self.distance_freq) + + extra_bits; + + return DynamicSize{ + .size = @as(u32, @intCast(size)), + .num_codegens = @as(u32, @intCast(num_codegens)), + }; + } + + // fixedSize returns the size of dynamically encoded data in bits. + fn fixedSize(self: *Self, extra_bits: u32) u32 { + return 3 + + self.fixed_literal_encoding.bitLength(&self.literal_freq) + + self.fixed_distance_encoding.bitLength(&self.distance_freq) + + extra_bits; + } + + const StoredSize = struct { + size: u32, + storable: bool, + }; + + // storedSizeFits calculates the stored size, including header. + // The function returns the size in bits and whether the block + // fits inside a single block. + fn storedSizeFits(in: ?[]const u8) StoredSize { + if (in == null) { + return .{ .size = 0, .storable = false }; + } + if (in.?.len <= consts.max_store_block_size) { + return .{ .size = @as(u32, @intCast((in.?.len + 5) * 8)), .storable = true }; + } + return .{ .size = 0, .storable = false }; + } + + // Write the header of a dynamic Huffman block to the output stream. + // + // num_literals: The number of literals specified in codegen + // num_distances: The number of distances specified in codegen + // num_codegens: The number of codegens used in codegen + // eof: Is it the end-of-file? (end of stream) + fn dynamicHeader( + self: *Self, + num_literals: u32, + num_distances: u32, + num_codegens: u32, + eof: bool, + ) Error!void { + const first_bits: u32 = if (eof) 5 else 4; + try self.bit_writer.writeBits(first_bits, 3); + try self.bit_writer.writeBits(num_literals - 257, 5); + try self.bit_writer.writeBits(num_distances - 1, 5); + try self.bit_writer.writeBits(num_codegens - 4, 4); + + var i: u32 = 0; + while (i < num_codegens) : (i += 1) { + const value = self.codegen_encoding.codes[codegen_order[i]].len; + try self.bit_writer.writeBits(value, 3); + } + + i = 0; + while (true) { + const code_word: u32 = @as(u32, @intCast(self.codegen[i])); + i += 1; + if (code_word == end_code_mark) { + break; + } + try self.writeCode(self.codegen_encoding.codes[@as(u32, @intCast(code_word))]); + + switch (code_word) { + 16 => { + try self.bit_writer.writeBits(self.codegen[i], 2); + i += 1; + }, + 17 => { + try self.bit_writer.writeBits(self.codegen[i], 3); + i += 1; + }, + 18 => { + try self.bit_writer.writeBits(self.codegen[i], 7); + i += 1; + }, + else => {}, + } + } + } + + fn storedHeader(self: *Self, length: usize, eof: bool) Error!void { + assert(length <= 65535); + const flag: u32 = if (eof) 1 else 0; + try self.bit_writer.writeBits(flag, 3); + try self.flush(); + const l: u16 = @intCast(length); + try self.bit_writer.writeBits(l, 16); + try self.bit_writer.writeBits(~l, 16); + } + + fn fixedHeader(self: *Self, eof: bool) Error!void { + // Indicate that we are a fixed Huffman block + var value: u32 = 2; + if (eof) { + value = 3; + } + try self.bit_writer.writeBits(value, 3); + } + + // Write a block of tokens with the smallest encoding. Will choose block type. + // The original input can be supplied, and if the huffman encoded data + // is larger than the original bytes, the data will be written as a + // stored block. + // If the input is null, the tokens will always be Huffman encoded. + pub fn write(self: *Self, tokens: []const Token, eof: bool, input: ?[]const u8) Error!void { + const lit_and_dist = self.indexTokens(tokens); + const num_literals = lit_and_dist.num_literals; + const num_distances = lit_and_dist.num_distances; + + var extra_bits: u32 = 0; + const ret = storedSizeFits(input); + const stored_size = ret.size; + const storable = ret.storable; + + if (storable) { + // We only bother calculating the costs of the extra bits required by + // the length of distance fields (which will be the same for both fixed + // and dynamic encoding), if we need to compare those two encodings + // against stored encoding. + var length_code: u16 = Token.length_codes_start + 8; + while (length_code < num_literals) : (length_code += 1) { + // First eight length codes have extra size = 0. + extra_bits += @as(u32, @intCast(self.literal_freq[length_code])) * + @as(u32, @intCast(Token.lengthExtraBits(length_code))); + } + var distance_code: u16 = 4; + while (distance_code < num_distances) : (distance_code += 1) { + // First four distance codes have extra size = 0. + extra_bits += @as(u32, @intCast(self.distance_freq[distance_code])) * + @as(u32, @intCast(Token.distanceExtraBits(distance_code))); + } + } + + // Figure out smallest code. + // Fixed Huffman baseline. + var literal_encoding = &self.fixed_literal_encoding; + var distance_encoding = &self.fixed_distance_encoding; + var size = self.fixedSize(extra_bits); + + // Dynamic Huffman? + var num_codegens: u32 = 0; + + // Generate codegen and codegenFrequencies, which indicates how to encode + // the literal_encoding and the distance_encoding. + self.generateCodegen( + num_literals, + num_distances, + &self.literal_encoding, + &self.distance_encoding, + ); + self.codegen_encoding.generate(self.codegen_freq[0..], 7); + const dynamic_size = self.dynamicSize( + &self.literal_encoding, + &self.distance_encoding, + extra_bits, + ); + const dyn_size = dynamic_size.size; + num_codegens = dynamic_size.num_codegens; + + if (dyn_size < size) { + size = dyn_size; + literal_encoding = &self.literal_encoding; + distance_encoding = &self.distance_encoding; + } + + // Stored bytes? + if (storable and stored_size < size) { + try self.storedBlock(input.?, eof); + return; + } + + // Huffman. + if (@intFromPtr(literal_encoding) == @intFromPtr(&self.fixed_literal_encoding)) { + try self.fixedHeader(eof); + } else { + try self.dynamicHeader(num_literals, num_distances, num_codegens, eof); + } + + // Write the tokens. + try self.writeTokens(tokens, &literal_encoding.codes, &distance_encoding.codes); + } + + pub fn storedBlock(self: *Self, input: []const u8, eof: bool) Error!void { + try self.storedHeader(input.len, eof); + try self.bit_writer.writeBytes(input); + } + + // writeBlockDynamic encodes a block using a dynamic Huffman table. + // This should be used if the symbols used have a disproportionate + // histogram distribution. + // If input is supplied and the compression savings are below 1/16th of the + // input size the block is stored. + fn dynamicBlock( + self: *Self, + tokens: []const Token, + eof: bool, + input: ?[]const u8, + ) Error!void { + const total_tokens = self.indexTokens(tokens); + const num_literals = total_tokens.num_literals; + const num_distances = total_tokens.num_distances; + + // Generate codegen and codegenFrequencies, which indicates how to encode + // the literal_encoding and the distance_encoding. + self.generateCodegen( + num_literals, + num_distances, + &self.literal_encoding, + &self.distance_encoding, + ); + self.codegen_encoding.generate(self.codegen_freq[0..], 7); + const dynamic_size = self.dynamicSize(&self.literal_encoding, &self.distance_encoding, 0); + const size = dynamic_size.size; + const num_codegens = dynamic_size.num_codegens; + + // Store bytes, if we don't get a reasonable improvement. + + const stored_size = storedSizeFits(input); + const ssize = stored_size.size; + const storable = stored_size.storable; + if (storable and ssize < (size + (size >> 4))) { + try self.storedBlock(input.?, eof); + return; + } + + // Write Huffman table. + try self.dynamicHeader(num_literals, num_distances, num_codegens, eof); + + // Write the tokens. + try self.writeTokens(tokens, &self.literal_encoding.codes, &self.distance_encoding.codes); + } + + const TotalIndexedTokens = struct { + num_literals: u32, + num_distances: u32, + }; + + // Indexes a slice of tokens followed by an end_block_marker, and updates + // literal_freq and distance_freq, and generates literal_encoding + // and distance_encoding. + // The number of literal and distance tokens is returned. + fn indexTokens(self: *Self, tokens: []const Token) TotalIndexedTokens { + var num_literals: u32 = 0; + var num_distances: u32 = 0; + + for (self.literal_freq, 0..) |_, i| { + self.literal_freq[i] = 0; + } + for (self.distance_freq, 0..) |_, i| { + self.distance_freq[i] = 0; + } + + for (tokens) |t| { + if (t.kind == Token.Kind.literal) { + self.literal_freq[t.literal()] += 1; + continue; + } + self.literal_freq[t.lengthCode()] += 1; + self.distance_freq[t.distanceCode()] += 1; + } + // add end_block_marker token at the end + self.literal_freq[consts.end_block_marker] += 1; + + // get the number of literals + num_literals = @as(u32, @intCast(self.literal_freq.len)); + while (self.literal_freq[num_literals - 1] == 0) { + num_literals -= 1; + } + // get the number of distances + num_distances = @as(u32, @intCast(self.distance_freq.len)); + while (num_distances > 0 and self.distance_freq[num_distances - 1] == 0) { + num_distances -= 1; + } + if (num_distances == 0) { + // We haven't found a single match. If we want to go with the dynamic encoding, + // we should count at least one distance to be sure that the distance huffman tree could be encoded. + self.distance_freq[0] = 1; + num_distances = 1; + } + self.literal_encoding.generate(&self.literal_freq, 15); + self.distance_encoding.generate(&self.distance_freq, 15); + return TotalIndexedTokens{ + .num_literals = num_literals, + .num_distances = num_distances, + }; + } + + // Writes a slice of tokens to the output followed by and end_block_marker. + // codes for literal and distance encoding must be supplied. + fn writeTokens( + self: *Self, + tokens: []const Token, + le_codes: []hc.HuffCode, + oe_codes: []hc.HuffCode, + ) Error!void { + for (tokens) |t| { + if (t.kind == Token.Kind.literal) { + try self.writeCode(le_codes[t.literal()]); + continue; + } + + // Write the length + const le = t.lengthEncoding(); + try self.writeCode(le_codes[le.code]); + if (le.extra_bits > 0) { + try self.bit_writer.writeBits(le.extra_length, le.extra_bits); + } + + // Write the distance + const oe = t.distanceEncoding(); + try self.writeCode(oe_codes[oe.code]); + if (oe.extra_bits > 0) { + try self.bit_writer.writeBits(oe.extra_distance, oe.extra_bits); + } + } + // add end_block_marker at the end + try self.writeCode(le_codes[consts.end_block_marker]); + } + + // Encodes a block of bytes as either Huffman encoded literals or uncompressed bytes + // if the results only gains very little from compression. + pub fn huffmanBlock(self: *Self, input: []const u8, eof: bool) Error!void { + // Add everything as literals + histogram(input, &self.literal_freq); + + self.literal_freq[consts.end_block_marker] = 1; + + const num_literals = consts.end_block_marker + 1; + self.distance_freq[0] = 1; + const num_distances = 1; + + self.literal_encoding.generate(&self.literal_freq, 15); + + // Figure out smallest code. + // Always use dynamic Huffman or Store + var num_codegens: u32 = 0; + + // Generate codegen and codegenFrequencies, which indicates how to encode + // the literal_encoding and the distance_encoding. + self.generateCodegen( + num_literals, + num_distances, + &self.literal_encoding, + &self.huff_distance, + ); + self.codegen_encoding.generate(self.codegen_freq[0..], 7); + const dynamic_size = self.dynamicSize(&self.literal_encoding, &self.huff_distance, 0); + const size = dynamic_size.size; + num_codegens = dynamic_size.num_codegens; + + // Store bytes, if we don't get a reasonable improvement. + const stored_size_ret = storedSizeFits(input); + const ssize = stored_size_ret.size; + const storable = stored_size_ret.storable; + + if (storable and ssize < (size + (size >> 4))) { + try self.storedBlock(input, eof); + return; + } + + // Huffman. + try self.dynamicHeader(num_literals, num_distances, num_codegens, eof); + const encoding = self.literal_encoding.codes[0..257]; + + for (input) |t| { + const c = encoding[t]; + try self.bit_writer.writeBits(c.code, c.len); + } + try self.writeCode(encoding[consts.end_block_marker]); + } + + // histogram accumulates a histogram of b in h. + fn histogram(b: []const u8, h: *[286]u16) void { + // Clear histogram + for (h, 0..) |_, i| { + h[i] = 0; + } + + var lh = h.*[0..256]; + for (b) |t| { + lh[t] += 1; + } + } + }; +} + +// tests +const expect = std.testing.expect; +const fmt = std.fmt; +const testing = std.testing; +const ArrayList = std.ArrayList; + +const TestCase = @import("testdata/block_writer.zig").TestCase; +const testCases = @import("testdata/block_writer.zig").testCases; + +// tests if the writeBlock encoding has changed. +test "flate.BlockWriter write" { + inline for (0..testCases.len) |i| { + try testBlock(testCases[i], .write_block); + } +} + +// tests if the writeBlockDynamic encoding has changed. +test "flate.BlockWriter dynamicBlock" { + inline for (0..testCases.len) |i| { + try testBlock(testCases[i], .write_dyn_block); + } +} + +test "flate.BlockWriter huffmanBlock" { + inline for (0..testCases.len) |i| { + try testBlock(testCases[i], .write_huffman_block); + } + try testBlock(.{ + .tokens = &[_]Token{}, + .input = "huffman-rand-max.input", + .want = "huffman-rand-max.{s}.expect", + }, .write_huffman_block); +} + +const TestFn = enum { + write_block, + write_dyn_block, // write dynamic block + write_huffman_block, + + fn to_s(self: TestFn) []const u8 { + return switch (self) { + .write_block => "wb", + .write_dyn_block => "dyn", + .write_huffman_block => "huff", + }; + } + + fn write( + comptime self: TestFn, + bw: anytype, + tok: []const Token, + input: ?[]const u8, + final: bool, + ) !void { + switch (self) { + .write_block => try bw.write(tok, final, input), + .write_dyn_block => try bw.dynamicBlock(tok, final, input), + .write_huffman_block => try bw.huffmanBlock(input.?, final), + } + try bw.flush(); + } +}; + +// testBlock tests a block against its references +// +// size +// 64K [file-name].input - input non compressed file +// 8.1K [file-name].golden - +// 78 [file-name].dyn.expect - output with writeBlockDynamic +// 78 [file-name].wb.expect - output with writeBlock +// 8.1K [file-name].huff.expect - output with writeBlockHuff +// 78 [file-name].dyn.expect-noinput - output with writeBlockDynamic when input is null +// 78 [file-name].wb.expect-noinput - output with writeBlock when input is null +// +// wb - writeBlock +// dyn - writeBlockDynamic +// huff - writeBlockHuff +// +fn testBlock(comptime tc: TestCase, comptime tfn: TestFn) !void { + if (tc.input.len != 0 and tc.want.len != 0) { + const want_name = comptime fmt.comptimePrint(tc.want, .{tfn.to_s()}); + const input = @embedFile("testdata/block_writer/" ++ tc.input); + const want = @embedFile("testdata/block_writer/" ++ want_name); + try testWriteBlock(tfn, input, want, tc.tokens); + } + + if (tfn == .write_huffman_block) { + return; + } + + const want_name_no_input = comptime fmt.comptimePrint(tc.want_no_input, .{tfn.to_s()}); + const want = @embedFile("testdata/block_writer/" ++ want_name_no_input); + try testWriteBlock(tfn, null, want, tc.tokens); +} + +// Uses writer function `tfn` to write `tokens`, tests that we got `want` as output. +fn testWriteBlock(comptime tfn: TestFn, input: ?[]const u8, want: []const u8, tokens: []const Token) !void { + var buf = ArrayList(u8).init(testing.allocator); + var bw = blockWriter(buf.writer()); + try tfn.write(&bw, tokens, input, false); + var got = buf.items; + try testing.expectEqualSlices(u8, want, got); // expect writeBlock to yield expected result + try expect(got[0] & 0b0000_0001 == 0); // bfinal is not set + // + // Test if the writer produces the same output after reset. + buf.deinit(); + buf = ArrayList(u8).init(testing.allocator); + defer buf.deinit(); + bw.setWriter(buf.writer()); + + try tfn.write(&bw, tokens, input, true); + try bw.flush(); + got = buf.items; + + try expect(got[0] & 1 == 1); // bfinal is set + buf.items[0] &= 0b1111_1110; // remove bfinal bit, so we can run test slices + try testing.expectEqualSlices(u8, want, got); // expect writeBlock to yield expected result +} diff --git a/lib/std/compress/flate/consts.zig b/lib/std/compress/flate/consts.zig new file mode 100644 index 0000000000..c28b40f68e --- /dev/null +++ b/lib/std/compress/flate/consts.zig @@ -0,0 +1,49 @@ +pub const deflate = struct { + // Number of tokens to accumlate in deflate before starting block encoding. + // + // In zlib this depends on memlevel: 6 + memlevel, where default memlevel is + // 8 and max 9 that gives 14 or 15 bits. + pub const tokens = 1 << 15; +}; + +pub const match = struct { + pub const base_length = 3; // smallest match length per the RFC section 3.2.5 + pub const min_length = 4; // min length used in this algorithm + pub const max_length = 258; + + pub const min_distance = 1; + pub const max_distance = 32768; +}; + +pub const history = struct { + pub const len = match.max_distance; +}; + +pub const lookup = struct { + pub const bits = 15; + pub const len = 1 << bits; + pub const shift = 32 - bits; +}; + +pub const huffman = struct { + // The odd order in which the codegen code sizes are written. + pub const codegen_order = [_]u32{ 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15 }; + // The number of codegen codes. + pub const codegen_code_count = 19; + + // The largest distance code. + pub const distance_code_count = 30; + + // Maximum number of literals. + pub const max_num_lit = 286; + + // Max number of frequencies used for a Huffman Code + // Possible lengths are codegen_code_count (19), distance_code_count (30) and max_num_lit (286). + // The largest of these is max_num_lit. + pub const max_num_frequencies = max_num_lit; + + // Biggest block size for uncompressed block. + pub const max_store_block_size = 65535; + // The special code used to mark the end of a block. + pub const end_block_marker = 256; +}; diff --git a/lib/std/compress/flate/container.zig b/lib/std/compress/flate/container.zig new file mode 100644 index 0000000000..9e6f742757 --- /dev/null +++ b/lib/std/compress/flate/container.zig @@ -0,0 +1,205 @@ +const std = @import("std"); + +/// Container of the deflate bit stream body. Container adds header before +/// deflate bit stream and footer after. It can bi gzip, zlib or raw (no header, +/// no footer, raw bit stream). +/// +/// Zlib format is defined in rfc 1950. Header has 2 bytes and footer 4 bytes +/// addler 32 checksum. +/// +/// Gzip format is defined in rfc 1952. Header has 10+ bytes and footer 4 bytes +/// crc32 checksum and 4 bytes of uncompressed data length. +/// +/// +/// rfc 1950: https://datatracker.ietf.org/doc/html/rfc1950#page-4 +/// rfc 1952: https://datatracker.ietf.org/doc/html/rfc1952#page-5 +/// +pub const Container = enum { + raw, // no header or footer + gzip, // gzip header and footer + zlib, // zlib header and footer + + pub fn size(w: Container) usize { + return headerSize(w) + footerSize(w); + } + + pub fn headerSize(w: Container) usize { + return switch (w) { + .gzip => 10, + .zlib => 2, + .raw => 0, + }; + } + + pub fn footerSize(w: Container) usize { + return switch (w) { + .gzip => 8, + .zlib => 4, + .raw => 0, + }; + } + + pub const list = [_]Container{ .raw, .gzip, .zlib }; + + pub const Error = error{ + BadGzipHeader, + BadZlibHeader, + WrongGzipChecksum, + WrongGzipSize, + WrongZlibChecksum, + }; + + pub fn writeHeader(comptime wrap: Container, writer: anytype) !void { + switch (wrap) { + .gzip => { + // GZIP 10 byte header (https://datatracker.ietf.org/doc/html/rfc1952#page-5): + // - ID1 (IDentification 1), always 0x1f + // - ID2 (IDentification 2), always 0x8b + // - CM (Compression Method), always 8 = deflate + // - FLG (Flags), all set to 0 + // - 4 bytes, MTIME (Modification time), not used, all set to zero + // - XFL (eXtra FLags), all set to zero + // - OS (Operating System), 03 = Unix + const gzipHeader = [_]u8{ 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03 }; + try writer.writeAll(&gzipHeader); + }, + .zlib => { + // ZLIB has a two-byte header (https://datatracker.ietf.org/doc/html/rfc1950#page-4): + // 1st byte: + // - First four bits is the CINFO (compression info), which is 7 for the default deflate window size. + // - The next four bits is the CM (compression method), which is 8 for deflate. + // 2nd byte: + // - Two bits is the FLEVEL (compression level). Values are: 0=fastest, 1=fast, 2=default, 3=best. + // - The next bit, FDICT, is set if a dictionary is given. + // - The final five FCHECK bits form a mod-31 checksum. + // + // CINFO = 7, CM = 8, FLEVEL = 0b10, FDICT = 0, FCHECK = 0b11100 + const zlibHeader = [_]u8{ 0x78, 0b10_0_11100 }; + try writer.writeAll(&zlibHeader); + }, + .raw => {}, + } + } + + pub fn writeFooter(comptime wrap: Container, hasher: *Hasher(wrap), writer: anytype) !void { + var bits: [4]u8 = undefined; + switch (wrap) { + .gzip => { + // GZIP 8 bytes footer + // - 4 bytes, CRC32 (CRC-32) + // - 4 bytes, ISIZE (Input SIZE) - size of the original (uncompressed) input data modulo 2^32 + std.mem.writeInt(u32, &bits, hasher.chksum(), .little); + try writer.writeAll(&bits); + + std.mem.writeInt(u32, &bits, hasher.bytesRead(), .little); + try writer.writeAll(&bits); + }, + .zlib => { + // ZLIB (RFC 1950) is big-endian, unlike GZIP (RFC 1952). + // 4 bytes of ADLER32 (Adler-32 checksum) + // Checksum value of the uncompressed data (excluding any + // dictionary data) computed according to Adler-32 + // algorithm. + std.mem.writeInt(u32, &bits, hasher.chksum(), .big); + try writer.writeAll(&bits); + }, + .raw => {}, + } + } + + pub fn parseHeader(comptime wrap: Container, reader: anytype) !void { + switch (wrap) { + .gzip => try parseGzipHeader(reader), + .zlib => try parseZlibHeader(reader), + .raw => {}, + } + } + + fn parseGzipHeader(reader: anytype) !void { + const magic1 = try reader.read(u8); + const magic2 = try reader.read(u8); + const method = try reader.read(u8); + const flags = try reader.read(u8); + try reader.skipBytes(6); // mtime(4), xflags, os + if (magic1 != 0x1f or magic2 != 0x8b or method != 0x08) + return error.BadGzipHeader; + // Flags description: https://www.rfc-editor.org/rfc/rfc1952.html#page-5 + if (flags != 0) { + if (flags & 0b0000_0100 != 0) { // FEXTRA + const extra_len = try reader.read(u16); + try reader.skipBytes(extra_len); + } + if (flags & 0b0000_1000 != 0) { // FNAME + try reader.skipStringZ(); + } + if (flags & 0b0001_0000 != 0) { // FCOMMENT + try reader.skipStringZ(); + } + if (flags & 0b0000_0010 != 0) { // FHCRC + try reader.skipBytes(2); + } + } + } + + fn parseZlibHeader(reader: anytype) !void { + const cinfo_cm = try reader.read(u8); + _ = try reader.read(u8); + if (cinfo_cm != 0x78) { + return error.BadZlibHeader; + } + } + + pub fn parseFooter(comptime wrap: Container, hasher: *Hasher(wrap), reader: anytype) !void { + switch (wrap) { + .gzip => { + if (try reader.read(u32) != hasher.chksum()) return error.WrongGzipChecksum; + if (try reader.read(u32) != hasher.bytesRead()) return error.WrongGzipSize; + }, + .zlib => { + const chksum: u32 = @byteSwap(hasher.chksum()); + if (try reader.read(u32) != chksum) return error.WrongZlibChecksum; + }, + .raw => {}, + } + } + + pub fn Hasher(comptime wrap: Container) type { + const HasherType = switch (wrap) { + .gzip => std.hash.Crc32, + .zlib => std.hash.Adler32, + .raw => struct { + pub fn init() @This() { + return .{}; + } + }, + }; + + return struct { + hasher: HasherType = HasherType.init(), + bytes: usize = 0, + + const Self = @This(); + + pub fn update(self: *Self, buf: []const u8) void { + switch (wrap) { + .raw => {}, + else => { + self.hasher.update(buf); + self.bytes += buf.len; + }, + } + } + + pub fn chksum(self: *Self) u32 { + switch (wrap) { + .raw => return 0, + else => return self.hasher.final(), + } + } + + pub fn bytesRead(self: *Self) u32 { + return @truncate(self.bytes); + } + }; + } +}; diff --git a/lib/std/compress/flate/deflate.zig b/lib/std/compress/flate/deflate.zig new file mode 100644 index 0000000000..eac24aa301 --- /dev/null +++ b/lib/std/compress/flate/deflate.zig @@ -0,0 +1,783 @@ +const std = @import("std"); +const io = std.io; +const assert = std.debug.assert; +const testing = std.testing; +const expect = testing.expect; +const print = std.debug.print; + +const Token = @import("Token.zig"); +const consts = @import("consts.zig"); +const BlockWriter = @import("block_writer.zig").BlockWriter; +const Container = @import("container.zig").Container; +const SlidingWindow = @import("SlidingWindow.zig"); +const Lookup = @import("Lookup.zig"); + +pub const Options = struct { + level: Level = .default, +}; + +/// Trades between speed and compression size. +/// Starts with level 4: in [zlib](https://github.com/madler/zlib/blob/abd3d1a28930f89375d4b41408b39f6c1be157b2/deflate.c#L115C1-L117C43) +/// levels 1-3 are using different algorithm to perform faster but with less +/// compression. That is not implemented here. +pub const Level = enum(u4) { + // zig fmt: off + fast = 0xb, level_4 = 4, + level_5 = 5, + default = 0xc, level_6 = 6, + level_7 = 7, + level_8 = 8, + best = 0xd, level_9 = 9, + // zig fmt: on +}; + +/// Algorithm knobs for each level. +const LevelArgs = struct { + good: u16, // Do less lookups if we already have match of this length. + nice: u16, // Stop looking for better match if we found match with at least this length. + lazy: u16, // Don't do lazy match find if got match with at least this length. + chain: u16, // How many lookups for previous match to perform. + + pub fn get(level: Level) LevelArgs { + // zig fmt: off + return switch (level) { + .fast, .level_4 => .{ .good = 4, .lazy = 4, .nice = 16, .chain = 16 }, + .level_5 => .{ .good = 8, .lazy = 16, .nice = 32, .chain = 32 }, + .default, .level_6 => .{ .good = 8, .lazy = 16, .nice = 128, .chain = 128 }, + .level_7 => .{ .good = 8, .lazy = 32, .nice = 128, .chain = 256 }, + .level_8 => .{ .good = 32, .lazy = 128, .nice = 258, .chain = 1024 }, + .best, .level_9 => .{ .good = 32, .lazy = 258, .nice = 258, .chain = 4096 }, + }; + // zig fmt: on + } +}; + +/// Compress plain data from reader into compressed stream written to writer. +pub fn compress(comptime container: Container, reader: anytype, writer: anytype, options: Options) !void { + var c = try compressor(container, writer, options); + try c.compress(reader); + try c.finish(); +} + +/// Create compressor for writer type. +pub fn compressor(comptime container: Container, writer: anytype, options: Options) !Compressor( + container, + @TypeOf(writer), +) { + return try Compressor(container, @TypeOf(writer)).init(writer, options); +} + +/// Compressor type. +pub fn Compressor(comptime container: Container, comptime WriterType: type) type { + const TokenWriterType = BlockWriter(WriterType); + return Deflate(container, WriterType, TokenWriterType); +} + +/// Default compression algorithm. Has two steps: tokenization and token +/// encoding. +/// +/// Tokenization takes uncompressed input stream and produces list of tokens. +/// Each token can be literal (byte of data) or match (backrefernce to previous +/// data with length and distance). Tokenization accumulators 32K tokens, when +/// full or `flush` is called tokens are passed to the `block_writer`. Level +/// defines how hard (how slow) it tries to find match. +/// +/// Block writer will decide which type of deflate block to write (stored, fixed, +/// dynamic) and encode tokens to the output byte stream. Client has to call +/// `finish` to write block with the final bit set. +/// +/// Container defines type of header and footer which can be gzip, zlib or raw. +/// They all share same deflate body. Raw has no header or footer just deflate +/// body. +/// +/// Compression algorithm explained in rfc-1951 (slightly edited for this case): +/// +/// The compressor uses a chained hash table `lookup` to find duplicated +/// strings, using a hash function that operates on 4-byte sequences. At any +/// given point during compression, let XYZW be the next 4 input bytes +/// (lookahead) to be examined (not necessarily all different, of course). +/// First, the compressor examines the hash chain for XYZW. If the chain is +/// empty, the compressor simply writes out X as a literal byte and advances +/// one byte in the input. If the hash chain is not empty, indicating that the +/// sequence XYZW (or, if we are unlucky, some other 4 bytes with the same +/// hash function value) has occurred recently, the compressor compares all +/// strings on the XYZW hash chain with the actual input data sequence +/// starting at the current point, and selects the longest match. +/// +/// To improve overall compression, the compressor defers the selection of +/// matches ("lazy matching"): after a match of length N has been found, the +/// compressor searches for a longer match starting at the next input byte. If +/// it finds a longer match, it truncates the previous match to a length of +/// one (thus producing a single literal byte) and then emits the longer +/// match. Otherwise, it emits the original match, and, as described above, +/// advances N bytes before continuing. +/// +/// +/// Allocates statically ~400K (192K lookup, 128K tokens, 64K window). +/// +/// Deflate function accepts BlockWriterType so we can change that in test to test +/// just tokenization part. +/// +fn Deflate(comptime container: Container, comptime WriterType: type, comptime BlockWriterType: type) type { + return struct { + lookup: Lookup = .{}, + win: SlidingWindow = .{}, + tokens: Tokens = .{}, + wrt: WriterType, + block_writer: BlockWriterType, + level: LevelArgs, + hasher: container.Hasher() = .{}, + + // Match and literal at the previous position. + // Used for lazy match finding in processWindow. + prev_match: ?Token = null, + prev_literal: ?u8 = null, + + const Self = @This(); + + pub fn init(wrt: WriterType, options: Options) !Self { + const self = Self{ + .wrt = wrt, + .block_writer = BlockWriterType.init(wrt), + .level = LevelArgs.get(options.level), + }; + try container.writeHeader(self.wrt); + return self; + } + + const FlushOption = enum { none, flush, final }; + + // Process data in window and create tokens. If token buffer is full + // flush tokens to the token writer. In the case of `flush` or `final` + // option it will process all data from the window. In the `none` case + // it will preserve some data for the next match. + fn tokenize(self: *Self, flush_opt: FlushOption) !void { + // flush - process all data from window + const should_flush = (flush_opt != .none); + + // While there is data in active lookahead buffer. + while (self.win.activeLookahead(should_flush)) |lh| { + var step: u16 = 1; // 1 in the case of literal, match length otherwise + const pos: u16 = self.win.pos(); + const literal = lh[0]; // literal at current position + const min_len: u16 = if (self.prev_match) |m| m.length() else 0; + + // Try to find match at least min_len long. + if (self.findMatch(pos, lh, min_len)) |match| { + // Found better match than previous. + try self.addPrevLiteral(); + + // Is found match length good enough? + if (match.length() >= self.level.lazy) { + // Don't try to lazy find better match, use this. + step = try self.addMatch(match); + } else { + // Store this match. + self.prev_literal = literal; + self.prev_match = match; + } + } else { + // There is no better match at current pos then it was previous. + // Write previous match or literal. + if (self.prev_match) |m| { + // Write match from previous position. + step = try self.addMatch(m) - 1; // we already advanced 1 from previous position + } else { + // No match at previous postition. + // Write previous literal if any, and remember this literal. + try self.addPrevLiteral(); + self.prev_literal = literal; + } + } + // Advance window and add hashes. + self.windowAdvance(step, lh, pos); + } + + if (should_flush) { + // In the case of flushing, last few lookahead buffers were smaller then min match len. + // So only last literal can be unwritten. + assert(self.prev_match == null); + try self.addPrevLiteral(); + self.prev_literal = null; + + try self.flushTokens(flush_opt); + } + } + + fn windowAdvance(self: *Self, step: u16, lh: []const u8, pos: u16) void { + // current position is already added in findMatch + self.lookup.bulkAdd(lh[1..], step - 1, pos + 1); + self.win.advance(step); + } + + // Add previous literal (if any) to the tokens list. + fn addPrevLiteral(self: *Self) !void { + if (self.prev_literal) |l| try self.addToken(Token.initLiteral(l)); + } + + // Add match to the tokens list, reset prev pointers. + // Returns length of the added match. + fn addMatch(self: *Self, m: Token) !u16 { + try self.addToken(m); + self.prev_literal = null; + self.prev_match = null; + return m.length(); + } + + fn addToken(self: *Self, token: Token) !void { + self.tokens.add(token); + if (self.tokens.full()) try self.flushTokens(.none); + } + + // Finds largest match in the history window with the data at current pos. + fn findMatch(self: *Self, pos: u16, lh: []const u8, min_len: u16) ?Token { + var len: u16 = min_len; + // Previous location with the same hash (same 4 bytes). + var prev_pos = self.lookup.add(lh, pos); + // Last found match. + var match: ?Token = null; + + // How much back-references to try, performance knob. + var chain: usize = self.level.chain; + if (len >= self.level.good) { + // If we've got a match that's good enough, only look in 1/4 the chain. + chain >>= 2; + } + + // Hot path loop! + while (prev_pos > 0 and chain > 0) : (chain -= 1) { + const distance = pos - prev_pos; + if (distance > consts.match.max_distance) + break; + + const new_len = self.win.match(prev_pos, pos, len); + if (new_len > len) { + match = Token.initMatch(@intCast(distance), new_len); + if (new_len >= self.level.nice) { + // The match is good enough that we don't try to find a better one. + return match; + } + len = new_len; + } + prev_pos = self.lookup.prev(prev_pos); + } + + return match; + } + + fn flushTokens(self: *Self, flush_opt: FlushOption) !void { + // Pass tokens to the token writer + try self.block_writer.write(self.tokens.tokens(), flush_opt == .final, self.win.tokensBuffer()); + // Stored block ensures byte aligment. + // It has 3 bits (final, block_type) and then padding until byte boundary. + // After that everyting is aligned to the boundary in the stored block. + // Empty stored block is Ob000 + (0-7) bits of padding + 0x00 0x00 0xFF 0xFF. + // Last 4 bytes are byte aligned. + if (flush_opt == .flush) { + try self.block_writer.storedBlock("", false); + } + if (flush_opt != .none) { + // Safe to call only when byte aligned or it is OK to add + // padding bits (on last byte of the final block). + try self.block_writer.flush(); + } + // Reset internal tokens store. + self.tokens.reset(); + // Notify win that tokens are flushed. + self.win.flush(); + } + + // Slide win and if needed lookup tables. + fn slide(self: *Self) void { + const n = self.win.slide(); + self.lookup.slide(n); + } + + /// Compresses as much data as possible, stops when the reader becomes + /// empty. It will introduce some output latency (reading input without + /// producing all output) because some data are still in internal + /// buffers. + /// + /// It is up to the caller to call flush (if needed) or finish (required) + /// when is need to output any pending data or complete stream. + /// + pub fn compress(self: *Self, reader: anytype) !void { + while (true) { + // Fill window from reader + const buf = self.win.writable(); + if (buf.len == 0) { + try self.tokenize(.none); + self.slide(); + continue; + } + const n = try reader.readAll(buf); + self.hasher.update(buf[0..n]); + self.win.written(n); + // Process window + try self.tokenize(.none); + // Exit when no more data in reader + if (n < buf.len) break; + } + } + + /// Flushes internal buffers to the output writer. Outputs empty stored + /// block to sync bit stream to the byte boundary, so that the + /// decompressor can get all input data available so far. + /// + /// It is useful mainly in compressed network protocols, to ensure that + /// deflate bit stream can be used as byte stream. May degrade + /// compression so it should be used only when necessary. + /// + /// Completes the current deflate block and follows it with an empty + /// stored block that is three zero bits plus filler bits to the next + /// byte, followed by four bytes (00 00 ff ff). + /// + pub fn flush(self: *Self) !void { + try self.tokenize(.flush); + } + + /// Completes deflate bit stream by writing any pending data as deflate + /// final deflate block. HAS to be called once all data are written to + /// the compressor as a signal that next block has to have final bit + /// set. + /// + pub fn finish(self: *Self) !void { + try self.tokenize(.final); + try container.writeFooter(&self.hasher, self.wrt); + } + + /// Use another writer while preserving history. Most probably flush + /// should be called on old writer before setting new. + pub fn setWriter(self: *Self, new_writer: WriterType) void { + self.block_writer.setWriter(new_writer); + self.wrt = new_writer; + } + + // Writer interface + + pub const Writer = io.Writer(*Self, Error, write); + pub const Error = BlockWriterType.Error; + + /// Write `input` of uncompressed data. + /// See compress. + pub fn write(self: *Self, input: []const u8) !usize { + var fbs = io.fixedBufferStream(input); + try self.compress(fbs.reader()); + return input.len; + } + + pub fn writer(self: *Self) Writer { + return .{ .context = self }; + } + }; +} + +// Tokens store +const Tokens = struct { + list: [consts.deflate.tokens]Token = undefined, + pos: usize = 0, + + fn add(self: *Tokens, t: Token) void { + self.list[self.pos] = t; + self.pos += 1; + } + + fn full(self: *Tokens) bool { + return self.pos == self.list.len; + } + + fn reset(self: *Tokens) void { + self.pos = 0; + } + + fn tokens(self: *Tokens) []const Token { + return self.list[0..self.pos]; + } +}; + +/// Creates huffman only deflate blocks. Disables Lempel-Ziv match searching and +/// only performs Huffman entropy encoding. Results in faster compression, much +/// less memory requirements during compression but bigger compressed sizes. +pub const huffman = struct { + pub fn compress(comptime container: Container, reader: anytype, writer: anytype) !void { + var c = try huffman.compressor(container, writer); + try c.compress(reader); + try c.finish(); + } + + pub fn Compressor(comptime container: Container, comptime WriterType: type) type { + return SimpleCompressor(.huffman, container, WriterType); + } + + pub fn compressor(comptime container: Container, writer: anytype) !huffman.Compressor(container, @TypeOf(writer)) { + return try huffman.Compressor(container, @TypeOf(writer)).init(writer); + } +}; + +/// Creates store blocks only. Data are not compressed only packed into deflate +/// store blocks. That adds 9 bytes of header for each block. Max stored block +/// size is 64K. Block is emitted when flush is called on on finish. +pub const store = struct { + pub fn compress(comptime container: Container, reader: anytype, writer: anytype) !void { + var c = try store.compressor(container, writer); + try c.compress(reader); + try c.finish(); + } + + pub fn Compressor(comptime container: Container, comptime WriterType: type) type { + return SimpleCompressor(.store, container, WriterType); + } + + pub fn compressor(comptime container: Container, writer: anytype) !store.Compressor(container, @TypeOf(writer)) { + return try store.Compressor(container, @TypeOf(writer)).init(writer); + } +}; + +const SimpleCompressorKind = enum { + huffman, + store, +}; + +fn simpleCompressor( + comptime kind: SimpleCompressorKind, + comptime container: Container, + writer: anytype, +) !SimpleCompressor(kind, container, @TypeOf(writer)) { + return try SimpleCompressor(kind, container, @TypeOf(writer)).init(writer); +} + +fn SimpleCompressor( + comptime kind: SimpleCompressorKind, + comptime container: Container, + comptime WriterType: type, +) type { + const BlockWriterType = BlockWriter(WriterType); + return struct { + buffer: [65535]u8 = undefined, // because store blocks are limited to 65535 bytes + wp: usize = 0, + + wrt: WriterType, + block_writer: BlockWriterType, + hasher: container.Hasher() = .{}, + + const Self = @This(); + + pub fn init(wrt: WriterType) !Self { + const self = Self{ + .wrt = wrt, + .block_writer = BlockWriterType.init(wrt), + }; + try container.writeHeader(self.wrt); + return self; + } + + pub fn flush(self: *Self) !void { + try self.flushBuffer(false); + try self.block_writer.storedBlock("", false); + try self.block_writer.flush(); + } + + pub fn finish(self: *Self) !void { + try self.flushBuffer(true); + try self.block_writer.flush(); + try container.writeFooter(&self.hasher, self.wrt); + } + + fn flushBuffer(self: *Self, final: bool) !void { + const buf = self.buffer[0..self.wp]; + switch (kind) { + .huffman => try self.block_writer.huffmanBlock(buf, final), + .store => try self.block_writer.storedBlock(buf, final), + } + self.wp = 0; + } + + // Writes all data from the input reader of uncompressed data. + // It is up to the caller to call flush or finish if there is need to + // output compressed blocks. + pub fn compress(self: *Self, reader: anytype) !void { + while (true) { + // read from rdr into buffer + const buf = self.buffer[self.wp..]; + if (buf.len == 0) { + try self.flushBuffer(false); + continue; + } + const n = try reader.readAll(buf); + self.hasher.update(buf[0..n]); + self.wp += n; + if (n < buf.len) break; // no more data in reader + } + } + + // Writer interface + + pub const Writer = io.Writer(*Self, Error, write); + pub const Error = BlockWriterType.Error; + + // Write `input` of uncompressed data. + pub fn write(self: *Self, input: []const u8) !usize { + var fbs = io.fixedBufferStream(input); + try self.compress(fbs.reader()); + return input.len; + } + + pub fn writer(self: *Self) Writer { + return .{ .context = self }; + } + }; +} + +test "flate.Deflate tokenization" { + const L = Token.initLiteral; + const M = Token.initMatch; + + const cases = [_]struct { + data: []const u8, + tokens: []const Token, + }{ + .{ + .data = "Blah blah blah blah blah!", + .tokens = &[_]Token{ L('B'), L('l'), L('a'), L('h'), L(' '), L('b'), M(5, 18), L('!') }, + }, + .{ + .data = "ABCDEABCD ABCDEABCD", + .tokens = &[_]Token{ + L('A'), L('B'), L('C'), L('D'), L('E'), L('A'), L('B'), L('C'), L('D'), L(' '), + L('A'), M(10, 8), + }, + }, + }; + + for (cases) |c| { + inline for (Container.list) |container| { // for each wrapping + var cw = io.countingWriter(io.null_writer); + const cww = cw.writer(); + var df = try Deflate(container, @TypeOf(cww), TestTokenWriter).init(cww, .{}); + + _ = try df.write(c.data); + try df.flush(); + + // df.token_writer.show(); + try expect(df.block_writer.pos == c.tokens.len); // number of tokens written + try testing.expectEqualSlices(Token, df.block_writer.get(), c.tokens); // tokens match + + try testing.expectEqual(container.headerSize(), cw.bytes_written); + try df.finish(); + try testing.expectEqual(container.size(), cw.bytes_written); + } + } +} + +// Tests that tokens writen are equal to expected token list. +const TestTokenWriter = struct { + const Self = @This(); + //expected: []const Token, + pos: usize = 0, + actual: [1024]Token = undefined, + + pub fn init(_: anytype) Self { + return .{}; + } + pub fn write(self: *Self, tokens: []const Token, _: bool, _: ?[]const u8) !void { + for (tokens) |t| { + self.actual[self.pos] = t; + self.pos += 1; + } + } + + pub fn storedBlock(_: *Self, _: []const u8, _: bool) !void {} + + pub fn get(self: *Self) []Token { + return self.actual[0..self.pos]; + } + + pub fn show(self: *Self) void { + print("\n", .{}); + for (self.get()) |t| { + t.show(); + } + } + + pub fn flush(_: *Self) !void {} +}; + +test "flate.Deflate struct sizes" { + try expect(@sizeOf(Token) == 4); + + // list: (1 << 15) * 4 = 128k + pos: 8 + const tokens_size = 128 * 1024 + 8; + try expect(@sizeOf(Tokens) == tokens_size); + + // head: (1 << 15) * 2 = 64k, chain: (32768 * 2) * 2 = 128k = 192k + const lookup_size = 192 * 1024; + try expect(@sizeOf(Lookup) == lookup_size); + + // buffer: (32k * 2), wp: 8, rp: 8, fp: 8 + const window_size = 64 * 1024 + 8 + 8 + 8; + try expect(@sizeOf(SlidingWindow) == window_size); + + const Bw = BlockWriter(@TypeOf(io.null_writer)); + // huffman bit writer internal: 11480 + const hbw_size = 11472; // 11.2k + try expect(@sizeOf(Bw) == hbw_size); + + const D = Deflate(.raw, @TypeOf(io.null_writer), Bw); + // 404744, 395.26K + // ?Token: 6, ?u8: 2, level: 8 + try expect(@sizeOf(D) == tokens_size + lookup_size + window_size + hbw_size + 24); + //print("Delfate size: {d} {d}\n", .{ @sizeOf(D), tokens_size + lookup_size + hbw_size + window_size }); + + // current std lib deflate allocation: + // 797_901, 779.2k + // measured with: + // var la = std.heap.logToWriterAllocator(testing.allocator, io.getStdOut().writer()); + // const allocator = la.allocator(); + // var cmp = try std.compress.deflate.compressor(allocator, io.null_writer, .{}); + // defer cmp.deinit(); + + const HC = huffman.Compressor(.raw, @TypeOf(io.null_writer)); + //print("size of HOC {d}\n", .{@sizeOf(HOC)}); + try expect(@sizeOf(HC) == 77024); + // 64K buffer + // 11480 huffman_encoded + // 8 buffer write pointer +} + +test "flate deflate file tokenization" { + const levels = [_]Level{ .level_4, .level_5, .level_6, .level_7, .level_8, .level_9 }; + const cases = [_]struct { + data: []const u8, // uncompressed content + // expected number of tokens producet in deflate tokenization + tokens_count: [levels.len]usize = .{0} ** levels.len, + }{ + .{ + .data = @embedFile("testdata/rfc1951.txt"), + .tokens_count = .{ 7675, 7672, 7599, 7594, 7598, 7599 }, + }, + + .{ + .data = @embedFile("testdata/block_writer/huffman-null-max.input"), + .tokens_count = .{ 257, 257, 257, 257, 257, 257 }, + }, + .{ + .data = @embedFile("testdata/block_writer/huffman-pi.input"), + .tokens_count = .{ 2570, 2564, 2564, 2564, 2564, 2564 }, + }, + .{ + .data = @embedFile("testdata/block_writer/huffman-text.input"), + .tokens_count = .{ 235, 234, 234, 234, 234, 234 }, + }, + .{ + .data = @embedFile("testdata/fuzz/roundtrip1.input"), + .tokens_count = .{ 333, 331, 331, 331, 331, 331 }, + }, + .{ + .data = @embedFile("testdata/fuzz/roundtrip2.input"), + .tokens_count = .{ 334, 334, 334, 334, 334, 334 }, + }, + }; + + for (cases) |case| { // for each case + const data = case.data; + + for (levels, 0..) |level, i| { // for each compression level + var original = io.fixedBufferStream(data); + + // buffer for decompressed data + var al = std.ArrayList(u8).init(testing.allocator); + defer al.deinit(); + const writer = al.writer(); + + // create compressor + const WriterType = @TypeOf(writer); + const TokenWriter = TokenDecoder(@TypeOf(writer)); + var cmp = try Deflate(.raw, WriterType, TokenWriter).init(writer, .{ .level = level }); + + // Stream uncompressed `orignal` data to the compressor. It will + // produce tokens list and pass that list to the TokenDecoder. This + // TokenDecoder uses CircularBuffer from inflate to convert list of + // tokens back to the uncompressed stream. + try cmp.compress(original.reader()); + try cmp.flush(); + const expected_count = case.tokens_count[i]; + const actual = cmp.block_writer.tokens_count; + if (expected_count == 0) { + print("actual token count {d}\n", .{actual}); + } else { + try testing.expectEqual(expected_count, actual); + } + + try testing.expectEqual(data.len, al.items.len); + try testing.expectEqualSlices(u8, data, al.items); + } + } +} + +fn TokenDecoder(comptime WriterType: type) type { + return struct { + const CircularBuffer = @import("CircularBuffer.zig"); + hist: CircularBuffer = .{}, + wrt: WriterType, + tokens_count: usize = 0, + + const Self = @This(); + + pub fn init(wrt: WriterType) Self { + return .{ .wrt = wrt }; + } + + pub fn write(self: *Self, tokens: []const Token, _: bool, _: ?[]const u8) !void { + self.tokens_count += tokens.len; + for (tokens) |t| { + switch (t.kind) { + .literal => self.hist.write(t.literal()), + .match => try self.hist.writeMatch(t.length(), t.distance()), + } + if (self.hist.free() < 285) try self.flushWin(); + } + try self.flushWin(); + } + + pub fn storedBlock(_: *Self, _: []const u8, _: bool) !void {} + + fn flushWin(self: *Self) !void { + while (true) { + const buf = self.hist.read(); + if (buf.len == 0) break; + try self.wrt.writeAll(buf); + } + } + + pub fn flush(_: *Self) !void {} + }; +} + +test "flate.Deflate store simple compressor" { + const data = "Hello world!"; + const expected = [_]u8{ + 0x1, // block type 0, final bit set + 0xc, 0x0, // len = 12 + 0xf3, 0xff, // ~len + 'H', 'e', 'l', 'l', 'o', ' ', 'w', 'o', 'r', 'l', 'd', '!', // + //0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x77, 0x6f, 0x72, 0x6c, 0x64, 0x21, + }; + + var fbs = std.io.fixedBufferStream(data); + var al = std.ArrayList(u8).init(testing.allocator); + defer al.deinit(); + + var cmp = try store.compressor(.raw, al.writer()); + try cmp.compress(fbs.reader()); + try cmp.finish(); + try testing.expectEqualSlices(u8, &expected, al.items); + + fbs.reset(); + try al.resize(0); + + // huffman only compresoor will also emit store block for this small sample + var hc = try huffman.compressor(.raw, al.writer()); + try hc.compress(fbs.reader()); + try hc.finish(); + try testing.expectEqualSlices(u8, &expected, al.items); +} diff --git a/lib/std/compress/flate/flate.zig b/lib/std/compress/flate/flate.zig new file mode 100644 index 0000000000..4c0977adde --- /dev/null +++ b/lib/std/compress/flate/flate.zig @@ -0,0 +1,236 @@ +/// Deflate is a lossless data compression file format that uses a combination +/// of LZ77 and Huffman coding. +pub const deflate = @import("deflate.zig"); + +/// Inflate is the decoding process that takes a Deflate bitstream for +/// decompression and correctly produces the original full-size data or file. +pub const inflate = @import("inflate.zig"); + +/// Decompress compressed data from reader and write plain data to the writer. +pub fn decompress(reader: anytype, writer: anytype) !void { + try inflate.decompress(.raw, reader, writer); +} + +/// Decompressor type +pub fn Decompressor(comptime ReaderType: type) type { + return inflate.Inflate(.raw, ReaderType); +} + +/// Create Decompressor which will read compressed data from reader. +pub fn decompressor(reader: anytype) Decompressor(@TypeOf(reader)) { + return inflate.decompressor(.raw, reader); +} + +/// Compression level, trades between speed and compression size. +pub const Options = deflate.Options; + +/// Compress plain data from reader and write compressed data to the writer. +pub fn compress(reader: anytype, writer: anytype, options: Options) !void { + try deflate.compress(.raw, reader, writer, options); +} + +/// Compressor type +pub fn Compressor(comptime WriterType: type) type { + return deflate.Compressor(.raw, WriterType); +} + +/// Create Compressor which outputs compressed data to the writer. +pub fn compressor(writer: anytype, options: Options) !Compressor(@TypeOf(writer)) { + return try deflate.compressor(.raw, writer, options); +} + +/// Huffman only compression. Without Lempel-Ziv match searching. Faster +/// compression, less memory requirements but bigger compressed sizes. +pub const huffman = struct { + pub fn compress(reader: anytype, writer: anytype) !void { + try deflate.huffman.compress(.raw, reader, writer); + } + + pub fn Compressor(comptime WriterType: type) type { + return deflate.huffman.Compressor(.raw, WriterType); + } + + pub fn compressor(writer: anytype) !huffman.Compressor(@TypeOf(writer)) { + return deflate.huffman.compressor(.raw, writer); + } +}; + +// No compression store only. Compressed size is slightly bigger than plain. +pub const store = struct { + pub fn compress(reader: anytype, writer: anytype) !void { + try deflate.store.compress(.raw, reader, writer); + } + + pub fn Compressor(comptime WriterType: type) type { + return deflate.store.Compressor(.raw, WriterType); + } + + pub fn compressor(writer: anytype) !store.Compressor(@TypeOf(writer)) { + return deflate.store.compressor(.raw, writer); + } +}; + +/// Container defines header/footer arround deflate bit stream. Gzip and zlib +/// compression algorithms are containers arround deflate bit stream body. +const Container = @import("container.zig").Container; +const std = @import("std"); +const testing = std.testing; +const fixedBufferStream = std.io.fixedBufferStream; +const print = std.debug.print; + +test "flate compress/decompress" { + var cmp_buf: [64 * 1024]u8 = undefined; // compressed data buffer + var dcm_buf: [64 * 1024]u8 = undefined; // decompressed data buffer + + const levels = [_]deflate.Level{ .level_4, .level_5, .level_6, .level_7, .level_8, .level_9 }; + const cases = [_]struct { + data: []const u8, // uncompressed content + // compressed data sizes per level 4-9 + gzip_sizes: [levels.len]usize = [_]usize{0} ** levels.len, + huffman_only_size: usize = 0, + store_size: usize = 0, + }{ + .{ + .data = @embedFile("testdata/rfc1951.txt"), + .gzip_sizes = [_]usize{ 11513, 11217, 11139, 11126, 11122, 11119 }, + .huffman_only_size = 20287, + .store_size = 36967, + }, + .{ + .data = @embedFile("testdata/fuzz/roundtrip1.input"), + .gzip_sizes = [_]usize{ 373, 370, 370, 370, 370, 370 }, + .huffman_only_size = 393, + .store_size = 393, + }, + .{ + .data = @embedFile("testdata/fuzz/roundtrip2.input"), + .gzip_sizes = [_]usize{ 373, 373, 373, 373, 373, 373 }, + .huffman_only_size = 394, + .store_size = 394, + }, + .{ + .data = @embedFile("testdata/fuzz/deflate-stream.expect"), + .gzip_sizes = [_]usize{ 351, 347, 347, 347, 347, 347 }, + .huffman_only_size = 498, + .store_size = 747, + }, + }; + + for (cases, 0..) |case, case_no| { // for each case + const data = case.data; + + for (levels, 0..) |level, i| { // for each compression level + + inline for (Container.list) |container| { // for each wrapping + var compressed_size: usize = if (case.gzip_sizes[i] > 0) + case.gzip_sizes[i] - Container.gzip.size() + container.size() + else + 0; + + // compress original stream to compressed stream + { + var original = fixedBufferStream(data); + var compressed = fixedBufferStream(&cmp_buf); + try deflate.compress(container, original.reader(), compressed.writer(), .{ .level = level }); + if (compressed_size == 0) { + if (container == .gzip) + print("case {d} gzip level {} compressed size: {d}\n", .{ case_no, level, compressed.pos }); + compressed_size = compressed.pos; + } + try testing.expectEqual(compressed_size, compressed.pos); + } + // decompress compressed stream to decompressed stream + { + var compressed = fixedBufferStream(cmp_buf[0..compressed_size]); + var decompressed = fixedBufferStream(&dcm_buf); + try inflate.decompress(container, compressed.reader(), decompressed.writer()); + try testing.expectEqualSlices(u8, data, decompressed.getWritten()); + } + + // compressor writer interface + { + var compressed = fixedBufferStream(&cmp_buf); + var cmp = try deflate.compressor(container, compressed.writer(), .{ .level = level }); + var cmp_wrt = cmp.writer(); + try cmp_wrt.writeAll(data); + try cmp.finish(); + + try testing.expectEqual(compressed_size, compressed.pos); + } + // decompressor reader interface + { + var compressed = fixedBufferStream(cmp_buf[0..compressed_size]); + var dcm = inflate.decompressor(container, compressed.reader()); + var dcm_rdr = dcm.reader(); + const n = try dcm_rdr.readAll(&dcm_buf); + try testing.expectEqual(data.len, n); + try testing.expectEqualSlices(u8, data, dcm_buf[0..n]); + } + } + } + // huffman only compression + { + inline for (Container.list) |container| { // for each wrapping + var compressed_size: usize = if (case.huffman_only_size > 0) + case.huffman_only_size - Container.gzip.size() + container.size() + else + 0; + + // compress original stream to compressed stream + { + var original = fixedBufferStream(data); + var compressed = fixedBufferStream(&cmp_buf); + var cmp = try deflate.huffman.compressor(container, compressed.writer()); + try cmp.compress(original.reader()); + try cmp.finish(); + if (compressed_size == 0) { + if (container == .gzip) + print("case {d} huffman only compressed size: {d}\n", .{ case_no, compressed.pos }); + compressed_size = compressed.pos; + } + try testing.expectEqual(compressed_size, compressed.pos); + } + // decompress compressed stream to decompressed stream + { + var compressed = fixedBufferStream(cmp_buf[0..compressed_size]); + var decompressed = fixedBufferStream(&dcm_buf); + try inflate.decompress(container, compressed.reader(), decompressed.writer()); + try testing.expectEqualSlices(u8, data, decompressed.getWritten()); + } + } + } + + // store only + { + inline for (Container.list) |container| { // for each wrapping + var compressed_size: usize = if (case.store_size > 0) + case.store_size - Container.gzip.size() + container.size() + else + 0; + + // compress original stream to compressed stream + { + var original = fixedBufferStream(data); + var compressed = fixedBufferStream(&cmp_buf); + var cmp = try deflate.store.compressor(container, compressed.writer()); + try cmp.compress(original.reader()); + try cmp.finish(); + if (compressed_size == 0) { + if (container == .gzip) + print("case {d} store only compressed size: {d}\n", .{ case_no, compressed.pos }); + compressed_size = compressed.pos; + } + + try testing.expectEqual(compressed_size, compressed.pos); + } + // decompress compressed stream to decompressed stream + { + var compressed = fixedBufferStream(cmp_buf[0..compressed_size]); + var decompressed = fixedBufferStream(&dcm_buf); + try inflate.decompress(container, compressed.reader(), decompressed.writer()); + try testing.expectEqualSlices(u8, data, decompressed.getWritten()); + } + } + } + } +} diff --git a/lib/std/compress/flate/gzip.zig b/lib/std/compress/flate/gzip.zig new file mode 100644 index 0000000000..feb9ae07b5 --- /dev/null +++ b/lib/std/compress/flate/gzip.zig @@ -0,0 +1,66 @@ +const deflate = @import("deflate.zig"); +const inflate = @import("inflate.zig"); + +/// Decompress compressed data from reader and write plain data to the writer. +pub fn decompress(reader: anytype, writer: anytype) !void { + try inflate.decompress(.gzip, reader, writer); +} + +/// Decompressor type +pub fn Decompressor(comptime ReaderType: type) type { + return inflate.Inflate(.gzip, ReaderType); +} + +/// Create Decompressor which will read compressed data from reader. +pub fn decompressor(reader: anytype) Decompressor(@TypeOf(reader)) { + return inflate.decompressor(.gzip, reader); +} + +/// Compression level, trades between speed and compression size. +pub const Options = deflate.Options; + +/// Compress plain data from reader and write compressed data to the writer. +pub fn compress(reader: anytype, writer: anytype, options: Options) !void { + try deflate.compress(.gzip, reader, writer, options); +} + +/// Compressor type +pub fn Compressor(comptime WriterType: type) type { + return deflate.Compressor(.gzip, WriterType); +} + +/// Create Compressor which outputs compressed data to the writer. +pub fn compressor(writer: anytype, options: Options) !Compressor(@TypeOf(writer)) { + return try deflate.compressor(.gzip, writer, options); +} + +/// Huffman only compression. Without Lempel-Ziv match searching. Faster +/// compression, less memory requirements but bigger compressed sizes. +pub const huffman = struct { + pub fn compress(reader: anytype, writer: anytype) !void { + try deflate.huffman.compress(.gzip, reader, writer); + } + + pub fn Compressor(comptime WriterType: type) type { + return deflate.huffman.Compressor(.gzip, WriterType); + } + + pub fn compressor(writer: anytype) !huffman.Compressor(@TypeOf(writer)) { + return deflate.huffman.compressor(.gzip, writer); + } +}; + +// No compression store only. Compressed size is slightly bigger than plain. +pub const store = struct { + pub fn compress(reader: anytype, writer: anytype) !void { + try deflate.store.compress(.gzip, reader, writer); + } + + pub fn Compressor(comptime WriterType: type) type { + return deflate.store.Compressor(.gzip, WriterType); + } + + pub fn compressor(writer: anytype) !store.Compressor(@TypeOf(writer)) { + return deflate.store.compressor(.gzip, writer); + } +}; diff --git a/lib/std/compress/flate/huffman_decoder.zig b/lib/std/compress/flate/huffman_decoder.zig new file mode 100644 index 0000000000..5bfe9242c7 --- /dev/null +++ b/lib/std/compress/flate/huffman_decoder.zig @@ -0,0 +1,308 @@ +const std = @import("std"); +const testing = std.testing; + +pub const Symbol = packed struct { + pub const Kind = enum(u2) { + literal, + end_of_block, + match, + }; + + symbol: u8 = 0, // symbol from alphabet + code_bits: u4 = 0, // number of bits in code 0-15 + kind: Kind = .literal, + + code: u16 = 0, // huffman code of the symbol + next: u16 = 0, // pointer to the next symbol in linked list + // it is safe to use 0 as null pointer, when sorted 0 has shortest code and fits into lookup + + // Sorting less than function. + pub fn asc(_: void, a: Symbol, b: Symbol) bool { + if (a.code_bits == b.code_bits) { + if (a.kind == b.kind) { + return a.symbol < b.symbol; + } + return @intFromEnum(a.kind) < @intFromEnum(b.kind); + } + return a.code_bits < b.code_bits; + } +}; + +pub const LiteralDecoder = HuffmanDecoder(286, 15, 9); +pub const DistanceDecoder = HuffmanDecoder(30, 15, 9); +pub const CodegenDecoder = HuffmanDecoder(19, 7, 7); + +pub const Error = error{ + InvalidCode, + OversubscribedHuffmanTree, + IncompleteHuffmanTree, + MissingEndOfBlockCode, +}; + +/// Creates huffman tree codes from list of code lengths (in `build`). +/// +/// `find` then finds symbol for code bits. Code can be any length between 1 and +/// 15 bits. When calling `find` we don't know how many bits will be used to +/// find symbol. When symbol is returned it has code_bits field which defines +/// how much we should advance in bit stream. +/// +/// Lookup table is used to map 15 bit int to symbol. Same symbol is written +/// many times in this table; 32K places for 286 (at most) symbols. +/// Small lookup table is optimization for faster search. +/// It is variation of the algorithm explained in [zlib](https://github.com/madler/zlib/blob/643e17b7498d12ab8d15565662880579692f769d/doc/algorithm.txt#L92) +/// with difference that we here use statically allocated arrays. +/// +fn HuffmanDecoder( + comptime alphabet_size: u16, + comptime max_code_bits: u4, + comptime lookup_bits: u4, +) type { + const lookup_shift = max_code_bits - lookup_bits; + + return struct { + // all symbols in alaphabet, sorted by code_len, symbol + symbols: [alphabet_size]Symbol = undefined, + // lookup table code -> symbol + lookup: [1 << lookup_bits]Symbol = undefined, + + const Self = @This(); + + /// Generates symbols and lookup tables from list of code lens for each symbol. + pub fn generate(self: *Self, lens: []const u4) !void { + try checkCompletnes(lens); + + // init alphabet with code_bits + for (self.symbols, 0..) |_, i| { + const cb: u4 = if (i < lens.len) lens[i] else 0; + self.symbols[i] = if (i < 256) + .{ .kind = .literal, .symbol = @intCast(i), .code_bits = cb } + else if (i == 256) + .{ .kind = .end_of_block, .symbol = 0xff, .code_bits = cb } + else + .{ .kind = .match, .symbol = @intCast(i - 257), .code_bits = cb }; + } + std.sort.heap(Symbol, &self.symbols, {}, Symbol.asc); + + // reset lookup table + for (0..self.lookup.len) |i| { + self.lookup[i] = .{}; + } + + // assign code to symbols + // reference: https://youtu.be/9_YEGLe33NA?list=PLU4IQLU9e_OrY8oASHx0u3IXAL9TOdidm&t=2639 + var code: u16 = 0; + var idx: u16 = 0; + for (&self.symbols, 0..) |*sym, pos| { + //print("sym: {}\n", .{sym}); + if (sym.code_bits == 0) continue; // skip unused + sym.code = code; + + const next_code = code + (@as(u16, 1) << (max_code_bits - sym.code_bits)); + const next_idx = next_code >> lookup_shift; + + if (next_idx > self.lookup.len or idx >= self.lookup.len) break; + if (sym.code_bits <= lookup_bits) { + // fill small lookup table + for (idx..next_idx) |j| + self.lookup[j] = sym.*; + } else { + // insert into linked table starting at root + const root = &self.lookup[idx]; + const root_next = root.next; + root.next = @intCast(pos); + sym.next = root_next; + } + + idx = next_idx; + code = next_code; + } + //print("decoder generate, code: {d}, idx: {d}\n", .{ code, idx }); + } + + /// Given the list of code lengths check that it represents a canonical + /// Huffman code for n symbols. + /// + /// Reference: https://github.com/madler/zlib/blob/5c42a230b7b468dff011f444161c0145b5efae59/contrib/puff/puff.c#L340 + fn checkCompletnes(lens: []const u4) !void { + if (alphabet_size == 286) + if (lens[256] == 0) return error.MissingEndOfBlockCode; + + var count = [_]u16{0} ** (@as(usize, max_code_bits) + 1); + var max: usize = 0; + for (lens) |n| { + if (n == 0) continue; + if (n > max) max = n; + count[n] += 1; + } + if (max == 0) // emtpy tree + return; + + // check for an over-subscribed or incomplete set of lengths + var left: usize = 1; // one possible code of zero length + for (1..count.len) |len| { + left <<= 1; // one more bit, double codes left + if (count[len] > left) + return error.OversubscribedHuffmanTree; + left -= count[len]; // deduct count from possible codes + } + if (left > 0) { // left > 0 means incomplete + // incomplete code ok only for single length 1 code + if (max_code_bits > 7 and max == count[0] + count[1]) return; + return error.IncompleteHuffmanTree; + } + } + + /// Finds symbol for lookup table code. + pub fn find(self: *Self, code: u16) !Symbol { + // try to find in lookup table + const idx = code >> lookup_shift; + const sym = self.lookup[idx]; + if (sym.code_bits != 0) return sym; + // if not use linked list of symbols with same prefix + return self.findLinked(code, sym.next); + } + + inline fn findLinked(self: *Self, code: u16, start: u16) !Symbol { + var pos = start; + while (pos > 0) { + const sym = self.symbols[pos]; + const shift = max_code_bits - sym.code_bits; + // compare code_bits number of upper bits + if ((code ^ sym.code) >> shift == 0) return sym; + pos = sym.next; + } + return error.InvalidCode; + } + }; +} + +test "flate.HuffmanDecoder init/find" { + // example data from: https://youtu.be/SJPvNi4HrWQ?t=8423 + const code_lens = [_]u4{ 4, 3, 0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 3, 2 }; + var h: CodegenDecoder = .{}; + try h.generate(&code_lens); + + const expected = [_]struct { + sym: Symbol, + code: u16, + }{ + .{ + .code = 0b00_00000, + .sym = .{ .symbol = 3, .code_bits = 2 }, + }, + .{ + .code = 0b01_00000, + .sym = .{ .symbol = 18, .code_bits = 2 }, + }, + .{ + .code = 0b100_0000, + .sym = .{ .symbol = 1, .code_bits = 3 }, + }, + .{ + .code = 0b101_0000, + .sym = .{ .symbol = 4, .code_bits = 3 }, + }, + .{ + .code = 0b110_0000, + .sym = .{ .symbol = 17, .code_bits = 3 }, + }, + .{ + .code = 0b1110_000, + .sym = .{ .symbol = 0, .code_bits = 4 }, + }, + .{ + .code = 0b1111_000, + .sym = .{ .symbol = 16, .code_bits = 4 }, + }, + }; + + // unused symbols + for (0..12) |i| { + try testing.expectEqual(0, h.symbols[i].code_bits); + } + // used, from index 12 + for (expected, 12..) |e, i| { + try testing.expectEqual(e.sym.symbol, h.symbols[i].symbol); + try testing.expectEqual(e.sym.code_bits, h.symbols[i].code_bits); + const sym_from_code = try h.find(e.code); + try testing.expectEqual(e.sym.symbol, sym_from_code.symbol); + } + + // All possible codes for each symbol. + // Lookup table has 126 elements, to cover all possible 7 bit codes. + for (0b0000_000..0b0100_000) |c| // 0..32 (32) + try testing.expectEqual(3, (try h.find(@intCast(c))).symbol); + + for (0b0100_000..0b1000_000) |c| // 32..64 (32) + try testing.expectEqual(18, (try h.find(@intCast(c))).symbol); + + for (0b1000_000..0b1010_000) |c| // 64..80 (16) + try testing.expectEqual(1, (try h.find(@intCast(c))).symbol); + + for (0b1010_000..0b1100_000) |c| // 80..96 (16) + try testing.expectEqual(4, (try h.find(@intCast(c))).symbol); + + for (0b1100_000..0b1110_000) |c| // 96..112 (16) + try testing.expectEqual(17, (try h.find(@intCast(c))).symbol); + + for (0b1110_000..0b1111_000) |c| // 112..120 (8) + try testing.expectEqual(0, (try h.find(@intCast(c))).symbol); + + for (0b1111_000..0b1_0000_000) |c| // 120...128 (8) + try testing.expectEqual(16, (try h.find(@intCast(c))).symbol); +} + +const print = std.debug.print; +const assert = std.debug.assert; +const expect = std.testing.expect; + +test "flate.HuffmanDecoder encode/decode literals" { + const LiteralEncoder = @import("huffman_encoder.zig").LiteralEncoder; + + for (1..286) |j| { // for all different number of codes + var enc: LiteralEncoder = .{}; + // create freqencies + var freq = [_]u16{0} ** 286; + freq[256] = 1; // ensure we have end of block code + for (&freq, 1..) |*f, i| { + if (i % j == 0) + f.* = @intCast(i); + } + + // encoder from freqencies + enc.generate(&freq, 15); + + // get code_lens from encoder + var code_lens = [_]u4{0} ** 286; + for (code_lens, 0..) |_, i| { + code_lens[i] = @intCast(enc.codes[i].len); + } + // generate decoder from code lens + var dec: LiteralDecoder = .{}; + try dec.generate(&code_lens); + + // expect decoder code to match original encoder code + for (dec.symbols) |s| { + if (s.code_bits == 0) continue; + const c_code: u16 = @bitReverse(@as(u15, @intCast(s.code))); + const symbol: u16 = switch (s.kind) { + .literal => s.symbol, + .end_of_block => 256, + .match => @as(u16, s.symbol) + 257, + }; + + const c = enc.codes[symbol]; + try expect(c.code == c_code); + } + + // find each symbol by code + for (enc.codes) |c| { + if (c.len == 0) continue; + + const s_code: u15 = @bitReverse(@as(u15, @intCast(c.code))); + const s = try dec.find(s_code); + try expect(s.code == s_code); + try expect(s.code_bits == c.len); + } + } +} diff --git a/lib/std/compress/flate/huffman_encoder.zig b/lib/std/compress/flate/huffman_encoder.zig new file mode 100644 index 0000000000..a8553ebb5e --- /dev/null +++ b/lib/std/compress/flate/huffman_encoder.zig @@ -0,0 +1,536 @@ +const std = @import("std"); +const assert = std.debug.assert; +const math = std.math; +const mem = std.mem; +const sort = std.sort; +const testing = std.testing; + +const consts = @import("consts.zig").huffman; + +const LiteralNode = struct { + literal: u16, + freq: u16, +}; + +// Describes the state of the constructed tree for a given depth. +const LevelInfo = struct { + // Our level. for better printing + level: u32, + + // The frequency of the last node at this level + last_freq: u32, + + // The frequency of the next character to add to this level + next_char_freq: u32, + + // The frequency of the next pair (from level below) to add to this level. + // Only valid if the "needed" value of the next lower level is 0. + next_pair_freq: u32, + + // The number of chains remaining to generate for this level before moving + // up to the next level + needed: u32, +}; + +// hcode is a huffman code with a bit code and bit length. +pub const HuffCode = struct { + code: u16 = 0, + len: u16 = 0, + + // set sets the code and length of an hcode. + fn set(self: *HuffCode, code: u16, length: u16) void { + self.len = length; + self.code = code; + } +}; + +pub fn HuffmanEncoder(comptime size: usize) type { + return struct { + codes: [size]HuffCode = undefined, + // Reusable buffer with the longest possible frequency table. + freq_cache: [consts.max_num_frequencies + 1]LiteralNode = undefined, + bit_count: [17]u32 = undefined, + lns: []LiteralNode = undefined, // sorted by literal, stored to avoid repeated allocation in generate + lfs: []LiteralNode = undefined, // sorted by frequency, stored to avoid repeated allocation in generate + + const Self = @This(); + + // Update this Huffman Code object to be the minimum code for the specified frequency count. + // + // freq An array of frequencies, in which frequency[i] gives the frequency of literal i. + // max_bits The maximum number of bits to use for any literal. + pub fn generate(self: *Self, freq: []u16, max_bits: u32) void { + var list = self.freq_cache[0 .. freq.len + 1]; + // Number of non-zero literals + var count: u32 = 0; + // Set list to be the set of all non-zero literals and their frequencies + for (freq, 0..) |f, i| { + if (f != 0) { + list[count] = LiteralNode{ .literal = @as(u16, @intCast(i)), .freq = f }; + count += 1; + } else { + list[count] = LiteralNode{ .literal = 0x00, .freq = 0 }; + self.codes[i].len = 0; + } + } + list[freq.len] = LiteralNode{ .literal = 0x00, .freq = 0 }; + + list = list[0..count]; + if (count <= 2) { + // Handle the small cases here, because they are awkward for the general case code. With + // two or fewer literals, everything has bit length 1. + for (list, 0..) |node, i| { + // "list" is in order of increasing literal value. + self.codes[node.literal].set(@as(u16, @intCast(i)), 1); + } + return; + } + self.lfs = list; + mem.sort(LiteralNode, self.lfs, {}, byFreq); + + // Get the number of literals for each bit count + const bit_count = self.bitCounts(list, max_bits); + // And do the assignment + self.assignEncodingAndSize(bit_count, list); + } + + pub fn bitLength(self: *Self, freq: []u16) u32 { + var total: u32 = 0; + for (freq, 0..) |f, i| { + if (f != 0) { + total += @as(u32, @intCast(f)) * @as(u32, @intCast(self.codes[i].len)); + } + } + return total; + } + + // Return the number of literals assigned to each bit size in the Huffman encoding + // + // This method is only called when list.len >= 3 + // The cases of 0, 1, and 2 literals are handled by special case code. + // + // list: An array of the literals with non-zero frequencies + // and their associated frequencies. The array is in order of increasing + // frequency, and has as its last element a special element with frequency + // std.math.maxInt(i32) + // + // max_bits: The maximum number of bits that should be used to encode any literal. + // Must be less than 16. + // + // Returns an integer array in which array[i] indicates the number of literals + // that should be encoded in i bits. + fn bitCounts(self: *Self, list: []LiteralNode, max_bits_to_use: usize) []u32 { + var max_bits = max_bits_to_use; + const n = list.len; + const max_bits_limit = 16; + + assert(max_bits < max_bits_limit); + + // The tree can't have greater depth than n - 1, no matter what. This + // saves a little bit of work in some small cases + max_bits = @min(max_bits, n - 1); + + // Create information about each of the levels. + // A bogus "Level 0" whose sole purpose is so that + // level1.prev.needed == 0. This makes level1.next_pair_freq + // be a legitimate value that never gets chosen. + var levels: [max_bits_limit]LevelInfo = mem.zeroes([max_bits_limit]LevelInfo); + // leaf_counts[i] counts the number of literals at the left + // of ancestors of the rightmost node at level i. + // leaf_counts[i][j] is the number of literals at the left + // of the level j ancestor. + var leaf_counts: [max_bits_limit][max_bits_limit]u32 = mem.zeroes([max_bits_limit][max_bits_limit]u32); + + { + var level = @as(u32, 1); + while (level <= max_bits) : (level += 1) { + // For every level, the first two items are the first two characters. + // We initialize the levels as if we had already figured this out. + levels[level] = LevelInfo{ + .level = level, + .last_freq = list[1].freq, + .next_char_freq = list[2].freq, + .next_pair_freq = list[0].freq + list[1].freq, + .needed = 0, + }; + leaf_counts[level][level] = 2; + if (level == 1) { + levels[level].next_pair_freq = math.maxInt(i32); + } + } + } + + // We need a total of 2*n - 2 items at top level and have already generated 2. + levels[max_bits].needed = 2 * @as(u32, @intCast(n)) - 4; + + { + var level = max_bits; + while (true) { + var l = &levels[level]; + if (l.next_pair_freq == math.maxInt(i32) and l.next_char_freq == math.maxInt(i32)) { + // We've run out of both leafs and pairs. + // End all calculations for this level. + // To make sure we never come back to this level or any lower level, + // set next_pair_freq impossibly large. + l.needed = 0; + levels[level + 1].next_pair_freq = math.maxInt(i32); + level += 1; + continue; + } + + const prev_freq = l.last_freq; + if (l.next_char_freq < l.next_pair_freq) { + // The next item on this row is a leaf node. + const next = leaf_counts[level][level] + 1; + l.last_freq = l.next_char_freq; + // Lower leaf_counts are the same of the previous node. + leaf_counts[level][level] = next; + if (next >= list.len) { + l.next_char_freq = maxNode().freq; + } else { + l.next_char_freq = list[next].freq; + } + } else { + // The next item on this row is a pair from the previous row. + // next_pair_freq isn't valid until we generate two + // more values in the level below + l.last_freq = l.next_pair_freq; + // Take leaf counts from the lower level, except counts[level] remains the same. + @memcpy(leaf_counts[level][0..level], leaf_counts[level - 1][0..level]); + levels[l.level - 1].needed = 2; + } + + l.needed -= 1; + if (l.needed == 0) { + // We've done everything we need to do for this level. + // Continue calculating one level up. Fill in next_pair_freq + // of that level with the sum of the two nodes we've just calculated on + // this level. + if (l.level == max_bits) { + // All done! + break; + } + levels[l.level + 1].next_pair_freq = prev_freq + l.last_freq; + level += 1; + } else { + // If we stole from below, move down temporarily to replenish it. + while (levels[level - 1].needed > 0) { + level -= 1; + if (level == 0) { + break; + } + } + } + } + } + + // Somethings is wrong if at the end, the top level is null or hasn't used + // all of the leaves. + assert(leaf_counts[max_bits][max_bits] == n); + + var bit_count = self.bit_count[0 .. max_bits + 1]; + var bits: u32 = 1; + const counts = &leaf_counts[max_bits]; + { + var level = max_bits; + while (level > 0) : (level -= 1) { + // counts[level] gives the number of literals requiring at least "bits" + // bits to encode. + bit_count[bits] = counts[level] - counts[level - 1]; + bits += 1; + if (level == 0) { + break; + } + } + } + return bit_count; + } + + // Look at the leaves and assign them a bit count and an encoding as specified + // in RFC 1951 3.2.2 + fn assignEncodingAndSize(self: *Self, bit_count: []u32, list_arg: []LiteralNode) void { + var code = @as(u16, 0); + var list = list_arg; + + for (bit_count, 0..) |bits, n| { + code <<= 1; + if (n == 0 or bits == 0) { + continue; + } + // The literals list[list.len-bits] .. list[list.len-bits] + // are encoded using "bits" bits, and get the values + // code, code + 1, .... The code values are + // assigned in literal order (not frequency order). + const chunk = list[list.len - @as(u32, @intCast(bits)) ..]; + + self.lns = chunk; + mem.sort(LiteralNode, self.lns, {}, byLiteral); + + for (chunk) |node| { + self.codes[node.literal] = HuffCode{ + .code = bitReverse(u16, code, @as(u5, @intCast(n))), + .len = @as(u16, @intCast(n)), + }; + code += 1; + } + list = list[0 .. list.len - @as(u32, @intCast(bits))]; + } + } + }; +} + +fn maxNode() LiteralNode { + return LiteralNode{ + .literal = math.maxInt(u16), + .freq = math.maxInt(u16), + }; +} + +pub fn huffmanEncoder(comptime size: u32) HuffmanEncoder(size) { + return .{}; +} + +pub const LiteralEncoder = HuffmanEncoder(consts.max_num_frequencies); +pub const DistanceEncoder = HuffmanEncoder(consts.distance_code_count); +pub const CodegenEncoder = HuffmanEncoder(19); + +// Generates a HuffmanCode corresponding to the fixed literal table +pub fn fixedLiteralEncoder() LiteralEncoder { + var h: LiteralEncoder = undefined; + var ch: u16 = 0; + + while (ch < consts.max_num_frequencies) : (ch += 1) { + var bits: u16 = undefined; + var size: u16 = undefined; + switch (ch) { + 0...143 => { + // size 8, 000110000 .. 10111111 + bits = ch + 48; + size = 8; + }, + 144...255 => { + // size 9, 110010000 .. 111111111 + bits = ch + 400 - 144; + size = 9; + }, + 256...279 => { + // size 7, 0000000 .. 0010111 + bits = ch - 256; + size = 7; + }, + else => { + // size 8, 11000000 .. 11000111 + bits = ch + 192 - 280; + size = 8; + }, + } + h.codes[ch] = HuffCode{ .code = bitReverse(u16, bits, @as(u5, @intCast(size))), .len = size }; + } + return h; +} + +pub fn fixedDistanceEncoder() DistanceEncoder { + var h: DistanceEncoder = undefined; + for (h.codes, 0..) |_, ch| { + h.codes[ch] = HuffCode{ .code = bitReverse(u16, @as(u16, @intCast(ch)), 5), .len = 5 }; + } + return h; +} + +pub fn huffmanDistanceEncoder() DistanceEncoder { + var distance_freq = [1]u16{0} ** consts.distance_code_count; + distance_freq[0] = 1; + // huff_distance is a static distance encoder used for huffman only encoding. + // It can be reused since we will not be encoding distance values. + var h: DistanceEncoder = .{}; + h.generate(distance_freq[0..], 15); + return h; +} + +fn byLiteral(context: void, a: LiteralNode, b: LiteralNode) bool { + _ = context; + return a.literal < b.literal; +} + +fn byFreq(context: void, a: LiteralNode, b: LiteralNode) bool { + _ = context; + if (a.freq == b.freq) { + return a.literal < b.literal; + } + return a.freq < b.freq; +} + +test "flate.HuffmanEncoder generate a Huffman code from an array of frequencies" { + var freqs: [19]u16 = [_]u16{ + 8, // 0 + 1, // 1 + 1, // 2 + 2, // 3 + 5, // 4 + 10, // 5 + 9, // 6 + 1, // 7 + 0, // 8 + 0, // 9 + 0, // 10 + 0, // 11 + 0, // 12 + 0, // 13 + 0, // 14 + 0, // 15 + 1, // 16 + 3, // 17 + 5, // 18 + }; + + var enc = huffmanEncoder(19); + enc.generate(freqs[0..], 7); + + try testing.expectEqual(@as(u32, 141), enc.bitLength(freqs[0..])); + + try testing.expectEqual(@as(usize, 3), enc.codes[0].len); + try testing.expectEqual(@as(usize, 6), enc.codes[1].len); + try testing.expectEqual(@as(usize, 6), enc.codes[2].len); + try testing.expectEqual(@as(usize, 5), enc.codes[3].len); + try testing.expectEqual(@as(usize, 3), enc.codes[4].len); + try testing.expectEqual(@as(usize, 2), enc.codes[5].len); + try testing.expectEqual(@as(usize, 2), enc.codes[6].len); + try testing.expectEqual(@as(usize, 6), enc.codes[7].len); + try testing.expectEqual(@as(usize, 0), enc.codes[8].len); + try testing.expectEqual(@as(usize, 0), enc.codes[9].len); + try testing.expectEqual(@as(usize, 0), enc.codes[10].len); + try testing.expectEqual(@as(usize, 0), enc.codes[11].len); + try testing.expectEqual(@as(usize, 0), enc.codes[12].len); + try testing.expectEqual(@as(usize, 0), enc.codes[13].len); + try testing.expectEqual(@as(usize, 0), enc.codes[14].len); + try testing.expectEqual(@as(usize, 0), enc.codes[15].len); + try testing.expectEqual(@as(usize, 6), enc.codes[16].len); + try testing.expectEqual(@as(usize, 5), enc.codes[17].len); + try testing.expectEqual(@as(usize, 3), enc.codes[18].len); + + try testing.expectEqual(@as(u16, 0x0), enc.codes[5].code); + try testing.expectEqual(@as(u16, 0x2), enc.codes[6].code); + try testing.expectEqual(@as(u16, 0x1), enc.codes[0].code); + try testing.expectEqual(@as(u16, 0x5), enc.codes[4].code); + try testing.expectEqual(@as(u16, 0x3), enc.codes[18].code); + try testing.expectEqual(@as(u16, 0x7), enc.codes[3].code); + try testing.expectEqual(@as(u16, 0x17), enc.codes[17].code); + try testing.expectEqual(@as(u16, 0x0f), enc.codes[1].code); + try testing.expectEqual(@as(u16, 0x2f), enc.codes[2].code); + try testing.expectEqual(@as(u16, 0x1f), enc.codes[7].code); + try testing.expectEqual(@as(u16, 0x3f), enc.codes[16].code); +} + +test "flate.HuffmanEncoder generate a Huffman code for the fixed literal table specific to Deflate" { + const enc = fixedLiteralEncoder(); + for (enc.codes) |c| { + switch (c.len) { + 7 => { + const v = @bitReverse(@as(u7, @intCast(c.code))); + try testing.expect(v <= 0b0010111); + }, + 8 => { + const v = @bitReverse(@as(u8, @intCast(c.code))); + try testing.expect((v >= 0b000110000 and v <= 0b10111111) or + (v >= 0b11000000 and v <= 11000111)); + }, + 9 => { + const v = @bitReverse(@as(u9, @intCast(c.code))); + try testing.expect(v >= 0b110010000 and v <= 0b111111111); + }, + else => unreachable, + } + } +} + +test "flate.HuffmanEncoder generate a Huffman code for the 30 possible relative distances (LZ77 distances) of Deflate" { + const enc = fixedDistanceEncoder(); + for (enc.codes) |c| { + const v = @bitReverse(@as(u5, @intCast(c.code))); + try testing.expect(v <= 29); + try testing.expect(c.len == 5); + } +} + +// Reverse bit-by-bit a N-bit code. +fn bitReverse(comptime T: type, value: T, n: usize) T { + const r = @bitReverse(value); + return r >> @as(math.Log2Int(T), @intCast(@typeInfo(T).Int.bits - n)); +} + +test "flate bitReverse" { + const ReverseBitsTest = struct { + in: u16, + bit_count: u5, + out: u16, + }; + + const reverse_bits_tests = [_]ReverseBitsTest{ + .{ .in = 1, .bit_count = 1, .out = 1 }, + .{ .in = 1, .bit_count = 2, .out = 2 }, + .{ .in = 1, .bit_count = 3, .out = 4 }, + .{ .in = 1, .bit_count = 4, .out = 8 }, + .{ .in = 1, .bit_count = 5, .out = 16 }, + .{ .in = 17, .bit_count = 5, .out = 17 }, + .{ .in = 257, .bit_count = 9, .out = 257 }, + .{ .in = 29, .bit_count = 5, .out = 23 }, + }; + + for (reverse_bits_tests) |h| { + const v = bitReverse(u16, h.in, h.bit_count); + try std.testing.expectEqual(h.out, v); + } +} + +test "flate.HuffmanEncoder fixedLiteralEncoder codes" { + var al = std.ArrayList(u8).init(testing.allocator); + defer al.deinit(); + var bw = std.io.bitWriter(.little, al.writer()); + + const f = fixedLiteralEncoder(); + for (f.codes) |c| { + try bw.writeBits(c.code, c.len); + } + try testing.expectEqualSlices(u8, &fixed_codes, al.items); +} + +pub const fixed_codes = [_]u8{ + 0b00001100, 0b10001100, 0b01001100, 0b11001100, 0b00101100, 0b10101100, 0b01101100, 0b11101100, + 0b00011100, 0b10011100, 0b01011100, 0b11011100, 0b00111100, 0b10111100, 0b01111100, 0b11111100, + 0b00000010, 0b10000010, 0b01000010, 0b11000010, 0b00100010, 0b10100010, 0b01100010, 0b11100010, + 0b00010010, 0b10010010, 0b01010010, 0b11010010, 0b00110010, 0b10110010, 0b01110010, 0b11110010, + 0b00001010, 0b10001010, 0b01001010, 0b11001010, 0b00101010, 0b10101010, 0b01101010, 0b11101010, + 0b00011010, 0b10011010, 0b01011010, 0b11011010, 0b00111010, 0b10111010, 0b01111010, 0b11111010, + 0b00000110, 0b10000110, 0b01000110, 0b11000110, 0b00100110, 0b10100110, 0b01100110, 0b11100110, + 0b00010110, 0b10010110, 0b01010110, 0b11010110, 0b00110110, 0b10110110, 0b01110110, 0b11110110, + 0b00001110, 0b10001110, 0b01001110, 0b11001110, 0b00101110, 0b10101110, 0b01101110, 0b11101110, + 0b00011110, 0b10011110, 0b01011110, 0b11011110, 0b00111110, 0b10111110, 0b01111110, 0b11111110, + 0b00000001, 0b10000001, 0b01000001, 0b11000001, 0b00100001, 0b10100001, 0b01100001, 0b11100001, + 0b00010001, 0b10010001, 0b01010001, 0b11010001, 0b00110001, 0b10110001, 0b01110001, 0b11110001, + 0b00001001, 0b10001001, 0b01001001, 0b11001001, 0b00101001, 0b10101001, 0b01101001, 0b11101001, + 0b00011001, 0b10011001, 0b01011001, 0b11011001, 0b00111001, 0b10111001, 0b01111001, 0b11111001, + 0b00000101, 0b10000101, 0b01000101, 0b11000101, 0b00100101, 0b10100101, 0b01100101, 0b11100101, + 0b00010101, 0b10010101, 0b01010101, 0b11010101, 0b00110101, 0b10110101, 0b01110101, 0b11110101, + 0b00001101, 0b10001101, 0b01001101, 0b11001101, 0b00101101, 0b10101101, 0b01101101, 0b11101101, + 0b00011101, 0b10011101, 0b01011101, 0b11011101, 0b00111101, 0b10111101, 0b01111101, 0b11111101, + 0b00010011, 0b00100110, 0b01001110, 0b10011010, 0b00111100, 0b01100101, 0b11101010, 0b10110100, + 0b11101001, 0b00110011, 0b01100110, 0b11001110, 0b10011010, 0b00111101, 0b01100111, 0b11101110, + 0b10111100, 0b11111001, 0b00001011, 0b00010110, 0b00101110, 0b01011010, 0b10111100, 0b01100100, + 0b11101001, 0b10110010, 0b11100101, 0b00101011, 0b01010110, 0b10101110, 0b01011010, 0b10111101, + 0b01100110, 0b11101101, 0b10111010, 0b11110101, 0b00011011, 0b00110110, 0b01101110, 0b11011010, + 0b10111100, 0b01100101, 0b11101011, 0b10110110, 0b11101101, 0b00111011, 0b01110110, 0b11101110, + 0b11011010, 0b10111101, 0b01100111, 0b11101111, 0b10111110, 0b11111101, 0b00000111, 0b00001110, + 0b00011110, 0b00111010, 0b01111100, 0b11100100, 0b11101000, 0b10110001, 0b11100011, 0b00100111, + 0b01001110, 0b10011110, 0b00111010, 0b01111101, 0b11100110, 0b11101100, 0b10111001, 0b11110011, + 0b00010111, 0b00101110, 0b01011110, 0b10111010, 0b01111100, 0b11100101, 0b11101010, 0b10110101, + 0b11101011, 0b00110111, 0b01101110, 0b11011110, 0b10111010, 0b01111101, 0b11100111, 0b11101110, + 0b10111101, 0b11111011, 0b00001111, 0b00011110, 0b00111110, 0b01111010, 0b11111100, 0b11100100, + 0b11101001, 0b10110011, 0b11100111, 0b00101111, 0b01011110, 0b10111110, 0b01111010, 0b11111101, + 0b11100110, 0b11101101, 0b10111011, 0b11110111, 0b00011111, 0b00111110, 0b01111110, 0b11111010, + 0b11111100, 0b11100101, 0b11101011, 0b10110111, 0b11101111, 0b00111111, 0b01111110, 0b11111110, + 0b11111010, 0b11111101, 0b11100111, 0b11101111, 0b10111111, 0b11111111, 0b00000000, 0b00100000, + 0b00001000, 0b00001100, 0b10000001, 0b11000010, 0b11100000, 0b00001000, 0b00100100, 0b00001010, + 0b10001101, 0b11000001, 0b11100010, 0b11110000, 0b00000100, 0b00100010, 0b10001001, 0b01001100, + 0b10100001, 0b11010010, 0b11101000, 0b00000011, 0b10000011, 0b01000011, 0b11000011, 0b00100011, + 0b10100011, +}; diff --git a/lib/std/compress/flate/inflate.zig b/lib/std/compress/flate/inflate.zig new file mode 100644 index 0000000000..d87a888d11 --- /dev/null +++ b/lib/std/compress/flate/inflate.zig @@ -0,0 +1,546 @@ +const std = @import("std"); +const assert = std.debug.assert; +const testing = std.testing; + +const hfd = @import("huffman_decoder.zig"); +const BitReader = @import("bit_reader.zig").BitReader; +const CircularBuffer = @import("CircularBuffer.zig"); +const Container = @import("container.zig").Container; +const Token = @import("Token.zig"); +const codegen_order = @import("consts.zig").huffman.codegen_order; + +/// Decompresses deflate bit stream `reader` and writes uncompressed data to the +/// `writer` stream. +pub fn decompress(comptime container: Container, reader: anytype, writer: anytype) !void { + var d = decompressor(container, reader); + try d.decompress(writer); +} + +/// Inflate decompressor for the reader type. +pub fn decompressor(comptime container: Container, reader: anytype) Inflate(container, @TypeOf(reader)) { + return Inflate(container, @TypeOf(reader)).init(reader); +} + +/// Inflate decompresses deflate bit stream. Reads compressed data from reader +/// provided in init. Decompressed data are stored in internal hist buffer and +/// can be accesses iterable `next` or reader interface. +/// +/// Container defines header/footer wrapper around deflate bit stream. Can be +/// gzip or zlib. +/// +/// Deflate bit stream consists of multiple blocks. Block can be one of three types: +/// * stored, non compressed, max 64k in size +/// * fixed, huffman codes are predefined +/// * dynamic, huffman code tables are encoded at the block start +/// +/// `step` function runs decoder until internal `hist` buffer is full. Client +/// than needs to read that data in order to proceed with decoding. +/// +/// Allocates 74.5K of internal buffers, most important are: +/// * 64K for history (CircularBuffer) +/// * ~10K huffman decoders (Literal and DistanceDecoder) +/// +pub fn Inflate(comptime container: Container, comptime ReaderType: type) type { + return struct { + const BitReaderType = BitReader(ReaderType); + const F = BitReaderType.flag; + + bits: BitReaderType = .{}, + hist: CircularBuffer = .{}, + // Hashes, produces checkusm, of uncompressed data for gzip/zlib footer. + hasher: container.Hasher() = .{}, + + // dynamic block huffman code decoders + lit_dec: hfd.LiteralDecoder = .{}, // literals + dst_dec: hfd.DistanceDecoder = .{}, // distances + + // current read state + bfinal: u1 = 0, + block_type: u2 = 0b11, + state: ReadState = .protocol_header, + + const ReadState = enum { + protocol_header, + block_header, + block, + protocol_footer, + end, + }; + + const Self = @This(); + + pub const Error = BitReaderType.Error || Container.Error || hfd.Error || error{ + InvalidCode, + InvalidMatch, + InvalidBlockType, + WrongStoredBlockNlen, + InvalidDynamicBlockHeader, + }; + + pub fn init(rt: ReaderType) Self { + return .{ .bits = BitReaderType.init(rt) }; + } + + fn blockHeader(self: *Self) !void { + self.bfinal = try self.bits.read(u1); + self.block_type = try self.bits.read(u2); + } + + fn storedBlock(self: *Self) !bool { + self.bits.alignToByte(); // skip padding until byte boundary + // everyting after this is byte aligned in stored block + var len = try self.bits.read(u16); + const nlen = try self.bits.read(u16); + if (len != ~nlen) return error.WrongStoredBlockNlen; + + while (len > 0) { + const buf = self.hist.getWritable(len); + try self.bits.readAll(buf); + len -= @intCast(buf.len); + } + return true; + } + + fn fixedBlock(self: *Self) !bool { + while (!self.hist.full()) { + const code = try self.bits.readFixedCode(); + switch (code) { + 0...255 => self.hist.write(@intCast(code)), + 256 => return true, // end of block + 257...285 => try self.fixedDistanceCode(@intCast(code - 257)), + else => return error.InvalidCode, + } + } + return false; + } + + // Handles fixed block non literal (length) code. + // Length code is followed by 5 bits of distance code. + fn fixedDistanceCode(self: *Self, code: u8) !void { + try self.bits.fill(5 + 5 + 13); + const length = try self.decodeLength(code); + const distance = try self.decodeDistance(try self.bits.readF(u5, F.buffered | F.reverse)); + try self.hist.writeMatch(length, distance); + } + + inline fn decodeLength(self: *Self, code: u8) !u16 { + if (code > 28) return error.InvalidCode; + const ml = Token.matchLength(code); + return if (ml.extra_bits == 0) // 0 - 5 extra bits + ml.base + else + ml.base + try self.bits.readN(ml.extra_bits, F.buffered); + } + + fn decodeDistance(self: *Self, code: u8) !u16 { + if (code > 29) return error.InvalidCode; + const md = Token.matchDistance(code); + return if (md.extra_bits == 0) // 0 - 13 extra bits + md.base + else + md.base + try self.bits.readN(md.extra_bits, F.buffered); + } + + fn dynamicBlockHeader(self: *Self) !void { + const hlit: u16 = @as(u16, try self.bits.read(u5)) + 257; // number of ll code entries present - 257 + const hdist: u16 = @as(u16, try self.bits.read(u5)) + 1; // number of distance code entries - 1 + const hclen: u8 = @as(u8, try self.bits.read(u4)) + 4; // hclen + 4 code lenths are encoded + + if (hlit > 286 or hdist > 30) + return error.InvalidDynamicBlockHeader; + + // lengths for code lengths + var cl_lens = [_]u4{0} ** 19; + for (0..hclen) |i| { + cl_lens[codegen_order[i]] = try self.bits.read(u3); + } + var cl_dec: hfd.CodegenDecoder = .{}; + try cl_dec.generate(&cl_lens); + + // literal code lengths + var lit_lens = [_]u4{0} ** (286); + var pos: usize = 0; + while (pos < hlit) { + const sym = try cl_dec.find(try self.bits.peekF(u7, F.reverse)); + try self.bits.shift(sym.code_bits); + pos += try self.dynamicCodeLength(sym.symbol, &lit_lens, pos); + } + if (pos > hlit) + return error.InvalidDynamicBlockHeader; + + // distance code lenths + var dst_lens = [_]u4{0} ** (30); + pos = 0; + while (pos < hdist) { + const sym = try cl_dec.find(try self.bits.peekF(u7, F.reverse)); + try self.bits.shift(sym.code_bits); + pos += try self.dynamicCodeLength(sym.symbol, &dst_lens, pos); + } + if (pos > hdist) + return error.InvalidDynamicBlockHeader; + + try self.lit_dec.generate(&lit_lens); + try self.dst_dec.generate(&dst_lens); + } + + // Decode code length symbol to code length. Writes decoded length into + // lens slice starting at position pos. Returns number of positions + // advanced. + fn dynamicCodeLength(self: *Self, code: u16, lens: []u4, pos: usize) !usize { + if (pos >= lens.len) + return error.InvalidDynamicBlockHeader; + + switch (code) { + 0...15 => { + // Represent code lengths of 0 - 15 + lens[pos] = @intCast(code); + return 1; + }, + 16 => { + // Copy the previous code length 3 - 6 times. + // The next 2 bits indicate repeat length + const n: u8 = @as(u8, try self.bits.read(u2)) + 3; + if (pos == 0 or pos + n > lens.len) + return error.InvalidDynamicBlockHeader; + for (0..n) |i| { + lens[pos + i] = lens[pos + i - 1]; + } + return n; + }, + // Repeat a code length of 0 for 3 - 10 times. (3 bits of length) + 17 => return @as(u8, try self.bits.read(u3)) + 3, + // Repeat a code length of 0 for 11 - 138 times (7 bits of length) + 18 => return @as(u8, try self.bits.read(u7)) + 11, + else => return error.InvalidDynamicBlockHeader, + } + } + + // In larger archives most blocks are usually dynamic, so decompression + // performance depends on this function. + fn dynamicBlock(self: *Self) !bool { + // Hot path loop! + while (!self.hist.full()) { + try self.bits.fill(15); // optimization so other bit reads can be buffered (avoiding one `if` in hot path) + const sym = try self.decodeSymbol(&self.lit_dec); + + switch (sym.kind) { + .literal => self.hist.write(sym.symbol), + .match => { // Decode match backreference + try self.bits.fill(5 + 15 + 13); // so we can use buffered reads + const length = try self.decodeLength(sym.symbol); + const dsm = try self.decodeSymbol(&self.dst_dec); + const distance = try self.decodeDistance(dsm.symbol); + try self.hist.writeMatch(length, distance); + }, + .end_of_block => return true, + } + } + return false; + } + + // Peek 15 bits from bits reader (maximum code len is 15 bits). Use + // decoder to find symbol for that code. We then know how many bits is + // used. Shift bit reader for that much bits, those bits are used. And + // return symbol. + fn decodeSymbol(self: *Self, decoder: anytype) !hfd.Symbol { + const sym = try decoder.find(try self.bits.peekF(u15, F.buffered | F.reverse)); + try self.bits.shift(sym.code_bits); + return sym; + } + + fn step(self: *Self) !void { + switch (self.state) { + .protocol_header => { + try container.parseHeader(&self.bits); + self.state = .block_header; + }, + .block_header => { + try self.blockHeader(); + self.state = .block; + if (self.block_type == 2) try self.dynamicBlockHeader(); + }, + .block => { + const done = switch (self.block_type) { + 0 => try self.storedBlock(), + 1 => try self.fixedBlock(), + 2 => try self.dynamicBlock(), + else => return error.InvalidBlockType, + }; + if (done) { + self.state = if (self.bfinal == 1) .protocol_footer else .block_header; + } + }, + .protocol_footer => { + self.bits.alignToByte(); + try container.parseFooter(&self.hasher, &self.bits); + self.state = .end; + }, + .end => {}, + } + } + + /// Replaces the inner reader with new reader. + pub fn setReader(self: *Self, new_reader: ReaderType) void { + self.bits.forward_reader = new_reader; + if (self.state == .end or self.state == .protocol_footer) { + self.state = .protocol_header; + } + } + + // Reads all compressed data from the internal reader and outputs plain + // (uncompressed) data to the provided writer. + pub fn decompress(self: *Self, writer: anytype) !void { + while (try self.next()) |buf| { + try writer.writeAll(buf); + } + } + + // Iterator interface + + /// Can be used in iterator like loop without memcpy to another buffer: + /// while (try inflate.next()) |buf| { ... } + pub fn next(self: *Self) Error!?[]const u8 { + const out = try self.get(0); + if (out.len == 0) return null; + return out; + } + + /// Returns decompressed data from internal sliding window buffer. + /// Returned buffer can be any length between 0 and `limit` bytes. + /// 0 returned bytes means end of stream reached. + /// With limit=0 returns as much data can. It newer will be more + /// than 65536 bytes, which is limit of internal buffer. + pub fn get(self: *Self, limit: usize) Error![]const u8 { + while (true) { + const out = self.hist.readAtMost(limit); + if (out.len > 0) { + self.hasher.update(out); + return out; + } + if (self.state == .end) return out; + try self.step(); + } + } + + // Reader interface + + pub const Reader = std.io.Reader(*Self, Error, read); + + /// Returns the number of bytes read. It may be less than buffer.len. + /// If the number of bytes read is 0, it means end of stream. + /// End of stream is not an error condition. + pub fn read(self: *Self, buffer: []u8) Error!usize { + const out = try self.get(buffer.len); + @memcpy(buffer[0..out.len], out); + return out.len; + } + + pub fn reader(self: *Self) Reader { + return .{ .context = self }; + } + }; +} + +test "flate.Inflate struct sizes" { + var fbs = std.io.fixedBufferStream(""); + const ReaderType = @TypeOf(fbs.reader()); + const inflate_size = @sizeOf(Inflate(.gzip, ReaderType)); + + try testing.expectEqual(76320, inflate_size); + try testing.expectEqual( + @sizeOf(CircularBuffer) + @sizeOf(hfd.LiteralDecoder) + @sizeOf(hfd.DistanceDecoder) + 48, + inflate_size, + ); + try testing.expectEqual(65536 + 8 + 8, @sizeOf(CircularBuffer)); + try testing.expectEqual(8, @sizeOf(Container.raw.Hasher())); + try testing.expectEqual(24, @sizeOf(BitReader(ReaderType))); + try testing.expectEqual(6384, @sizeOf(hfd.LiteralDecoder)); + try testing.expectEqual(4336, @sizeOf(hfd.DistanceDecoder)); +} + +test "flate.Inflate decompress" { + const cases = [_]struct { + in: []const u8, + out: []const u8, + }{ + // non compressed block (type 0) + .{ + .in = &[_]u8{ + 0b0000_0001, 0b0000_1100, 0x00, 0b1111_0011, 0xff, // deflate fixed buffer header len, nlen + 'H', 'e', 'l', 'l', 'o', ' ', 'w', 'o', 'r', 'l', 'd', 0x0a, // non compressed data + }, + .out = "Hello world\n", + }, + // fixed code block (type 1) + .{ + .in = &[_]u8{ + 0xf3, 0x48, 0xcd, 0xc9, 0xc9, 0x57, 0x28, 0xcf, // deflate data block type 1 + 0x2f, 0xca, 0x49, 0xe1, 0x02, 0x00, + }, + .out = "Hello world\n", + }, + // dynamic block (type 2) + .{ + .in = &[_]u8{ + 0x3d, 0xc6, 0x39, 0x11, 0x00, 0x00, 0x0c, 0x02, // deflate data block type 2 + 0x30, 0x2b, 0xb5, 0x52, 0x1e, 0xff, 0x96, 0x38, + 0x16, 0x96, 0x5c, 0x1e, 0x94, 0xcb, 0x6d, 0x01, + }, + .out = "ABCDEABCD ABCDEABCD", + }, + }; + for (cases) |c| { + var fb = std.io.fixedBufferStream(c.in); + var al = std.ArrayList(u8).init(testing.allocator); + defer al.deinit(); + + try decompress(.raw, fb.reader(), al.writer()); + try testing.expectEqualStrings(c.out, al.items); + } +} + +test "flate.Inflate gzip decompress" { + const cases = [_]struct { + in: []const u8, + out: []const u8, + }{ + // non compressed block (type 0) + .{ + .in = &[_]u8{ + 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, // gzip header (10 bytes) + 0b0000_0001, 0b0000_1100, 0x00, 0b1111_0011, 0xff, // deflate fixed buffer header len, nlen + 'H', 'e', 'l', 'l', 'o', ' ', 'w', 'o', 'r', 'l', 'd', 0x0a, // non compressed data + 0xd5, 0xe0, 0x39, 0xb7, // gzip footer: checksum + 0x0c, 0x00, 0x00, 0x00, // gzip footer: size + }, + .out = "Hello world\n", + }, + // fixed code block (type 1) + .{ + .in = &[_]u8{ + 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x03, // gzip header (10 bytes) + 0xf3, 0x48, 0xcd, 0xc9, 0xc9, 0x57, 0x28, 0xcf, // deflate data block type 1 + 0x2f, 0xca, 0x49, 0xe1, 0x02, 0x00, + 0xd5, 0xe0, 0x39, 0xb7, 0x0c, 0x00, 0x00, 0x00, // gzip footer (chksum, len) + }, + .out = "Hello world\n", + }, + // dynamic block (type 2) + .{ + .in = &[_]u8{ + 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, // gzip header (10 bytes) + 0x3d, 0xc6, 0x39, 0x11, 0x00, 0x00, 0x0c, 0x02, // deflate data block type 2 + 0x30, 0x2b, 0xb5, 0x52, 0x1e, 0xff, 0x96, 0x38, + 0x16, 0x96, 0x5c, 0x1e, 0x94, 0xcb, 0x6d, 0x01, + 0x17, 0x1c, 0x39, 0xb4, 0x13, 0x00, 0x00, 0x00, // gzip footer (chksum, len) + }, + .out = "ABCDEABCD ABCDEABCD", + }, + // gzip header with name + .{ + .in = &[_]u8{ + 0x1f, 0x8b, 0x08, 0x08, 0xe5, 0x70, 0xb1, 0x65, 0x00, 0x03, 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x2e, + 0x74, 0x78, 0x74, 0x00, 0xf3, 0x48, 0xcd, 0xc9, 0xc9, 0x57, 0x28, 0xcf, 0x2f, 0xca, 0x49, 0xe1, + 0x02, 0x00, 0xd5, 0xe0, 0x39, 0xb7, 0x0c, 0x00, 0x00, 0x00, + }, + .out = "Hello world\n", + }, + }; + for (cases) |c| { + var fb = std.io.fixedBufferStream(c.in); + var al = std.ArrayList(u8).init(testing.allocator); + defer al.deinit(); + + try decompress(.gzip, fb.reader(), al.writer()); + try testing.expectEqualStrings(c.out, al.items); + } +} + +test "flate.Inflate zlib decompress" { + const cases = [_]struct { + in: []const u8, + out: []const u8, + }{ + // non compressed block (type 0) + .{ + .in = &[_]u8{ + 0x78, 0b10_0_11100, // zlib header (2 bytes) + 0b0000_0001, 0b0000_1100, 0x00, 0b1111_0011, 0xff, // deflate fixed buffer header len, nlen + 'H', 'e', 'l', 'l', 'o', ' ', 'w', 'o', 'r', 'l', 'd', 0x0a, // non compressed data + 0x1c, 0xf2, 0x04, 0x47, // zlib footer: checksum + }, + .out = "Hello world\n", + }, + }; + for (cases) |c| { + var fb = std.io.fixedBufferStream(c.in); + var al = std.ArrayList(u8).init(testing.allocator); + defer al.deinit(); + + try decompress(.zlib, fb.reader(), al.writer()); + try testing.expectEqualStrings(c.out, al.items); + } +} + +test "flate.Inflate fuzzing tests" { + const cases = [_]struct { + input: []const u8, + out: []const u8 = "", + err: ?anyerror = null, + }{ + .{ .input = "deflate-stream", .out = @embedFile("testdata/fuzz/deflate-stream.expect") }, // 0 + .{ .input = "empty-distance-alphabet01" }, + .{ .input = "empty-distance-alphabet02" }, + .{ .input = "end-of-stream", .err = error.EndOfStream }, + .{ .input = "invalid-distance", .err = error.InvalidMatch }, + .{ .input = "invalid-tree01", .err = error.IncompleteHuffmanTree }, // 5 + .{ .input = "invalid-tree02", .err = error.IncompleteHuffmanTree }, + .{ .input = "invalid-tree03", .err = error.IncompleteHuffmanTree }, + .{ .input = "lengths-overflow", .err = error.InvalidDynamicBlockHeader }, + .{ .input = "out-of-codes", .err = error.InvalidCode }, + .{ .input = "puff01", .err = error.WrongStoredBlockNlen }, // 10 + .{ .input = "puff02", .err = error.EndOfStream }, + .{ .input = "puff03", .out = &[_]u8{0xa} }, + .{ .input = "puff04", .err = error.InvalidCode }, + .{ .input = "puff05", .err = error.EndOfStream }, + .{ .input = "puff06", .err = error.EndOfStream }, + .{ .input = "puff08", .err = error.InvalidCode }, + .{ .input = "puff09", .out = "P" }, + .{ .input = "puff10", .err = error.InvalidCode }, + .{ .input = "puff11", .err = error.InvalidMatch }, + .{ .input = "puff12", .err = error.InvalidDynamicBlockHeader }, // 20 + .{ .input = "puff13", .err = error.IncompleteHuffmanTree }, + .{ .input = "puff14", .err = error.EndOfStream }, + .{ .input = "puff15", .err = error.IncompleteHuffmanTree }, + .{ .input = "puff16", .err = error.InvalidDynamicBlockHeader }, + .{ .input = "puff17", .err = error.InvalidDynamicBlockHeader }, // 25 + .{ .input = "fuzz1", .err = error.InvalidDynamicBlockHeader }, + .{ .input = "fuzz2", .err = error.InvalidDynamicBlockHeader }, + .{ .input = "fuzz3", .err = error.InvalidMatch }, + .{ .input = "fuzz4", .err = error.OversubscribedHuffmanTree }, + .{ .input = "puff18", .err = error.OversubscribedHuffmanTree }, // 30 + .{ .input = "puff19", .err = error.OversubscribedHuffmanTree }, + .{ .input = "puff20", .err = error.OversubscribedHuffmanTree }, + .{ .input = "puff21", .err = error.OversubscribedHuffmanTree }, + .{ .input = "puff22", .err = error.OversubscribedHuffmanTree }, + .{ .input = "puff23", .err = error.InvalidDynamicBlockHeader }, // 35 + .{ .input = "puff24", .err = error.InvalidDynamicBlockHeader }, + .{ .input = "puff25", .err = error.OversubscribedHuffmanTree }, + .{ .input = "puff26", .err = error.InvalidDynamicBlockHeader }, + .{ .input = "puff27", .err = error.InvalidDynamicBlockHeader }, + }; + + inline for (cases, 0..) |c, case_no| { + var in = std.io.fixedBufferStream(@embedFile("testdata/fuzz/" ++ c.input ++ ".input")); + var out = std.ArrayList(u8).init(testing.allocator); + defer out.deinit(); + errdefer std.debug.print("test case failed {}\n", .{case_no}); + + if (c.err) |expected_err| { + try testing.expectError(expected_err, decompress(.raw, in.reader(), out.writer())); + } else { + try decompress(.raw, in.reader(), out.writer()); + try testing.expectEqualStrings(c.out, out.items); + } + } +} diff --git a/lib/std/compress/flate/root.zig b/lib/std/compress/flate/root.zig new file mode 100644 index 0000000000..da385b21b0 --- /dev/null +++ b/lib/std/compress/flate/root.zig @@ -0,0 +1,133 @@ +pub const flate = @import("flate.zig"); +pub const gzip = @import("gzip.zig"); +pub const zlib = @import("zlib.zig"); + +test "flate" { + _ = @import("deflate.zig"); + _ = @import("inflate.zig"); +} + +test "flate public interface" { + const plain_data = [_]u8{ 'H', 'e', 'l', 'l', 'o', ' ', 'w', 'o', 'r', 'l', 'd', 0x0a }; + + // deflate final stored block, header + plain (stored) data + const deflate_block = [_]u8{ + 0b0000_0001, 0b0000_1100, 0x00, 0b1111_0011, 0xff, // deflate fixed buffer header len, nlen + } ++ plain_data; + + // gzip header/footer + deflate block + const gzip_data = + [_]u8{ 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03 } ++ // gzip header (10 bytes) + deflate_block ++ + [_]u8{ 0xd5, 0xe0, 0x39, 0xb7, 0x0c, 0x00, 0x00, 0x00 }; // gzip footer checksum (4 byte), size (4 bytes) + + // zlib header/footer + deflate block + const zlib_data = [_]u8{ 0x78, 0b10_0_11100 } ++ // zlib header (2 bytes)} + deflate_block ++ + [_]u8{ 0x1c, 0xf2, 0x04, 0x47 }; // zlib footer: checksum + + try testInterface(gzip, &gzip_data, &plain_data); + try testInterface(zlib, &zlib_data, &plain_data); + try testInterface(flate, &deflate_block, &plain_data); +} + +fn testInterface(comptime pkg: type, gzip_data: []const u8, plain_data: []const u8) !void { + const std = @import("std"); + const testing = std.testing; + const fixedBufferStream = std.io.fixedBufferStream; + + var buffer1: [64]u8 = undefined; + var buffer2: [64]u8 = undefined; + + var compressed = fixedBufferStream(&buffer1); + var plain = fixedBufferStream(&buffer2); + + // decompress + { + var in = fixedBufferStream(gzip_data); + try pkg.decompress(in.reader(), plain.writer()); + try testing.expectEqualSlices(u8, plain_data, plain.getWritten()); + } + plain.reset(); + compressed.reset(); + + // compress/decompress + { + var in = fixedBufferStream(plain_data); + try pkg.compress(in.reader(), compressed.writer(), .{}); + compressed.reset(); + try pkg.decompress(compressed.reader(), plain.writer()); + try testing.expectEqualSlices(u8, plain_data, plain.getWritten()); + } + plain.reset(); + compressed.reset(); + + // compressor/decompressor + { + var in = fixedBufferStream(plain_data); + var cmp = try pkg.compressor(compressed.writer(), .{}); + try cmp.compress(in.reader()); + try cmp.finish(); + + compressed.reset(); + var dcp = pkg.decompressor(compressed.reader()); + try dcp.decompress(plain.writer()); + try testing.expectEqualSlices(u8, plain_data, plain.getWritten()); + } + plain.reset(); + compressed.reset(); + + // huffman + { + // huffman compress/decompress + { + var in = fixedBufferStream(plain_data); + try pkg.huffman.compress(in.reader(), compressed.writer()); + compressed.reset(); + try pkg.decompress(compressed.reader(), plain.writer()); + try testing.expectEqualSlices(u8, plain_data, plain.getWritten()); + } + plain.reset(); + compressed.reset(); + + // huffman compressor/decompressor + { + var in = fixedBufferStream(plain_data); + var cmp = try pkg.huffman.compressor(compressed.writer()); + try cmp.compress(in.reader()); + try cmp.finish(); + + compressed.reset(); + try pkg.decompress(compressed.reader(), plain.writer()); + try testing.expectEqualSlices(u8, plain_data, plain.getWritten()); + } + } + plain.reset(); + compressed.reset(); + + // store + { + // store compress/decompress + { + var in = fixedBufferStream(plain_data); + try pkg.store.compress(in.reader(), compressed.writer()); + compressed.reset(); + try pkg.decompress(compressed.reader(), plain.writer()); + try testing.expectEqualSlices(u8, plain_data, plain.getWritten()); + } + plain.reset(); + compressed.reset(); + + // store compressor/decompressor + { + var in = fixedBufferStream(plain_data); + var cmp = try pkg.store.compressor(compressed.writer()); + try cmp.compress(in.reader()); + try cmp.finish(); + + compressed.reset(); + try pkg.decompress(compressed.reader(), plain.writer()); + try testing.expectEqualSlices(u8, plain_data, plain.getWritten()); + } + } +} diff --git a/lib/std/compress/flate/testdata/block_writer.zig b/lib/std/compress/flate/testdata/block_writer.zig new file mode 100644 index 0000000000..cb8f3028d1 --- /dev/null +++ b/lib/std/compress/flate/testdata/block_writer.zig @@ -0,0 +1,606 @@ +const Token = @import("../Token.zig"); + +pub const TestCase = struct { + tokens: []const Token, + input: []const u8 = "", // File name of input data matching the tokens. + want: []const u8 = "", // File name of data with the expected output with input available. + want_no_input: []const u8 = "", // File name of the expected output when no input is available. +}; + +pub const testCases = blk: { + @setEvalBranchQuota(4096 * 2); + + const L = Token.initLiteral; + const M = Token.initMatch; + const ml = M(1, 258); // Maximum length token. Used to reduce the size of writeBlockTests + + break :blk &[_]TestCase{ + TestCase{ + .input = "huffman-null-max.input", + .want = "huffman-null-max.{s}.expect", + .want_no_input = "huffman-null-max.{s}.expect-noinput", + .tokens = &[_]Token{ + L(0x0), ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, + ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, + ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, + ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, + ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, + ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, + ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, + ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, + ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, + ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, + ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, + ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, + ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, L(0x0), L(0x0), + }, + }, + TestCase{ + .input = "huffman-pi.input", + .want = "huffman-pi.{s}.expect", + .want_no_input = "huffman-pi.{s}.expect-noinput", + .tokens = &[_]Token{ + L('3'), L('.'), L('1'), L('4'), L('1'), L('5'), L('9'), L('2'), + L('6'), L('5'), L('3'), L('5'), L('8'), L('9'), L('7'), L('9'), + L('3'), L('2'), L('3'), L('8'), L('4'), L('6'), L('2'), L('6'), + L('4'), L('3'), L('3'), L('8'), L('3'), L('2'), L('7'), L('9'), + L('5'), L('0'), L('2'), L('8'), L('8'), L('4'), L('1'), L('9'), + L('7'), L('1'), L('6'), L('9'), L('3'), L('9'), L('9'), L('3'), + L('7'), L('5'), L('1'), L('0'), L('5'), L('8'), L('2'), L('0'), + L('9'), L('7'), L('4'), L('9'), L('4'), L('4'), L('5'), L('9'), + L('2'), L('3'), L('0'), L('7'), L('8'), L('1'), L('6'), L('4'), + L('0'), L('6'), L('2'), L('8'), L('6'), L('2'), L('0'), L('8'), + L('9'), L('9'), L('8'), L('6'), L('2'), L('8'), L('0'), L('3'), + L('4'), L('8'), L('2'), L('5'), L('3'), L('4'), L('2'), L('1'), + L('1'), L('7'), L('0'), L('6'), L('7'), L('9'), L('8'), L('2'), + L('1'), L('4'), L('8'), L('0'), L('8'), L('6'), L('5'), L('1'), + L('3'), L('2'), L('8'), L('2'), L('3'), L('0'), L('6'), L('6'), + L('4'), L('7'), L('0'), L('9'), L('3'), L('8'), L('4'), L('4'), + L('6'), L('0'), L('9'), L('5'), L('5'), L('0'), L('5'), L('8'), + L('2'), L('2'), L('3'), L('1'), L('7'), L('2'), L('5'), L('3'), + L('5'), L('9'), L('4'), L('0'), L('8'), L('1'), L('2'), L('8'), + L('4'), L('8'), L('1'), L('1'), L('1'), L('7'), L('4'), M(127, 4), + L('4'), L('1'), L('0'), L('2'), L('7'), L('0'), L('1'), L('9'), + L('3'), L('8'), L('5'), L('2'), L('1'), L('1'), L('0'), L('5'), + L('5'), L('5'), L('9'), L('6'), L('4'), L('4'), L('6'), L('2'), + L('2'), L('9'), L('4'), L('8'), L('9'), L('5'), L('4'), L('9'), + L('3'), L('0'), L('3'), L('8'), L('1'), M(19, 4), L('2'), L('8'), + L('8'), L('1'), L('0'), L('9'), L('7'), L('5'), L('6'), L('6'), + L('5'), L('9'), L('3'), L('3'), L('4'), L('4'), L('6'), M(72, 4), + L('7'), L('5'), L('6'), L('4'), L('8'), L('2'), L('3'), L('3'), + L('7'), L('8'), L('6'), L('7'), L('8'), L('3'), L('1'), L('6'), + L('5'), L('2'), L('7'), L('1'), L('2'), L('0'), L('1'), L('9'), + L('0'), L('9'), L('1'), L('4'), M(27, 4), L('5'), L('6'), L('6'), + L('9'), L('2'), L('3'), L('4'), L('6'), M(179, 4), L('6'), L('1'), + L('0'), L('4'), L('5'), L('4'), L('3'), L('2'), L('6'), M(51, 4), + L('1'), L('3'), L('3'), L('9'), L('3'), L('6'), L('0'), L('7'), + L('2'), L('6'), L('0'), L('2'), L('4'), L('9'), L('1'), L('4'), + L('1'), L('2'), L('7'), L('3'), L('7'), L('2'), L('4'), L('5'), + L('8'), L('7'), L('0'), L('0'), L('6'), L('6'), L('0'), L('6'), + L('3'), L('1'), L('5'), L('5'), L('8'), L('8'), L('1'), L('7'), + L('4'), L('8'), L('8'), L('1'), L('5'), L('2'), L('0'), L('9'), + L('2'), L('0'), L('9'), L('6'), L('2'), L('8'), L('2'), L('9'), + L('2'), L('5'), L('4'), L('0'), L('9'), L('1'), L('7'), L('1'), + L('5'), L('3'), L('6'), L('4'), L('3'), L('6'), L('7'), L('8'), + L('9'), L('2'), L('5'), L('9'), L('0'), L('3'), L('6'), L('0'), + L('0'), L('1'), L('1'), L('3'), L('3'), L('0'), L('5'), L('3'), + L('0'), L('5'), L('4'), L('8'), L('8'), L('2'), L('0'), L('4'), + L('6'), L('6'), L('5'), L('2'), L('1'), L('3'), L('8'), L('4'), + L('1'), L('4'), L('6'), L('9'), L('5'), L('1'), L('9'), L('4'), + L('1'), L('5'), L('1'), L('1'), L('6'), L('0'), L('9'), L('4'), + L('3'), L('3'), L('0'), L('5'), L('7'), L('2'), L('7'), L('0'), + L('3'), L('6'), L('5'), L('7'), L('5'), L('9'), L('5'), L('9'), + L('1'), L('9'), L('5'), L('3'), L('0'), L('9'), L('2'), L('1'), + L('8'), L('6'), L('1'), L('1'), L('7'), M(234, 4), L('3'), L('2'), + M(10, 4), L('9'), L('3'), L('1'), L('0'), L('5'), L('1'), L('1'), + L('8'), L('5'), L('4'), L('8'), L('0'), L('7'), M(271, 4), L('3'), + L('7'), L('9'), L('9'), L('6'), L('2'), L('7'), L('4'), L('9'), + L('5'), L('6'), L('7'), L('3'), L('5'), L('1'), L('8'), L('8'), + L('5'), L('7'), L('5'), L('2'), L('7'), L('2'), L('4'), L('8'), + L('9'), L('1'), L('2'), L('2'), L('7'), L('9'), L('3'), L('8'), + L('1'), L('8'), L('3'), L('0'), L('1'), L('1'), L('9'), L('4'), + L('9'), L('1'), L('2'), L('9'), L('8'), L('3'), L('3'), L('6'), + L('7'), L('3'), L('3'), L('6'), L('2'), L('4'), L('4'), L('0'), + L('6'), L('5'), L('6'), L('6'), L('4'), L('3'), L('0'), L('8'), + L('6'), L('0'), L('2'), L('1'), L('3'), L('9'), L('4'), L('9'), + L('4'), L('6'), L('3'), L('9'), L('5'), L('2'), L('2'), L('4'), + L('7'), L('3'), L('7'), L('1'), L('9'), L('0'), L('7'), L('0'), + L('2'), L('1'), L('7'), L('9'), L('8'), M(154, 5), L('7'), L('0'), + L('2'), L('7'), L('7'), L('0'), L('5'), L('3'), L('9'), L('2'), + L('1'), L('7'), L('1'), L('7'), L('6'), L('2'), L('9'), L('3'), + L('1'), L('7'), L('6'), L('7'), L('5'), M(563, 5), L('7'), L('4'), + L('8'), L('1'), M(7, 4), L('6'), L('6'), L('9'), L('4'), L('0'), + M(488, 4), L('0'), L('0'), L('0'), L('5'), L('6'), L('8'), L('1'), + L('2'), L('7'), L('1'), L('4'), L('5'), L('2'), L('6'), L('3'), + L('5'), L('6'), L('0'), L('8'), L('2'), L('7'), L('7'), L('8'), + L('5'), L('7'), L('7'), L('1'), L('3'), L('4'), L('2'), L('7'), + L('5'), L('7'), L('7'), L('8'), L('9'), L('6'), M(298, 4), L('3'), + L('6'), L('3'), L('7'), L('1'), L('7'), L('8'), L('7'), L('2'), + L('1'), L('4'), L('6'), L('8'), L('4'), L('4'), L('0'), L('9'), + L('0'), L('1'), L('2'), L('2'), L('4'), L('9'), L('5'), L('3'), + L('4'), L('3'), L('0'), L('1'), L('4'), L('6'), L('5'), L('4'), + L('9'), L('5'), L('8'), L('5'), L('3'), L('7'), L('1'), L('0'), + L('5'), L('0'), L('7'), L('9'), M(203, 4), L('6'), M(340, 4), L('8'), + L('9'), L('2'), L('3'), L('5'), L('4'), M(458, 4), L('9'), L('5'), + L('6'), L('1'), L('1'), L('2'), L('1'), L('2'), L('9'), L('0'), + L('2'), L('1'), L('9'), L('6'), L('0'), L('8'), L('6'), L('4'), + L('0'), L('3'), L('4'), L('4'), L('1'), L('8'), L('1'), L('5'), + L('9'), L('8'), L('1'), L('3'), L('6'), L('2'), L('9'), L('7'), + L('7'), L('4'), M(117, 4), L('0'), L('9'), L('9'), L('6'), L('0'), + L('5'), L('1'), L('8'), L('7'), L('0'), L('7'), L('2'), L('1'), + L('1'), L('3'), L('4'), L('9'), M(1, 5), L('8'), L('3'), L('7'), + L('2'), L('9'), L('7'), L('8'), L('0'), L('4'), L('9'), L('9'), + M(731, 4), L('9'), L('7'), L('3'), L('1'), L('7'), L('3'), L('2'), + L('8'), M(395, 4), L('6'), L('3'), L('1'), L('8'), L('5'), M(770, 4), + M(745, 4), L('4'), L('5'), L('5'), L('3'), L('4'), L('6'), L('9'), + L('0'), L('8'), L('3'), L('0'), L('2'), L('6'), L('4'), L('2'), + L('5'), L('2'), L('2'), L('3'), L('0'), M(740, 4), M(616, 4), L('8'), + L('5'), L('0'), L('3'), L('5'), L('2'), L('6'), L('1'), L('9'), + L('3'), L('1'), L('1'), M(531, 4), L('1'), L('0'), L('1'), L('0'), + L('0'), L('0'), L('3'), L('1'), L('3'), L('7'), L('8'), L('3'), + L('8'), L('7'), L('5'), L('2'), L('8'), L('8'), L('6'), L('5'), + L('8'), L('7'), L('5'), L('3'), L('3'), L('2'), L('0'), L('8'), + L('3'), L('8'), L('1'), L('4'), L('2'), L('0'), L('6'), M(321, 4), + M(300, 4), L('1'), L('4'), L('7'), L('3'), L('0'), L('3'), L('5'), + L('9'), M(815, 5), L('9'), L('0'), L('4'), L('2'), L('8'), L('7'), + L('5'), L('5'), L('4'), L('6'), L('8'), L('7'), L('3'), L('1'), + L('1'), L('5'), L('9'), L('5'), M(854, 4), L('3'), L('8'), L('8'), + L('2'), L('3'), L('5'), L('3'), L('7'), L('8'), L('7'), L('5'), + M(896, 5), L('9'), M(315, 4), L('1'), M(329, 4), L('8'), L('0'), L('5'), + L('3'), M(395, 4), L('2'), L('2'), L('6'), L('8'), L('0'), L('6'), + L('6'), L('1'), L('3'), L('0'), L('0'), L('1'), L('9'), L('2'), + L('7'), L('8'), L('7'), L('6'), L('6'), L('1'), L('1'), L('1'), + L('9'), L('5'), L('9'), M(568, 4), L('6'), M(293, 5), L('8'), L('9'), + L('3'), L('8'), L('0'), L('9'), L('5'), L('2'), L('5'), L('7'), + L('2'), L('0'), L('1'), L('0'), L('6'), L('5'), L('4'), L('8'), + L('5'), L('8'), L('6'), L('3'), L('2'), L('7'), M(155, 4), L('9'), + L('3'), L('6'), L('1'), L('5'), L('3'), M(545, 4), M(349, 5), L('2'), + L('3'), L('0'), L('3'), L('0'), L('1'), L('9'), L('5'), L('2'), + L('0'), L('3'), L('5'), L('3'), L('0'), L('1'), L('8'), L('5'), + L('2'), M(370, 4), M(118, 4), L('3'), L('6'), L('2'), L('2'), L('5'), + L('9'), L('9'), L('4'), L('1'), L('3'), M(597, 4), L('4'), L('9'), + L('7'), L('2'), L('1'), L('7'), M(223, 4), L('3'), L('4'), L('7'), + L('9'), L('1'), L('3'), L('1'), L('5'), L('1'), L('5'), L('5'), + L('7'), L('4'), L('8'), L('5'), L('7'), L('2'), L('4'), L('2'), + L('4'), L('5'), L('4'), L('1'), L('5'), L('0'), L('6'), L('9'), + M(320, 4), L('8'), L('2'), L('9'), L('5'), L('3'), L('3'), L('1'), + L('1'), L('6'), L('8'), L('6'), L('1'), L('7'), L('2'), L('7'), + L('8'), M(824, 4), L('9'), L('0'), L('7'), L('5'), L('0'), L('9'), + M(270, 4), L('7'), L('5'), L('4'), L('6'), L('3'), L('7'), L('4'), + L('6'), L('4'), L('9'), L('3'), L('9'), L('3'), L('1'), L('9'), + L('2'), L('5'), L('5'), L('0'), L('6'), L('0'), L('4'), L('0'), + L('0'), L('9'), M(620, 4), L('1'), L('6'), L('7'), L('1'), L('1'), + L('3'), L('9'), L('0'), L('0'), L('9'), L('8'), M(822, 4), L('4'), + L('0'), L('1'), L('2'), L('8'), L('5'), L('8'), L('3'), L('6'), + L('1'), L('6'), L('0'), L('3'), L('5'), L('6'), L('3'), L('7'), + L('0'), L('7'), L('6'), L('6'), L('0'), L('1'), L('0'), L('4'), + M(371, 4), L('8'), L('1'), L('9'), L('4'), L('2'), L('9'), M(1055, 5), + M(240, 4), M(652, 4), L('7'), L('8'), L('3'), L('7'), L('4'), M(1193, 4), + L('8'), L('2'), L('5'), L('5'), L('3'), L('7'), M(522, 5), L('2'), + L('6'), L('8'), M(47, 4), L('4'), L('0'), L('4'), L('7'), M(466, 4), + L('4'), M(1206, 4), M(910, 4), L('8'), L('4'), M(937, 4), L('6'), M(800, 6), + L('3'), L('3'), L('1'), L('3'), L('6'), L('7'), L('7'), L('0'), + L('2'), L('8'), L('9'), L('8'), L('9'), L('1'), L('5'), L('2'), + M(99, 4), L('5'), L('2'), L('1'), L('6'), L('2'), L('0'), L('5'), + L('6'), L('9'), L('6'), M(1042, 4), L('0'), L('5'), L('8'), M(1144, 4), + L('5'), M(1177, 4), L('5'), L('1'), L('1'), M(522, 4), L('8'), L('2'), + L('4'), L('3'), L('0'), L('0'), L('3'), L('5'), L('5'), L('8'), + L('7'), L('6'), L('4'), L('0'), L('2'), L('4'), L('7'), L('4'), + L('9'), L('6'), L('4'), L('7'), L('3'), L('2'), L('6'), L('3'), + M(1087, 4), L('9'), L('9'), L('2'), M(1100, 4), L('4'), L('2'), L('6'), + L('9'), M(710, 6), L('7'), M(471, 4), L('4'), M(1342, 4), M(1054, 4), L('9'), + L('3'), L('4'), L('1'), L('7'), M(430, 4), L('1'), L('2'), M(43, 4), + L('4'), M(415, 4), L('1'), L('5'), L('0'), L('3'), L('0'), L('2'), + L('8'), L('6'), L('1'), L('8'), L('2'), L('9'), L('7'), L('4'), + L('5'), L('5'), L('5'), L('7'), L('0'), L('6'), L('7'), L('4'), + M(310, 4), L('5'), L('0'), L('5'), L('4'), L('9'), L('4'), L('5'), + L('8'), M(454, 4), L('9'), M(82, 4), L('5'), L('6'), M(493, 4), L('7'), + L('2'), L('1'), L('0'), L('7'), L('9'), M(346, 4), L('3'), L('0'), + M(267, 4), L('3'), L('2'), L('1'), L('1'), L('6'), L('5'), L('3'), + L('4'), L('4'), L('9'), L('8'), L('7'), L('2'), L('0'), L('2'), + L('7'), M(284, 4), L('0'), L('2'), L('3'), L('6'), L('4'), M(559, 4), + L('5'), L('4'), L('9'), L('9'), L('1'), L('1'), L('9'), L('8'), + M(1049, 4), L('4'), M(284, 4), L('5'), L('3'), L('5'), L('6'), L('6'), + L('3'), L('6'), L('9'), M(1105, 4), L('2'), L('6'), L('5'), M(741, 4), + L('7'), L('8'), L('6'), L('2'), L('5'), L('5'), L('1'), M(987, 4), + L('1'), L('7'), L('5'), L('7'), L('4'), L('6'), L('7'), L('2'), + L('8'), L('9'), L('0'), L('9'), L('7'), L('7'), L('7'), L('7'), + M(1108, 5), L('0'), L('0'), L('0'), M(1534, 4), L('7'), L('0'), M(1248, 4), + L('6'), M(1002, 4), L('4'), L('9'), L('1'), M(1055, 4), M(664, 4), L('2'), + L('1'), L('4'), L('7'), L('7'), L('2'), L('3'), L('5'), L('0'), + L('1'), L('4'), L('1'), L('4'), M(1604, 4), L('3'), L('5'), L('6'), + M(1200, 4), L('1'), L('6'), L('1'), L('3'), L('6'), L('1'), L('1'), + L('5'), L('7'), L('3'), L('5'), L('2'), L('5'), M(1285, 4), L('3'), + L('4'), M(92, 4), L('1'), L('8'), M(1148, 4), L('8'), L('4'), M(1512, 4), + L('3'), L('3'), L('2'), L('3'), L('9'), L('0'), L('7'), L('3'), + L('9'), L('4'), L('1'), L('4'), L('3'), L('3'), L('3'), L('4'), + L('5'), L('4'), L('7'), L('7'), L('6'), L('2'), L('4'), M(579, 4), + L('2'), L('5'), L('1'), L('8'), L('9'), L('8'), L('3'), L('5'), + L('6'), L('9'), L('4'), L('8'), L('5'), L('5'), L('6'), L('2'), + L('0'), L('9'), L('9'), L('2'), L('1'), L('9'), L('2'), L('2'), + L('2'), L('1'), L('8'), L('4'), L('2'), L('7'), M(575, 4), L('2'), + M(187, 4), L('6'), L('8'), L('8'), L('7'), L('6'), L('7'), L('1'), + L('7'), L('9'), L('0'), M(86, 4), L('0'), M(263, 5), L('6'), L('6'), + M(1000, 4), L('8'), L('8'), L('6'), L('2'), L('7'), L('2'), M(1757, 4), + L('1'), L('7'), L('8'), L('6'), L('0'), L('8'), L('5'), L('7'), + M(116, 4), L('3'), M(765, 5), L('7'), L('9'), L('7'), L('6'), L('6'), + L('8'), L('1'), M(702, 4), L('0'), L('0'), L('9'), L('5'), L('3'), + L('8'), L('8'), M(1593, 4), L('3'), M(1702, 4), L('0'), L('6'), L('8'), + L('0'), L('0'), L('6'), L('4'), L('2'), L('2'), L('5'), L('1'), + L('2'), L('5'), L('2'), M(1404, 4), L('7'), L('3'), L('9'), L('2'), + M(664, 4), M(1141, 4), L('4'), M(1716, 5), L('8'), L('6'), L('2'), L('6'), + L('9'), L('4'), L('5'), M(486, 4), L('4'), L('1'), L('9'), L('6'), + L('5'), L('2'), L('8'), L('5'), L('0'), M(154, 4), M(925, 4), L('1'), + L('8'), L('6'), L('3'), M(447, 4), L('4'), M(341, 5), L('2'), L('0'), + L('3'), L('9'), M(1420, 4), L('4'), L('5'), M(701, 4), L('2'), L('3'), + L('7'), M(1069, 4), L('6'), M(1297, 4), L('5'), L('6'), M(1593, 4), L('7'), + L('1'), L('9'), L('1'), L('7'), L('2'), L('8'), M(370, 4), L('7'), + L('6'), L('4'), L('6'), L('5'), L('7'), L('5'), L('7'), L('3'), + L('9'), M(258, 4), L('3'), L('8'), L('9'), M(1865, 4), L('8'), L('3'), + L('2'), L('6'), L('4'), L('5'), L('9'), L('9'), L('5'), L('8'), + M(1704, 4), L('0'), L('4'), L('7'), L('8'), M(479, 4), M(809, 4), L('9'), + M(46, 4), L('6'), L('4'), L('0'), L('7'), L('8'), L('9'), L('5'), + L('1'), M(143, 4), L('6'), L('8'), L('3'), M(304, 4), L('2'), L('5'), + L('9'), L('5'), L('7'), L('0'), M(1129, 4), L('8'), L('2'), L('2'), + M(713, 4), L('2'), M(1564, 4), L('4'), L('0'), L('7'), L('7'), L('2'), + L('6'), L('7'), L('1'), L('9'), L('4'), L('7'), L('8'), M(794, 4), + L('8'), L('2'), L('6'), L('0'), L('1'), L('4'), L('7'), L('6'), + L('9'), L('9'), L('0'), L('9'), M(1257, 4), L('0'), L('1'), L('3'), + L('6'), L('3'), L('9'), L('4'), L('4'), L('3'), M(640, 4), L('3'), + L('0'), M(262, 4), L('2'), L('0'), L('3'), L('4'), L('9'), L('6'), + L('2'), L('5'), L('2'), L('4'), L('5'), L('1'), L('7'), M(950, 4), + L('9'), L('6'), L('5'), L('1'), L('4'), L('3'), L('1'), L('4'), + L('2'), L('9'), L('8'), L('0'), L('9'), L('1'), L('9'), L('0'), + L('6'), L('5'), L('9'), L('2'), M(643, 4), L('7'), L('2'), L('2'), + L('1'), L('6'), L('9'), L('6'), L('4'), L('6'), M(1050, 4), M(123, 4), + L('5'), M(1295, 4), L('4'), M(1382, 5), L('8'), M(1370, 4), L('9'), L('7'), + M(1404, 4), L('5'), L('4'), M(1182, 4), M(575, 4), L('7'), M(1627, 4), L('8'), + L('4'), L('6'), L('8'), L('1'), L('3'), M(141, 4), L('6'), L('8'), + L('3'), L('8'), L('6'), L('8'), L('9'), L('4'), L('2'), L('7'), + L('7'), L('4'), L('1'), L('5'), L('5'), L('9'), L('9'), L('1'), + L('8'), L('5'), M(91, 4), L('2'), L('4'), L('5'), L('9'), L('5'), + L('3'), L('9'), L('5'), L('9'), L('4'), L('3'), L('1'), M(1464, 4), + L('7'), M(19, 4), L('6'), L('8'), L('0'), L('8'), L('4'), L('5'), + M(744, 4), L('7'), L('3'), M(2079, 4), L('9'), L('5'), L('8'), L('4'), + L('8'), L('6'), L('5'), L('3'), L('8'), M(1769, 4), L('6'), L('2'), + M(243, 4), L('6'), L('0'), L('9'), M(1207, 4), L('6'), L('0'), L('8'), + L('0'), L('5'), L('1'), L('2'), L('4'), L('3'), L('8'), L('8'), + L('4'), M(315, 4), M(12, 4), L('4'), L('1'), L('3'), M(784, 4), L('7'), + L('6'), L('2'), L('7'), L('8'), M(834, 4), L('7'), L('1'), L('5'), + M(1436, 4), L('3'), L('5'), L('9'), L('9'), L('7'), L('7'), L('0'), + L('0'), L('1'), L('2'), L('9'), M(1139, 4), L('8'), L('9'), L('4'), + L('4'), L('1'), M(632, 4), L('6'), L('8'), L('5'), L('5'), M(96, 4), + L('4'), L('0'), L('6'), L('3'), M(2279, 4), L('2'), L('0'), L('7'), + L('2'), L('2'), M(345, 4), M(516, 5), L('4'), L('8'), L('1'), L('5'), + L('8'), M(518, 4), M(511, 4), M(635, 4), M(665, 4), L('3'), L('9'), L('4'), + L('5'), L('2'), L('2'), L('6'), L('7'), M(1175, 6), L('8'), M(1419, 4), + L('2'), L('1'), M(747, 4), L('2'), M(904, 4), L('5'), L('4'), L('6'), + L('6'), L('6'), M(1308, 4), L('2'), L('3'), L('9'), L('8'), L('6'), + L('4'), L('5'), L('6'), M(1221, 4), L('1'), L('6'), L('3'), L('5'), + M(596, 5), M(2066, 4), L('7'), M(2222, 4), L('9'), L('8'), M(1119, 4), L('9'), + L('3'), L('6'), L('3'), L('4'), M(1884, 4), L('7'), L('4'), L('3'), + L('2'), L('4'), M(1148, 4), L('1'), L('5'), L('0'), L('7'), L('6'), + M(1212, 4), L('7'), L('9'), L('4'), L('5'), L('1'), L('0'), L('9'), + M(63, 4), L('0'), L('9'), L('4'), L('0'), M(1703, 4), L('8'), L('8'), + L('7'), L('9'), L('7'), L('1'), L('0'), L('8'), L('9'), L('3'), + M(2289, 4), L('6'), L('9'), L('1'), L('3'), L('6'), L('8'), L('6'), + L('7'), L('2'), M(604, 4), M(511, 4), L('5'), M(1344, 4), M(1129, 4), M(2050, 4), + L('1'), L('7'), L('9'), L('2'), L('8'), L('6'), L('8'), M(2253, 4), + L('8'), L('7'), L('4'), L('7'), M(1951, 5), L('8'), L('2'), L('4'), + M(2427, 4), L('8'), M(604, 4), L('7'), L('1'), L('4'), L('9'), L('0'), + L('9'), L('6'), L('7'), L('5'), L('9'), L('8'), M(1776, 4), L('3'), + L('6'), L('5'), M(309, 4), L('8'), L('1'), M(93, 4), M(1862, 4), M(2359, 4), + L('6'), L('8'), L('2'), L('9'), M(1407, 4), L('8'), L('7'), L('2'), + L('2'), L('6'), L('5'), L('8'), L('8'), L('0'), M(1554, 4), L('5'), + M(586, 4), L('4'), L('2'), L('7'), L('0'), L('4'), L('7'), L('7'), + L('5'), L('5'), M(2079, 4), L('3'), L('7'), L('9'), L('6'), L('4'), + L('1'), L('4'), L('5'), L('1'), L('5'), L('2'), M(1534, 4), L('2'), + L('3'), L('4'), L('3'), L('6'), L('4'), L('5'), L('4'), M(1503, 4), + L('4'), L('4'), L('4'), L('7'), L('9'), L('5'), M(61, 4), M(1316, 4), + M(2279, 5), L('4'), L('1'), M(1323, 4), L('3'), M(773, 4), L('5'), L('2'), + L('3'), L('1'), M(2114, 5), L('1'), L('6'), L('6'), L('1'), M(2227, 4), + L('5'), L('9'), L('6'), L('9'), L('5'), L('3'), L('6'), L('2'), + L('3'), L('1'), L('4'), M(1536, 4), L('2'), L('4'), L('8'), L('4'), + L('9'), L('3'), L('7'), L('1'), L('8'), L('7'), L('1'), L('1'), + L('0'), L('1'), L('4'), L('5'), L('7'), L('6'), L('5'), L('4'), + M(1890, 4), L('0'), L('2'), L('7'), L('9'), L('9'), L('3'), L('4'), + L('4'), L('0'), L('3'), L('7'), L('4'), L('2'), L('0'), L('0'), + L('7'), M(2368, 4), L('7'), L('8'), L('5'), L('3'), L('9'), L('0'), + L('6'), L('2'), L('1'), L('9'), M(666, 5), M(838, 4), L('8'), L('4'), + L('7'), M(979, 5), L('8'), L('3'), L('3'), L('2'), L('1'), L('4'), + L('4'), L('5'), L('7'), L('1'), M(645, 4), M(1911, 4), L('4'), L('3'), + L('5'), L('0'), M(2345, 4), M(1129, 4), L('5'), L('3'), L('1'), L('9'), + L('1'), L('0'), L('4'), L('8'), L('4'), L('8'), L('1'), L('0'), + L('0'), L('5'), L('3'), L('7'), L('0'), L('6'), M(2237, 4), M(1438, 5), + M(1922, 5), L('1'), M(1370, 4), L('7'), M(796, 4), L('5'), M(2029, 4), M(1037, 4), + L('6'), L('3'), M(2013, 5), L('4'), M(2418, 4), M(847, 5), M(1014, 5), L('8'), + M(1326, 5), M(2184, 5), L('9'), M(392, 4), L('9'), L('1'), M(2255, 4), L('8'), + L('1'), L('4'), L('6'), L('7'), L('5'), L('1'), M(1580, 4), L('1'), + L('2'), L('3'), L('9'), M(426, 6), L('9'), L('0'), L('7'), L('1'), + L('8'), L('6'), L('4'), L('9'), L('4'), L('2'), L('3'), L('1'), + L('9'), L('6'), L('1'), L('5'), L('6'), M(493, 4), M(1725, 4), L('9'), + L('5'), M(2343, 4), M(1130, 4), M(284, 4), L('6'), L('0'), L('3'), L('8'), + M(2598, 4), M(368, 4), M(901, 4), L('6'), L('2'), M(1115, 4), L('5'), M(2125, 4), + L('6'), L('3'), L('8'), L('9'), L('3'), L('7'), L('7'), L('8'), + L('7'), M(2246, 4), M(249, 4), L('9'), L('7'), L('9'), L('2'), L('0'), + L('7'), L('7'), L('3'), M(1496, 4), L('2'), L('1'), L('8'), L('2'), + L('5'), L('6'), M(2016, 4), L('6'), L('6'), M(1751, 4), L('4'), L('2'), + M(1663, 5), L('6'), M(1767, 4), L('4'), L('4'), M(37, 4), L('5'), L('4'), + L('9'), L('2'), L('0'), L('2'), L('6'), L('0'), L('5'), M(2740, 4), + M(997, 5), L('2'), L('0'), L('1'), L('4'), L('9'), M(1235, 4), L('8'), + L('5'), L('0'), L('7'), L('3'), M(1434, 4), L('6'), L('6'), L('6'), + L('0'), M(405, 4), L('2'), L('4'), L('3'), L('4'), L('0'), M(136, 4), + L('0'), M(1900, 4), L('8'), L('6'), L('3'), M(2391, 4), M(2021, 4), M(1068, 4), + M(373, 4), L('5'), L('7'), L('9'), L('6'), L('2'), L('6'), L('8'), + L('5'), L('6'), M(321, 4), L('5'), L('0'), L('8'), M(1316, 4), L('5'), + L('8'), L('7'), L('9'), L('6'), L('9'), L('9'), M(1810, 4), L('5'), + L('7'), L('4'), M(2585, 4), L('8'), L('4'), L('0'), M(2228, 4), L('1'), + L('4'), L('5'), L('9'), L('1'), M(1933, 4), L('7'), L('0'), M(565, 4), + L('0'), L('1'), M(3048, 4), L('1'), L('2'), M(3189, 4), L('0'), M(964, 4), + L('3'), L('9'), M(2859, 4), M(275, 4), L('7'), L('1'), L('5'), M(945, 4), + L('4'), L('2'), L('0'), M(3059, 5), L('9'), M(3011, 4), L('0'), L('7'), + M(834, 4), M(1942, 4), M(2736, 4), M(3171, 4), L('2'), L('1'), M(2401, 4), L('2'), + L('5'), L('1'), M(1404, 4), M(2373, 4), L('9'), L('2'), M(435, 4), L('8'), + L('2'), L('6'), M(2919, 4), L('2'), M(633, 4), L('3'), L('2'), L('1'), + L('5'), L('7'), L('9'), L('1'), L('9'), L('8'), L('4'), L('1'), + L('4'), M(2172, 5), L('9'), L('1'), L('6'), L('4'), M(1769, 5), L('9'), + M(2905, 5), M(2268, 4), L('7'), L('2'), L('2'), M(802, 4), L('5'), M(2213, 4), + M(322, 4), L('9'), L('1'), L('0'), M(189, 4), M(3164, 4), L('5'), L('2'), + L('8'), L('0'), L('1'), L('7'), M(562, 4), L('7'), L('1'), L('2'), + M(2325, 4), L('8'), L('3'), L('2'), M(884, 4), L('1'), M(1418, 4), L('0'), + L('9'), L('3'), L('5'), L('3'), L('9'), L('6'), L('5'), L('7'), + M(1612, 4), L('1'), L('0'), L('8'), L('3'), M(106, 4), L('5'), L('1'), + M(1915, 4), M(3419, 4), L('1'), L('4'), L('4'), L('4'), L('2'), L('1'), + L('0'), L('0'), M(515, 4), L('0'), L('3'), M(413, 4), L('1'), L('1'), + L('0'), L('3'), M(3202, 4), M(10, 4), M(39, 4), M(1539, 6), L('5'), L('1'), + L('6'), M(1498, 4), M(2180, 5), M(2347, 4), L('5'), M(3139, 5), L('8'), L('5'), + L('1'), L('7'), L('1'), L('4'), L('3'), L('7'), M(1542, 4), M(110, 4), + L('1'), L('5'), L('5'), L('6'), L('5'), L('0'), L('8'), L('8'), + M(954, 4), L('9'), L('8'), L('9'), L('8'), L('5'), L('9'), L('9'), + L('8'), L('2'), L('3'), L('8'), M(464, 4), M(2491, 4), L('3'), M(365, 4), + M(1087, 4), M(2500, 4), L('8'), M(3590, 5), L('3'), L('2'), M(264, 4), L('5'), + M(774, 4), L('3'), M(459, 4), L('9'), M(1052, 4), L('9'), L('8'), M(2174, 4), + L('4'), M(3257, 4), L('7'), M(1612, 4), L('0'), L('7'), M(230, 4), L('4'), + L('8'), L('1'), L('4'), L('1'), M(1338, 4), L('8'), L('5'), L('9'), + L('4'), L('6'), L('1'), M(3018, 4), L('8'), L('0'), + }, + }, + TestCase{ + .input = "huffman-rand-1k.input", + .want = "huffman-rand-1k.{s}.expect", + .want_no_input = "huffman-rand-1k.{s}.expect-noinput", + .tokens = &[_]Token{ + L(0xf8), L(0x8b), L(0x96), L(0x76), L(0x48), L(0xd), L(0x85), L(0x94), L(0x25), L(0x80), L(0xaf), L(0xc2), L(0xfe), L(0x8d), + L(0xe8), L(0x20), L(0xeb), L(0x17), L(0x86), L(0xc9), L(0xb7), L(0xc5), L(0xde), L(0x6), L(0xea), L(0x7d), L(0x18), L(0x8b), + L(0xe7), L(0x3e), L(0x7), L(0xda), L(0xdf), L(0xff), L(0x6c), L(0x73), L(0xde), L(0xcc), L(0xe7), L(0x6d), L(0x8d), L(0x4), + L(0x19), L(0x49), L(0x7f), L(0x47), L(0x1f), L(0x48), L(0x15), L(0xb0), L(0xe8), L(0x9e), L(0xf2), L(0x31), L(0x59), L(0xde), + L(0x34), L(0xb4), L(0x5b), L(0xe5), L(0xe0), L(0x9), L(0x11), L(0x30), L(0xc2), L(0x88), L(0x5b), L(0x7c), L(0x5d), L(0x14), + L(0x13), L(0x6f), L(0x23), L(0xa9), L(0xd), L(0xbc), L(0x2d), L(0x23), L(0xbe), L(0xd9), L(0xed), L(0x75), L(0x4), L(0x6c), + L(0x99), L(0xdf), L(0xfd), L(0x70), L(0x66), L(0xe6), L(0xee), L(0xd9), L(0xb1), L(0x9e), L(0x6e), L(0x83), L(0x59), L(0xd5), + L(0xd4), L(0x80), L(0x59), L(0x98), L(0x77), L(0x89), L(0x43), L(0x38), L(0xc9), L(0xaf), L(0x30), L(0x32), L(0x9a), L(0x20), + L(0x1b), L(0x46), L(0x3d), L(0x67), L(0x6e), L(0xd7), L(0x72), L(0x9e), L(0x4e), L(0x21), L(0x4f), L(0xc6), L(0xe0), L(0xd4), + L(0x7b), L(0x4), L(0x8d), L(0xa5), L(0x3), L(0xf6), L(0x5), L(0x9b), L(0x6b), L(0xdc), L(0x2a), L(0x93), L(0x77), L(0x28), + L(0xfd), L(0xb4), L(0x62), L(0xda), L(0x20), L(0xe7), L(0x1f), L(0xab), L(0x6b), L(0x51), L(0x43), L(0x39), L(0x2f), L(0xa0), + L(0x92), L(0x1), L(0x6c), L(0x75), L(0x3e), L(0xf4), L(0x35), L(0xfd), L(0x43), L(0x2e), L(0xf7), L(0xa4), L(0x75), L(0xda), + L(0xea), L(0x9b), L(0xa), L(0x64), L(0xb), L(0xe0), L(0x23), L(0x29), L(0xbd), L(0xf7), L(0xe7), L(0x83), L(0x3c), L(0xfb), + L(0xdf), L(0xb3), L(0xae), L(0x4f), L(0xa4), L(0x47), L(0x55), L(0x99), L(0xde), L(0x2f), L(0x96), L(0x6e), L(0x1c), L(0x43), + L(0x4c), L(0x87), L(0xe2), L(0x7c), L(0xd9), L(0x5f), L(0x4c), L(0x7c), L(0xe8), L(0x90), L(0x3), L(0xdb), L(0x30), L(0x95), + L(0xd6), L(0x22), L(0xc), L(0x47), L(0xb8), L(0x4d), L(0x6b), L(0xbd), L(0x24), L(0x11), L(0xab), L(0x2c), L(0xd7), L(0xbe), + L(0x6e), L(0x7a), L(0xd6), L(0x8), L(0xa3), L(0x98), L(0xd8), L(0xdd), L(0x15), L(0x6a), L(0xfa), L(0x93), L(0x30), L(0x1), + L(0x25), L(0x1d), L(0xa2), L(0x74), L(0x86), L(0x4b), L(0x6a), L(0x95), L(0xe8), L(0xe1), L(0x4e), L(0xe), L(0x76), L(0xb9), + L(0x49), L(0xa9), L(0x5f), L(0xa0), L(0xa6), L(0x63), L(0x3c), L(0x7e), L(0x7e), L(0x20), L(0x13), L(0x4f), L(0xbb), L(0x66), + L(0x92), L(0xb8), L(0x2e), L(0xa4), L(0xfa), L(0x48), L(0xcb), L(0xae), L(0xb9), L(0x3c), L(0xaf), L(0xd3), L(0x1f), L(0xe1), + L(0xd5), L(0x8d), L(0x42), L(0x6d), L(0xf0), L(0xfc), L(0x8c), L(0xc), L(0x0), L(0xde), L(0x40), L(0xab), L(0x8b), L(0x47), + L(0x97), L(0x4e), L(0xa8), L(0xcf), L(0x8e), L(0xdb), L(0xa6), L(0x8b), L(0x20), L(0x9), L(0x84), L(0x7a), L(0x66), L(0xe5), + L(0x98), L(0x29), L(0x2), L(0x95), L(0xe6), L(0x38), L(0x32), L(0x60), L(0x3), L(0xe3), L(0x9a), L(0x1e), L(0x54), L(0xe8), + L(0x63), L(0x80), L(0x48), L(0x9c), L(0xe7), L(0x63), L(0x33), L(0x6e), L(0xa0), L(0x65), L(0x83), L(0xfa), L(0xc6), L(0xba), + L(0x7a), L(0x43), L(0x71), L(0x5), L(0xf5), L(0x68), L(0x69), L(0x85), L(0x9c), L(0xba), L(0x45), L(0xcd), L(0x6b), L(0xb), + L(0x19), L(0xd1), L(0xbb), L(0x7f), L(0x70), L(0x85), L(0x92), L(0xd1), L(0xb4), L(0x64), L(0x82), L(0xb1), L(0xe4), L(0x62), + L(0xc5), L(0x3c), L(0x46), L(0x1f), L(0x92), L(0x31), L(0x1c), L(0x4e), L(0x41), L(0x77), L(0xf7), L(0xe7), L(0x87), L(0xa2), + L(0xf), L(0x6e), L(0xe8), L(0x92), L(0x3), L(0x6b), L(0xa), L(0xe7), L(0xa9), L(0x3b), L(0x11), L(0xda), L(0x66), L(0x8a), + L(0x29), L(0xda), L(0x79), L(0xe1), L(0x64), L(0x8d), L(0xe3), L(0x54), L(0xd4), L(0xf5), L(0xef), L(0x64), L(0x87), L(0x3b), + L(0xf4), L(0xc2), L(0xf4), L(0x71), L(0x13), L(0xa9), L(0xe9), L(0xe0), L(0xa2), L(0x6), L(0x14), L(0xab), L(0x5d), L(0xa7), + L(0x96), L(0x0), L(0xd6), L(0xc3), L(0xcc), L(0x57), L(0xed), L(0x39), L(0x6a), L(0x25), L(0xcd), L(0x76), L(0xea), L(0xba), + L(0x3a), L(0xf2), L(0xa1), L(0x95), L(0x5d), L(0xe5), L(0x71), L(0xcf), L(0x9c), L(0x62), L(0x9e), L(0x6a), L(0xfa), L(0xd5), + L(0x31), L(0xd1), L(0xa8), L(0x66), L(0x30), L(0x33), L(0xaa), L(0x51), L(0x17), L(0x13), L(0x82), L(0x99), L(0xc8), L(0x14), + L(0x60), L(0x9f), L(0x4d), L(0x32), L(0x6d), L(0xda), L(0x19), L(0x26), L(0x21), L(0xdc), L(0x7e), L(0x2e), L(0x25), L(0x67), + L(0x72), L(0xca), L(0xf), L(0x92), L(0xcd), L(0xf6), L(0xd6), L(0xcb), L(0x97), L(0x8a), L(0x33), L(0x58), L(0x73), L(0x70), + L(0x91), L(0x1d), L(0xbf), L(0x28), L(0x23), L(0xa3), L(0xc), L(0xf1), L(0x83), L(0xc3), L(0xc8), L(0x56), L(0x77), L(0x68), + L(0xe3), L(0x82), L(0xba), L(0xb9), L(0x57), L(0x56), L(0x57), L(0x9c), L(0xc3), L(0xd6), L(0x14), L(0x5), L(0x3c), L(0xb1), + L(0xaf), L(0x93), L(0xc8), L(0x8a), L(0x57), L(0x7f), L(0x53), L(0xfa), L(0x2f), L(0xaa), L(0x6e), L(0x66), L(0x83), L(0xfa), + L(0x33), L(0xd1), L(0x21), L(0xab), L(0x1b), L(0x71), L(0xb4), L(0x7c), L(0xda), L(0xfd), L(0xfb), L(0x7f), L(0x20), L(0xab), + L(0x5e), L(0xd5), L(0xca), L(0xfd), L(0xdd), L(0xe0), L(0xee), L(0xda), L(0xba), L(0xa8), L(0x27), L(0x99), L(0x97), L(0x69), + L(0xc1), L(0x3c), L(0x82), L(0x8c), L(0xa), L(0x5c), L(0x2d), L(0x5b), L(0x88), L(0x3e), L(0x34), L(0x35), L(0x86), L(0x37), + L(0x46), L(0x79), L(0xe1), L(0xaa), L(0x19), L(0xfb), L(0xaa), L(0xde), L(0x15), L(0x9), L(0xd), L(0x1a), L(0x57), L(0xff), + L(0xb5), L(0xf), L(0xf3), L(0x2b), L(0x5a), L(0x6a), L(0x4d), L(0x19), L(0x77), L(0x71), L(0x45), L(0xdf), L(0x4f), L(0xb3), + L(0xec), L(0xf1), L(0xeb), L(0x18), L(0x53), L(0x3e), L(0x3b), L(0x47), L(0x8), L(0x9a), L(0x73), L(0xa0), L(0x5c), L(0x8c), + L(0x5f), L(0xeb), L(0xf), L(0x3a), L(0xc2), L(0x43), L(0x67), L(0xb4), L(0x66), L(0x67), L(0x80), L(0x58), L(0xe), L(0xc1), + L(0xec), L(0x40), L(0xd4), L(0x22), L(0x94), L(0xca), L(0xf9), L(0xe8), L(0x92), L(0xe4), L(0x69), L(0x38), L(0xbe), L(0x67), + L(0x64), L(0xca), L(0x50), L(0xc7), L(0x6), L(0x67), L(0x42), L(0x6e), L(0xa3), L(0xf0), L(0xb7), L(0x6c), L(0xf2), L(0xe8), + L(0x5f), L(0xb1), L(0xaf), L(0xe7), L(0xdb), L(0xbb), L(0x77), L(0xb5), L(0xf8), L(0xcb), L(0x8), L(0xc4), L(0x75), L(0x7e), + L(0xc0), L(0xf9), L(0x1c), L(0x7f), L(0x3c), L(0x89), L(0x2f), L(0xd2), L(0x58), L(0x3a), L(0xe2), L(0xf8), L(0x91), L(0xb6), + L(0x7b), L(0x24), L(0x27), L(0xe9), L(0xae), L(0x84), L(0x8b), L(0xde), L(0x74), L(0xac), L(0xfd), L(0xd9), L(0xb7), L(0x69), + L(0x2a), L(0xec), L(0x32), L(0x6f), L(0xf0), L(0x92), L(0x84), L(0xf1), L(0x40), L(0xc), L(0x8a), L(0xbc), L(0x39), L(0x6e), + L(0x2e), L(0x73), L(0xd4), L(0x6e), L(0x8a), L(0x74), L(0x2a), L(0xdc), L(0x60), L(0x1f), L(0xa3), L(0x7), L(0xde), L(0x75), + L(0x8b), L(0x74), L(0xc8), L(0xfe), L(0x63), L(0x75), L(0xf6), L(0x3d), L(0x63), L(0xac), L(0x33), L(0x89), L(0xc3), L(0xf0), + L(0xf8), L(0x2d), L(0x6b), L(0xb4), L(0x9e), L(0x74), L(0x8b), L(0x5c), L(0x33), L(0xb4), L(0xca), L(0xa8), L(0xe4), L(0x99), + L(0xb6), L(0x90), L(0xa1), L(0xef), L(0xf), L(0xd3), L(0x61), L(0xb2), L(0xc6), L(0x1a), L(0x94), L(0x7c), L(0x44), L(0x55), + L(0xf4), L(0x45), L(0xff), L(0x9e), L(0xa5), L(0x5a), L(0xc6), L(0xa0), L(0xe8), L(0x2a), L(0xc1), L(0x8d), L(0x6f), L(0x34), + L(0x11), L(0xb9), L(0xbe), L(0x4e), L(0xd9), L(0x87), L(0x97), L(0x73), L(0xcf), L(0x3d), L(0x23), L(0xae), L(0xd5), L(0x1a), + L(0x5e), L(0xae), L(0x5d), L(0x6a), L(0x3), L(0xf9), L(0x22), L(0xd), L(0x10), L(0xd9), L(0x47), L(0x69), L(0x15), L(0x3f), + L(0xee), L(0x52), L(0xa3), L(0x8), L(0xd2), L(0x3c), L(0x51), L(0xf4), L(0xf8), L(0x9d), L(0xe4), L(0x98), L(0x89), L(0xc8), + L(0x67), L(0x39), L(0xd5), L(0x5e), L(0x35), L(0x78), L(0x27), L(0xe8), L(0x3c), L(0x80), L(0xae), L(0x79), L(0x71), L(0xd2), + L(0x93), L(0xf4), L(0xaa), L(0x51), L(0x12), L(0x1c), L(0x4b), L(0x1b), L(0xe5), L(0x6e), L(0x15), L(0x6f), L(0xe4), L(0xbb), + L(0x51), L(0x9b), L(0x45), L(0x9f), L(0xf9), L(0xc4), L(0x8c), L(0x2a), L(0xfb), L(0x1a), L(0xdf), L(0x55), L(0xd3), L(0x48), + L(0x93), L(0x27), L(0x1), L(0x26), L(0xc2), L(0x6b), L(0x55), L(0x6d), L(0xa2), L(0xfb), L(0x84), L(0x8b), L(0xc9), L(0x9e), + L(0x28), L(0xc2), L(0xef), L(0x1a), L(0x24), L(0xec), L(0x9b), L(0xae), L(0xbd), L(0x60), L(0xe9), L(0x15), L(0x35), L(0xee), + L(0x42), L(0xa4), L(0x33), L(0x5b), L(0xfa), L(0xf), L(0xb6), L(0xf7), L(0x1), L(0xa6), L(0x2), L(0x4c), L(0xca), L(0x90), + L(0x58), L(0x3a), L(0x96), L(0x41), L(0xe7), L(0xcb), L(0x9), L(0x8c), L(0xdb), L(0x85), L(0x4d), L(0xa8), L(0x89), L(0xf3), + L(0xb5), L(0x8e), L(0xfd), L(0x75), L(0x5b), L(0x4f), L(0xed), L(0xde), L(0x3f), L(0xeb), L(0x38), L(0xa3), L(0xbe), L(0xb0), + L(0x73), L(0xfc), L(0xb8), L(0x54), L(0xf7), L(0x4c), L(0x30), L(0x67), L(0x2e), L(0x38), L(0xa2), L(0x54), L(0x18), L(0xba), + L(0x8), L(0xbf), L(0xf2), L(0x39), L(0xd5), L(0xfe), L(0xa5), L(0x41), L(0xc6), L(0x66), L(0x66), L(0xba), L(0x81), L(0xef), + L(0x67), L(0xe4), L(0xe6), L(0x3c), L(0xc), L(0xca), L(0xa4), L(0xa), L(0x79), L(0xb3), L(0x57), L(0x8b), L(0x8a), L(0x75), + L(0x98), L(0x18), L(0x42), L(0x2f), L(0x29), L(0xa3), L(0x82), L(0xef), L(0x9f), L(0x86), L(0x6), L(0x23), L(0xe1), L(0x75), + L(0xfa), L(0x8), L(0xb1), L(0xde), L(0x17), L(0x4a), + }, + }, + TestCase{ + .input = "huffman-rand-limit.input", + .want = "huffman-rand-limit.{s}.expect", + .want_no_input = "huffman-rand-limit.{s}.expect-noinput", + .tokens = &[_]Token{ + L(0x61), M(1, 74), L(0xa), L(0xf8), L(0x8b), L(0x96), L(0x76), L(0x48), L(0xa), L(0x85), L(0x94), L(0x25), L(0x80), + L(0xaf), L(0xc2), L(0xfe), L(0x8d), L(0xe8), L(0x20), L(0xeb), L(0x17), L(0x86), L(0xc9), L(0xb7), L(0xc5), L(0xde), + L(0x6), L(0xea), L(0x7d), L(0x18), L(0x8b), L(0xe7), L(0x3e), L(0x7), L(0xda), L(0xdf), L(0xff), L(0x6c), L(0x73), + L(0xde), L(0xcc), L(0xe7), L(0x6d), L(0x8d), L(0x4), L(0x19), L(0x49), L(0x7f), L(0x47), L(0x1f), L(0x48), L(0x15), + L(0xb0), L(0xe8), L(0x9e), L(0xf2), L(0x31), L(0x59), L(0xde), L(0x34), L(0xb4), L(0x5b), L(0xe5), L(0xe0), L(0x9), + L(0x11), L(0x30), L(0xc2), L(0x88), L(0x5b), L(0x7c), L(0x5d), L(0x14), L(0x13), L(0x6f), L(0x23), L(0xa9), L(0xa), + L(0xbc), L(0x2d), L(0x23), L(0xbe), L(0xd9), L(0xed), L(0x75), L(0x4), L(0x6c), L(0x99), L(0xdf), L(0xfd), L(0x70), + L(0x66), L(0xe6), L(0xee), L(0xd9), L(0xb1), L(0x9e), L(0x6e), L(0x83), L(0x59), L(0xd5), L(0xd4), L(0x80), L(0x59), + L(0x98), L(0x77), L(0x89), L(0x43), L(0x38), L(0xc9), L(0xaf), L(0x30), L(0x32), L(0x9a), L(0x20), L(0x1b), L(0x46), + L(0x3d), L(0x67), L(0x6e), L(0xd7), L(0x72), L(0x9e), L(0x4e), L(0x21), L(0x4f), L(0xc6), L(0xe0), L(0xd4), L(0x7b), + L(0x4), L(0x8d), L(0xa5), L(0x3), L(0xf6), L(0x5), L(0x9b), L(0x6b), L(0xdc), L(0x2a), L(0x93), L(0x77), L(0x28), + L(0xfd), L(0xb4), L(0x62), L(0xda), L(0x20), L(0xe7), L(0x1f), L(0xab), L(0x6b), L(0x51), L(0x43), L(0x39), L(0x2f), + L(0xa0), L(0x92), L(0x1), L(0x6c), L(0x75), L(0x3e), L(0xf4), L(0x35), L(0xfd), L(0x43), L(0x2e), L(0xf7), L(0xa4), + L(0x75), L(0xda), L(0xea), L(0x9b), L(0xa), + }, + }, + TestCase{ + .input = "huffman-shifts.input", + .want = "huffman-shifts.{s}.expect", + .want_no_input = "huffman-shifts.{s}.expect-noinput", + .tokens = &[_]Token{ + L('1'), L('0'), M(2, 258), M(2, 258), M(2, 258), M(2, 258), M(2, 258), M(2, 258), + M(2, 258), M(2, 258), M(2, 258), M(2, 258), M(2, 258), M(2, 258), M(2, 258), M(2, 258), + M(2, 258), M(2, 76), L(0xd), L(0xa), L('2'), L('3'), M(2, 258), M(2, 258), + M(2, 258), M(2, 258), M(2, 258), M(2, 258), M(2, 258), M(2, 258), M(2, 258), M(2, 256), + }, + }, + TestCase{ + .input = "huffman-text-shift.input", + .want = "huffman-text-shift.{s}.expect", + .want_no_input = "huffman-text-shift.{s}.expect-noinput", + .tokens = &[_]Token{ + L('/'), L('/'), L('C'), L('o'), L('p'), L('y'), L('r'), L('i'), + L('g'), L('h'), L('t'), L('2'), L('0'), L('0'), L('9'), L('T'), + L('h'), L('G'), L('o'), L('A'), L('u'), L('t'), L('h'), L('o'), + L('r'), L('.'), L('A'), L('l'), L('l'), M(23, 5), L('r'), L('r'), + L('v'), L('d'), L('.'), L(0xd), L(0xa), L('/'), L('/'), L('U'), + L('o'), L('f'), L('t'), L('h'), L('i'), L('o'), L('u'), L('r'), + L('c'), L('c'), L('o'), L('d'), L('i'), L('g'), L('o'), L('v'), + L('r'), L('n'), L('d'), L('b'), L('y'), L('B'), L('S'), L('D'), + L('-'), L('t'), L('y'), L('l'), M(33, 4), L('l'), L('i'), L('c'), + L('n'), L('t'), L('h'), L('t'), L('c'), L('n'), L('b'), L('f'), + L('o'), L('u'), L('n'), L('d'), L('i'), L('n'), L('t'), L('h'), + L('L'), L('I'), L('C'), L('E'), L('N'), L('S'), L('E'), L('f'), + L('i'), L('l'), L('.'), L(0xd), L(0xa), L(0xd), L(0xa), L('p'), + L('c'), L('k'), L('g'), L('m'), L('i'), L('n'), M(11, 4), L('i'), + L('m'), L('p'), L('o'), L('r'), L('t'), L('"'), L('o'), L('"'), + M(13, 4), L('f'), L('u'), L('n'), L('c'), L('m'), L('i'), L('n'), + L('('), L(')'), L('{'), L(0xd), L(0xa), L(0x9), L('v'), L('r'), + L('b'), L('='), L('m'), L('k'), L('('), L('['), L(']'), L('b'), + L('y'), L('t'), L(','), L('6'), L('5'), L('5'), L('3'), L('5'), + L(')'), L(0xd), L(0xa), L(0x9), L('f'), L(','), L('_'), L(':'), + L('='), L('o'), L('.'), L('C'), L('r'), L('t'), L('('), L('"'), + L('h'), L('u'), L('f'), L('f'), L('m'), L('n'), L('-'), L('n'), + L('u'), L('l'), L('l'), L('-'), L('m'), L('x'), L('.'), L('i'), + L('n'), L('"'), M(34, 5), L('.'), L('W'), L('r'), L('i'), L('t'), + L('('), L('b'), L(')'), L(0xd), L(0xa), L('}'), L(0xd), L(0xa), + L('A'), L('B'), L('C'), L('D'), L('E'), L('F'), L('G'), L('H'), + L('I'), L('J'), L('K'), L('L'), L('M'), L('N'), L('O'), L('P'), + L('Q'), L('R'), L('S'), L('T'), L('U'), L('V'), L('X'), L('x'), + L('y'), L('z'), L('!'), L('"'), L('#'), L(0xc2), L(0xa4), L('%'), + L('&'), L('/'), L('?'), L('"'), + }, + }, + TestCase{ + .input = "huffman-text.input", + .want = "huffman-text.{s}.expect", + .want_no_input = "huffman-text.{s}.expect-noinput", + .tokens = &[_]Token{ + L('/'), L('/'), L(' '), L('z'), L('i'), L('g'), L(' '), L('v'), + L('0'), L('.'), L('1'), L('0'), L('.'), L('0'), L(0xa), L('/'), + L('/'), L(' '), L('c'), L('r'), L('e'), L('a'), L('t'), L('e'), + L(' '), L('a'), L(' '), L('f'), L('i'), L('l'), L('e'), M(5, 4), + L('l'), L('e'), L('d'), L(' '), L('w'), L('i'), L('t'), L('h'), + L(' '), L('0'), L('x'), L('0'), L('0'), L(0xa), L('c'), L('o'), + L('n'), L('s'), L('t'), L(' '), L('s'), L('t'), L('d'), L(' '), + L('='), L(' '), L('@'), L('i'), L('m'), L('p'), L('o'), L('r'), + L('t'), L('('), L('"'), L('s'), L('t'), L('d'), L('"'), L(')'), + L(';'), L(0xa), L(0xa), L('p'), L('u'), L('b'), L(' '), L('f'), + L('n'), L(' '), L('m'), L('a'), L('i'), L('n'), L('('), L(')'), + L(' '), L('!'), L('v'), L('o'), L('i'), L('d'), L(' '), L('{'), + L(0xa), L(' '), L(' '), L(' '), L(' '), L('v'), L('a'), L('r'), + L(' '), L('b'), L(' '), L('='), L(' '), L('['), L('1'), L(']'), + L('u'), L('8'), L('{'), L('0'), L('}'), L(' '), L('*'), L('*'), + L(' '), L('6'), L('5'), L('5'), L('3'), L('5'), L(';'), M(31, 5), + M(86, 6), L('f'), L(' '), L('='), L(' '), L('t'), L('r'), L('y'), + M(94, 4), L('.'), L('f'), L('s'), L('.'), L('c'), L('w'), L('d'), + L('('), L(')'), L('.'), M(144, 6), L('F'), L('i'), L('l'), L('e'), + L('('), M(43, 5), M(1, 4), L('"'), L('h'), L('u'), L('f'), L('f'), + L('m'), L('a'), L('n'), L('-'), L('n'), L('u'), L('l'), L('l'), + L('-'), L('m'), L('a'), L('x'), L('.'), L('i'), L('n'), L('"'), + L(','), M(31, 9), L('.'), L('{'), L(' '), L('.'), L('r'), L('e'), + L('a'), L('d'), M(79, 5), L('u'), L('e'), L(' '), L('}'), M(27, 6), + L(')'), M(108, 6), L('d'), L('e'), L('f'), L('e'), L('r'), L(' '), + L('f'), L('.'), L('c'), L('l'), L('o'), L('s'), L('e'), L('('), + M(183, 4), M(22, 4), L('_'), M(124, 7), L('f'), L('.'), L('w'), L('r'), + L('i'), L('t'), L('e'), L('A'), L('l'), L('l'), L('('), L('b'), + L('['), L('0'), L('.'), L('.'), L(']'), L(')'), L(';'), L(0xa), + L('}'), L(0xa), + }, + }, + TestCase{ + .input = "huffman-zero.input", + .want = "huffman-zero.{s}.expect", + .want_no_input = "huffman-zero.{s}.expect-noinput", + .tokens = &[_]Token{ L(0x30), ml, M(1, 49) }, + }, + TestCase{ + .input = "", + .want = "", + .want_no_input = "null-long-match.{s}.expect-noinput", + .tokens = &[_]Token{ + L(0x0), ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, + ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, + ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, + ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, + ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, + ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, + ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, + ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, + ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, + ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, + ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, + ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, + ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, + ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, + ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, + ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, + ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, + ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, + ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, + ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, + ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, + ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, + ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, + ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, + ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, + ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, + ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, + ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, + ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, + ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, + ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, + ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, + ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, + ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, + ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, + ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, + ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, + ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, + ml, ml, ml, M(1, 8), + }, + }, + }; +}; diff --git a/lib/std/compress/flate/testdata/block_writer/huffman-null-max.dyn.expect b/lib/std/compress/flate/testdata/block_writer/huffman-null-max.dyn.expect new file mode 100644 index 0000000000..c08165143f Binary files /dev/null and b/lib/std/compress/flate/testdata/block_writer/huffman-null-max.dyn.expect differ diff --git a/lib/std/compress/flate/testdata/block_writer/huffman-null-max.dyn.expect-noinput b/lib/std/compress/flate/testdata/block_writer/huffman-null-max.dyn.expect-noinput new file mode 100644 index 0000000000..c08165143f Binary files /dev/null and b/lib/std/compress/flate/testdata/block_writer/huffman-null-max.dyn.expect-noinput differ diff --git a/lib/std/compress/flate/testdata/block_writer/huffman-null-max.huff.expect b/lib/std/compress/flate/testdata/block_writer/huffman-null-max.huff.expect new file mode 100644 index 0000000000..db422ca398 Binary files /dev/null and b/lib/std/compress/flate/testdata/block_writer/huffman-null-max.huff.expect differ diff --git a/lib/std/compress/flate/testdata/block_writer/huffman-null-max.input b/lib/std/compress/flate/testdata/block_writer/huffman-null-max.input new file mode 100644 index 0000000000..5dfddf075b Binary files /dev/null and b/lib/std/compress/flate/testdata/block_writer/huffman-null-max.input differ diff --git a/lib/std/compress/flate/testdata/block_writer/huffman-null-max.wb.expect b/lib/std/compress/flate/testdata/block_writer/huffman-null-max.wb.expect new file mode 100644 index 0000000000..c08165143f Binary files /dev/null and b/lib/std/compress/flate/testdata/block_writer/huffman-null-max.wb.expect differ diff --git a/lib/std/compress/flate/testdata/block_writer/huffman-null-max.wb.expect-noinput b/lib/std/compress/flate/testdata/block_writer/huffman-null-max.wb.expect-noinput new file mode 100644 index 0000000000..c08165143f Binary files /dev/null and b/lib/std/compress/flate/testdata/block_writer/huffman-null-max.wb.expect-noinput differ diff --git a/lib/std/compress/flate/testdata/block_writer/huffman-pi.dyn.expect b/lib/std/compress/flate/testdata/block_writer/huffman-pi.dyn.expect new file mode 100644 index 0000000000..e4396ac6fe Binary files /dev/null and b/lib/std/compress/flate/testdata/block_writer/huffman-pi.dyn.expect differ diff --git a/lib/std/compress/flate/testdata/block_writer/huffman-pi.dyn.expect-noinput b/lib/std/compress/flate/testdata/block_writer/huffman-pi.dyn.expect-noinput new file mode 100644 index 0000000000..e4396ac6fe Binary files /dev/null and b/lib/std/compress/flate/testdata/block_writer/huffman-pi.dyn.expect-noinput differ diff --git a/lib/std/compress/flate/testdata/block_writer/huffman-pi.huff.expect b/lib/std/compress/flate/testdata/block_writer/huffman-pi.huff.expect new file mode 100644 index 0000000000..23d8f7f98b Binary files /dev/null and b/lib/std/compress/flate/testdata/block_writer/huffman-pi.huff.expect differ diff --git a/lib/std/compress/flate/testdata/block_writer/huffman-pi.input b/lib/std/compress/flate/testdata/block_writer/huffman-pi.input new file mode 100644 index 0000000000..efaed43431 --- /dev/null +++ b/lib/std/compress/flate/testdata/block_writer/huffman-pi.input @@ -0,0 +1 @@ +3.141592653589793238462643383279502884197169399375105820974944592307816406286208998628034825342117067982148086513282306647093844609550582231725359408128481117450284102701938521105559644622948954930381964428810975665933446128475648233786783165271201909145648566923460348610454326648213393607260249141273724587006606315588174881520920962829254091715364367892590360011330530548820466521384146951941511609433057270365759591953092186117381932611793105118548074462379962749567351885752724891227938183011949129833673362440656643086021394946395224737190702179860943702770539217176293176752384674818467669405132000568127145263560827785771342757789609173637178721468440901224953430146549585371050792279689258923542019956112129021960864034418159813629774771309960518707211349999998372978049951059731732816096318595024459455346908302642522308253344685035261931188171010003137838752886587533208381420617177669147303598253490428755468731159562863882353787593751957781857780532171226806613001927876611195909216420198938095257201065485863278865936153381827968230301952035301852968995773622599413891249721775283479131515574857242454150695950829533116861727855889075098381754637464939319255060400927701671139009848824012858361603563707660104710181942955596198946767837449448255379774726847104047534646208046684259069491293313677028989152104752162056966024058038150193511253382430035587640247496473263914199272604269922796782354781636009341721641219924586315030286182974555706749838505494588586926995690927210797509302955321165344987202755960236480665499119881834797753566369807426542527862551818417574672890977772793800081647060016145249192173217214772350141441973568548161361157352552133475741849468438523323907394143334547762416862518983569485562099219222184272550254256887671790494601653466804988627232791786085784383827967976681454100953883786360950680064225125205117392984896084128488626945604241965285022210661186306744278622039194945047123713786960956364371917287467764657573962413890865832645995813390478027590099465764078951269468398352595709825822620522489407726719478268482601476990902640136394437455305068203496252451749399651431429809190659250937221696461515709858387410597885959772975498930161753928468138268683868942774155991855925245953959431049972524680845987273644695848653836736222626099124608051243884390451244136549762780797715691435997700129616089441694868555848406353422072225828488648158456028506016842739452267467678895252138522549954666727823986456596116354886230577456498035593634568174324112515076069479451096596094025228879710893145669136867228748940560101503308617928680920874760917824938589009714909675985261365549781893129784821682998948722658804857564014270477555132379641451523746234364542858444795265867821051141354735739523113427166102135969536231442952484937187110145765403590279934403742007310578539062198387447808478489683321445713868751943506430218453191048481005370614680674919278191197939952061419663428754440643745123718192179998391015919561814675142691239748940907186494231961567945208095146550225231603881930142093762137855956638937787083039069792077346722182562599661501421503068038447734549202605414665925201497442850732518666002132434088190710486331734649651453905796268561005508106658796998163574736384052571459102897064140110971206280439039759515677157700420337869936007230558763176359421873125147120532928191826186125867321579198414848829164470609575270695722091756711672291098169091528017350671274858322287183520935396572512108357915136988209144421006751033467110314126711136990865851639831501970165151168517143765761835155650884909989859982387345528331635507647918535893226185489632132933089857064204675259070915481416549859461637180 \ No newline at end of file diff --git a/lib/std/compress/flate/testdata/block_writer/huffman-pi.wb.expect b/lib/std/compress/flate/testdata/block_writer/huffman-pi.wb.expect new file mode 100644 index 0000000000..e4396ac6fe Binary files /dev/null and b/lib/std/compress/flate/testdata/block_writer/huffman-pi.wb.expect differ diff --git a/lib/std/compress/flate/testdata/block_writer/huffman-pi.wb.expect-noinput b/lib/std/compress/flate/testdata/block_writer/huffman-pi.wb.expect-noinput new file mode 100644 index 0000000000..e4396ac6fe Binary files /dev/null and b/lib/std/compress/flate/testdata/block_writer/huffman-pi.wb.expect-noinput differ diff --git a/lib/std/compress/flate/testdata/block_writer/huffman-rand-1k.dyn.expect b/lib/std/compress/flate/testdata/block_writer/huffman-rand-1k.dyn.expect new file mode 100644 index 0000000000..09dc798ee3 Binary files /dev/null and b/lib/std/compress/flate/testdata/block_writer/huffman-rand-1k.dyn.expect differ diff --git a/lib/std/compress/flate/testdata/block_writer/huffman-rand-1k.dyn.expect-noinput b/lib/std/compress/flate/testdata/block_writer/huffman-rand-1k.dyn.expect-noinput new file mode 100644 index 0000000000..0c24742fde Binary files /dev/null and b/lib/std/compress/flate/testdata/block_writer/huffman-rand-1k.dyn.expect-noinput differ diff --git a/lib/std/compress/flate/testdata/block_writer/huffman-rand-1k.huff.expect b/lib/std/compress/flate/testdata/block_writer/huffman-rand-1k.huff.expect new file mode 100644 index 0000000000..09dc798ee3 Binary files /dev/null and b/lib/std/compress/flate/testdata/block_writer/huffman-rand-1k.huff.expect differ diff --git a/lib/std/compress/flate/testdata/block_writer/huffman-rand-1k.input b/lib/std/compress/flate/testdata/block_writer/huffman-rand-1k.input new file mode 100644 index 0000000000..ce038ebb5b Binary files /dev/null and b/lib/std/compress/flate/testdata/block_writer/huffman-rand-1k.input differ diff --git a/lib/std/compress/flate/testdata/block_writer/huffman-rand-1k.wb.expect b/lib/std/compress/flate/testdata/block_writer/huffman-rand-1k.wb.expect new file mode 100644 index 0000000000..09dc798ee3 Binary files /dev/null and b/lib/std/compress/flate/testdata/block_writer/huffman-rand-1k.wb.expect differ diff --git a/lib/std/compress/flate/testdata/block_writer/huffman-rand-1k.wb.expect-noinput b/lib/std/compress/flate/testdata/block_writer/huffman-rand-1k.wb.expect-noinput new file mode 100644 index 0000000000..0c24742fde Binary files /dev/null and b/lib/std/compress/flate/testdata/block_writer/huffman-rand-1k.wb.expect-noinput differ diff --git a/lib/std/compress/flate/testdata/block_writer/huffman-rand-limit.dyn.expect b/lib/std/compress/flate/testdata/block_writer/huffman-rand-limit.dyn.expect new file mode 100644 index 0000000000..2d6527934e Binary files /dev/null and b/lib/std/compress/flate/testdata/block_writer/huffman-rand-limit.dyn.expect differ diff --git a/lib/std/compress/flate/testdata/block_writer/huffman-rand-limit.dyn.expect-noinput b/lib/std/compress/flate/testdata/block_writer/huffman-rand-limit.dyn.expect-noinput new file mode 100644 index 0000000000..2d6527934e Binary files /dev/null and b/lib/std/compress/flate/testdata/block_writer/huffman-rand-limit.dyn.expect-noinput differ diff --git a/lib/std/compress/flate/testdata/block_writer/huffman-rand-limit.huff.expect b/lib/std/compress/flate/testdata/block_writer/huffman-rand-limit.huff.expect new file mode 100644 index 0000000000..57e59322e9 Binary files /dev/null and b/lib/std/compress/flate/testdata/block_writer/huffman-rand-limit.huff.expect differ diff --git a/lib/std/compress/flate/testdata/block_writer/huffman-rand-limit.input b/lib/std/compress/flate/testdata/block_writer/huffman-rand-limit.input new file mode 100644 index 0000000000..fb5b1be619 --- /dev/null +++ b/lib/std/compress/flate/testdata/block_writer/huffman-rand-limit.input @@ -0,0 +1,4 @@ +aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +ø‹–vH +…”%€¯Âþè ë†É·ÅÞê}‹ç>ÚßÿlsÞÌçmIGH°èžò1YÞ4´[åà 0ˆ[|]o#© +¼-#¾Ùíul™ßýpfæîÙ±žnƒYÕÔ€Y˜w‰C8ɯ02š F=gn×ržN!OÆàÔ{¥ö›kÜ*“w(ý´bÚ ç«kQC9/ ’lu>ô5ýC.÷¤uÚê› diff --git a/lib/std/compress/flate/testdata/block_writer/huffman-rand-limit.wb.expect b/lib/std/compress/flate/testdata/block_writer/huffman-rand-limit.wb.expect new file mode 100644 index 0000000000..881e59c9ab Binary files /dev/null and b/lib/std/compress/flate/testdata/block_writer/huffman-rand-limit.wb.expect differ diff --git a/lib/std/compress/flate/testdata/block_writer/huffman-rand-limit.wb.expect-noinput b/lib/std/compress/flate/testdata/block_writer/huffman-rand-limit.wb.expect-noinput new file mode 100644 index 0000000000..881e59c9ab Binary files /dev/null and b/lib/std/compress/flate/testdata/block_writer/huffman-rand-limit.wb.expect-noinput differ diff --git a/lib/std/compress/flate/testdata/block_writer/huffman-rand-max.huff.expect b/lib/std/compress/flate/testdata/block_writer/huffman-rand-max.huff.expect new file mode 100644 index 0000000000..47d53c89c0 Binary files /dev/null and b/lib/std/compress/flate/testdata/block_writer/huffman-rand-max.huff.expect differ diff --git a/lib/std/compress/flate/testdata/block_writer/huffman-rand-max.input b/lib/std/compress/flate/testdata/block_writer/huffman-rand-max.input new file mode 100644 index 0000000000..8418633d2a Binary files /dev/null and b/lib/std/compress/flate/testdata/block_writer/huffman-rand-max.input differ diff --git a/lib/std/compress/flate/testdata/block_writer/huffman-shifts.dyn.expect b/lib/std/compress/flate/testdata/block_writer/huffman-shifts.dyn.expect new file mode 100644 index 0000000000..7812c1c62d Binary files /dev/null and b/lib/std/compress/flate/testdata/block_writer/huffman-shifts.dyn.expect differ diff --git a/lib/std/compress/flate/testdata/block_writer/huffman-shifts.dyn.expect-noinput b/lib/std/compress/flate/testdata/block_writer/huffman-shifts.dyn.expect-noinput new file mode 100644 index 0000000000..7812c1c62d Binary files /dev/null and b/lib/std/compress/flate/testdata/block_writer/huffman-shifts.dyn.expect-noinput differ diff --git a/lib/std/compress/flate/testdata/block_writer/huffman-shifts.huff.expect b/lib/std/compress/flate/testdata/block_writer/huffman-shifts.huff.expect new file mode 100644 index 0000000000..f5133778e1 Binary files /dev/null and b/lib/std/compress/flate/testdata/block_writer/huffman-shifts.huff.expect differ diff --git a/lib/std/compress/flate/testdata/block_writer/huffman-shifts.input b/lib/std/compress/flate/testdata/block_writer/huffman-shifts.input new file mode 100644 index 0000000000..7c7a50d158 --- /dev/null +++ b/lib/std/compress/flate/testdata/block_writer/huffman-shifts.input @@ -0,0 +1,2 @@ +101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010 +232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323232323 \ No newline at end of file diff --git a/lib/std/compress/flate/testdata/block_writer/huffman-shifts.wb.expect b/lib/std/compress/flate/testdata/block_writer/huffman-shifts.wb.expect new file mode 100644 index 0000000000..7812c1c62d Binary files /dev/null and b/lib/std/compress/flate/testdata/block_writer/huffman-shifts.wb.expect differ diff --git a/lib/std/compress/flate/testdata/block_writer/huffman-shifts.wb.expect-noinput b/lib/std/compress/flate/testdata/block_writer/huffman-shifts.wb.expect-noinput new file mode 100644 index 0000000000..7812c1c62d Binary files /dev/null and b/lib/std/compress/flate/testdata/block_writer/huffman-shifts.wb.expect-noinput differ diff --git a/lib/std/compress/flate/testdata/block_writer/huffman-text-shift.dyn.expect b/lib/std/compress/flate/testdata/block_writer/huffman-text-shift.dyn.expect new file mode 100644 index 0000000000..71ce3aeb75 Binary files /dev/null and b/lib/std/compress/flate/testdata/block_writer/huffman-text-shift.dyn.expect differ diff --git a/lib/std/compress/flate/testdata/block_writer/huffman-text-shift.dyn.expect-noinput b/lib/std/compress/flate/testdata/block_writer/huffman-text-shift.dyn.expect-noinput new file mode 100644 index 0000000000..71ce3aeb75 Binary files /dev/null and b/lib/std/compress/flate/testdata/block_writer/huffman-text-shift.dyn.expect-noinput differ diff --git a/lib/std/compress/flate/testdata/block_writer/huffman-text-shift.huff.expect b/lib/std/compress/flate/testdata/block_writer/huffman-text-shift.huff.expect new file mode 100644 index 0000000000..ff023114bb Binary files /dev/null and b/lib/std/compress/flate/testdata/block_writer/huffman-text-shift.huff.expect differ diff --git a/lib/std/compress/flate/testdata/block_writer/huffman-text-shift.input b/lib/std/compress/flate/testdata/block_writer/huffman-text-shift.input new file mode 100644 index 0000000000..cc5c3ad69d --- /dev/null +++ b/lib/std/compress/flate/testdata/block_writer/huffman-text-shift.input @@ -0,0 +1,14 @@ +//Copyright2009ThGoAuthor.Allrightrrvd. +//UofthiourccodigovrndbyBSD-tyl +//licnthtcnbfoundinthLICENSEfil. + +pckgmin + +import"o" + +funcmin(){ + vrb=mk([]byt,65535) + f,_:=o.Crt("huffmn-null-mx.in") + f.Writ(b) +} +ABCDEFGHIJKLMNOPQRSTUVXxyz!"#¤%&/?" \ No newline at end of file diff --git a/lib/std/compress/flate/testdata/block_writer/huffman-text-shift.wb.expect b/lib/std/compress/flate/testdata/block_writer/huffman-text-shift.wb.expect new file mode 100644 index 0000000000..71ce3aeb75 Binary files /dev/null and b/lib/std/compress/flate/testdata/block_writer/huffman-text-shift.wb.expect differ diff --git a/lib/std/compress/flate/testdata/block_writer/huffman-text-shift.wb.expect-noinput b/lib/std/compress/flate/testdata/block_writer/huffman-text-shift.wb.expect-noinput new file mode 100644 index 0000000000..71ce3aeb75 Binary files /dev/null and b/lib/std/compress/flate/testdata/block_writer/huffman-text-shift.wb.expect-noinput differ diff --git a/lib/std/compress/flate/testdata/block_writer/huffman-text.dyn.expect b/lib/std/compress/flate/testdata/block_writer/huffman-text.dyn.expect new file mode 100644 index 0000000000..fbffc3f36b Binary files /dev/null and b/lib/std/compress/flate/testdata/block_writer/huffman-text.dyn.expect differ diff --git a/lib/std/compress/flate/testdata/block_writer/huffman-text.dyn.expect-noinput b/lib/std/compress/flate/testdata/block_writer/huffman-text.dyn.expect-noinput new file mode 100644 index 0000000000..fbffc3f36b Binary files /dev/null and b/lib/std/compress/flate/testdata/block_writer/huffman-text.dyn.expect-noinput differ diff --git a/lib/std/compress/flate/testdata/block_writer/huffman-text.huff.expect b/lib/std/compress/flate/testdata/block_writer/huffman-text.huff.expect new file mode 100644 index 0000000000..46fa51fdad Binary files /dev/null and b/lib/std/compress/flate/testdata/block_writer/huffman-text.huff.expect differ diff --git a/lib/std/compress/flate/testdata/block_writer/huffman-text.input b/lib/std/compress/flate/testdata/block_writer/huffman-text.input new file mode 100644 index 0000000000..df97174253 --- /dev/null +++ b/lib/std/compress/flate/testdata/block_writer/huffman-text.input @@ -0,0 +1,14 @@ +// zig v0.10.0 +// create a file filled with 0x00 +const std = @import("std"); + +pub fn main() !void { + var b = [1]u8{0} ** 65535; + const f = try std.fs.cwd().createFile( + "huffman-null-max.in", + .{ .read = true }, + ); + defer f.close(); + + _ = try f.writeAll(b[0..]); +} diff --git a/lib/std/compress/flate/testdata/block_writer/huffman-text.wb.expect b/lib/std/compress/flate/testdata/block_writer/huffman-text.wb.expect new file mode 100644 index 0000000000..fbffc3f36b Binary files /dev/null and b/lib/std/compress/flate/testdata/block_writer/huffman-text.wb.expect differ diff --git a/lib/std/compress/flate/testdata/block_writer/huffman-text.wb.expect-noinput b/lib/std/compress/flate/testdata/block_writer/huffman-text.wb.expect-noinput new file mode 100644 index 0000000000..fbffc3f36b Binary files /dev/null and b/lib/std/compress/flate/testdata/block_writer/huffman-text.wb.expect-noinput differ diff --git a/lib/std/compress/flate/testdata/block_writer/huffman-zero.dyn.expect b/lib/std/compress/flate/testdata/block_writer/huffman-zero.dyn.expect new file mode 100644 index 0000000000..830348a79a Binary files /dev/null and b/lib/std/compress/flate/testdata/block_writer/huffman-zero.dyn.expect differ diff --git a/lib/std/compress/flate/testdata/block_writer/huffman-zero.dyn.expect-noinput b/lib/std/compress/flate/testdata/block_writer/huffman-zero.dyn.expect-noinput new file mode 100644 index 0000000000..830348a79a Binary files /dev/null and b/lib/std/compress/flate/testdata/block_writer/huffman-zero.dyn.expect-noinput differ diff --git a/lib/std/compress/flate/testdata/block_writer/huffman-zero.huff.expect b/lib/std/compress/flate/testdata/block_writer/huffman-zero.huff.expect new file mode 100644 index 0000000000..5abdbaff9a Binary files /dev/null and b/lib/std/compress/flate/testdata/block_writer/huffman-zero.huff.expect differ diff --git a/lib/std/compress/flate/testdata/block_writer/huffman-zero.input b/lib/std/compress/flate/testdata/block_writer/huffman-zero.input new file mode 100644 index 0000000000..349be0e6ec --- /dev/null +++ b/lib/std/compress/flate/testdata/block_writer/huffman-zero.input @@ -0,0 +1 @@ +00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 \ No newline at end of file diff --git a/lib/std/compress/flate/testdata/block_writer/huffman-zero.wb.expect b/lib/std/compress/flate/testdata/block_writer/huffman-zero.wb.expect new file mode 100644 index 0000000000..dbe401c54c Binary files /dev/null and b/lib/std/compress/flate/testdata/block_writer/huffman-zero.wb.expect differ diff --git a/lib/std/compress/flate/testdata/block_writer/huffman-zero.wb.expect-noinput b/lib/std/compress/flate/testdata/block_writer/huffman-zero.wb.expect-noinput new file mode 100644 index 0000000000..dbe401c54c Binary files /dev/null and b/lib/std/compress/flate/testdata/block_writer/huffman-zero.wb.expect-noinput differ diff --git a/lib/std/compress/flate/testdata/block_writer/null-long-match.dyn.expect-noinput b/lib/std/compress/flate/testdata/block_writer/null-long-match.dyn.expect-noinput new file mode 100644 index 0000000000..8b92d9fc20 Binary files /dev/null and b/lib/std/compress/flate/testdata/block_writer/null-long-match.dyn.expect-noinput differ diff --git a/lib/std/compress/flate/testdata/block_writer/null-long-match.wb.expect-noinput b/lib/std/compress/flate/testdata/block_writer/null-long-match.wb.expect-noinput new file mode 100644 index 0000000000..8b92d9fc20 Binary files /dev/null and b/lib/std/compress/flate/testdata/block_writer/null-long-match.wb.expect-noinput differ diff --git a/lib/std/compress/flate/testdata/fuzz/deflate-stream.expect b/lib/std/compress/flate/testdata/fuzz/deflate-stream.expect new file mode 100644 index 0000000000..ab73c04108 --- /dev/null +++ b/lib/std/compress/flate/testdata/fuzz/deflate-stream.expect @@ -0,0 +1,22 @@ +[ + { id: "brieflz", + name: "BriefLZ", + libraryUrl: "https://github.com/jibsen/brieflz", + license: "MIT", + revision: "bcaa6a1ee7ccf005512b5c23aa92b40cf75f9ed1", + codecs: [ { name: "brieflz" } ], }, + { id: "brotli", + name: "Brotli", + libraryUrl: "https://github.com/google/brotli", + license: "Apache 2.0", + revision: "1dd66ef114fd244778d9dcb5da09c28b49a0df33", + codecs: [ { name: "brotli", + levels: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11], + streaming: true } ], }, + { id: "bsc", + name: "bsc", + libraryUrl: "http://libbsc.com/", + license: "Apache 2.0", + revision: "b2b07421381b19b2fada8b291f3cdead10578abc", + codecs: [ { name: "bsc" } ] } +] diff --git a/lib/std/compress/flate/testdata/fuzz/deflate-stream.input b/lib/std/compress/flate/testdata/fuzz/deflate-stream.input new file mode 100644 index 0000000000..a0ed06f872 --- /dev/null +++ b/lib/std/compress/flate/testdata/fuzz/deflate-stream.input @@ -0,0 +1,3 @@ +•=oÂ0†çò+Ṉ„ÄÙÚ­R»µKƒ}>W!AI@j+þ{ +¨|ˆª–¼<çóÝóÎFÎx[ÀØ´ž\õ9f;Pë%ü0à§·#®¼iuûñÚV¡¸èûUWDQéûÅÚL°YFïÞtTGç¿U©î†Ÿ_Ž´¥ï|SãQë©D¢ã. + +Abstract + + This specification defines a lossless compressed data format that + compresses data using a combination of the LZ77 algorithm and Huffman + coding, with efficiency comparable to the best currently available + general-purpose compression methods. The data can be produced or + consumed, even for an arbitrarily long sequentially presented input + data stream, using only an a priori bounded amount of intermediate + storage. The format can be implemented readily in a manner not + covered by patents. + + + + + + + + +Deutsch Informational [Page 1] + +RFC 1951 DEFLATE Compressed Data Format Specification May 1996 + + +Table of Contents + + 1. Introduction ................................................... 2 + 1.1. Purpose ................................................... 2 + 1.2. Intended audience ......................................... 3 + 1.3. Scope ..................................................... 3 + 1.4. Compliance ................................................ 3 + 1.5. Definitions of terms and conventions used ................ 3 + 1.6. Changes from previous versions ............................ 4 + 2. Compressed representation overview ............................. 4 + 3. Detailed specification ......................................... 5 + 3.1. Overall conventions ....................................... 5 + 3.1.1. Packing into bytes .................................. 5 + 3.2. Compressed block format ................................... 6 + 3.2.1. Synopsis of prefix and Huffman coding ............... 6 + 3.2.2. Use of Huffman coding in the "deflate" format ....... 7 + 3.2.3. Details of block format ............................. 9 + 3.2.4. Non-compressed blocks (BTYPE=00) ................... 11 + 3.2.5. Compressed blocks (length and distance codes) ...... 11 + 3.2.6. Compression with fixed Huffman codes (BTYPE=01) .... 12 + 3.2.7. Compression with dynamic Huffman codes (BTYPE=10) .. 13 + 3.3. Compliance ............................................... 14 + 4. Compression algorithm details ................................. 14 + 5. References .................................................... 16 + 6. Security Considerations ....................................... 16 + 7. Source code ................................................... 16 + 8. Acknowledgements .............................................. 16 + 9. Author's Address .............................................. 17 + +1. Introduction + + 1.1. Purpose + + The purpose of this specification is to define a lossless + compressed data format that: + * Is independent of CPU type, operating system, file system, + and character set, and hence can be used for interchange; + * Can be produced or consumed, even for an arbitrarily long + sequentially presented input data stream, using only an a + priori bounded amount of intermediate storage, and hence + can be used in data communications or similar structures + such as Unix filters; + * Compresses data with efficiency comparable to the best + currently available general-purpose compression methods, + and in particular considerably better than the "compress" + program; + * Can be implemented readily in a manner not covered by + patents, and hence can be practiced freely; + + + +Deutsch Informational [Page 2] + +RFC 1951 DEFLATE Compressed Data Format Specification May 1996 + + + * Is compatible with the file format produced by the current + widely used gzip utility, in that conforming decompressors + will be able to read data produced by the existing gzip + compressor. + + The data format defined by this specification does not attempt to: + + * Allow random access to compressed data; + * Compress specialized data (e.g., raster graphics) as well + as the best currently available specialized algorithms. + + A simple counting argument shows that no lossless compression + algorithm can compress every possible input data set. For the + format defined here, the worst case expansion is 5 bytes per 32K- + byte block, i.e., a size increase of 0.015% for large data sets. + English text usually compresses by a factor of 2.5 to 3; + executable files usually compress somewhat less; graphical data + such as raster images may compress much more. + + 1.2. Intended audience + + This specification is intended for use by implementors of software + to compress data into "deflate" format and/or decompress data from + "deflate" format. + + The text of the specification assumes a basic background in + programming at the level of bits and other primitive data + representations. Familiarity with the technique of Huffman coding + is helpful but not required. + + 1.3. Scope + + The specification specifies a method for representing a sequence + of bytes as a (usually shorter) sequence of bits, and a method for + packing the latter bit sequence into bytes. + + 1.4. Compliance + + Unless otherwise indicated below, a compliant decompressor must be + able to accept and decompress any data set that conforms to all + the specifications presented here; a compliant compressor must + produce data sets that conform to all the specifications presented + here. + + 1.5. Definitions of terms and conventions used + + Byte: 8 bits stored or transmitted as a unit (same as an octet). + For this specification, a byte is exactly 8 bits, even on machines + + + +Deutsch Informational [Page 3] + +RFC 1951 DEFLATE Compressed Data Format Specification May 1996 + + + which store a character on a number of bits different from eight. + See below, for the numbering of bits within a byte. + + String: a sequence of arbitrary bytes. + + 1.6. Changes from previous versions + + There have been no technical changes to the deflate format since + version 1.1 of this specification. In version 1.2, some + terminology was changed. Version 1.3 is a conversion of the + specification to RFC style. + +2. Compressed representation overview + + A compressed data set consists of a series of blocks, corresponding + to successive blocks of input data. The block sizes are arbitrary, + except that non-compressible blocks are limited to 65,535 bytes. + + Each block is compressed using a combination of the LZ77 algorithm + and Huffman coding. The Huffman trees for each block are independent + of those for previous or subsequent blocks; the LZ77 algorithm may + use a reference to a duplicated string occurring in a previous block, + up to 32K input bytes before. + + Each block consists of two parts: a pair of Huffman code trees that + describe the representation of the compressed data part, and a + compressed data part. (The Huffman trees themselves are compressed + using Huffman encoding.) The compressed data consists of a series of + elements of two types: literal bytes (of strings that have not been + detected as duplicated within the previous 32K input bytes), and + pointers to duplicated strings, where a pointer is represented as a + pair . The representation used in the + "deflate" format limits distances to 32K bytes and lengths to 258 + bytes, but does not limit the size of a block, except for + uncompressible blocks, which are limited as noted above. + + Each type of value (literals, distances, and lengths) in the + compressed data is represented using a Huffman code, using one code + tree for literals and lengths and a separate code tree for distances. + The code trees for each block appear in a compact form just before + the compressed data for that block. + + + + + + + + + + +Deutsch Informational [Page 4] + +RFC 1951 DEFLATE Compressed Data Format Specification May 1996 + + +3. Detailed specification + + 3.1. Overall conventions In the diagrams below, a box like this: + + +---+ + | | <-- the vertical bars might be missing + +---+ + + represents one byte; a box like this: + + +==============+ + | | + +==============+ + + represents a variable number of bytes. + + Bytes stored within a computer do not have a "bit order", since + they are always treated as a unit. However, a byte considered as + an integer between 0 and 255 does have a most- and least- + significant bit, and since we write numbers with the most- + significant digit on the left, we also write bytes with the most- + significant bit on the left. In the diagrams below, we number the + bits of a byte so that bit 0 is the least-significant bit, i.e., + the bits are numbered: + + +--------+ + |76543210| + +--------+ + + Within a computer, a number may occupy multiple bytes. All + multi-byte numbers in the format described here are stored with + the least-significant byte first (at the lower memory address). + For example, the decimal number 520 is stored as: + + 0 1 + +--------+--------+ + |00001000|00000010| + +--------+--------+ + ^ ^ + | | + | + more significant byte = 2 x 256 + + less significant byte = 8 + + 3.1.1. Packing into bytes + + This document does not address the issue of the order in which + bits of a byte are transmitted on a bit-sequential medium, + since the final data format described here is byte- rather than + + + +Deutsch Informational [Page 5] + +RFC 1951 DEFLATE Compressed Data Format Specification May 1996 + + + bit-oriented. However, we describe the compressed block format + in below, as a sequence of data elements of various bit + lengths, not a sequence of bytes. We must therefore specify + how to pack these data elements into bytes to form the final + compressed byte sequence: + + * Data elements are packed into bytes in order of + increasing bit number within the byte, i.e., starting + with the least-significant bit of the byte. + * Data elements other than Huffman codes are packed + starting with the least-significant bit of the data + element. + * Huffman codes are packed starting with the most- + significant bit of the code. + + In other words, if one were to print out the compressed data as + a sequence of bytes, starting with the first byte at the + *right* margin and proceeding to the *left*, with the most- + significant bit of each byte on the left as usual, one would be + able to parse the result from right to left, with fixed-width + elements in the correct MSB-to-LSB order and Huffman codes in + bit-reversed order (i.e., with the first bit of the code in the + relative LSB position). + + 3.2. Compressed block format + + 3.2.1. Synopsis of prefix and Huffman coding + + Prefix coding represents symbols from an a priori known + alphabet by bit sequences (codes), one code for each symbol, in + a manner such that different symbols may be represented by bit + sequences of different lengths, but a parser can always parse + an encoded string unambiguously symbol-by-symbol. + + We define a prefix code in terms of a binary tree in which the + two edges descending from each non-leaf node are labeled 0 and + 1 and in which the leaf nodes correspond one-for-one with (are + labeled with) the symbols of the alphabet; then the code for a + symbol is the sequence of 0's and 1's on the edges leading from + the root to the leaf labeled with that symbol. For example: + + + + + + + + + + + +Deutsch Informational [Page 6] + +RFC 1951 DEFLATE Compressed Data Format Specification May 1996 + + + /\ Symbol Code + 0 1 ------ ---- + / \ A 00 + /\ B B 1 + 0 1 C 011 + / \ D 010 + A /\ + 0 1 + / \ + D C + + A parser can decode the next symbol from an encoded input + stream by walking down the tree from the root, at each step + choosing the edge corresponding to the next input bit. + + Given an alphabet with known symbol frequencies, the Huffman + algorithm allows the construction of an optimal prefix code + (one which represents strings with those symbol frequencies + using the fewest bits of any possible prefix codes for that + alphabet). Such a code is called a Huffman code. (See + reference [1] in Chapter 5, references for additional + information on Huffman codes.) + + Note that in the "deflate" format, the Huffman codes for the + various alphabets must not exceed certain maximum code lengths. + This constraint complicates the algorithm for computing code + lengths from symbol frequencies. Again, see Chapter 5, + references for details. + + 3.2.2. Use of Huffman coding in the "deflate" format + + The Huffman codes used for each alphabet in the "deflate" + format have two additional rules: + + * All codes of a given bit length have lexicographically + consecutive values, in the same order as the symbols + they represent; + + * Shorter codes lexicographically precede longer codes. + + + + + + + + + + + + +Deutsch Informational [Page 7] + +RFC 1951 DEFLATE Compressed Data Format Specification May 1996 + + + We could recode the example above to follow this rule as + follows, assuming that the order of the alphabet is ABCD: + + Symbol Code + ------ ---- + A 10 + B 0 + C 110 + D 111 + + I.e., 0 precedes 10 which precedes 11x, and 110 and 111 are + lexicographically consecutive. + + Given this rule, we can define the Huffman code for an alphabet + just by giving the bit lengths of the codes for each symbol of + the alphabet in order; this is sufficient to determine the + actual codes. In our example, the code is completely defined + by the sequence of bit lengths (2, 1, 3, 3). The following + algorithm generates the codes as integers, intended to be read + from most- to least-significant bit. The code lengths are + initially in tree[I].Len; the codes are produced in + tree[I].Code. + + 1) Count the number of codes for each code length. Let + bl_count[N] be the number of codes of length N, N >= 1. + + 2) Find the numerical value of the smallest code for each + code length: + + code = 0; + bl_count[0] = 0; + for (bits = 1; bits <= MAX_BITS; bits++) { + code = (code + bl_count[bits-1]) << 1; + next_code[bits] = code; + } + + 3) Assign numerical values to all codes, using consecutive + values for all codes of the same length with the base + values determined at step 2. Codes that are never used + (which have a bit length of zero) must not be assigned a + value. + + for (n = 0; n <= max_code; n++) { + len = tree[n].Len; + if (len != 0) { + tree[n].Code = next_code[len]; + next_code[len]++; + } + + + +Deutsch Informational [Page 8] + +RFC 1951 DEFLATE Compressed Data Format Specification May 1996 + + + } + + Example: + + Consider the alphabet ABCDEFGH, with bit lengths (3, 3, 3, 3, + 3, 2, 4, 4). After step 1, we have: + + N bl_count[N] + - ----------- + 2 1 + 3 5 + 4 2 + + Step 2 computes the following next_code values: + + N next_code[N] + - ------------ + 1 0 + 2 0 + 3 2 + 4 14 + + Step 3 produces the following code values: + + Symbol Length Code + ------ ------ ---- + A 3 010 + B 3 011 + C 3 100 + D 3 101 + E 3 110 + F 2 00 + G 4 1110 + H 4 1111 + + 3.2.3. Details of block format + + Each block of compressed data begins with 3 header bits + containing the following data: + + first bit BFINAL + next 2 bits BTYPE + + Note that the header bits do not necessarily begin on a byte + boundary, since a block does not necessarily occupy an integral + number of bytes. + + + + + +Deutsch Informational [Page 9] + +RFC 1951 DEFLATE Compressed Data Format Specification May 1996 + + + BFINAL is set if and only if this is the last block of the data + set. + + BTYPE specifies how the data are compressed, as follows: + + 00 - no compression + 01 - compressed with fixed Huffman codes + 10 - compressed with dynamic Huffman codes + 11 - reserved (error) + + The only difference between the two compressed cases is how the + Huffman codes for the literal/length and distance alphabets are + defined. + + In all cases, the decoding algorithm for the actual data is as + follows: + + do + read block header from input stream. + if stored with no compression + skip any remaining bits in current partially + processed byte + read LEN and NLEN (see next section) + copy LEN bytes of data to output + otherwise + if compressed with dynamic Huffman codes + read representation of code trees (see + subsection below) + loop (until end of block code recognized) + decode literal/length value from input stream + if value < 256 + copy value (literal byte) to output stream + otherwise + if value = end of block (256) + break from loop + otherwise (value = 257..285) + decode distance from input stream + + move backwards distance bytes in the output + stream, and copy length bytes from this + position to the output stream. + end loop + while not last block + + Note that a duplicated string reference may refer to a string + in a previous block; i.e., the backward distance may cross one + or more block boundaries. However a distance cannot refer past + the beginning of the output stream. (An application using a + + + +Deutsch Informational [Page 10] + +RFC 1951 DEFLATE Compressed Data Format Specification May 1996 + + + preset dictionary might discard part of the output stream; a + distance can refer to that part of the output stream anyway) + Note also that the referenced string may overlap the current + position; for example, if the last 2 bytes decoded have values + X and Y, a string reference with + adds X,Y,X,Y,X to the output stream. + + We now specify each compression method in turn. + + 3.2.4. Non-compressed blocks (BTYPE=00) + + Any bits of input up to the next byte boundary are ignored. + The rest of the block consists of the following information: + + 0 1 2 3 4... + +---+---+---+---+================================+ + | LEN | NLEN |... LEN bytes of literal data...| + +---+---+---+---+================================+ + + LEN is the number of data bytes in the block. NLEN is the + one's complement of LEN. + + 3.2.5. Compressed blocks (length and distance codes) + + As noted above, encoded data blocks in the "deflate" format + consist of sequences of symbols drawn from three conceptually + distinct alphabets: either literal bytes, from the alphabet of + byte values (0..255), or pairs, + where the length is drawn from (3..258) and the distance is + drawn from (1..32,768). In fact, the literal and length + alphabets are merged into a single alphabet (0..285), where + values 0..255 represent literal bytes, the value 256 indicates + end-of-block, and values 257..285 represent length codes + (possibly in conjunction with extra bits following the symbol + code) as follows: + + + + + + + + + + + + + + + + +Deutsch Informational [Page 11] + +RFC 1951 DEFLATE Compressed Data Format Specification May 1996 + + + Extra Extra Extra + Code Bits Length(s) Code Bits Lengths Code Bits Length(s) + ---- ---- ------ ---- ---- ------- ---- ---- ------- + 257 0 3 267 1 15,16 277 4 67-82 + 258 0 4 268 1 17,18 278 4 83-98 + 259 0 5 269 2 19-22 279 4 99-114 + 260 0 6 270 2 23-26 280 4 115-130 + 261 0 7 271 2 27-30 281 5 131-162 + 262 0 8 272 2 31-34 282 5 163-194 + 263 0 9 273 3 35-42 283 5 195-226 + 264 0 10 274 3 43-50 284 5 227-257 + 265 1 11,12 275 3 51-58 285 0 258 + 266 1 13,14 276 3 59-66 + + The extra bits should be interpreted as a machine integer + stored with the most-significant bit first, e.g., bits 1110 + represent the value 14. + + Extra Extra Extra + Code Bits Dist Code Bits Dist Code Bits Distance + ---- ---- ---- ---- ---- ------ ---- ---- -------- + 0 0 1 10 4 33-48 20 9 1025-1536 + 1 0 2 11 4 49-64 21 9 1537-2048 + 2 0 3 12 5 65-96 22 10 2049-3072 + 3 0 4 13 5 97-128 23 10 3073-4096 + 4 1 5,6 14 6 129-192 24 11 4097-6144 + 5 1 7,8 15 6 193-256 25 11 6145-8192 + 6 2 9-12 16 7 257-384 26 12 8193-12288 + 7 2 13-16 17 7 385-512 27 12 12289-16384 + 8 3 17-24 18 8 513-768 28 13 16385-24576 + 9 3 25-32 19 8 769-1024 29 13 24577-32768 + + 3.2.6. Compression with fixed Huffman codes (BTYPE=01) + + The Huffman codes for the two alphabets are fixed, and are not + represented explicitly in the data. The Huffman code lengths + for the literal/length alphabet are: + + Lit Value Bits Codes + --------- ---- ----- + 0 - 143 8 00110000 through + 10111111 + 144 - 255 9 110010000 through + 111111111 + 256 - 279 7 0000000 through + 0010111 + 280 - 287 8 11000000 through + 11000111 + + + +Deutsch Informational [Page 12] + +RFC 1951 DEFLATE Compressed Data Format Specification May 1996 + + + The code lengths are sufficient to generate the actual codes, + as described above; we show the codes in the table for added + clarity. Literal/length values 286-287 will never actually + occur in the compressed data, but participate in the code + construction. + + Distance codes 0-31 are represented by (fixed-length) 5-bit + codes, with possible additional bits as shown in the table + shown in Paragraph 3.2.5, above. Note that distance codes 30- + 31 will never actually occur in the compressed data. + + 3.2.7. Compression with dynamic Huffman codes (BTYPE=10) + + The Huffman codes for the two alphabets appear in the block + immediately after the header bits and before the actual + compressed data, first the literal/length code and then the + distance code. Each code is defined by a sequence of code + lengths, as discussed in Paragraph 3.2.2, above. For even + greater compactness, the code length sequences themselves are + compressed using a Huffman code. The alphabet for code lengths + is as follows: + + 0 - 15: Represent code lengths of 0 - 15 + 16: Copy the previous code length 3 - 6 times. + The next 2 bits indicate repeat length + (0 = 3, ... , 3 = 6) + Example: Codes 8, 16 (+2 bits 11), + 16 (+2 bits 10) will expand to + 12 code lengths of 8 (1 + 6 + 5) + 17: Repeat a code length of 0 for 3 - 10 times. + (3 bits of length) + 18: Repeat a code length of 0 for 11 - 138 times + (7 bits of length) + + A code length of 0 indicates that the corresponding symbol in + the literal/length or distance alphabet will not occur in the + block, and should not participate in the Huffman code + construction algorithm given earlier. If only one distance + code is used, it is encoded using one bit, not zero bits; in + this case there is a single code length of one, with one unused + code. One distance code of zero bits means that there are no + distance codes used at all (the data is all literals). + + We can now define the format of the block: + + 5 Bits: HLIT, # of Literal/Length codes - 257 (257 - 286) + 5 Bits: HDIST, # of Distance codes - 1 (1 - 32) + 4 Bits: HCLEN, # of Code Length codes - 4 (4 - 19) + + + +Deutsch Informational [Page 13] + +RFC 1951 DEFLATE Compressed Data Format Specification May 1996 + + + (HCLEN + 4) x 3 bits: code lengths for the code length + alphabet given just above, in the order: 16, 17, 18, + 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15 + + These code lengths are interpreted as 3-bit integers + (0-7); as above, a code length of 0 means the + corresponding symbol (literal/length or distance code + length) is not used. + + HLIT + 257 code lengths for the literal/length alphabet, + encoded using the code length Huffman code + + HDIST + 1 code lengths for the distance alphabet, + encoded using the code length Huffman code + + The actual compressed data of the block, + encoded using the literal/length and distance Huffman + codes + + The literal/length symbol 256 (end of data), + encoded using the literal/length Huffman code + + The code length repeat codes can cross from HLIT + 257 to the + HDIST + 1 code lengths. In other words, all code lengths form + a single sequence of HLIT + HDIST + 258 values. + + 3.3. Compliance + + A compressor may limit further the ranges of values specified in + the previous section and still be compliant; for example, it may + limit the range of backward pointers to some value smaller than + 32K. Similarly, a compressor may limit the size of blocks so that + a compressible block fits in memory. + + A compliant decompressor must accept the full range of possible + values defined in the previous section, and must accept blocks of + arbitrary size. + +4. Compression algorithm details + + While it is the intent of this document to define the "deflate" + compressed data format without reference to any particular + compression algorithm, the format is related to the compressed + formats produced by LZ77 (Lempel-Ziv 1977, see reference [2] below); + since many variations of LZ77 are patented, it is strongly + recommended that the implementor of a compressor follow the general + algorithm presented here, which is known not to be patented per se. + The material in this section is not part of the definition of the + + + +Deutsch Informational [Page 14] + +RFC 1951 DEFLATE Compressed Data Format Specification May 1996 + + + specification per se, and a compressor need not follow it in order to + be compliant. + + The compressor terminates a block when it determines that starting a + new block with fresh trees would be useful, or when the block size + fills up the compressor's block buffer. + + The compressor uses a chained hash table to find duplicated strings, + using a hash function that operates on 3-byte sequences. At any + given point during compression, let XYZ be the next 3 input bytes to + be examined (not necessarily all different, of course). First, the + compressor examines the hash chain for XYZ. If the chain is empty, + the compressor simply writes out X as a literal byte and advances one + byte in the input. If the hash chain is not empty, indicating that + the sequence XYZ (or, if we are unlucky, some other 3 bytes with the + same hash function value) has occurred recently, the compressor + compares all strings on the XYZ hash chain with the actual input data + sequence starting at the current point, and selects the longest + match. + + The compressor searches the hash chains starting with the most recent + strings, to favor small distances and thus take advantage of the + Huffman encoding. The hash chains are singly linked. There are no + deletions from the hash chains; the algorithm simply discards matches + that are too old. To avoid a worst-case situation, very long hash + chains are arbitrarily truncated at a certain length, determined by a + run-time parameter. + + To improve overall compression, the compressor optionally defers the + selection of matches ("lazy matching"): after a match of length N has + been found, the compressor searches for a longer match starting at + the next input byte. If it finds a longer match, it truncates the + previous match to a length of one (thus producing a single literal + byte) and then emits the longer match. Otherwise, it emits the + original match, and, as described above, advances N bytes before + continuing. + + Run-time parameters also control this "lazy match" procedure. If + compression ratio is most important, the compressor attempts a + complete second search regardless of the length of the first match. + In the normal case, if the current match is "long enough", the + compressor reduces the search for a longer match, thus speeding up + the process. If speed is most important, the compressor inserts new + strings in the hash table only when no match was found, or when the + match is not "too long". This degrades the compression ratio but + saves time since there are both fewer insertions and fewer searches. + + + + + +Deutsch Informational [Page 15] + +RFC 1951 DEFLATE Compressed Data Format Specification May 1996 + + +5. References + + [1] Huffman, D. A., "A Method for the Construction of Minimum + Redundancy Codes", Proceedings of the Institute of Radio + Engineers, September 1952, Volume 40, Number 9, pp. 1098-1101. + + [2] Ziv J., Lempel A., "A Universal Algorithm for Sequential Data + Compression", IEEE Transactions on Information Theory, Vol. 23, + No. 3, pp. 337-343. + + [3] Gailly, J.-L., and Adler, M., ZLIB documentation and sources, + available in ftp://ftp.uu.net/pub/archiving/zip/doc/ + + [4] Gailly, J.-L., and Adler, M., GZIP documentation and sources, + available as gzip-*.tar in ftp://prep.ai.mit.edu/pub/gnu/ + + [5] Schwartz, E. S., and Kallick, B. "Generating a canonical prefix + encoding." Comm. ACM, 7,3 (Mar. 1964), pp. 166-169. + + [6] Hirschberg and Lelewer, "Efficient decoding of prefix codes," + Comm. ACM, 33,4, April 1990, pp. 449-459. + +6. Security Considerations + + Any data compression method involves the reduction of redundancy in + the data. Consequently, any corruption of the data is likely to have + severe effects and be difficult to correct. Uncompressed text, on + the other hand, will probably still be readable despite the presence + of some corrupted bytes. + + It is recommended that systems using this data format provide some + means of validating the integrity of the compressed data. See + reference [3], for example. + +7. Source code + + Source code for a C language implementation of a "deflate" compliant + compressor and decompressor is available within the zlib package at + ftp://ftp.uu.net/pub/archiving/zip/zlib/. + +8. Acknowledgements + + Trademarks cited in this document are the property of their + respective owners. + + Phil Katz designed the deflate format. Jean-Loup Gailly and Mark + Adler wrote the related software described in this specification. + Glenn Randers-Pehrson converted this document to RFC and HTML format. + + + +Deutsch Informational [Page 16] + +RFC 1951 DEFLATE Compressed Data Format Specification May 1996 + + +9. Author's Address + + L. Peter Deutsch + Aladdin Enterprises + 203 Santa Margarita Ave. + Menlo Park, CA 94025 + + Phone: (415) 322-0103 (AM only) + FAX: (415) 322-1734 + EMail: + + Questions about the technical content of this specification can be + sent by email to: + + Jean-Loup Gailly and + Mark Adler + + Editorial comments on this specification can be sent by email to: + + L. Peter Deutsch and + Glenn Randers-Pehrson + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Deutsch Informational [Page 17] + diff --git a/lib/std/compress/flate/zlib.zig b/lib/std/compress/flate/zlib.zig new file mode 100644 index 0000000000..328e625119 --- /dev/null +++ b/lib/std/compress/flate/zlib.zig @@ -0,0 +1,66 @@ +const deflate = @import("deflate.zig"); +const inflate = @import("inflate.zig"); + +/// Decompress compressed data from reader and write plain data to the writer. +pub fn decompress(reader: anytype, writer: anytype) !void { + try inflate.decompress(.zlib, reader, writer); +} + +/// Decompressor type +pub fn Decompressor(comptime ReaderType: type) type { + return inflate.Inflate(.zlib, ReaderType); +} + +/// Create Decompressor which will read compressed data from reader. +pub fn decompressor(reader: anytype) Decompressor(@TypeOf(reader)) { + return inflate.decompressor(.zlib, reader); +} + +/// Compression level, trades between speed and compression size. +pub const Options = deflate.Options; + +/// Compress plain data from reader and write compressed data to the writer. +pub fn compress(reader: anytype, writer: anytype, options: Options) !void { + try deflate.compress(.zlib, reader, writer, options); +} + +/// Compressor type +pub fn Compressor(comptime WriterType: type) type { + return deflate.Compressor(.zlib, WriterType); +} + +/// Create Compressor which outputs compressed data to the writer. +pub fn compressor(writer: anytype, options: Options) !Compressor(@TypeOf(writer)) { + return try deflate.compressor(.zlib, writer, options); +} + +/// Huffman only compression. Without Lempel-Ziv match searching. Faster +/// compression, less memory requirements but bigger compressed sizes. +pub const huffman = struct { + pub fn compress(reader: anytype, writer: anytype) !void { + try deflate.huffman.compress(.zlib, reader, writer); + } + + pub fn Compressor(comptime WriterType: type) type { + return deflate.huffman.Compressor(.zlib, WriterType); + } + + pub fn compressor(writer: anytype) !huffman.Compressor(@TypeOf(writer)) { + return deflate.huffman.compressor(.zlib, writer); + } +}; + +// No compression store only. Compressed size is slightly bigger than plain. +pub const store = struct { + pub fn compress(reader: anytype, writer: anytype) !void { + try deflate.store.compress(.zlib, reader, writer); + } + + pub fn Compressor(comptime WriterType: type) type { + return deflate.store.Compressor(.zlib, WriterType); + } + + pub fn compressor(writer: anytype) !store.Compressor(@TypeOf(writer)) { + return deflate.store.compressor(.zlib, writer); + } +}; diff --git a/lib/std/compress/gzip.zig b/lib/std/compress/gzip.zig index 0576812a09..9d9bad7184 100644 --- a/lib/std/compress/gzip.zig +++ b/lib/std/compress/gzip.zig @@ -6,7 +6,7 @@ const io = std.io; const fs = std.fs; const testing = std.testing; const mem = std.mem; -const deflate = std.compress.deflate; +const deflate = @import("deflate.zig"); const magic = &[2]u8{ 0x1f, 0x8b }; diff --git a/lib/std/compress/zlib.zig b/lib/std/compress/zlib.zig index 6708875930..811f714286 100644 --- a/lib/std/compress/zlib.zig +++ b/lib/std/compress/zlib.zig @@ -6,7 +6,7 @@ const io = std.io; const fs = std.fs; const testing = std.testing; const mem = std.mem; -const deflate = std.compress.deflate; +const deflate = @import("deflate.zig"); // Zlib header format as specified in RFC1950 const ZLibHeader = packed struct { diff --git a/lib/std/debug.zig b/lib/std/debug.zig index 601a949ecf..3b9e46be92 100644 --- a/lib/std/debug.zig +++ b/lib/std/debug.zig @@ -1212,8 +1212,7 @@ pub fn readElfDebugInfo( const chdr = section_reader.readStruct(elf.Chdr) catch continue; if (chdr.ch_type != .ZLIB) continue; - var zlib_stream = std.compress.zlib.decompressStream(allocator, section_stream.reader()) catch continue; - defer zlib_stream.deinit(); + var zlib_stream = std.compress.zlib.decompressor(section_stream.reader()); const decompressed_section = try allocator.alloc(u8, chdr.ch_size); errdefer allocator.free(decompressed_section); diff --git a/lib/std/http/Client.zig b/lib/std/http/Client.zig index ed6aec55aa..a50e814fd4 100644 --- a/lib/std/http/Client.zig +++ b/lib/std/http/Client.zig @@ -404,8 +404,8 @@ pub const RequestTransfer = union(enum) { /// The decompressor for response messages. pub const Compression = union(enum) { - pub const DeflateDecompressor = std.compress.zlib.DecompressStream(Request.TransferReader); - pub const GzipDecompressor = std.compress.gzip.Decompress(Request.TransferReader); + pub const DeflateDecompressor = std.compress.zlib.Decompressor(Request.TransferReader); + pub const GzipDecompressor = std.compress.gzip.Decompressor(Request.TransferReader); pub const ZstdDecompressor = std.compress.zstd.DecompressStream(Request.TransferReader, .{}); deflate: DeflateDecompressor, @@ -601,8 +601,8 @@ pub const Request = struct { pub fn deinit(req: *Request) void { switch (req.response.compression) { .none => {}, - .deflate => |*deflate| deflate.deinit(), - .gzip => |*gzip| gzip.deinit(), + .deflate => {}, + .gzip => {}, .zstd => |*zstd| zstd.deinit(), } @@ -632,8 +632,8 @@ pub const Request = struct { switch (req.response.compression) { .none => {}, - .deflate => |*deflate| deflate.deinit(), - .gzip => |*gzip| gzip.deinit(), + .deflate => {}, + .gzip => {}, .zstd => |*zstd| zstd.deinit(), } @@ -941,10 +941,10 @@ pub const Request = struct { .identity => req.response.compression = .none, .compress, .@"x-compress" => return error.CompressionNotSupported, .deflate => req.response.compression = .{ - .deflate = std.compress.zlib.decompressStream(req.client.allocator, req.transferReader()) catch return error.CompressionInitializationFailed, + .deflate = std.compress.zlib.decompressor(req.transferReader()), }, .gzip, .@"x-gzip" => req.response.compression = .{ - .gzip = std.compress.gzip.decompress(req.client.allocator, req.transferReader()) catch return error.CompressionInitializationFailed, + .gzip = std.compress.gzip.decompressor(req.transferReader()), }, .zstd => req.response.compression = .{ .zstd = std.compress.zstd.decompressStream(req.client.allocator, req.transferReader()), diff --git a/lib/std/http/Server.zig b/lib/std/http/Server.zig index 48c4e2cbfb..4659041779 100644 --- a/lib/std/http/Server.zig +++ b/lib/std/http/Server.zig @@ -195,8 +195,8 @@ pub const ResponseTransfer = union(enum) { /// The decompressor for request messages. pub const Compression = union(enum) { - pub const DeflateDecompressor = std.compress.zlib.DecompressStream(Response.TransferReader); - pub const GzipDecompressor = std.compress.gzip.Decompress(Response.TransferReader); + pub const DeflateDecompressor = std.compress.zlib.Decompressor(Response.TransferReader); + pub const GzipDecompressor = std.compress.gzip.Decompressor(Response.TransferReader); pub const ZstdDecompressor = std.compress.zstd.DecompressStream(Response.TransferReader, .{}); deflate: DeflateDecompressor, @@ -420,8 +420,8 @@ pub const Response = struct { switch (res.request.compression) { .none => {}, - .deflate => |*deflate| deflate.deinit(), - .gzip => |*gzip| gzip.deinit(), + .deflate => {}, + .gzip => {}, .zstd => |*zstd| zstd.deinit(), } @@ -605,10 +605,10 @@ pub const Response = struct { .identity => res.request.compression = .none, .compress, .@"x-compress" => return error.CompressionNotSupported, .deflate => res.request.compression = .{ - .deflate = std.compress.zlib.decompressStream(res.allocator, res.transferReader()) catch return error.CompressionInitializationFailed, + .deflate = std.compress.zlib.decompressor(res.transferReader()), }, .gzip, .@"x-gzip" => res.request.compression = .{ - .gzip = std.compress.gzip.decompress(res.allocator, res.transferReader()) catch return error.CompressionInitializationFailed, + .gzip = std.compress.gzip.decompressor(res.transferReader()), }, .zstd => res.request.compression = .{ .zstd = std.compress.zstd.decompressStream(res.allocator, res.transferReader()), diff --git a/src/Package/Fetch.zig b/src/Package/Fetch.zig index fb9d7c823c..ed3c6b099f 100644 --- a/src/Package/Fetch.zig +++ b/src/Package/Fetch.zig @@ -1099,7 +1099,12 @@ fn unpackResource( switch (file_type) { .tar => try unpackTarball(f, tmp_directory.handle, resource.reader()), - .@"tar.gz" => try unpackTarballCompressed(f, tmp_directory.handle, resource, std.compress.gzip), + .@"tar.gz" => { + const reader = resource.reader(); + var br = std.io.bufferedReaderSize(std.crypto.tls.max_ciphertext_record_len, reader); + var dcp = std.compress.gzip.decompressor(br.reader()); + try unpackTarball(f, tmp_directory.handle, dcp.reader()); + }, .@"tar.xz" => try unpackTarballCompressed(f, tmp_directory.handle, resource, std.compress.xz), .@"tar.zst" => try unpackTarballCompressed(f, tmp_directory.handle, resource, ZstdWrapper), .git_pack => unpackGitPack(f, tmp_directory.handle, resource) catch |err| switch (err) { diff --git a/src/Package/Fetch/git.zig b/src/Package/Fetch/git.zig index df5332d41d..abbb031948 100644 --- a/src/Package/Fetch/git.zig +++ b/src/Package/Fetch/git.zig @@ -1115,8 +1115,7 @@ fn indexPackFirstPass( const entry_header = try EntryHeader.read(entry_crc32_reader.reader()); switch (entry_header) { inline .commit, .tree, .blob, .tag => |object, tag| { - var entry_decompress_stream = try std.compress.zlib.decompressStream(allocator, entry_crc32_reader.reader()); - defer entry_decompress_stream.deinit(); + var entry_decompress_stream = std.compress.zlib.decompressor(entry_crc32_reader.reader()); var entry_counting_reader = std.io.countingReader(entry_decompress_stream.reader()); var entry_hashed_writer = hashedWriter(std.io.null_writer, Sha1.init(.{})); const entry_writer = entry_hashed_writer.writer(); @@ -1135,8 +1134,7 @@ fn indexPackFirstPass( }); }, inline .ofs_delta, .ref_delta => |delta| { - var entry_decompress_stream = try std.compress.zlib.decompressStream(allocator, entry_crc32_reader.reader()); - defer entry_decompress_stream.deinit(); + var entry_decompress_stream = std.compress.zlib.decompressor(entry_crc32_reader.reader()); var entry_counting_reader = std.io.countingReader(entry_decompress_stream.reader()); var fifo = std.fifo.LinearFifo(u8, .{ .Static = 4096 }).init(); try fifo.pump(entry_counting_reader.reader(), std.io.null_writer); @@ -1257,8 +1255,7 @@ fn resolveDeltaChain( fn readObjectRaw(allocator: Allocator, reader: anytype, size: u64) ![]u8 { const alloc_size = std.math.cast(usize, size) orelse return error.ObjectTooLarge; var buffered_reader = std.io.bufferedReader(reader); - var decompress_stream = try std.compress.zlib.decompressStream(allocator, buffered_reader.reader()); - defer decompress_stream.deinit(); + var decompress_stream = std.compress.zlib.decompressor(buffered_reader.reader()); const data = try allocator.alloc(u8, alloc_size); errdefer allocator.free(data); try decompress_stream.reader().readNoEof(data); diff --git a/src/link/Elf/Object.zig b/src/link/Elf/Object.zig index 395e6680a1..b29de3fc59 100644 --- a/src/link/Elf/Object.zig +++ b/src/link/Elf/Object.zig @@ -902,9 +902,7 @@ pub fn codeDecompressAlloc(self: Object, elf_file: *Elf, atom_index: Atom.Index) switch (chdr.ch_type) { .ZLIB => { var stream = std.io.fixedBufferStream(data[@sizeOf(elf.Elf64_Chdr)..]); - var zlib_stream = std.compress.zlib.decompressStream(gpa, stream.reader()) catch - return error.InputOutput; - defer zlib_stream.deinit(); + var zlib_stream = std.compress.zlib.decompressor(stream.reader()); const size = std.math.cast(usize, chdr.ch_size) orelse return error.Overflow; const decomp = try gpa.alloc(u8, size); const nread = zlib_stream.reader().readAll(decomp) catch return error.InputOutput; diff --git a/src/objcopy.zig b/src/objcopy.zig index 8ea0eaac59..81638d4af0 100644 --- a/src/objcopy.zig +++ b/src/objcopy.zig @@ -1298,8 +1298,7 @@ const ElfFileHelper = struct { try compressed_stream.writer().writeAll(prefix); { - var compressor = try std.compress.zlib.compressStream(allocator, compressed_stream.writer(), .{}); - defer compressor.deinit(); + var compressor = try std.compress.zlib.compressor(compressed_stream.writer(), .{}); var buf: [8000]u8 = undefined; while (true) {