From 6c48aad991f64f7e5bb92af498cc4cbddca9895e Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Fri, 4 Apr 2025 18:33:42 -0700 Subject: [PATCH] update some more std lib API to new Reader/Writer std.compress needs an audit, I see some problems --- lib/std/compress/flate/BitWriter.zig | 93 +++ lib/std/compress/flate/BlockWriter.zig | 696 +++++++++++++++++++ lib/std/compress/flate/bit_writer.zig | 99 --- lib/std/compress/flate/block_writer.zig | 706 -------------------- lib/std/compress/flate/deflate.zig | 24 +- lib/std/compress/flate/inflate.zig | 25 +- lib/std/compress/lzma/decode.zig | 168 ++++- lib/std/compress/lzma/decode/rangecoder.zig | 181 ----- lib/std/compress/lzma2.zig | 2 +- lib/std/compress/lzma2/decode.zig | 32 +- lib/std/compress/zlib.zig | 49 +- lib/std/debug/Dwarf.zig | 2 +- lib/std/debug/SelfInfo.zig | 6 +- lib/std/fs/File.zig | 4 +- lib/std/io/bit_reader.zig | 6 +- 15 files changed, 1012 insertions(+), 1081 deletions(-) create mode 100644 lib/std/compress/flate/BitWriter.zig create mode 100644 lib/std/compress/flate/BlockWriter.zig delete mode 100644 lib/std/compress/flate/bit_writer.zig delete mode 100644 lib/std/compress/flate/block_writer.zig delete mode 100644 lib/std/compress/lzma/decode/rangecoder.zig diff --git a/lib/std/compress/flate/BitWriter.zig b/lib/std/compress/flate/BitWriter.zig new file mode 100644 index 0000000000..9f7d242a03 --- /dev/null +++ b/lib/std/compress/flate/BitWriter.zig @@ -0,0 +1,93 @@ +//! Bit writer for use in deflate (compression). +//! +//! Has internal bits buffer of 64 bits and internal bytes buffer of 248 bytes. +//! When we accumulate 48 bits 6 bytes are moved to the bytes buffer. When we +//! accumulate 240 bytes they are flushed to the underlying inner_writer. + +const std = @import("std"); +const assert = std.debug.assert; +const BitWriter = @This(); + +// buffer_flush_size indicates the buffer size +// after which bytes are flushed to the writer. +// Should preferably be a multiple of 6, since +// we accumulate 6 bytes between writes to the buffer. +const buffer_flush_size = 240; + +// buffer_size is the actual output byte buffer size. +// It must have additional headroom for a flush +// which can contain up to 8 bytes. +const buffer_size = buffer_flush_size + 8; + +inner_writer: *std.io.BufferedWriter, + +// Data waiting to be written is bytes[0 .. nbytes] +// and then the low nbits of bits. Data is always written +// sequentially into the bytes array. +bits: u64 = 0, +nbits: u32 = 0, // number of bits +bytes: [buffer_size]u8 = undefined, +nbytes: u32 = 0, // number of bytes + +const Self = @This(); + +pub fn init(bw: *std.io.BufferedWriter) Self { + return .{ .inner_writer = bw }; +} + +pub fn setWriter(self: *Self, new_writer: *std.io.BufferedWriter) void { + self.inner_writer = new_writer; +} + +pub fn flush(self: *Self) anyerror!void { + var n = self.nbytes; + while (self.nbits != 0) { + self.bytes[n] = @as(u8, @truncate(self.bits)); + self.bits >>= 8; + if (self.nbits > 8) { // Avoid underflow + self.nbits -= 8; + } else { + self.nbits = 0; + } + n += 1; + } + self.bits = 0; + _ = try self.inner_writer.write(self.bytes[0..n]); + self.nbytes = 0; +} + +pub fn writeBits(self: *Self, b: u32, nb: u32) anyerror!void { + self.bits |= @as(u64, @intCast(b)) << @as(u6, @intCast(self.nbits)); + self.nbits += nb; + if (self.nbits < 48) + return; + + var n = self.nbytes; + std.mem.writeInt(u64, self.bytes[n..][0..8], self.bits, .little); + n += 6; + if (n >= buffer_flush_size) { + _ = try self.inner_writer.write(self.bytes[0..n]); + n = 0; + } + self.nbytes = n; + self.bits >>= 48; + self.nbits -= 48; +} + +pub fn writeBytes(self: *Self, bytes: []const u8) anyerror!void { + var n = self.nbytes; + if (self.nbits & 7 != 0) { + return error.UnfinishedBits; + } + while (self.nbits != 0) { + self.bytes[n] = @as(u8, @truncate(self.bits)); + self.bits >>= 8; + self.nbits -= 8; + n += 1; + } + if (n != 0) { + _ = try self.inner_writer.write(self.bytes[0..n]); + } + self.nbytes = 0; + _ = try self.inner_writer.write(bytes); +} diff --git a/lib/std/compress/flate/BlockWriter.zig b/lib/std/compress/flate/BlockWriter.zig new file mode 100644 index 0000000000..721bd59f2a --- /dev/null +++ b/lib/std/compress/flate/BlockWriter.zig @@ -0,0 +1,696 @@ +//! Accepts list of tokens, decides what is best block type to write. What block +//! type will provide best compression. Writes header and body of the block. +const std = @import("std"); +const io = std.io; +const assert = std.debug.assert; + +const hc = @import("huffman_encoder.zig"); +const consts = @import("consts.zig").huffman; +const Token = @import("Token.zig"); +const BitWriter = @import("BitWriter.zig"); +const BlockWriter = @This(); + +const codegen_order = consts.codegen_order; +const end_code_mark = 255; +const Self = @This(); + +bit_writer: BitWriter, + +codegen_freq: [consts.codegen_code_count]u16 = undefined, +literal_freq: [consts.max_num_lit]u16 = undefined, +distance_freq: [consts.distance_code_count]u16 = undefined, +codegen: [consts.max_num_lit + consts.distance_code_count + 1]u8 = undefined, +literal_encoding: hc.LiteralEncoder = .{}, +distance_encoding: hc.DistanceEncoder = .{}, +codegen_encoding: hc.CodegenEncoder = .{}, +fixed_literal_encoding: hc.LiteralEncoder, +fixed_distance_encoding: hc.DistanceEncoder, +huff_distance: hc.DistanceEncoder, + +pub fn init(writer: *std.io.BufferedWriter) Self { + return .{ + .bit_writer = BitWriter.init(writer), + .fixed_literal_encoding = hc.fixedLiteralEncoder(), + .fixed_distance_encoding = hc.fixedDistanceEncoder(), + .huff_distance = hc.huffmanDistanceEncoder(), + }; +} + +/// Flush intrenal bit buffer to the writer. +/// Should be called only when bit stream is at byte boundary. +/// +/// That is after final block; when last byte could be incomplete or +/// after stored block; which is aligned to the byte boundary (it has x +/// padding bits after first 3 bits). +pub fn flush(self: *Self) anyerror!void { + try self.bit_writer.flush(); +} + +pub fn setWriter(self: *Self, new_writer: *std.io.BufferedWriter) void { + self.bit_writer.setWriter(new_writer); +} + +fn writeCode(self: *Self, c: hc.HuffCode) anyerror!void { + try self.bit_writer.writeBits(c.code, c.len); +} + +// RFC 1951 3.2.7 specifies a special run-length encoding for specifying +// the literal and distance lengths arrays (which are concatenated into a single +// array). This method generates that run-length encoding. +// +// The result is written into the codegen array, and the frequencies +// of each code is written into the codegen_freq array. +// Codes 0-15 are single byte codes. Codes 16-18 are followed by additional +// information. Code bad_code is an end marker +// +// num_literals: The number of literals in literal_encoding +// num_distances: The number of distances in distance_encoding +// lit_enc: The literal encoder to use +// dist_enc: The distance encoder to use +fn generateCodegen( + self: *Self, + num_literals: u32, + num_distances: u32, + lit_enc: *hc.LiteralEncoder, + dist_enc: *hc.DistanceEncoder, +) void { + for (self.codegen_freq, 0..) |_, i| { + self.codegen_freq[i] = 0; + } + + // Note that we are using codegen both as a temporary variable for holding + // a copy of the frequencies, and as the place where we put the result. + // This is fine because the output is always shorter than the input used + // so far. + var codegen = &self.codegen; // cache + // Copy the concatenated code sizes to codegen. Put a marker at the end. + var cgnl = codegen[0..num_literals]; + for (cgnl, 0..) |_, i| { + cgnl[i] = @as(u8, @intCast(lit_enc.codes[i].len)); + } + + cgnl = codegen[num_literals .. num_literals + num_distances]; + for (cgnl, 0..) |_, i| { + cgnl[i] = @as(u8, @intCast(dist_enc.codes[i].len)); + } + codegen[num_literals + num_distances] = end_code_mark; + + var size = codegen[0]; + var count: i32 = 1; + var out_index: u32 = 0; + var in_index: u32 = 1; + while (size != end_code_mark) : (in_index += 1) { + // INVARIANT: We have seen "count" copies of size that have not yet + // had output generated for them. + const next_size = codegen[in_index]; + if (next_size == size) { + count += 1; + continue; + } + // We need to generate codegen indicating "count" of size. + if (size != 0) { + codegen[out_index] = size; + out_index += 1; + self.codegen_freq[size] += 1; + count -= 1; + while (count >= 3) { + var n: i32 = 6; + if (n > count) { + n = count; + } + codegen[out_index] = 16; + out_index += 1; + codegen[out_index] = @as(u8, @intCast(n - 3)); + out_index += 1; + self.codegen_freq[16] += 1; + count -= n; + } + } else { + while (count >= 11) { + var n: i32 = 138; + if (n > count) { + n = count; + } + codegen[out_index] = 18; + out_index += 1; + codegen[out_index] = @as(u8, @intCast(n - 11)); + out_index += 1; + self.codegen_freq[18] += 1; + count -= n; + } + if (count >= 3) { + // 3 <= count <= 10 + codegen[out_index] = 17; + out_index += 1; + codegen[out_index] = @as(u8, @intCast(count - 3)); + out_index += 1; + self.codegen_freq[17] += 1; + count = 0; + } + } + count -= 1; + while (count >= 0) : (count -= 1) { + codegen[out_index] = size; + out_index += 1; + self.codegen_freq[size] += 1; + } + // Set up invariant for next time through the loop. + size = next_size; + count = 1; + } + // Marker indicating the end of the codegen. + codegen[out_index] = end_code_mark; +} + +const DynamicSize = struct { + size: u32, + num_codegens: u32, +}; + +// dynamicSize returns the size of dynamically encoded data in bits. +fn dynamicSize( + self: *Self, + lit_enc: *hc.LiteralEncoder, // literal encoder + dist_enc: *hc.DistanceEncoder, // distance encoder + extra_bits: u32, +) DynamicSize { + var num_codegens = self.codegen_freq.len; + while (num_codegens > 4 and self.codegen_freq[codegen_order[num_codegens - 1]] == 0) { + num_codegens -= 1; + } + const header = 3 + 5 + 5 + 4 + (3 * num_codegens) + + self.codegen_encoding.bitLength(self.codegen_freq[0..]) + + self.codegen_freq[16] * 2 + + self.codegen_freq[17] * 3 + + self.codegen_freq[18] * 7; + const size = header + + lit_enc.bitLength(&self.literal_freq) + + dist_enc.bitLength(&self.distance_freq) + + extra_bits; + + return DynamicSize{ + .size = @as(u32, @intCast(size)), + .num_codegens = @as(u32, @intCast(num_codegens)), + }; +} + +// fixedSize returns the size of dynamically encoded data in bits. +fn fixedSize(self: *Self, extra_bits: u32) u32 { + return 3 + + self.fixed_literal_encoding.bitLength(&self.literal_freq) + + self.fixed_distance_encoding.bitLength(&self.distance_freq) + + extra_bits; +} + +const StoredSize = struct { + size: u32, + storable: bool, +}; + +// storedSizeFits calculates the stored size, including header. +// The function returns the size in bits and whether the block +// fits inside a single block. +fn storedSizeFits(in: ?[]const u8) StoredSize { + if (in == null) { + return .{ .size = 0, .storable = false }; + } + if (in.?.len <= consts.max_store_block_size) { + return .{ .size = @as(u32, @intCast((in.?.len + 5) * 8)), .storable = true }; + } + return .{ .size = 0, .storable = false }; +} + +// Write the header of a dynamic Huffman block to the output stream. +// +// num_literals: The number of literals specified in codegen +// num_distances: The number of distances specified in codegen +// num_codegens: The number of codegens used in codegen +// eof: Is it the end-of-file? (end of stream) +fn dynamicHeader( + self: *Self, + num_literals: u32, + num_distances: u32, + num_codegens: u32, + eof: bool, +) anyerror!void { + const first_bits: u32 = if (eof) 5 else 4; + try self.bit_writer.writeBits(first_bits, 3); + try self.bit_writer.writeBits(num_literals - 257, 5); + try self.bit_writer.writeBits(num_distances - 1, 5); + try self.bit_writer.writeBits(num_codegens - 4, 4); + + var i: u32 = 0; + while (i < num_codegens) : (i += 1) { + const value = self.codegen_encoding.codes[codegen_order[i]].len; + try self.bit_writer.writeBits(value, 3); + } + + i = 0; + while (true) { + const code_word: u32 = @as(u32, @intCast(self.codegen[i])); + i += 1; + if (code_word == end_code_mark) { + break; + } + try self.writeCode(self.codegen_encoding.codes[@as(u32, @intCast(code_word))]); + + switch (code_word) { + 16 => { + try self.bit_writer.writeBits(self.codegen[i], 2); + i += 1; + }, + 17 => { + try self.bit_writer.writeBits(self.codegen[i], 3); + i += 1; + }, + 18 => { + try self.bit_writer.writeBits(self.codegen[i], 7); + i += 1; + }, + else => {}, + } + } +} + +fn storedHeader(self: *Self, length: usize, eof: bool) anyerror!void { + assert(length <= 65535); + const flag: u32 = if (eof) 1 else 0; + try self.bit_writer.writeBits(flag, 3); + try self.flush(); + const l: u16 = @intCast(length); + try self.bit_writer.writeBits(l, 16); + try self.bit_writer.writeBits(~l, 16); +} + +fn fixedHeader(self: *Self, eof: bool) anyerror!void { + // Indicate that we are a fixed Huffman block + var value: u32 = 2; + if (eof) { + value = 3; + } + try self.bit_writer.writeBits(value, 3); +} + +// Write a block of tokens with the smallest encoding. Will choose block type. +// The original input can be supplied, and if the huffman encoded data +// is larger than the original bytes, the data will be written as a +// stored block. +// If the input is null, the tokens will always be Huffman encoded. +pub fn write(self: *Self, tokens: []const Token, eof: bool, input: ?[]const u8) anyerror!void { + const lit_and_dist = self.indexTokens(tokens); + const num_literals = lit_and_dist.num_literals; + const num_distances = lit_and_dist.num_distances; + + var extra_bits: u32 = 0; + const ret = storedSizeFits(input); + const stored_size = ret.size; + const storable = ret.storable; + + if (storable) { + // We only bother calculating the costs of the extra bits required by + // the length of distance fields (which will be the same for both fixed + // and dynamic encoding), if we need to compare those two encodings + // against stored encoding. + var length_code: u16 = Token.length_codes_start + 8; + while (length_code < num_literals) : (length_code += 1) { + // First eight length codes have extra size = 0. + extra_bits += @as(u32, @intCast(self.literal_freq[length_code])) * + @as(u32, @intCast(Token.lengthExtraBits(length_code))); + } + var distance_code: u16 = 4; + while (distance_code < num_distances) : (distance_code += 1) { + // First four distance codes have extra size = 0. + extra_bits += @as(u32, @intCast(self.distance_freq[distance_code])) * + @as(u32, @intCast(Token.distanceExtraBits(distance_code))); + } + } + + // Figure out smallest code. + // Fixed Huffman baseline. + var literal_encoding = &self.fixed_literal_encoding; + var distance_encoding = &self.fixed_distance_encoding; + var size = self.fixedSize(extra_bits); + + // Dynamic Huffman? + var num_codegens: u32 = 0; + + // Generate codegen and codegenFrequencies, which indicates how to encode + // the literal_encoding and the distance_encoding. + self.generateCodegen( + num_literals, + num_distances, + &self.literal_encoding, + &self.distance_encoding, + ); + self.codegen_encoding.generate(self.codegen_freq[0..], 7); + const dynamic_size = self.dynamicSize( + &self.literal_encoding, + &self.distance_encoding, + extra_bits, + ); + const dyn_size = dynamic_size.size; + num_codegens = dynamic_size.num_codegens; + + if (dyn_size < size) { + size = dyn_size; + literal_encoding = &self.literal_encoding; + distance_encoding = &self.distance_encoding; + } + + // Stored bytes? + if (storable and stored_size < size) { + try self.storedBlock(input.?, eof); + return; + } + + // Huffman. + if (@intFromPtr(literal_encoding) == @intFromPtr(&self.fixed_literal_encoding)) { + try self.fixedHeader(eof); + } else { + try self.dynamicHeader(num_literals, num_distances, num_codegens, eof); + } + + // Write the tokens. + try self.writeTokens(tokens, &literal_encoding.codes, &distance_encoding.codes); +} + +pub fn storedBlock(self: *Self, input: []const u8, eof: bool) anyerror!void { + try self.storedHeader(input.len, eof); + try self.bit_writer.writeBytes(input); +} + +// writeBlockDynamic encodes a block using a dynamic Huffman table. +// This should be used if the symbols used have a disproportionate +// histogram distribution. +// If input is supplied and the compression savings are below 1/16th of the +// input size the block is stored. +fn dynamicBlock( + self: *Self, + tokens: []const Token, + eof: bool, + input: ?[]const u8, +) anyerror!void { + const total_tokens = self.indexTokens(tokens); + const num_literals = total_tokens.num_literals; + const num_distances = total_tokens.num_distances; + + // Generate codegen and codegenFrequencies, which indicates how to encode + // the literal_encoding and the distance_encoding. + self.generateCodegen( + num_literals, + num_distances, + &self.literal_encoding, + &self.distance_encoding, + ); + self.codegen_encoding.generate(self.codegen_freq[0..], 7); + const dynamic_size = self.dynamicSize(&self.literal_encoding, &self.distance_encoding, 0); + const size = dynamic_size.size; + const num_codegens = dynamic_size.num_codegens; + + // Store bytes, if we don't get a reasonable improvement. + + const stored_size = storedSizeFits(input); + const ssize = stored_size.size; + const storable = stored_size.storable; + if (storable and ssize < (size + (size >> 4))) { + try self.storedBlock(input.?, eof); + return; + } + + // Write Huffman table. + try self.dynamicHeader(num_literals, num_distances, num_codegens, eof); + + // Write the tokens. + try self.writeTokens(tokens, &self.literal_encoding.codes, &self.distance_encoding.codes); +} + +const TotalIndexedTokens = struct { + num_literals: u32, + num_distances: u32, +}; + +// Indexes a slice of tokens followed by an end_block_marker, and updates +// literal_freq and distance_freq, and generates literal_encoding +// and distance_encoding. +// The number of literal and distance tokens is returned. +fn indexTokens(self: *Self, tokens: []const Token) TotalIndexedTokens { + var num_literals: u32 = 0; + var num_distances: u32 = 0; + + for (self.literal_freq, 0..) |_, i| { + self.literal_freq[i] = 0; + } + for (self.distance_freq, 0..) |_, i| { + self.distance_freq[i] = 0; + } + + for (tokens) |t| { + if (t.kind == Token.Kind.literal) { + self.literal_freq[t.literal()] += 1; + continue; + } + self.literal_freq[t.lengthCode()] += 1; + self.distance_freq[t.distanceCode()] += 1; + } + // add end_block_marker token at the end + self.literal_freq[consts.end_block_marker] += 1; + + // get the number of literals + num_literals = @as(u32, @intCast(self.literal_freq.len)); + while (self.literal_freq[num_literals - 1] == 0) { + num_literals -= 1; + } + // get the number of distances + num_distances = @as(u32, @intCast(self.distance_freq.len)); + while (num_distances > 0 and self.distance_freq[num_distances - 1] == 0) { + num_distances -= 1; + } + if (num_distances == 0) { + // We haven't found a single match. If we want to go with the dynamic encoding, + // we should count at least one distance to be sure that the distance huffman tree could be encoded. + self.distance_freq[0] = 1; + num_distances = 1; + } + self.literal_encoding.generate(&self.literal_freq, 15); + self.distance_encoding.generate(&self.distance_freq, 15); + return TotalIndexedTokens{ + .num_literals = num_literals, + .num_distances = num_distances, + }; +} + +// Writes a slice of tokens to the output followed by and end_block_marker. +// codes for literal and distance encoding must be supplied. +fn writeTokens( + self: *Self, + tokens: []const Token, + le_codes: []hc.HuffCode, + oe_codes: []hc.HuffCode, +) anyerror!void { + for (tokens) |t| { + if (t.kind == Token.Kind.literal) { + try self.writeCode(le_codes[t.literal()]); + continue; + } + + // Write the length + const le = t.lengthEncoding(); + try self.writeCode(le_codes[le.code]); + if (le.extra_bits > 0) { + try self.bit_writer.writeBits(le.extra_length, le.extra_bits); + } + + // Write the distance + const oe = t.distanceEncoding(); + try self.writeCode(oe_codes[oe.code]); + if (oe.extra_bits > 0) { + try self.bit_writer.writeBits(oe.extra_distance, oe.extra_bits); + } + } + // add end_block_marker at the end + try self.writeCode(le_codes[consts.end_block_marker]); +} + +// Encodes a block of bytes as either Huffman encoded literals or uncompressed bytes +// if the results only gains very little from compression. +pub fn huffmanBlock(self: *Self, input: []const u8, eof: bool) anyerror!void { + // Add everything as literals + histogram(input, &self.literal_freq); + + self.literal_freq[consts.end_block_marker] = 1; + + const num_literals = consts.end_block_marker + 1; + self.distance_freq[0] = 1; + const num_distances = 1; + + self.literal_encoding.generate(&self.literal_freq, 15); + + // Figure out smallest code. + // Always use dynamic Huffman or Store + var num_codegens: u32 = 0; + + // Generate codegen and codegenFrequencies, which indicates how to encode + // the literal_encoding and the distance_encoding. + self.generateCodegen( + num_literals, + num_distances, + &self.literal_encoding, + &self.huff_distance, + ); + self.codegen_encoding.generate(self.codegen_freq[0..], 7); + const dynamic_size = self.dynamicSize(&self.literal_encoding, &self.huff_distance, 0); + const size = dynamic_size.size; + num_codegens = dynamic_size.num_codegens; + + // Store bytes, if we don't get a reasonable improvement. + const stored_size_ret = storedSizeFits(input); + const ssize = stored_size_ret.size; + const storable = stored_size_ret.storable; + + if (storable and ssize < (size + (size >> 4))) { + try self.storedBlock(input, eof); + return; + } + + // Huffman. + try self.dynamicHeader(num_literals, num_distances, num_codegens, eof); + const encoding = self.literal_encoding.codes[0..257]; + + for (input) |t| { + const c = encoding[t]; + try self.bit_writer.writeBits(c.code, c.len); + } + try self.writeCode(encoding[consts.end_block_marker]); +} + +// histogram accumulates a histogram of b in h. +fn histogram(b: []const u8, h: *[286]u16) void { + // Clear histogram + for (h, 0..) |_, i| { + h[i] = 0; + } + + var lh = h.*[0..256]; + for (b) |t| { + lh[t] += 1; + } +} + +// tests +const expect = std.testing.expect; +const fmt = std.fmt; +const testing = std.testing; +const ArrayList = std.ArrayList; + +const TestCase = @import("testdata/block_writer.zig").TestCase; +const testCases = @import("testdata/block_writer.zig").testCases; + +// tests if the writeBlock encoding has changed. +test "write" { + inline for (0..testCases.len) |i| { + try testBlock(testCases[i], .write_block); + } +} + +// tests if the writeBlockDynamic encoding has changed. +test "dynamicBlock" { + inline for (0..testCases.len) |i| { + try testBlock(testCases[i], .write_dyn_block); + } +} + +test "huffmanBlock" { + inline for (0..testCases.len) |i| { + try testBlock(testCases[i], .write_huffman_block); + } + try testBlock(.{ + .tokens = &[_]Token{}, + .input = "huffman-rand-max.input", + .want = "huffman-rand-max.{s}.expect", + }, .write_huffman_block); +} + +const TestFn = enum { + write_block, + write_dyn_block, // write dynamic block + write_huffman_block, + + fn to_s(self: TestFn) []const u8 { + return switch (self) { + .write_block => "wb", + .write_dyn_block => "dyn", + .write_huffman_block => "huff", + }; + } + + fn write( + comptime self: TestFn, + bw: anytype, + tok: []const Token, + input: ?[]const u8, + final: bool, + ) !void { + switch (self) { + .write_block => try bw.write(tok, final, input), + .write_dyn_block => try bw.dynamicBlock(tok, final, input), + .write_huffman_block => try bw.huffmanBlock(input.?, final), + } + try bw.flush(); + } +}; + +// testBlock tests a block against its references +// +// size +// 64K [file-name].input - input non compressed file +// 8.1K [file-name].golden - +// 78 [file-name].dyn.expect - output with writeBlockDynamic +// 78 [file-name].wb.expect - output with writeBlock +// 8.1K [file-name].huff.expect - output with writeBlockHuff +// 78 [file-name].dyn.expect-noinput - output with writeBlockDynamic when input is null +// 78 [file-name].wb.expect-noinput - output with writeBlock when input is null +// +// wb - writeBlock +// dyn - writeBlockDynamic +// huff - writeBlockHuff +// +fn testBlock(comptime tc: TestCase, comptime tfn: TestFn) !void { + if (tc.input.len != 0 and tc.want.len != 0) { + const want_name = comptime fmt.comptimePrint(tc.want, .{tfn.to_s()}); + const input = @embedFile("testdata/block_writer/" ++ tc.input); + const want = @embedFile("testdata/block_writer/" ++ want_name); + try testWriteBlock(tfn, input, want, tc.tokens); + } + + if (tfn == .write_huffman_block) { + return; + } + + const want_name_no_input = comptime fmt.comptimePrint(tc.want_no_input, .{tfn.to_s()}); + const want = @embedFile("testdata/block_writer/" ++ want_name_no_input); + try testWriteBlock(tfn, null, want, tc.tokens); +} + +// Uses writer function `tfn` to write `tokens`, tests that we got `want` as output. +fn testWriteBlock(comptime tfn: TestFn, input: ?[]const u8, want: []const u8, tokens: []const Token) !void { + var buf = ArrayList(u8).init(testing.allocator); + var bw: BlockWriter = .init(buf.writer()); + try tfn.write(&bw, tokens, input, false); + var got = buf.items; + try testing.expectEqualSlices(u8, want, got); // expect writeBlock to yield expected result + try expect(got[0] & 0b0000_0001 == 0); // bfinal is not set + // + // Test if the writer produces the same output after reset. + buf.deinit(); + buf = ArrayList(u8).init(testing.allocator); + defer buf.deinit(); + bw.setWriter(buf.writer()); + + try tfn.write(&bw, tokens, input, true); + try bw.flush(); + got = buf.items; + + try expect(got[0] & 1 == 1); // bfinal is set + buf.items[0] &= 0b1111_1110; // remove bfinal bit, so we can run test slices + try testing.expectEqualSlices(u8, want, got); // expect writeBlock to yield expected result +} diff --git a/lib/std/compress/flate/bit_writer.zig b/lib/std/compress/flate/bit_writer.zig deleted file mode 100644 index b5d84c7e2a..0000000000 --- a/lib/std/compress/flate/bit_writer.zig +++ /dev/null @@ -1,99 +0,0 @@ -const std = @import("std"); -const assert = std.debug.assert; - -/// Bit writer for use in deflate (compression). -/// -/// Has internal bits buffer of 64 bits and internal bytes buffer of 248 bytes. -/// When we accumulate 48 bits 6 bytes are moved to the bytes buffer. When we -/// accumulate 240 bytes they are flushed to the underlying inner_writer. -/// -pub fn BitWriter(comptime WriterType: type) type { - // buffer_flush_size indicates the buffer size - // after which bytes are flushed to the writer. - // Should preferably be a multiple of 6, since - // we accumulate 6 bytes between writes to the buffer. - const buffer_flush_size = 240; - - // buffer_size is the actual output byte buffer size. - // It must have additional headroom for a flush - // which can contain up to 8 bytes. - const buffer_size = buffer_flush_size + 8; - - return struct { - inner_writer: WriterType, - - // Data waiting to be written is bytes[0 .. nbytes] - // and then the low nbits of bits. Data is always written - // sequentially into the bytes array. - bits: u64 = 0, - nbits: u32 = 0, // number of bits - bytes: [buffer_size]u8 = undefined, - nbytes: u32 = 0, // number of bytes - - const Self = @This(); - - pub const Error = WriterType.Error || error{UnfinishedBits}; - - pub fn init(writer: WriterType) Self { - return .{ .inner_writer = writer }; - } - - pub fn setWriter(self: *Self, new_writer: WriterType) void { - //assert(self.bits == 0 and self.nbits == 0 and self.nbytes == 0); - self.inner_writer = new_writer; - } - - pub fn flush(self: *Self) Error!void { - var n = self.nbytes; - while (self.nbits != 0) { - self.bytes[n] = @as(u8, @truncate(self.bits)); - self.bits >>= 8; - if (self.nbits > 8) { // Avoid underflow - self.nbits -= 8; - } else { - self.nbits = 0; - } - n += 1; - } - self.bits = 0; - _ = try self.inner_writer.write(self.bytes[0..n]); - self.nbytes = 0; - } - - pub fn writeBits(self: *Self, b: u32, nb: u32) Error!void { - self.bits |= @as(u64, @intCast(b)) << @as(u6, @intCast(self.nbits)); - self.nbits += nb; - if (self.nbits < 48) - return; - - var n = self.nbytes; - std.mem.writeInt(u64, self.bytes[n..][0..8], self.bits, .little); - n += 6; - if (n >= buffer_flush_size) { - _ = try self.inner_writer.write(self.bytes[0..n]); - n = 0; - } - self.nbytes = n; - self.bits >>= 48; - self.nbits -= 48; - } - - pub fn writeBytes(self: *Self, bytes: []const u8) Error!void { - var n = self.nbytes; - if (self.nbits & 7 != 0) { - return error.UnfinishedBits; - } - while (self.nbits != 0) { - self.bytes[n] = @as(u8, @truncate(self.bits)); - self.bits >>= 8; - self.nbits -= 8; - n += 1; - } - if (n != 0) { - _ = try self.inner_writer.write(self.bytes[0..n]); - } - self.nbytes = 0; - _ = try self.inner_writer.write(bytes); - } - }; -} diff --git a/lib/std/compress/flate/block_writer.zig b/lib/std/compress/flate/block_writer.zig deleted file mode 100644 index fa0d299e84..0000000000 --- a/lib/std/compress/flate/block_writer.zig +++ /dev/null @@ -1,706 +0,0 @@ -const std = @import("std"); -const io = std.io; -const assert = std.debug.assert; - -const hc = @import("huffman_encoder.zig"); -const consts = @import("consts.zig").huffman; -const Token = @import("Token.zig"); -const BitWriter = @import("bit_writer.zig").BitWriter; - -pub fn blockWriter(writer: anytype) BlockWriter(@TypeOf(writer)) { - return BlockWriter(@TypeOf(writer)).init(writer); -} - -/// Accepts list of tokens, decides what is best block type to write. What block -/// type will provide best compression. Writes header and body of the block. -/// -pub fn BlockWriter(comptime WriterType: type) type { - const BitWriterType = BitWriter(WriterType); - return struct { - const codegen_order = consts.codegen_order; - const end_code_mark = 255; - const Self = @This(); - - pub const Error = BitWriterType.Error; - bit_writer: BitWriterType, - - codegen_freq: [consts.codegen_code_count]u16 = undefined, - literal_freq: [consts.max_num_lit]u16 = undefined, - distance_freq: [consts.distance_code_count]u16 = undefined, - codegen: [consts.max_num_lit + consts.distance_code_count + 1]u8 = undefined, - literal_encoding: hc.LiteralEncoder = .{}, - distance_encoding: hc.DistanceEncoder = .{}, - codegen_encoding: hc.CodegenEncoder = .{}, - fixed_literal_encoding: hc.LiteralEncoder, - fixed_distance_encoding: hc.DistanceEncoder, - huff_distance: hc.DistanceEncoder, - - pub fn init(writer: WriterType) Self { - return .{ - .bit_writer = BitWriterType.init(writer), - .fixed_literal_encoding = hc.fixedLiteralEncoder(), - .fixed_distance_encoding = hc.fixedDistanceEncoder(), - .huff_distance = hc.huffmanDistanceEncoder(), - }; - } - - /// Flush intrenal bit buffer to the writer. - /// Should be called only when bit stream is at byte boundary. - /// - /// That is after final block; when last byte could be incomplete or - /// after stored block; which is aligned to the byte boundary (it has x - /// padding bits after first 3 bits). - pub fn flush(self: *Self) Error!void { - try self.bit_writer.flush(); - } - - pub fn setWriter(self: *Self, new_writer: WriterType) void { - self.bit_writer.setWriter(new_writer); - } - - fn writeCode(self: *Self, c: hc.HuffCode) Error!void { - try self.bit_writer.writeBits(c.code, c.len); - } - - // RFC 1951 3.2.7 specifies a special run-length encoding for specifying - // the literal and distance lengths arrays (which are concatenated into a single - // array). This method generates that run-length encoding. - // - // The result is written into the codegen array, and the frequencies - // of each code is written into the codegen_freq array. - // Codes 0-15 are single byte codes. Codes 16-18 are followed by additional - // information. Code bad_code is an end marker - // - // num_literals: The number of literals in literal_encoding - // num_distances: The number of distances in distance_encoding - // lit_enc: The literal encoder to use - // dist_enc: The distance encoder to use - fn generateCodegen( - self: *Self, - num_literals: u32, - num_distances: u32, - lit_enc: *hc.LiteralEncoder, - dist_enc: *hc.DistanceEncoder, - ) void { - for (self.codegen_freq, 0..) |_, i| { - self.codegen_freq[i] = 0; - } - - // Note that we are using codegen both as a temporary variable for holding - // a copy of the frequencies, and as the place where we put the result. - // This is fine because the output is always shorter than the input used - // so far. - var codegen = &self.codegen; // cache - // Copy the concatenated code sizes to codegen. Put a marker at the end. - var cgnl = codegen[0..num_literals]; - for (cgnl, 0..) |_, i| { - cgnl[i] = @as(u8, @intCast(lit_enc.codes[i].len)); - } - - cgnl = codegen[num_literals .. num_literals + num_distances]; - for (cgnl, 0..) |_, i| { - cgnl[i] = @as(u8, @intCast(dist_enc.codes[i].len)); - } - codegen[num_literals + num_distances] = end_code_mark; - - var size = codegen[0]; - var count: i32 = 1; - var out_index: u32 = 0; - var in_index: u32 = 1; - while (size != end_code_mark) : (in_index += 1) { - // INVARIANT: We have seen "count" copies of size that have not yet - // had output generated for them. - const next_size = codegen[in_index]; - if (next_size == size) { - count += 1; - continue; - } - // We need to generate codegen indicating "count" of size. - if (size != 0) { - codegen[out_index] = size; - out_index += 1; - self.codegen_freq[size] += 1; - count -= 1; - while (count >= 3) { - var n: i32 = 6; - if (n > count) { - n = count; - } - codegen[out_index] = 16; - out_index += 1; - codegen[out_index] = @as(u8, @intCast(n - 3)); - out_index += 1; - self.codegen_freq[16] += 1; - count -= n; - } - } else { - while (count >= 11) { - var n: i32 = 138; - if (n > count) { - n = count; - } - codegen[out_index] = 18; - out_index += 1; - codegen[out_index] = @as(u8, @intCast(n - 11)); - out_index += 1; - self.codegen_freq[18] += 1; - count -= n; - } - if (count >= 3) { - // 3 <= count <= 10 - codegen[out_index] = 17; - out_index += 1; - codegen[out_index] = @as(u8, @intCast(count - 3)); - out_index += 1; - self.codegen_freq[17] += 1; - count = 0; - } - } - count -= 1; - while (count >= 0) : (count -= 1) { - codegen[out_index] = size; - out_index += 1; - self.codegen_freq[size] += 1; - } - // Set up invariant for next time through the loop. - size = next_size; - count = 1; - } - // Marker indicating the end of the codegen. - codegen[out_index] = end_code_mark; - } - - const DynamicSize = struct { - size: u32, - num_codegens: u32, - }; - - // dynamicSize returns the size of dynamically encoded data in bits. - fn dynamicSize( - self: *Self, - lit_enc: *hc.LiteralEncoder, // literal encoder - dist_enc: *hc.DistanceEncoder, // distance encoder - extra_bits: u32, - ) DynamicSize { - var num_codegens = self.codegen_freq.len; - while (num_codegens > 4 and self.codegen_freq[codegen_order[num_codegens - 1]] == 0) { - num_codegens -= 1; - } - const header = 3 + 5 + 5 + 4 + (3 * num_codegens) + - self.codegen_encoding.bitLength(self.codegen_freq[0..]) + - self.codegen_freq[16] * 2 + - self.codegen_freq[17] * 3 + - self.codegen_freq[18] * 7; - const size = header + - lit_enc.bitLength(&self.literal_freq) + - dist_enc.bitLength(&self.distance_freq) + - extra_bits; - - return DynamicSize{ - .size = @as(u32, @intCast(size)), - .num_codegens = @as(u32, @intCast(num_codegens)), - }; - } - - // fixedSize returns the size of dynamically encoded data in bits. - fn fixedSize(self: *Self, extra_bits: u32) u32 { - return 3 + - self.fixed_literal_encoding.bitLength(&self.literal_freq) + - self.fixed_distance_encoding.bitLength(&self.distance_freq) + - extra_bits; - } - - const StoredSize = struct { - size: u32, - storable: bool, - }; - - // storedSizeFits calculates the stored size, including header. - // The function returns the size in bits and whether the block - // fits inside a single block. - fn storedSizeFits(in: ?[]const u8) StoredSize { - if (in == null) { - return .{ .size = 0, .storable = false }; - } - if (in.?.len <= consts.max_store_block_size) { - return .{ .size = @as(u32, @intCast((in.?.len + 5) * 8)), .storable = true }; - } - return .{ .size = 0, .storable = false }; - } - - // Write the header of a dynamic Huffman block to the output stream. - // - // num_literals: The number of literals specified in codegen - // num_distances: The number of distances specified in codegen - // num_codegens: The number of codegens used in codegen - // eof: Is it the end-of-file? (end of stream) - fn dynamicHeader( - self: *Self, - num_literals: u32, - num_distances: u32, - num_codegens: u32, - eof: bool, - ) Error!void { - const first_bits: u32 = if (eof) 5 else 4; - try self.bit_writer.writeBits(first_bits, 3); - try self.bit_writer.writeBits(num_literals - 257, 5); - try self.bit_writer.writeBits(num_distances - 1, 5); - try self.bit_writer.writeBits(num_codegens - 4, 4); - - var i: u32 = 0; - while (i < num_codegens) : (i += 1) { - const value = self.codegen_encoding.codes[codegen_order[i]].len; - try self.bit_writer.writeBits(value, 3); - } - - i = 0; - while (true) { - const code_word: u32 = @as(u32, @intCast(self.codegen[i])); - i += 1; - if (code_word == end_code_mark) { - break; - } - try self.writeCode(self.codegen_encoding.codes[@as(u32, @intCast(code_word))]); - - switch (code_word) { - 16 => { - try self.bit_writer.writeBits(self.codegen[i], 2); - i += 1; - }, - 17 => { - try self.bit_writer.writeBits(self.codegen[i], 3); - i += 1; - }, - 18 => { - try self.bit_writer.writeBits(self.codegen[i], 7); - i += 1; - }, - else => {}, - } - } - } - - fn storedHeader(self: *Self, length: usize, eof: bool) Error!void { - assert(length <= 65535); - const flag: u32 = if (eof) 1 else 0; - try self.bit_writer.writeBits(flag, 3); - try self.flush(); - const l: u16 = @intCast(length); - try self.bit_writer.writeBits(l, 16); - try self.bit_writer.writeBits(~l, 16); - } - - fn fixedHeader(self: *Self, eof: bool) Error!void { - // Indicate that we are a fixed Huffman block - var value: u32 = 2; - if (eof) { - value = 3; - } - try self.bit_writer.writeBits(value, 3); - } - - // Write a block of tokens with the smallest encoding. Will choose block type. - // The original input can be supplied, and if the huffman encoded data - // is larger than the original bytes, the data will be written as a - // stored block. - // If the input is null, the tokens will always be Huffman encoded. - pub fn write(self: *Self, tokens: []const Token, eof: bool, input: ?[]const u8) Error!void { - const lit_and_dist = self.indexTokens(tokens); - const num_literals = lit_and_dist.num_literals; - const num_distances = lit_and_dist.num_distances; - - var extra_bits: u32 = 0; - const ret = storedSizeFits(input); - const stored_size = ret.size; - const storable = ret.storable; - - if (storable) { - // We only bother calculating the costs of the extra bits required by - // the length of distance fields (which will be the same for both fixed - // and dynamic encoding), if we need to compare those two encodings - // against stored encoding. - var length_code: u16 = Token.length_codes_start + 8; - while (length_code < num_literals) : (length_code += 1) { - // First eight length codes have extra size = 0. - extra_bits += @as(u32, @intCast(self.literal_freq[length_code])) * - @as(u32, @intCast(Token.lengthExtraBits(length_code))); - } - var distance_code: u16 = 4; - while (distance_code < num_distances) : (distance_code += 1) { - // First four distance codes have extra size = 0. - extra_bits += @as(u32, @intCast(self.distance_freq[distance_code])) * - @as(u32, @intCast(Token.distanceExtraBits(distance_code))); - } - } - - // Figure out smallest code. - // Fixed Huffman baseline. - var literal_encoding = &self.fixed_literal_encoding; - var distance_encoding = &self.fixed_distance_encoding; - var size = self.fixedSize(extra_bits); - - // Dynamic Huffman? - var num_codegens: u32 = 0; - - // Generate codegen and codegenFrequencies, which indicates how to encode - // the literal_encoding and the distance_encoding. - self.generateCodegen( - num_literals, - num_distances, - &self.literal_encoding, - &self.distance_encoding, - ); - self.codegen_encoding.generate(self.codegen_freq[0..], 7); - const dynamic_size = self.dynamicSize( - &self.literal_encoding, - &self.distance_encoding, - extra_bits, - ); - const dyn_size = dynamic_size.size; - num_codegens = dynamic_size.num_codegens; - - if (dyn_size < size) { - size = dyn_size; - literal_encoding = &self.literal_encoding; - distance_encoding = &self.distance_encoding; - } - - // Stored bytes? - if (storable and stored_size < size) { - try self.storedBlock(input.?, eof); - return; - } - - // Huffman. - if (@intFromPtr(literal_encoding) == @intFromPtr(&self.fixed_literal_encoding)) { - try self.fixedHeader(eof); - } else { - try self.dynamicHeader(num_literals, num_distances, num_codegens, eof); - } - - // Write the tokens. - try self.writeTokens(tokens, &literal_encoding.codes, &distance_encoding.codes); - } - - pub fn storedBlock(self: *Self, input: []const u8, eof: bool) Error!void { - try self.storedHeader(input.len, eof); - try self.bit_writer.writeBytes(input); - } - - // writeBlockDynamic encodes a block using a dynamic Huffman table. - // This should be used if the symbols used have a disproportionate - // histogram distribution. - // If input is supplied and the compression savings are below 1/16th of the - // input size the block is stored. - fn dynamicBlock( - self: *Self, - tokens: []const Token, - eof: bool, - input: ?[]const u8, - ) Error!void { - const total_tokens = self.indexTokens(tokens); - const num_literals = total_tokens.num_literals; - const num_distances = total_tokens.num_distances; - - // Generate codegen and codegenFrequencies, which indicates how to encode - // the literal_encoding and the distance_encoding. - self.generateCodegen( - num_literals, - num_distances, - &self.literal_encoding, - &self.distance_encoding, - ); - self.codegen_encoding.generate(self.codegen_freq[0..], 7); - const dynamic_size = self.dynamicSize(&self.literal_encoding, &self.distance_encoding, 0); - const size = dynamic_size.size; - const num_codegens = dynamic_size.num_codegens; - - // Store bytes, if we don't get a reasonable improvement. - - const stored_size = storedSizeFits(input); - const ssize = stored_size.size; - const storable = stored_size.storable; - if (storable and ssize < (size + (size >> 4))) { - try self.storedBlock(input.?, eof); - return; - } - - // Write Huffman table. - try self.dynamicHeader(num_literals, num_distances, num_codegens, eof); - - // Write the tokens. - try self.writeTokens(tokens, &self.literal_encoding.codes, &self.distance_encoding.codes); - } - - const TotalIndexedTokens = struct { - num_literals: u32, - num_distances: u32, - }; - - // Indexes a slice of tokens followed by an end_block_marker, and updates - // literal_freq and distance_freq, and generates literal_encoding - // and distance_encoding. - // The number of literal and distance tokens is returned. - fn indexTokens(self: *Self, tokens: []const Token) TotalIndexedTokens { - var num_literals: u32 = 0; - var num_distances: u32 = 0; - - for (self.literal_freq, 0..) |_, i| { - self.literal_freq[i] = 0; - } - for (self.distance_freq, 0..) |_, i| { - self.distance_freq[i] = 0; - } - - for (tokens) |t| { - if (t.kind == Token.Kind.literal) { - self.literal_freq[t.literal()] += 1; - continue; - } - self.literal_freq[t.lengthCode()] += 1; - self.distance_freq[t.distanceCode()] += 1; - } - // add end_block_marker token at the end - self.literal_freq[consts.end_block_marker] += 1; - - // get the number of literals - num_literals = @as(u32, @intCast(self.literal_freq.len)); - while (self.literal_freq[num_literals - 1] == 0) { - num_literals -= 1; - } - // get the number of distances - num_distances = @as(u32, @intCast(self.distance_freq.len)); - while (num_distances > 0 and self.distance_freq[num_distances - 1] == 0) { - num_distances -= 1; - } - if (num_distances == 0) { - // We haven't found a single match. If we want to go with the dynamic encoding, - // we should count at least one distance to be sure that the distance huffman tree could be encoded. - self.distance_freq[0] = 1; - num_distances = 1; - } - self.literal_encoding.generate(&self.literal_freq, 15); - self.distance_encoding.generate(&self.distance_freq, 15); - return TotalIndexedTokens{ - .num_literals = num_literals, - .num_distances = num_distances, - }; - } - - // Writes a slice of tokens to the output followed by and end_block_marker. - // codes for literal and distance encoding must be supplied. - fn writeTokens( - self: *Self, - tokens: []const Token, - le_codes: []hc.HuffCode, - oe_codes: []hc.HuffCode, - ) Error!void { - for (tokens) |t| { - if (t.kind == Token.Kind.literal) { - try self.writeCode(le_codes[t.literal()]); - continue; - } - - // Write the length - const le = t.lengthEncoding(); - try self.writeCode(le_codes[le.code]); - if (le.extra_bits > 0) { - try self.bit_writer.writeBits(le.extra_length, le.extra_bits); - } - - // Write the distance - const oe = t.distanceEncoding(); - try self.writeCode(oe_codes[oe.code]); - if (oe.extra_bits > 0) { - try self.bit_writer.writeBits(oe.extra_distance, oe.extra_bits); - } - } - // add end_block_marker at the end - try self.writeCode(le_codes[consts.end_block_marker]); - } - - // Encodes a block of bytes as either Huffman encoded literals or uncompressed bytes - // if the results only gains very little from compression. - pub fn huffmanBlock(self: *Self, input: []const u8, eof: bool) Error!void { - // Add everything as literals - histogram(input, &self.literal_freq); - - self.literal_freq[consts.end_block_marker] = 1; - - const num_literals = consts.end_block_marker + 1; - self.distance_freq[0] = 1; - const num_distances = 1; - - self.literal_encoding.generate(&self.literal_freq, 15); - - // Figure out smallest code. - // Always use dynamic Huffman or Store - var num_codegens: u32 = 0; - - // Generate codegen and codegenFrequencies, which indicates how to encode - // the literal_encoding and the distance_encoding. - self.generateCodegen( - num_literals, - num_distances, - &self.literal_encoding, - &self.huff_distance, - ); - self.codegen_encoding.generate(self.codegen_freq[0..], 7); - const dynamic_size = self.dynamicSize(&self.literal_encoding, &self.huff_distance, 0); - const size = dynamic_size.size; - num_codegens = dynamic_size.num_codegens; - - // Store bytes, if we don't get a reasonable improvement. - const stored_size_ret = storedSizeFits(input); - const ssize = stored_size_ret.size; - const storable = stored_size_ret.storable; - - if (storable and ssize < (size + (size >> 4))) { - try self.storedBlock(input, eof); - return; - } - - // Huffman. - try self.dynamicHeader(num_literals, num_distances, num_codegens, eof); - const encoding = self.literal_encoding.codes[0..257]; - - for (input) |t| { - const c = encoding[t]; - try self.bit_writer.writeBits(c.code, c.len); - } - try self.writeCode(encoding[consts.end_block_marker]); - } - - // histogram accumulates a histogram of b in h. - fn histogram(b: []const u8, h: *[286]u16) void { - // Clear histogram - for (h, 0..) |_, i| { - h[i] = 0; - } - - var lh = h.*[0..256]; - for (b) |t| { - lh[t] += 1; - } - } - }; -} - -// tests -const expect = std.testing.expect; -const fmt = std.fmt; -const testing = std.testing; -const ArrayList = std.ArrayList; - -const TestCase = @import("testdata/block_writer.zig").TestCase; -const testCases = @import("testdata/block_writer.zig").testCases; - -// tests if the writeBlock encoding has changed. -test "write" { - inline for (0..testCases.len) |i| { - try testBlock(testCases[i], .write_block); - } -} - -// tests if the writeBlockDynamic encoding has changed. -test "dynamicBlock" { - inline for (0..testCases.len) |i| { - try testBlock(testCases[i], .write_dyn_block); - } -} - -test "huffmanBlock" { - inline for (0..testCases.len) |i| { - try testBlock(testCases[i], .write_huffman_block); - } - try testBlock(.{ - .tokens = &[_]Token{}, - .input = "huffman-rand-max.input", - .want = "huffman-rand-max.{s}.expect", - }, .write_huffman_block); -} - -const TestFn = enum { - write_block, - write_dyn_block, // write dynamic block - write_huffman_block, - - fn to_s(self: TestFn) []const u8 { - return switch (self) { - .write_block => "wb", - .write_dyn_block => "dyn", - .write_huffman_block => "huff", - }; - } - - fn write( - comptime self: TestFn, - bw: anytype, - tok: []const Token, - input: ?[]const u8, - final: bool, - ) !void { - switch (self) { - .write_block => try bw.write(tok, final, input), - .write_dyn_block => try bw.dynamicBlock(tok, final, input), - .write_huffman_block => try bw.huffmanBlock(input.?, final), - } - try bw.flush(); - } -}; - -// testBlock tests a block against its references -// -// size -// 64K [file-name].input - input non compressed file -// 8.1K [file-name].golden - -// 78 [file-name].dyn.expect - output with writeBlockDynamic -// 78 [file-name].wb.expect - output with writeBlock -// 8.1K [file-name].huff.expect - output with writeBlockHuff -// 78 [file-name].dyn.expect-noinput - output with writeBlockDynamic when input is null -// 78 [file-name].wb.expect-noinput - output with writeBlock when input is null -// -// wb - writeBlock -// dyn - writeBlockDynamic -// huff - writeBlockHuff -// -fn testBlock(comptime tc: TestCase, comptime tfn: TestFn) !void { - if (tc.input.len != 0 and tc.want.len != 0) { - const want_name = comptime fmt.comptimePrint(tc.want, .{tfn.to_s()}); - const input = @embedFile("testdata/block_writer/" ++ tc.input); - const want = @embedFile("testdata/block_writer/" ++ want_name); - try testWriteBlock(tfn, input, want, tc.tokens); - } - - if (tfn == .write_huffman_block) { - return; - } - - const want_name_no_input = comptime fmt.comptimePrint(tc.want_no_input, .{tfn.to_s()}); - const want = @embedFile("testdata/block_writer/" ++ want_name_no_input); - try testWriteBlock(tfn, null, want, tc.tokens); -} - -// Uses writer function `tfn` to write `tokens`, tests that we got `want` as output. -fn testWriteBlock(comptime tfn: TestFn, input: ?[]const u8, want: []const u8, tokens: []const Token) !void { - var buf = ArrayList(u8).init(testing.allocator); - var bw = blockWriter(buf.writer()); - try tfn.write(&bw, tokens, input, false); - var got = buf.items; - try testing.expectEqualSlices(u8, want, got); // expect writeBlock to yield expected result - try expect(got[0] & 0b0000_0001 == 0); // bfinal is not set - // - // Test if the writer produces the same output after reset. - buf.deinit(); - buf = ArrayList(u8).init(testing.allocator); - defer buf.deinit(); - bw.setWriter(buf.writer()); - - try tfn.write(&bw, tokens, input, true); - try bw.flush(); - got = buf.items; - - try expect(got[0] & 1 == 1); // bfinal is set - buf.items[0] &= 0b1111_1110; // remove bfinal bit, so we can run test slices - try testing.expectEqualSlices(u8, want, got); // expect writeBlock to yield expected result -} diff --git a/lib/std/compress/flate/deflate.zig b/lib/std/compress/flate/deflate.zig index e953ecb354..e01d9ad647 100644 --- a/lib/std/compress/flate/deflate.zig +++ b/lib/std/compress/flate/deflate.zig @@ -7,7 +7,7 @@ const print = std.debug.print; const Token = @import("Token.zig"); const consts = @import("consts.zig"); -const BlockWriter = @import("block_writer.zig").BlockWriter; +const BlockWriter = @import("BlockWriter.zig"); const Container = @import("container.zig").Container; const SlidingWindow = @import("SlidingWindow.zig"); const Lookup = @import("Lookup.zig"); @@ -53,24 +53,20 @@ const LevelArgs = struct { }; /// Compress plain data from reader into compressed stream written to writer. -pub fn compress(comptime container: Container, reader: anytype, writer: anytype, options: Options) !void { - var c = try compressor(container, writer, options); +pub fn compress( + comptime container: Container, + reader: *std.io.BufferedReader, + writer: *std.io.BufferedWriter, + options: Options, +) !void { + var c = try Compressor.init(container, writer, options); try c.compress(reader); try c.finish(); } -/// Create compressor for writer type. -pub fn compressor(comptime container: Container, writer: anytype, options: Options) !Compressor( - container, - @TypeOf(writer), -) { - return try Compressor(container, @TypeOf(writer)).init(writer, options); -} - /// Compressor type. -pub fn Compressor(comptime container: Container, comptime WriterType: type) type { - const TokenWriterType = BlockWriter(WriterType); - return Deflate(container, WriterType, TokenWriterType); +pub fn Compressor(comptime container: Container) type { + return Deflate(container, BlockWriter); } /// Default compression algorithm. Has two steps: tokenization and token diff --git a/lib/std/compress/flate/inflate.zig b/lib/std/compress/flate/inflate.zig index bb4d158aca..fcc73c9878 100644 --- a/lib/std/compress/flate/inflate.zig +++ b/lib/std/compress/flate/inflate.zig @@ -11,22 +11,22 @@ const codegen_order = @import("consts.zig").huffman.codegen_order; /// Decompresses deflate bit stream `reader` and writes uncompressed data to the /// `writer` stream. -pub fn decompress(comptime container: Container, reader: anytype, writer: anytype) !void { +pub fn decompress(comptime container: Container, reader: *std.io.BufferedReader, writer: *std.io.BufferedWriter) !void { var d = decompressor(container, reader); try d.decompress(writer); } /// Inflate decompressor for the reader type. -pub fn decompressor(comptime container: Container, reader: anytype) Decompressor(container, @TypeOf(reader)) { - return Decompressor(container, @TypeOf(reader)).init(reader); +pub fn decompressor(comptime container: Container, reader: *std.io.BufferedReader) Decompressor(container) { + return Decompressor(container).init(reader); } -pub fn Decompressor(comptime container: Container, comptime ReaderType: type) type { +pub fn Decompressor(comptime container: Container) type { // zlib has 4 bytes footer, lookahead of 4 bytes ensures that we will not overshoot. // gzip has 8 bytes footer so we will not overshoot even with 8 bytes of lookahead. // For raw deflate there is always possibility of overshot so we use 8 bytes lookahead. const lookahead: type = if (container == .zlib) u32 else u64; - return Inflate(container, lookahead, ReaderType); + return Inflate(container, lookahead); } /// Inflate decompresses deflate bit stream. Reads compressed data from reader @@ -48,15 +48,14 @@ pub fn Decompressor(comptime container: Container, comptime ReaderType: type) ty /// * 64K for history (CircularBuffer) /// * ~10K huffman decoders (Literal and DistanceDecoder) /// -pub fn Inflate(comptime container: Container, comptime LookaheadType: type, comptime ReaderType: type) type { +pub fn Inflate(comptime container: Container, comptime LookaheadType: type) type { assert(LookaheadType == u32 or LookaheadType == u64); - const BitReaderType = BitReader(LookaheadType, ReaderType); + const BitReaderType = BitReader(LookaheadType); return struct { - //const BitReaderType = BitReader(ReaderType); const F = BitReaderType.flag; - bits: BitReaderType = .{}, + bits: BitReaderType, hist: CircularBuffer = .{}, // Hashes, produces checkusm, of uncompressed data for gzip/zlib footer. hasher: container.Hasher() = .{}, @@ -88,8 +87,8 @@ pub fn Inflate(comptime container: Container, comptime LookaheadType: type, comp InvalidDynamicBlockHeader, }; - pub fn init(rt: ReaderType) Self { - return .{ .bits = BitReaderType.init(rt) }; + pub fn init(bw: *std.io.BufferedReader) Self { + return .{ .bits = BitReaderType.init(bw) }; } fn blockHeader(self: *Self) !void { @@ -289,7 +288,7 @@ pub fn Inflate(comptime container: Container, comptime LookaheadType: type, comp } /// Replaces the inner reader with new reader. - pub fn setReader(self: *Self, new_reader: ReaderType) void { + pub fn setReader(self: *Self, new_reader: *std.io.BufferedReader) void { self.bits.forward_reader = new_reader; if (self.state == .end or self.state == .protocol_footer) { self.state = .protocol_header; @@ -298,7 +297,7 @@ pub fn Inflate(comptime container: Container, comptime LookaheadType: type, comp // Reads all compressed data from the internal reader and outputs plain // (uncompressed) data to the provided writer. - pub fn decompress(self: *Self, writer: anytype) !void { + pub fn decompress(self: *Self, writer: *std.io.BufferedWriter) !void { while (try self.next()) |buf| { try writer.writeAll(buf); } diff --git a/lib/std/compress/lzma/decode.zig b/lib/std/compress/lzma/decode.zig index ac45eb52b3..37a3281314 100644 --- a/lib/std/compress/lzma/decode.zig +++ b/lib/std/compress/lzma/decode.zig @@ -4,14 +4,174 @@ const math = std.math; const Allocator = std.mem.Allocator; pub const lzbuffer = @import("decode/lzbuffer.zig"); -pub const rangecoder = @import("decode/rangecoder.zig"); const LzCircularBuffer = lzbuffer.LzCircularBuffer; -const BitTree = rangecoder.BitTree; -const LenDecoder = rangecoder.LenDecoder; -const RangeDecoder = rangecoder.RangeDecoder; const Vec2D = @import("vec2d.zig").Vec2D; +pub const RangeDecoder = struct { + range: u32, + code: u32, + + pub fn init(br: *std.io.BufferedReader) !RangeDecoder { + const reserved = try br.takeByte(); + if (reserved != 0) { + return error.CorruptInput; + } + return .{ + .range = 0xFFFF_FFFF, + .code = try br.readInt(u32, .big), + }; + } + + pub inline fn isFinished(self: RangeDecoder) bool { + return self.code == 0; + } + + inline fn normalize(self: *RangeDecoder, br: *std.io.BufferedReader) !void { + if (self.range < 0x0100_0000) { + self.range <<= 8; + self.code = (self.code << 8) ^ @as(u32, try br.takeByte()); + } + } + + inline fn getBit(self: *RangeDecoder, br: *std.io.BufferedReader) !bool { + self.range >>= 1; + + const bit = self.code >= self.range; + if (bit) + self.code -= self.range; + + try self.normalize(br); + return bit; + } + + pub fn get(self: *RangeDecoder, br: *std.io.BufferedReader, count: usize) !u32 { + var result: u32 = 0; + var i: usize = 0; + while (i < count) : (i += 1) + result = (result << 1) ^ @intFromBool(try self.getBit(br)); + return result; + } + + pub inline fn decodeBit(self: *RangeDecoder, br: *std.io.BufferedReader, prob: *u16, update: bool) !bool { + const bound = (self.range >> 11) * prob.*; + + if (self.code < bound) { + if (update) + prob.* += (0x800 - prob.*) >> 5; + self.range = bound; + + try self.normalize(br); + return false; + } else { + if (update) + prob.* -= prob.* >> 5; + self.code -= bound; + self.range -= bound; + + try self.normalize(br); + return true; + } + } + + fn parseBitTree( + self: *RangeDecoder, + br: *std.io.BufferedReader, + num_bits: u5, + probs: []u16, + update: bool, + ) !u32 { + var tmp: u32 = 1; + var i: @TypeOf(num_bits) = 0; + while (i < num_bits) : (i += 1) { + const bit = try self.decodeBit(br, &probs[tmp], update); + tmp = (tmp << 1) ^ @intFromBool(bit); + } + return tmp - (@as(u32, 1) << num_bits); + } + + pub fn parseReverseBitTree( + self: *RangeDecoder, + br: *std.io.BufferedReader, + num_bits: u5, + probs: []u16, + offset: usize, + update: bool, + ) !u32 { + var result: u32 = 0; + var tmp: usize = 1; + var i: @TypeOf(num_bits) = 0; + while (i < num_bits) : (i += 1) { + const bit = @intFromBool(try self.decodeBit(br, &probs[offset + tmp], update)); + tmp = (tmp << 1) ^ bit; + result ^= @as(u32, bit) << i; + } + return result; + } +}; + +pub fn BitTree(comptime num_bits: usize) type { + return struct { + probs: [1 << num_bits]u16 = @splat(0x400), + + const Self = @This(); + + pub fn parse( + self: *Self, + br: *std.io.BufferedReader, + decoder: *RangeDecoder, + update: bool, + ) !u32 { + return decoder.parseBitTree(br, num_bits, &self.probs, update); + } + + pub fn parseReverse( + self: *Self, + br: *std.io.BufferedReader, + decoder: *RangeDecoder, + update: bool, + ) !u32 { + return decoder.parseReverseBitTree(br, num_bits, &self.probs, 0, update); + } + + pub fn reset(self: *Self) void { + @memset(&self.probs, 0x400); + } + }; +} + +pub const LenDecoder = struct { + choice: u16 = 0x400, + choice2: u16 = 0x400, + low_coder: [16]BitTree(3) = @splat(.{}), + mid_coder: [16]BitTree(3) = @splat(.{}), + high_coder: BitTree(8) = .{}, + + pub fn decode( + self: *LenDecoder, + br: *std.io.BufferedReader, + decoder: *RangeDecoder, + pos_state: usize, + update: bool, + ) !usize { + if (!try decoder.decodeBit(br, &self.choice, update)) { + return @as(usize, try self.low_coder[pos_state].parse(br, decoder, update)); + } else if (!try decoder.decodeBit(br, &self.choice2, update)) { + return @as(usize, try self.mid_coder[pos_state].parse(br, decoder, update)) + 8; + } else { + return @as(usize, try self.high_coder.parse(br, decoder, update)) + 16; + } + } + + pub fn reset(self: *LenDecoder) void { + self.choice = 0x400; + self.choice2 = 0x400; + for (&self.low_coder) |*t| t.reset(); + for (&self.mid_coder) |*t| t.reset(); + self.high_coder.reset(); + } +}; + pub const Options = struct { unpacked_size: UnpackedSize = .read_from_header, memlimit: ?usize = null, diff --git a/lib/std/compress/lzma/decode/rangecoder.zig b/lib/std/compress/lzma/decode/rangecoder.zig deleted file mode 100644 index 01930884d7..0000000000 --- a/lib/std/compress/lzma/decode/rangecoder.zig +++ /dev/null @@ -1,181 +0,0 @@ -const std = @import("../../../std.zig"); -const mem = std.mem; - -pub const RangeDecoder = struct { - range: u32, - code: u32, - - pub fn init(reader: anytype) !RangeDecoder { - const reserved = try reader.readByte(); - if (reserved != 0) { - return error.CorruptInput; - } - return RangeDecoder{ - .range = 0xFFFF_FFFF, - .code = try reader.readInt(u32, .big), - }; - } - - pub fn fromParts( - range: u32, - code: u32, - ) RangeDecoder { - return .{ - .range = range, - .code = code, - }; - } - - pub fn set(self: *RangeDecoder, range: u32, code: u32) void { - self.range = range; - self.code = code; - } - - pub inline fn isFinished(self: RangeDecoder) bool { - return self.code == 0; - } - - inline fn normalize(self: *RangeDecoder, reader: anytype) !void { - if (self.range < 0x0100_0000) { - self.range <<= 8; - self.code = (self.code << 8) ^ @as(u32, try reader.readByte()); - } - } - - inline fn getBit(self: *RangeDecoder, reader: anytype) !bool { - self.range >>= 1; - - const bit = self.code >= self.range; - if (bit) - self.code -= self.range; - - try self.normalize(reader); - return bit; - } - - pub fn get(self: *RangeDecoder, reader: anytype, count: usize) !u32 { - var result: u32 = 0; - var i: usize = 0; - while (i < count) : (i += 1) - result = (result << 1) ^ @intFromBool(try self.getBit(reader)); - return result; - } - - pub inline fn decodeBit(self: *RangeDecoder, reader: anytype, prob: *u16, update: bool) !bool { - const bound = (self.range >> 11) * prob.*; - - if (self.code < bound) { - if (update) - prob.* += (0x800 - prob.*) >> 5; - self.range = bound; - - try self.normalize(reader); - return false; - } else { - if (update) - prob.* -= prob.* >> 5; - self.code -= bound; - self.range -= bound; - - try self.normalize(reader); - return true; - } - } - - fn parseBitTree( - self: *RangeDecoder, - reader: anytype, - num_bits: u5, - probs: []u16, - update: bool, - ) !u32 { - var tmp: u32 = 1; - var i: @TypeOf(num_bits) = 0; - while (i < num_bits) : (i += 1) { - const bit = try self.decodeBit(reader, &probs[tmp], update); - tmp = (tmp << 1) ^ @intFromBool(bit); - } - return tmp - (@as(u32, 1) << num_bits); - } - - pub fn parseReverseBitTree( - self: *RangeDecoder, - reader: anytype, - num_bits: u5, - probs: []u16, - offset: usize, - update: bool, - ) !u32 { - var result: u32 = 0; - var tmp: usize = 1; - var i: @TypeOf(num_bits) = 0; - while (i < num_bits) : (i += 1) { - const bit = @intFromBool(try self.decodeBit(reader, &probs[offset + tmp], update)); - tmp = (tmp << 1) ^ bit; - result ^= @as(u32, bit) << i; - } - return result; - } -}; - -pub fn BitTree(comptime num_bits: usize) type { - return struct { - probs: [1 << num_bits]u16 = @splat(0x400), - - const Self = @This(); - - pub fn parse( - self: *Self, - reader: anytype, - decoder: *RangeDecoder, - update: bool, - ) !u32 { - return decoder.parseBitTree(reader, num_bits, &self.probs, update); - } - - pub fn parseReverse( - self: *Self, - reader: anytype, - decoder: *RangeDecoder, - update: bool, - ) !u32 { - return decoder.parseReverseBitTree(reader, num_bits, &self.probs, 0, update); - } - - pub fn reset(self: *Self) void { - @memset(&self.probs, 0x400); - } - }; -} - -pub const LenDecoder = struct { - choice: u16 = 0x400, - choice2: u16 = 0x400, - low_coder: [16]BitTree(3) = @splat(.{}), - mid_coder: [16]BitTree(3) = @splat(.{}), - high_coder: BitTree(8) = .{}, - - pub fn decode( - self: *LenDecoder, - reader: anytype, - decoder: *RangeDecoder, - pos_state: usize, - update: bool, - ) !usize { - if (!try decoder.decodeBit(reader, &self.choice, update)) { - return @as(usize, try self.low_coder[pos_state].parse(reader, decoder, update)); - } else if (!try decoder.decodeBit(reader, &self.choice2, update)) { - return @as(usize, try self.mid_coder[pos_state].parse(reader, decoder, update)) + 8; - } else { - return @as(usize, try self.high_coder.parse(reader, decoder, update)) + 16; - } - } - - pub fn reset(self: *LenDecoder) void { - self.choice = 0x400; - self.choice2 = 0x400; - for (&self.low_coder) |*t| t.reset(); - for (&self.mid_coder) |*t| t.reset(); - self.high_coder.reset(); - } -}; diff --git a/lib/std/compress/lzma2.zig b/lib/std/compress/lzma2.zig index f48c875ee8..4306e79214 100644 --- a/lib/std/compress/lzma2.zig +++ b/lib/std/compress/lzma2.zig @@ -11,7 +11,7 @@ pub fn decompress(allocator: Allocator, reader: *std.io.BufferedReader, writer: test { const expected = "Hello\nWorld!\n"; - const compressed = &[_]u8{ + const compressed = [_]u8{ 0x01, 0x00, 0x05, 0x48, 0x65, 0x6C, 0x6C, 0x6F, 0x0A, 0x02, 0x00, 0x06, 0x57, 0x6F, 0x72, 0x6C, 0x64, 0x21, 0x0A, 0x00, }; diff --git a/lib/std/compress/lzma2/decode.zig b/lib/std/compress/lzma2/decode.zig index 938c2d437b..cf37ec36ff 100644 --- a/lib/std/compress/lzma2/decode.zig +++ b/lib/std/compress/lzma2/decode.zig @@ -5,7 +5,7 @@ const lzma = @import("../lzma.zig"); const DecoderState = lzma.decode.DecoderState; const LzAccumBuffer = lzma.decode.lzbuffer.LzAccumBuffer; const Properties = lzma.decode.Properties; -const RangeDecoder = lzma.decode.rangecoder.RangeDecoder; +const RangeDecoder = lzma.decode.RangeDecoder; pub const Decoder = struct { lzma_state: DecoderState, @@ -32,14 +32,14 @@ pub const Decoder = struct { pub fn decompress( self: *Decoder, allocator: Allocator, - reader: anytype, - writer: anytype, + reader: *std.io.BufferedReader, + writer: *std.io.BufferedWriter, ) !void { var accum = LzAccumBuffer.init(std.math.maxInt(usize)); defer accum.deinit(allocator); while (true) { - const status = try reader.readByte(); + const status = try reader.takeByte(); switch (status) { 0 => break, @@ -55,8 +55,8 @@ pub const Decoder = struct { fn parseLzma( self: *Decoder, allocator: Allocator, - reader: anytype, - writer: anytype, + br: *std.io.BufferedReader, + writer: *std.io.BufferedWriter, accum: *LzAccumBuffer, status: u8, ) !void { @@ -97,12 +97,12 @@ pub const Decoder = struct { const unpacked_size = blk: { var tmp: u64 = status & 0x1F; tmp <<= 16; - tmp |= try reader.readInt(u16, .big); + tmp |= try br.takeInt(u16, .big); break :blk tmp + 1; }; const packed_size = blk: { - const tmp: u17 = try reader.readInt(u16, .big); + const tmp: u17 = try br.takeInt(u16, .big); break :blk tmp + 1; }; @@ -114,7 +114,7 @@ pub const Decoder = struct { var new_props = self.lzma_state.lzma_props; if (reset.props) { - var props = try reader.readByte(); + var props = try br.takeByte(); if (props >= 225) { return error.CorruptInput; } @@ -137,10 +137,10 @@ pub const Decoder = struct { self.lzma_state.unpacked_size = unpacked_size + accum.len; - var counter = std.io.countingReader(reader); - const counter_reader = counter.reader(); + var counter: std.io.CountingReader = .{ .child_reader = br.reader() }; + var counter_reader = counter.reader().unbuffered(); - var rangecoder = try RangeDecoder.init(counter_reader); + var rangecoder = try RangeDecoder.init(&counter_reader); while (try self.lzma_state.process(allocator, counter_reader, writer, accum, &rangecoder) == .continue_) {} if (counter.bytes_read != packed_size) { @@ -150,12 +150,12 @@ pub const Decoder = struct { fn parseUncompressed( allocator: Allocator, - reader: anytype, - writer: anytype, + reader: *std.io.BufferedReader, + writer: *std.io.BufferedWriter, accum: *LzAccumBuffer, reset_dict: bool, ) !void { - const unpacked_size = @as(u17, try reader.readInt(u16, .big)) + 1; + const unpacked_size = @as(u17, try reader.takeInt(u16, .big)) + 1; if (reset_dict) { try accum.reset(writer); @@ -163,7 +163,7 @@ pub const Decoder = struct { var i: @TypeOf(unpacked_size) = 0; while (i < unpacked_size) : (i += 1) { - try accum.appendByte(allocator, try reader.readByte()); + try accum.appendByte(allocator, try reader.takeByte()); } } }; diff --git a/lib/std/compress/zlib.zig b/lib/std/compress/zlib.zig index 554f6f894b..8b0d1d6122 100644 --- a/lib/std/compress/zlib.zig +++ b/lib/std/compress/zlib.zig @@ -1,73 +1,44 @@ +const std = @import("../std.zig"); const deflate = @import("flate/deflate.zig"); const inflate = @import("flate/inflate.zig"); /// Decompress compressed data from reader and write plain data to the writer. -pub fn decompress(reader: anytype, writer: anytype) !void { +pub fn decompress(reader: *std.io.BufferedReader, writer: *std.io.BufferedWriter) !void { try inflate.decompress(.zlib, reader, writer); } -/// Decompressor type -pub fn Decompressor(comptime ReaderType: type) type { - return inflate.Decompressor(.zlib, ReaderType); -} - -/// Create Decompressor which will read compressed data from reader. -pub fn decompressor(reader: anytype) Decompressor(@TypeOf(reader)) { - return inflate.decompressor(.zlib, reader); -} +pub const Decompressor = inflate.Decompressor(.zlib); /// Compression level, trades between speed and compression size. pub const Options = deflate.Options; /// Compress plain data from reader and write compressed data to the writer. -pub fn compress(reader: anytype, writer: anytype, options: Options) !void { +pub fn compress(reader: *std.io.BufferedReader, writer: *std.io.BufferedWriter, options: Options) !void { try deflate.compress(.zlib, reader, writer, options); } -/// Compressor type -pub fn Compressor(comptime WriterType: type) type { - return deflate.Compressor(.zlib, WriterType); -} - -/// Create Compressor which outputs compressed data to the writer. -pub fn compressor(writer: anytype, options: Options) !Compressor(@TypeOf(writer)) { - return try deflate.compressor(.zlib, writer, options); -} +pub const Compressor = deflate.Compressor(.zlib); /// Huffman only compression. Without Lempel-Ziv match searching. Faster /// compression, less memory requirements but bigger compressed sizes. pub const huffman = struct { - pub fn compress(reader: anytype, writer: anytype) !void { + pub fn compress(reader: *std.io.BufferedReader, writer: *std.io.BufferedWriter) !void { try deflate.huffman.compress(.zlib, reader, writer); } - pub fn Compressor(comptime WriterType: type) type { - return deflate.huffman.Compressor(.zlib, WriterType); - } - - pub fn compressor(writer: anytype) !huffman.Compressor(@TypeOf(writer)) { - return deflate.huffman.compressor(.zlib, writer); - } + pub const Compressor = deflate.huffman.Compressor(.zlib); }; // No compression store only. Compressed size is slightly bigger than plain. pub const store = struct { - pub fn compress(reader: anytype, writer: anytype) !void { + pub fn compress(reader: *std.io.BufferedReader, writer: *std.io.BufferedWriter) !void { try deflate.store.compress(.zlib, reader, writer); } - pub fn Compressor(comptime WriterType: type) type { - return deflate.store.Compressor(.zlib, WriterType); - } - - pub fn compressor(writer: anytype) !store.Compressor(@TypeOf(writer)) { - return deflate.store.compressor(.zlib, writer); - } + pub const Compressor = deflate.store.Compressor(.zlib); }; test "should not overshoot" { - const std = @import("std"); - // Compressed zlib data with extra 4 bytes at the end. const data = [_]u8{ 0x78, 0x9c, 0x73, 0xce, 0x2f, 0xa8, 0x2c, 0xca, 0x4c, 0xcf, 0x28, 0x51, 0x08, 0xcf, 0xcc, 0xc9, @@ -79,7 +50,7 @@ test "should not overshoot" { var stream = std.io.fixedBufferStream(data[0..]); const reader = stream.reader(); - var dcp = decompressor(reader); + var dcp = Decompressor.init(reader); var out: [128]u8 = undefined; // Decompress diff --git a/lib/std/debug/Dwarf.zig b/lib/std/debug/Dwarf.zig index 4b25546eb2..b4a287df9f 100644 --- a/lib/std/debug/Dwarf.zig +++ b/lib/std/debug/Dwarf.zig @@ -2241,7 +2241,7 @@ pub const ElfModule = struct { if (chdr.ch_type != .ZLIB) continue; const ch_size = chdr.ch_size; - var zlib_stream = std.compress.zlib.decompressor(§ion_reader); + var zlib_stream: std.compress.zlib.Decompressor = .init(§ion_reader); const decompressed_section = try gpa.alloc(u8, ch_size); errdefer gpa.free(decompressed_section); diff --git a/lib/std/debug/SelfInfo.zig b/lib/std/debug/SelfInfo.zig index 8e54dfae19..97b861e516 100644 --- a/lib/std/debug/SelfInfo.zig +++ b/lib/std/debug/SelfInfo.zig @@ -2027,8 +2027,10 @@ pub const VirtualMachine = struct { var prev_row: Row = self.current_row; - var cie_stream: std.io.FixedBufferStream = .{ .buffer = cie.initial_instructions }; - var fde_stream: std.io.FixedBufferStream = .{ .buffer = fde.instructions }; + var cie_stream: std.io.BufferedReader = undefined; + cie_stream.initFixed(&cie.initial_instructions); + var fde_stream: std.io.BufferedReader = undefined; + fde_stream.initFixed(&fde.instructions); const streams: [2]*std.io.FixedBufferStream = .{ &cie_stream, &fde_stream }; for (&streams, 0..) |stream, i| { diff --git a/lib/std/fs/File.zig b/lib/std/fs/File.zig index 5692d39ce7..5056f30f8b 100644 --- a/lib/std/fs/File.zig +++ b/lib/std/fs/File.zig @@ -1613,11 +1613,11 @@ pub fn writer(file: File) std.io.Writer { const max_buffers_len = 16; pub fn reader_posRead( - context: *anyopaque, + context: ?*anyopaque, bw: *std.io.BufferedWriter, limit: std.io.Reader.Limit, offset: u64, -) anyerror!usize { +) std.io.Reader.Result { const file = opaqueToHandle(context); const len: std.io.Writer.Len = if (limit.unwrap()) |l| .init(l) else .entire_file; return writer.writeFile(bw, file, .init(offset), len, &.{}, 0); diff --git a/lib/std/io/bit_reader.zig b/lib/std/io/bit_reader.zig index 7823e47d43..22b25cabda 100644 --- a/lib/std/io/bit_reader.zig +++ b/lib/std/io/bit_reader.zig @@ -13,9 +13,9 @@ const std = @import("../std.zig"); // of the byte. /// Creates a bit reader which allows for reading bits from an underlying standard reader -pub fn BitReader(comptime endian: std.builtin.Endian, comptime Reader: type) type { +pub fn BitReader(comptime endian: std.builtin.Endian) type { return struct { - reader: Reader, + reader: *std.io.BufferedReader, bits: u8 = 0, count: u4 = 0, @@ -157,7 +157,7 @@ pub fn BitReader(comptime endian: std.builtin.Endian, comptime Reader: type) typ }; } -pub fn bitReader(comptime endian: std.builtin.Endian, reader: anytype) BitReader(endian, @TypeOf(reader)) { +pub fn bitReader(comptime endian: std.builtin.Endian, reader: *std.io.BufferedReader) BitReader(endian) { return .{ .reader = reader }; }