mirror of
https://github.com/ziglang/zig.git
synced 2026-02-12 20:37:54 +00:00
update some more std lib API to new Reader/Writer
std.compress needs an audit, I see some problems
This commit is contained in:
parent
31e0b5c3c7
commit
6c48aad991
93
lib/std/compress/flate/BitWriter.zig
Normal file
93
lib/std/compress/flate/BitWriter.zig
Normal file
@ -0,0 +1,93 @@
|
||||
//! Bit writer for use in deflate (compression).
|
||||
//!
|
||||
//! Has internal bits buffer of 64 bits and internal bytes buffer of 248 bytes.
|
||||
//! When we accumulate 48 bits 6 bytes are moved to the bytes buffer. When we
|
||||
//! accumulate 240 bytes they are flushed to the underlying inner_writer.
|
||||
|
||||
const std = @import("std");
|
||||
const assert = std.debug.assert;
|
||||
const BitWriter = @This();
|
||||
|
||||
// buffer_flush_size indicates the buffer size
|
||||
// after which bytes are flushed to the writer.
|
||||
// Should preferably be a multiple of 6, since
|
||||
// we accumulate 6 bytes between writes to the buffer.
|
||||
const buffer_flush_size = 240;
|
||||
|
||||
// buffer_size is the actual output byte buffer size.
|
||||
// It must have additional headroom for a flush
|
||||
// which can contain up to 8 bytes.
|
||||
const buffer_size = buffer_flush_size + 8;
|
||||
|
||||
inner_writer: *std.io.BufferedWriter,
|
||||
|
||||
// Data waiting to be written is bytes[0 .. nbytes]
|
||||
// and then the low nbits of bits. Data is always written
|
||||
// sequentially into the bytes array.
|
||||
bits: u64 = 0,
|
||||
nbits: u32 = 0, // number of bits
|
||||
bytes: [buffer_size]u8 = undefined,
|
||||
nbytes: u32 = 0, // number of bytes
|
||||
|
||||
const Self = @This();
|
||||
|
||||
pub fn init(bw: *std.io.BufferedWriter) Self {
|
||||
return .{ .inner_writer = bw };
|
||||
}
|
||||
|
||||
pub fn setWriter(self: *Self, new_writer: *std.io.BufferedWriter) void {
|
||||
self.inner_writer = new_writer;
|
||||
}
|
||||
|
||||
pub fn flush(self: *Self) anyerror!void {
|
||||
var n = self.nbytes;
|
||||
while (self.nbits != 0) {
|
||||
self.bytes[n] = @as(u8, @truncate(self.bits));
|
||||
self.bits >>= 8;
|
||||
if (self.nbits > 8) { // Avoid underflow
|
||||
self.nbits -= 8;
|
||||
} else {
|
||||
self.nbits = 0;
|
||||
}
|
||||
n += 1;
|
||||
}
|
||||
self.bits = 0;
|
||||
_ = try self.inner_writer.write(self.bytes[0..n]);
|
||||
self.nbytes = 0;
|
||||
}
|
||||
|
||||
pub fn writeBits(self: *Self, b: u32, nb: u32) anyerror!void {
|
||||
self.bits |= @as(u64, @intCast(b)) << @as(u6, @intCast(self.nbits));
|
||||
self.nbits += nb;
|
||||
if (self.nbits < 48)
|
||||
return;
|
||||
|
||||
var n = self.nbytes;
|
||||
std.mem.writeInt(u64, self.bytes[n..][0..8], self.bits, .little);
|
||||
n += 6;
|
||||
if (n >= buffer_flush_size) {
|
||||
_ = try self.inner_writer.write(self.bytes[0..n]);
|
||||
n = 0;
|
||||
}
|
||||
self.nbytes = n;
|
||||
self.bits >>= 48;
|
||||
self.nbits -= 48;
|
||||
}
|
||||
|
||||
pub fn writeBytes(self: *Self, bytes: []const u8) anyerror!void {
|
||||
var n = self.nbytes;
|
||||
if (self.nbits & 7 != 0) {
|
||||
return error.UnfinishedBits;
|
||||
}
|
||||
while (self.nbits != 0) {
|
||||
self.bytes[n] = @as(u8, @truncate(self.bits));
|
||||
self.bits >>= 8;
|
||||
self.nbits -= 8;
|
||||
n += 1;
|
||||
}
|
||||
if (n != 0) {
|
||||
_ = try self.inner_writer.write(self.bytes[0..n]);
|
||||
}
|
||||
self.nbytes = 0;
|
||||
_ = try self.inner_writer.write(bytes);
|
||||
}
|
||||
696
lib/std/compress/flate/BlockWriter.zig
Normal file
696
lib/std/compress/flate/BlockWriter.zig
Normal file
@ -0,0 +1,696 @@
|
||||
//! Accepts list of tokens, decides what is best block type to write. What block
|
||||
//! type will provide best compression. Writes header and body of the block.
|
||||
const std = @import("std");
|
||||
const io = std.io;
|
||||
const assert = std.debug.assert;
|
||||
|
||||
const hc = @import("huffman_encoder.zig");
|
||||
const consts = @import("consts.zig").huffman;
|
||||
const Token = @import("Token.zig");
|
||||
const BitWriter = @import("BitWriter.zig");
|
||||
const BlockWriter = @This();
|
||||
|
||||
const codegen_order = consts.codegen_order;
|
||||
const end_code_mark = 255;
|
||||
const Self = @This();
|
||||
|
||||
bit_writer: BitWriter,
|
||||
|
||||
codegen_freq: [consts.codegen_code_count]u16 = undefined,
|
||||
literal_freq: [consts.max_num_lit]u16 = undefined,
|
||||
distance_freq: [consts.distance_code_count]u16 = undefined,
|
||||
codegen: [consts.max_num_lit + consts.distance_code_count + 1]u8 = undefined,
|
||||
literal_encoding: hc.LiteralEncoder = .{},
|
||||
distance_encoding: hc.DistanceEncoder = .{},
|
||||
codegen_encoding: hc.CodegenEncoder = .{},
|
||||
fixed_literal_encoding: hc.LiteralEncoder,
|
||||
fixed_distance_encoding: hc.DistanceEncoder,
|
||||
huff_distance: hc.DistanceEncoder,
|
||||
|
||||
pub fn init(writer: *std.io.BufferedWriter) Self {
|
||||
return .{
|
||||
.bit_writer = BitWriter.init(writer),
|
||||
.fixed_literal_encoding = hc.fixedLiteralEncoder(),
|
||||
.fixed_distance_encoding = hc.fixedDistanceEncoder(),
|
||||
.huff_distance = hc.huffmanDistanceEncoder(),
|
||||
};
|
||||
}
|
||||
|
||||
/// Flush intrenal bit buffer to the writer.
|
||||
/// Should be called only when bit stream is at byte boundary.
|
||||
///
|
||||
/// That is after final block; when last byte could be incomplete or
|
||||
/// after stored block; which is aligned to the byte boundary (it has x
|
||||
/// padding bits after first 3 bits).
|
||||
pub fn flush(self: *Self) anyerror!void {
|
||||
try self.bit_writer.flush();
|
||||
}
|
||||
|
||||
pub fn setWriter(self: *Self, new_writer: *std.io.BufferedWriter) void {
|
||||
self.bit_writer.setWriter(new_writer);
|
||||
}
|
||||
|
||||
fn writeCode(self: *Self, c: hc.HuffCode) anyerror!void {
|
||||
try self.bit_writer.writeBits(c.code, c.len);
|
||||
}
|
||||
|
||||
// RFC 1951 3.2.7 specifies a special run-length encoding for specifying
|
||||
// the literal and distance lengths arrays (which are concatenated into a single
|
||||
// array). This method generates that run-length encoding.
|
||||
//
|
||||
// The result is written into the codegen array, and the frequencies
|
||||
// of each code is written into the codegen_freq array.
|
||||
// Codes 0-15 are single byte codes. Codes 16-18 are followed by additional
|
||||
// information. Code bad_code is an end marker
|
||||
//
|
||||
// num_literals: The number of literals in literal_encoding
|
||||
// num_distances: The number of distances in distance_encoding
|
||||
// lit_enc: The literal encoder to use
|
||||
// dist_enc: The distance encoder to use
|
||||
fn generateCodegen(
|
||||
self: *Self,
|
||||
num_literals: u32,
|
||||
num_distances: u32,
|
||||
lit_enc: *hc.LiteralEncoder,
|
||||
dist_enc: *hc.DistanceEncoder,
|
||||
) void {
|
||||
for (self.codegen_freq, 0..) |_, i| {
|
||||
self.codegen_freq[i] = 0;
|
||||
}
|
||||
|
||||
// Note that we are using codegen both as a temporary variable for holding
|
||||
// a copy of the frequencies, and as the place where we put the result.
|
||||
// This is fine because the output is always shorter than the input used
|
||||
// so far.
|
||||
var codegen = &self.codegen; // cache
|
||||
// Copy the concatenated code sizes to codegen. Put a marker at the end.
|
||||
var cgnl = codegen[0..num_literals];
|
||||
for (cgnl, 0..) |_, i| {
|
||||
cgnl[i] = @as(u8, @intCast(lit_enc.codes[i].len));
|
||||
}
|
||||
|
||||
cgnl = codegen[num_literals .. num_literals + num_distances];
|
||||
for (cgnl, 0..) |_, i| {
|
||||
cgnl[i] = @as(u8, @intCast(dist_enc.codes[i].len));
|
||||
}
|
||||
codegen[num_literals + num_distances] = end_code_mark;
|
||||
|
||||
var size = codegen[0];
|
||||
var count: i32 = 1;
|
||||
var out_index: u32 = 0;
|
||||
var in_index: u32 = 1;
|
||||
while (size != end_code_mark) : (in_index += 1) {
|
||||
// INVARIANT: We have seen "count" copies of size that have not yet
|
||||
// had output generated for them.
|
||||
const next_size = codegen[in_index];
|
||||
if (next_size == size) {
|
||||
count += 1;
|
||||
continue;
|
||||
}
|
||||
// We need to generate codegen indicating "count" of size.
|
||||
if (size != 0) {
|
||||
codegen[out_index] = size;
|
||||
out_index += 1;
|
||||
self.codegen_freq[size] += 1;
|
||||
count -= 1;
|
||||
while (count >= 3) {
|
||||
var n: i32 = 6;
|
||||
if (n > count) {
|
||||
n = count;
|
||||
}
|
||||
codegen[out_index] = 16;
|
||||
out_index += 1;
|
||||
codegen[out_index] = @as(u8, @intCast(n - 3));
|
||||
out_index += 1;
|
||||
self.codegen_freq[16] += 1;
|
||||
count -= n;
|
||||
}
|
||||
} else {
|
||||
while (count >= 11) {
|
||||
var n: i32 = 138;
|
||||
if (n > count) {
|
||||
n = count;
|
||||
}
|
||||
codegen[out_index] = 18;
|
||||
out_index += 1;
|
||||
codegen[out_index] = @as(u8, @intCast(n - 11));
|
||||
out_index += 1;
|
||||
self.codegen_freq[18] += 1;
|
||||
count -= n;
|
||||
}
|
||||
if (count >= 3) {
|
||||
// 3 <= count <= 10
|
||||
codegen[out_index] = 17;
|
||||
out_index += 1;
|
||||
codegen[out_index] = @as(u8, @intCast(count - 3));
|
||||
out_index += 1;
|
||||
self.codegen_freq[17] += 1;
|
||||
count = 0;
|
||||
}
|
||||
}
|
||||
count -= 1;
|
||||
while (count >= 0) : (count -= 1) {
|
||||
codegen[out_index] = size;
|
||||
out_index += 1;
|
||||
self.codegen_freq[size] += 1;
|
||||
}
|
||||
// Set up invariant for next time through the loop.
|
||||
size = next_size;
|
||||
count = 1;
|
||||
}
|
||||
// Marker indicating the end of the codegen.
|
||||
codegen[out_index] = end_code_mark;
|
||||
}
|
||||
|
||||
const DynamicSize = struct {
|
||||
size: u32,
|
||||
num_codegens: u32,
|
||||
};
|
||||
|
||||
// dynamicSize returns the size of dynamically encoded data in bits.
|
||||
fn dynamicSize(
|
||||
self: *Self,
|
||||
lit_enc: *hc.LiteralEncoder, // literal encoder
|
||||
dist_enc: *hc.DistanceEncoder, // distance encoder
|
||||
extra_bits: u32,
|
||||
) DynamicSize {
|
||||
var num_codegens = self.codegen_freq.len;
|
||||
while (num_codegens > 4 and self.codegen_freq[codegen_order[num_codegens - 1]] == 0) {
|
||||
num_codegens -= 1;
|
||||
}
|
||||
const header = 3 + 5 + 5 + 4 + (3 * num_codegens) +
|
||||
self.codegen_encoding.bitLength(self.codegen_freq[0..]) +
|
||||
self.codegen_freq[16] * 2 +
|
||||
self.codegen_freq[17] * 3 +
|
||||
self.codegen_freq[18] * 7;
|
||||
const size = header +
|
||||
lit_enc.bitLength(&self.literal_freq) +
|
||||
dist_enc.bitLength(&self.distance_freq) +
|
||||
extra_bits;
|
||||
|
||||
return DynamicSize{
|
||||
.size = @as(u32, @intCast(size)),
|
||||
.num_codegens = @as(u32, @intCast(num_codegens)),
|
||||
};
|
||||
}
|
||||
|
||||
// fixedSize returns the size of dynamically encoded data in bits.
|
||||
fn fixedSize(self: *Self, extra_bits: u32) u32 {
|
||||
return 3 +
|
||||
self.fixed_literal_encoding.bitLength(&self.literal_freq) +
|
||||
self.fixed_distance_encoding.bitLength(&self.distance_freq) +
|
||||
extra_bits;
|
||||
}
|
||||
|
||||
const StoredSize = struct {
|
||||
size: u32,
|
||||
storable: bool,
|
||||
};
|
||||
|
||||
// storedSizeFits calculates the stored size, including header.
|
||||
// The function returns the size in bits and whether the block
|
||||
// fits inside a single block.
|
||||
fn storedSizeFits(in: ?[]const u8) StoredSize {
|
||||
if (in == null) {
|
||||
return .{ .size = 0, .storable = false };
|
||||
}
|
||||
if (in.?.len <= consts.max_store_block_size) {
|
||||
return .{ .size = @as(u32, @intCast((in.?.len + 5) * 8)), .storable = true };
|
||||
}
|
||||
return .{ .size = 0, .storable = false };
|
||||
}
|
||||
|
||||
// Write the header of a dynamic Huffman block to the output stream.
|
||||
//
|
||||
// num_literals: The number of literals specified in codegen
|
||||
// num_distances: The number of distances specified in codegen
|
||||
// num_codegens: The number of codegens used in codegen
|
||||
// eof: Is it the end-of-file? (end of stream)
|
||||
fn dynamicHeader(
|
||||
self: *Self,
|
||||
num_literals: u32,
|
||||
num_distances: u32,
|
||||
num_codegens: u32,
|
||||
eof: bool,
|
||||
) anyerror!void {
|
||||
const first_bits: u32 = if (eof) 5 else 4;
|
||||
try self.bit_writer.writeBits(first_bits, 3);
|
||||
try self.bit_writer.writeBits(num_literals - 257, 5);
|
||||
try self.bit_writer.writeBits(num_distances - 1, 5);
|
||||
try self.bit_writer.writeBits(num_codegens - 4, 4);
|
||||
|
||||
var i: u32 = 0;
|
||||
while (i < num_codegens) : (i += 1) {
|
||||
const value = self.codegen_encoding.codes[codegen_order[i]].len;
|
||||
try self.bit_writer.writeBits(value, 3);
|
||||
}
|
||||
|
||||
i = 0;
|
||||
while (true) {
|
||||
const code_word: u32 = @as(u32, @intCast(self.codegen[i]));
|
||||
i += 1;
|
||||
if (code_word == end_code_mark) {
|
||||
break;
|
||||
}
|
||||
try self.writeCode(self.codegen_encoding.codes[@as(u32, @intCast(code_word))]);
|
||||
|
||||
switch (code_word) {
|
||||
16 => {
|
||||
try self.bit_writer.writeBits(self.codegen[i], 2);
|
||||
i += 1;
|
||||
},
|
||||
17 => {
|
||||
try self.bit_writer.writeBits(self.codegen[i], 3);
|
||||
i += 1;
|
||||
},
|
||||
18 => {
|
||||
try self.bit_writer.writeBits(self.codegen[i], 7);
|
||||
i += 1;
|
||||
},
|
||||
else => {},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn storedHeader(self: *Self, length: usize, eof: bool) anyerror!void {
|
||||
assert(length <= 65535);
|
||||
const flag: u32 = if (eof) 1 else 0;
|
||||
try self.bit_writer.writeBits(flag, 3);
|
||||
try self.flush();
|
||||
const l: u16 = @intCast(length);
|
||||
try self.bit_writer.writeBits(l, 16);
|
||||
try self.bit_writer.writeBits(~l, 16);
|
||||
}
|
||||
|
||||
fn fixedHeader(self: *Self, eof: bool) anyerror!void {
|
||||
// Indicate that we are a fixed Huffman block
|
||||
var value: u32 = 2;
|
||||
if (eof) {
|
||||
value = 3;
|
||||
}
|
||||
try self.bit_writer.writeBits(value, 3);
|
||||
}
|
||||
|
||||
// Write a block of tokens with the smallest encoding. Will choose block type.
|
||||
// The original input can be supplied, and if the huffman encoded data
|
||||
// is larger than the original bytes, the data will be written as a
|
||||
// stored block.
|
||||
// If the input is null, the tokens will always be Huffman encoded.
|
||||
pub fn write(self: *Self, tokens: []const Token, eof: bool, input: ?[]const u8) anyerror!void {
|
||||
const lit_and_dist = self.indexTokens(tokens);
|
||||
const num_literals = lit_and_dist.num_literals;
|
||||
const num_distances = lit_and_dist.num_distances;
|
||||
|
||||
var extra_bits: u32 = 0;
|
||||
const ret = storedSizeFits(input);
|
||||
const stored_size = ret.size;
|
||||
const storable = ret.storable;
|
||||
|
||||
if (storable) {
|
||||
// We only bother calculating the costs of the extra bits required by
|
||||
// the length of distance fields (which will be the same for both fixed
|
||||
// and dynamic encoding), if we need to compare those two encodings
|
||||
// against stored encoding.
|
||||
var length_code: u16 = Token.length_codes_start + 8;
|
||||
while (length_code < num_literals) : (length_code += 1) {
|
||||
// First eight length codes have extra size = 0.
|
||||
extra_bits += @as(u32, @intCast(self.literal_freq[length_code])) *
|
||||
@as(u32, @intCast(Token.lengthExtraBits(length_code)));
|
||||
}
|
||||
var distance_code: u16 = 4;
|
||||
while (distance_code < num_distances) : (distance_code += 1) {
|
||||
// First four distance codes have extra size = 0.
|
||||
extra_bits += @as(u32, @intCast(self.distance_freq[distance_code])) *
|
||||
@as(u32, @intCast(Token.distanceExtraBits(distance_code)));
|
||||
}
|
||||
}
|
||||
|
||||
// Figure out smallest code.
|
||||
// Fixed Huffman baseline.
|
||||
var literal_encoding = &self.fixed_literal_encoding;
|
||||
var distance_encoding = &self.fixed_distance_encoding;
|
||||
var size = self.fixedSize(extra_bits);
|
||||
|
||||
// Dynamic Huffman?
|
||||
var num_codegens: u32 = 0;
|
||||
|
||||
// Generate codegen and codegenFrequencies, which indicates how to encode
|
||||
// the literal_encoding and the distance_encoding.
|
||||
self.generateCodegen(
|
||||
num_literals,
|
||||
num_distances,
|
||||
&self.literal_encoding,
|
||||
&self.distance_encoding,
|
||||
);
|
||||
self.codegen_encoding.generate(self.codegen_freq[0..], 7);
|
||||
const dynamic_size = self.dynamicSize(
|
||||
&self.literal_encoding,
|
||||
&self.distance_encoding,
|
||||
extra_bits,
|
||||
);
|
||||
const dyn_size = dynamic_size.size;
|
||||
num_codegens = dynamic_size.num_codegens;
|
||||
|
||||
if (dyn_size < size) {
|
||||
size = dyn_size;
|
||||
literal_encoding = &self.literal_encoding;
|
||||
distance_encoding = &self.distance_encoding;
|
||||
}
|
||||
|
||||
// Stored bytes?
|
||||
if (storable and stored_size < size) {
|
||||
try self.storedBlock(input.?, eof);
|
||||
return;
|
||||
}
|
||||
|
||||
// Huffman.
|
||||
if (@intFromPtr(literal_encoding) == @intFromPtr(&self.fixed_literal_encoding)) {
|
||||
try self.fixedHeader(eof);
|
||||
} else {
|
||||
try self.dynamicHeader(num_literals, num_distances, num_codegens, eof);
|
||||
}
|
||||
|
||||
// Write the tokens.
|
||||
try self.writeTokens(tokens, &literal_encoding.codes, &distance_encoding.codes);
|
||||
}
|
||||
|
||||
pub fn storedBlock(self: *Self, input: []const u8, eof: bool) anyerror!void {
|
||||
try self.storedHeader(input.len, eof);
|
||||
try self.bit_writer.writeBytes(input);
|
||||
}
|
||||
|
||||
// writeBlockDynamic encodes a block using a dynamic Huffman table.
|
||||
// This should be used if the symbols used have a disproportionate
|
||||
// histogram distribution.
|
||||
// If input is supplied and the compression savings are below 1/16th of the
|
||||
// input size the block is stored.
|
||||
fn dynamicBlock(
|
||||
self: *Self,
|
||||
tokens: []const Token,
|
||||
eof: bool,
|
||||
input: ?[]const u8,
|
||||
) anyerror!void {
|
||||
const total_tokens = self.indexTokens(tokens);
|
||||
const num_literals = total_tokens.num_literals;
|
||||
const num_distances = total_tokens.num_distances;
|
||||
|
||||
// Generate codegen and codegenFrequencies, which indicates how to encode
|
||||
// the literal_encoding and the distance_encoding.
|
||||
self.generateCodegen(
|
||||
num_literals,
|
||||
num_distances,
|
||||
&self.literal_encoding,
|
||||
&self.distance_encoding,
|
||||
);
|
||||
self.codegen_encoding.generate(self.codegen_freq[0..], 7);
|
||||
const dynamic_size = self.dynamicSize(&self.literal_encoding, &self.distance_encoding, 0);
|
||||
const size = dynamic_size.size;
|
||||
const num_codegens = dynamic_size.num_codegens;
|
||||
|
||||
// Store bytes, if we don't get a reasonable improvement.
|
||||
|
||||
const stored_size = storedSizeFits(input);
|
||||
const ssize = stored_size.size;
|
||||
const storable = stored_size.storable;
|
||||
if (storable and ssize < (size + (size >> 4))) {
|
||||
try self.storedBlock(input.?, eof);
|
||||
return;
|
||||
}
|
||||
|
||||
// Write Huffman table.
|
||||
try self.dynamicHeader(num_literals, num_distances, num_codegens, eof);
|
||||
|
||||
// Write the tokens.
|
||||
try self.writeTokens(tokens, &self.literal_encoding.codes, &self.distance_encoding.codes);
|
||||
}
|
||||
|
||||
const TotalIndexedTokens = struct {
|
||||
num_literals: u32,
|
||||
num_distances: u32,
|
||||
};
|
||||
|
||||
// Indexes a slice of tokens followed by an end_block_marker, and updates
|
||||
// literal_freq and distance_freq, and generates literal_encoding
|
||||
// and distance_encoding.
|
||||
// The number of literal and distance tokens is returned.
|
||||
fn indexTokens(self: *Self, tokens: []const Token) TotalIndexedTokens {
|
||||
var num_literals: u32 = 0;
|
||||
var num_distances: u32 = 0;
|
||||
|
||||
for (self.literal_freq, 0..) |_, i| {
|
||||
self.literal_freq[i] = 0;
|
||||
}
|
||||
for (self.distance_freq, 0..) |_, i| {
|
||||
self.distance_freq[i] = 0;
|
||||
}
|
||||
|
||||
for (tokens) |t| {
|
||||
if (t.kind == Token.Kind.literal) {
|
||||
self.literal_freq[t.literal()] += 1;
|
||||
continue;
|
||||
}
|
||||
self.literal_freq[t.lengthCode()] += 1;
|
||||
self.distance_freq[t.distanceCode()] += 1;
|
||||
}
|
||||
// add end_block_marker token at the end
|
||||
self.literal_freq[consts.end_block_marker] += 1;
|
||||
|
||||
// get the number of literals
|
||||
num_literals = @as(u32, @intCast(self.literal_freq.len));
|
||||
while (self.literal_freq[num_literals - 1] == 0) {
|
||||
num_literals -= 1;
|
||||
}
|
||||
// get the number of distances
|
||||
num_distances = @as(u32, @intCast(self.distance_freq.len));
|
||||
while (num_distances > 0 and self.distance_freq[num_distances - 1] == 0) {
|
||||
num_distances -= 1;
|
||||
}
|
||||
if (num_distances == 0) {
|
||||
// We haven't found a single match. If we want to go with the dynamic encoding,
|
||||
// we should count at least one distance to be sure that the distance huffman tree could be encoded.
|
||||
self.distance_freq[0] = 1;
|
||||
num_distances = 1;
|
||||
}
|
||||
self.literal_encoding.generate(&self.literal_freq, 15);
|
||||
self.distance_encoding.generate(&self.distance_freq, 15);
|
||||
return TotalIndexedTokens{
|
||||
.num_literals = num_literals,
|
||||
.num_distances = num_distances,
|
||||
};
|
||||
}
|
||||
|
||||
// Writes a slice of tokens to the output followed by and end_block_marker.
|
||||
// codes for literal and distance encoding must be supplied.
|
||||
fn writeTokens(
|
||||
self: *Self,
|
||||
tokens: []const Token,
|
||||
le_codes: []hc.HuffCode,
|
||||
oe_codes: []hc.HuffCode,
|
||||
) anyerror!void {
|
||||
for (tokens) |t| {
|
||||
if (t.kind == Token.Kind.literal) {
|
||||
try self.writeCode(le_codes[t.literal()]);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Write the length
|
||||
const le = t.lengthEncoding();
|
||||
try self.writeCode(le_codes[le.code]);
|
||||
if (le.extra_bits > 0) {
|
||||
try self.bit_writer.writeBits(le.extra_length, le.extra_bits);
|
||||
}
|
||||
|
||||
// Write the distance
|
||||
const oe = t.distanceEncoding();
|
||||
try self.writeCode(oe_codes[oe.code]);
|
||||
if (oe.extra_bits > 0) {
|
||||
try self.bit_writer.writeBits(oe.extra_distance, oe.extra_bits);
|
||||
}
|
||||
}
|
||||
// add end_block_marker at the end
|
||||
try self.writeCode(le_codes[consts.end_block_marker]);
|
||||
}
|
||||
|
||||
// Encodes a block of bytes as either Huffman encoded literals or uncompressed bytes
|
||||
// if the results only gains very little from compression.
|
||||
pub fn huffmanBlock(self: *Self, input: []const u8, eof: bool) anyerror!void {
|
||||
// Add everything as literals
|
||||
histogram(input, &self.literal_freq);
|
||||
|
||||
self.literal_freq[consts.end_block_marker] = 1;
|
||||
|
||||
const num_literals = consts.end_block_marker + 1;
|
||||
self.distance_freq[0] = 1;
|
||||
const num_distances = 1;
|
||||
|
||||
self.literal_encoding.generate(&self.literal_freq, 15);
|
||||
|
||||
// Figure out smallest code.
|
||||
// Always use dynamic Huffman or Store
|
||||
var num_codegens: u32 = 0;
|
||||
|
||||
// Generate codegen and codegenFrequencies, which indicates how to encode
|
||||
// the literal_encoding and the distance_encoding.
|
||||
self.generateCodegen(
|
||||
num_literals,
|
||||
num_distances,
|
||||
&self.literal_encoding,
|
||||
&self.huff_distance,
|
||||
);
|
||||
self.codegen_encoding.generate(self.codegen_freq[0..], 7);
|
||||
const dynamic_size = self.dynamicSize(&self.literal_encoding, &self.huff_distance, 0);
|
||||
const size = dynamic_size.size;
|
||||
num_codegens = dynamic_size.num_codegens;
|
||||
|
||||
// Store bytes, if we don't get a reasonable improvement.
|
||||
const stored_size_ret = storedSizeFits(input);
|
||||
const ssize = stored_size_ret.size;
|
||||
const storable = stored_size_ret.storable;
|
||||
|
||||
if (storable and ssize < (size + (size >> 4))) {
|
||||
try self.storedBlock(input, eof);
|
||||
return;
|
||||
}
|
||||
|
||||
// Huffman.
|
||||
try self.dynamicHeader(num_literals, num_distances, num_codegens, eof);
|
||||
const encoding = self.literal_encoding.codes[0..257];
|
||||
|
||||
for (input) |t| {
|
||||
const c = encoding[t];
|
||||
try self.bit_writer.writeBits(c.code, c.len);
|
||||
}
|
||||
try self.writeCode(encoding[consts.end_block_marker]);
|
||||
}
|
||||
|
||||
// histogram accumulates a histogram of b in h.
|
||||
fn histogram(b: []const u8, h: *[286]u16) void {
|
||||
// Clear histogram
|
||||
for (h, 0..) |_, i| {
|
||||
h[i] = 0;
|
||||
}
|
||||
|
||||
var lh = h.*[0..256];
|
||||
for (b) |t| {
|
||||
lh[t] += 1;
|
||||
}
|
||||
}
|
||||
|
||||
// tests
|
||||
const expect = std.testing.expect;
|
||||
const fmt = std.fmt;
|
||||
const testing = std.testing;
|
||||
const ArrayList = std.ArrayList;
|
||||
|
||||
const TestCase = @import("testdata/block_writer.zig").TestCase;
|
||||
const testCases = @import("testdata/block_writer.zig").testCases;
|
||||
|
||||
// tests if the writeBlock encoding has changed.
|
||||
test "write" {
|
||||
inline for (0..testCases.len) |i| {
|
||||
try testBlock(testCases[i], .write_block);
|
||||
}
|
||||
}
|
||||
|
||||
// tests if the writeBlockDynamic encoding has changed.
|
||||
test "dynamicBlock" {
|
||||
inline for (0..testCases.len) |i| {
|
||||
try testBlock(testCases[i], .write_dyn_block);
|
||||
}
|
||||
}
|
||||
|
||||
test "huffmanBlock" {
|
||||
inline for (0..testCases.len) |i| {
|
||||
try testBlock(testCases[i], .write_huffman_block);
|
||||
}
|
||||
try testBlock(.{
|
||||
.tokens = &[_]Token{},
|
||||
.input = "huffman-rand-max.input",
|
||||
.want = "huffman-rand-max.{s}.expect",
|
||||
}, .write_huffman_block);
|
||||
}
|
||||
|
||||
const TestFn = enum {
|
||||
write_block,
|
||||
write_dyn_block, // write dynamic block
|
||||
write_huffman_block,
|
||||
|
||||
fn to_s(self: TestFn) []const u8 {
|
||||
return switch (self) {
|
||||
.write_block => "wb",
|
||||
.write_dyn_block => "dyn",
|
||||
.write_huffman_block => "huff",
|
||||
};
|
||||
}
|
||||
|
||||
fn write(
|
||||
comptime self: TestFn,
|
||||
bw: anytype,
|
||||
tok: []const Token,
|
||||
input: ?[]const u8,
|
||||
final: bool,
|
||||
) !void {
|
||||
switch (self) {
|
||||
.write_block => try bw.write(tok, final, input),
|
||||
.write_dyn_block => try bw.dynamicBlock(tok, final, input),
|
||||
.write_huffman_block => try bw.huffmanBlock(input.?, final),
|
||||
}
|
||||
try bw.flush();
|
||||
}
|
||||
};
|
||||
|
||||
// testBlock tests a block against its references
|
||||
//
|
||||
// size
|
||||
// 64K [file-name].input - input non compressed file
|
||||
// 8.1K [file-name].golden -
|
||||
// 78 [file-name].dyn.expect - output with writeBlockDynamic
|
||||
// 78 [file-name].wb.expect - output with writeBlock
|
||||
// 8.1K [file-name].huff.expect - output with writeBlockHuff
|
||||
// 78 [file-name].dyn.expect-noinput - output with writeBlockDynamic when input is null
|
||||
// 78 [file-name].wb.expect-noinput - output with writeBlock when input is null
|
||||
//
|
||||
// wb - writeBlock
|
||||
// dyn - writeBlockDynamic
|
||||
// huff - writeBlockHuff
|
||||
//
|
||||
fn testBlock(comptime tc: TestCase, comptime tfn: TestFn) !void {
|
||||
if (tc.input.len != 0 and tc.want.len != 0) {
|
||||
const want_name = comptime fmt.comptimePrint(tc.want, .{tfn.to_s()});
|
||||
const input = @embedFile("testdata/block_writer/" ++ tc.input);
|
||||
const want = @embedFile("testdata/block_writer/" ++ want_name);
|
||||
try testWriteBlock(tfn, input, want, tc.tokens);
|
||||
}
|
||||
|
||||
if (tfn == .write_huffman_block) {
|
||||
return;
|
||||
}
|
||||
|
||||
const want_name_no_input = comptime fmt.comptimePrint(tc.want_no_input, .{tfn.to_s()});
|
||||
const want = @embedFile("testdata/block_writer/" ++ want_name_no_input);
|
||||
try testWriteBlock(tfn, null, want, tc.tokens);
|
||||
}
|
||||
|
||||
// Uses writer function `tfn` to write `tokens`, tests that we got `want` as output.
|
||||
fn testWriteBlock(comptime tfn: TestFn, input: ?[]const u8, want: []const u8, tokens: []const Token) !void {
|
||||
var buf = ArrayList(u8).init(testing.allocator);
|
||||
var bw: BlockWriter = .init(buf.writer());
|
||||
try tfn.write(&bw, tokens, input, false);
|
||||
var got = buf.items;
|
||||
try testing.expectEqualSlices(u8, want, got); // expect writeBlock to yield expected result
|
||||
try expect(got[0] & 0b0000_0001 == 0); // bfinal is not set
|
||||
//
|
||||
// Test if the writer produces the same output after reset.
|
||||
buf.deinit();
|
||||
buf = ArrayList(u8).init(testing.allocator);
|
||||
defer buf.deinit();
|
||||
bw.setWriter(buf.writer());
|
||||
|
||||
try tfn.write(&bw, tokens, input, true);
|
||||
try bw.flush();
|
||||
got = buf.items;
|
||||
|
||||
try expect(got[0] & 1 == 1); // bfinal is set
|
||||
buf.items[0] &= 0b1111_1110; // remove bfinal bit, so we can run test slices
|
||||
try testing.expectEqualSlices(u8, want, got); // expect writeBlock to yield expected result
|
||||
}
|
||||
@ -1,99 +0,0 @@
|
||||
const std = @import("std");
|
||||
const assert = std.debug.assert;
|
||||
|
||||
/// Bit writer for use in deflate (compression).
|
||||
///
|
||||
/// Has internal bits buffer of 64 bits and internal bytes buffer of 248 bytes.
|
||||
/// When we accumulate 48 bits 6 bytes are moved to the bytes buffer. When we
|
||||
/// accumulate 240 bytes they are flushed to the underlying inner_writer.
|
||||
///
|
||||
pub fn BitWriter(comptime WriterType: type) type {
|
||||
// buffer_flush_size indicates the buffer size
|
||||
// after which bytes are flushed to the writer.
|
||||
// Should preferably be a multiple of 6, since
|
||||
// we accumulate 6 bytes between writes to the buffer.
|
||||
const buffer_flush_size = 240;
|
||||
|
||||
// buffer_size is the actual output byte buffer size.
|
||||
// It must have additional headroom for a flush
|
||||
// which can contain up to 8 bytes.
|
||||
const buffer_size = buffer_flush_size + 8;
|
||||
|
||||
return struct {
|
||||
inner_writer: WriterType,
|
||||
|
||||
// Data waiting to be written is bytes[0 .. nbytes]
|
||||
// and then the low nbits of bits. Data is always written
|
||||
// sequentially into the bytes array.
|
||||
bits: u64 = 0,
|
||||
nbits: u32 = 0, // number of bits
|
||||
bytes: [buffer_size]u8 = undefined,
|
||||
nbytes: u32 = 0, // number of bytes
|
||||
|
||||
const Self = @This();
|
||||
|
||||
pub const Error = WriterType.Error || error{UnfinishedBits};
|
||||
|
||||
pub fn init(writer: WriterType) Self {
|
||||
return .{ .inner_writer = writer };
|
||||
}
|
||||
|
||||
pub fn setWriter(self: *Self, new_writer: WriterType) void {
|
||||
//assert(self.bits == 0 and self.nbits == 0 and self.nbytes == 0);
|
||||
self.inner_writer = new_writer;
|
||||
}
|
||||
|
||||
pub fn flush(self: *Self) Error!void {
|
||||
var n = self.nbytes;
|
||||
while (self.nbits != 0) {
|
||||
self.bytes[n] = @as(u8, @truncate(self.bits));
|
||||
self.bits >>= 8;
|
||||
if (self.nbits > 8) { // Avoid underflow
|
||||
self.nbits -= 8;
|
||||
} else {
|
||||
self.nbits = 0;
|
||||
}
|
||||
n += 1;
|
||||
}
|
||||
self.bits = 0;
|
||||
_ = try self.inner_writer.write(self.bytes[0..n]);
|
||||
self.nbytes = 0;
|
||||
}
|
||||
|
||||
pub fn writeBits(self: *Self, b: u32, nb: u32) Error!void {
|
||||
self.bits |= @as(u64, @intCast(b)) << @as(u6, @intCast(self.nbits));
|
||||
self.nbits += nb;
|
||||
if (self.nbits < 48)
|
||||
return;
|
||||
|
||||
var n = self.nbytes;
|
||||
std.mem.writeInt(u64, self.bytes[n..][0..8], self.bits, .little);
|
||||
n += 6;
|
||||
if (n >= buffer_flush_size) {
|
||||
_ = try self.inner_writer.write(self.bytes[0..n]);
|
||||
n = 0;
|
||||
}
|
||||
self.nbytes = n;
|
||||
self.bits >>= 48;
|
||||
self.nbits -= 48;
|
||||
}
|
||||
|
||||
pub fn writeBytes(self: *Self, bytes: []const u8) Error!void {
|
||||
var n = self.nbytes;
|
||||
if (self.nbits & 7 != 0) {
|
||||
return error.UnfinishedBits;
|
||||
}
|
||||
while (self.nbits != 0) {
|
||||
self.bytes[n] = @as(u8, @truncate(self.bits));
|
||||
self.bits >>= 8;
|
||||
self.nbits -= 8;
|
||||
n += 1;
|
||||
}
|
||||
if (n != 0) {
|
||||
_ = try self.inner_writer.write(self.bytes[0..n]);
|
||||
}
|
||||
self.nbytes = 0;
|
||||
_ = try self.inner_writer.write(bytes);
|
||||
}
|
||||
};
|
||||
}
|
||||
@ -1,706 +0,0 @@
|
||||
const std = @import("std");
|
||||
const io = std.io;
|
||||
const assert = std.debug.assert;
|
||||
|
||||
const hc = @import("huffman_encoder.zig");
|
||||
const consts = @import("consts.zig").huffman;
|
||||
const Token = @import("Token.zig");
|
||||
const BitWriter = @import("bit_writer.zig").BitWriter;
|
||||
|
||||
pub fn blockWriter(writer: anytype) BlockWriter(@TypeOf(writer)) {
|
||||
return BlockWriter(@TypeOf(writer)).init(writer);
|
||||
}
|
||||
|
||||
/// Accepts list of tokens, decides what is best block type to write. What block
|
||||
/// type will provide best compression. Writes header and body of the block.
|
||||
///
|
||||
pub fn BlockWriter(comptime WriterType: type) type {
|
||||
const BitWriterType = BitWriter(WriterType);
|
||||
return struct {
|
||||
const codegen_order = consts.codegen_order;
|
||||
const end_code_mark = 255;
|
||||
const Self = @This();
|
||||
|
||||
pub const Error = BitWriterType.Error;
|
||||
bit_writer: BitWriterType,
|
||||
|
||||
codegen_freq: [consts.codegen_code_count]u16 = undefined,
|
||||
literal_freq: [consts.max_num_lit]u16 = undefined,
|
||||
distance_freq: [consts.distance_code_count]u16 = undefined,
|
||||
codegen: [consts.max_num_lit + consts.distance_code_count + 1]u8 = undefined,
|
||||
literal_encoding: hc.LiteralEncoder = .{},
|
||||
distance_encoding: hc.DistanceEncoder = .{},
|
||||
codegen_encoding: hc.CodegenEncoder = .{},
|
||||
fixed_literal_encoding: hc.LiteralEncoder,
|
||||
fixed_distance_encoding: hc.DistanceEncoder,
|
||||
huff_distance: hc.DistanceEncoder,
|
||||
|
||||
pub fn init(writer: WriterType) Self {
|
||||
return .{
|
||||
.bit_writer = BitWriterType.init(writer),
|
||||
.fixed_literal_encoding = hc.fixedLiteralEncoder(),
|
||||
.fixed_distance_encoding = hc.fixedDistanceEncoder(),
|
||||
.huff_distance = hc.huffmanDistanceEncoder(),
|
||||
};
|
||||
}
|
||||
|
||||
/// Flush intrenal bit buffer to the writer.
|
||||
/// Should be called only when bit stream is at byte boundary.
|
||||
///
|
||||
/// That is after final block; when last byte could be incomplete or
|
||||
/// after stored block; which is aligned to the byte boundary (it has x
|
||||
/// padding bits after first 3 bits).
|
||||
pub fn flush(self: *Self) Error!void {
|
||||
try self.bit_writer.flush();
|
||||
}
|
||||
|
||||
pub fn setWriter(self: *Self, new_writer: WriterType) void {
|
||||
self.bit_writer.setWriter(new_writer);
|
||||
}
|
||||
|
||||
fn writeCode(self: *Self, c: hc.HuffCode) Error!void {
|
||||
try self.bit_writer.writeBits(c.code, c.len);
|
||||
}
|
||||
|
||||
// RFC 1951 3.2.7 specifies a special run-length encoding for specifying
|
||||
// the literal and distance lengths arrays (which are concatenated into a single
|
||||
// array). This method generates that run-length encoding.
|
||||
//
|
||||
// The result is written into the codegen array, and the frequencies
|
||||
// of each code is written into the codegen_freq array.
|
||||
// Codes 0-15 are single byte codes. Codes 16-18 are followed by additional
|
||||
// information. Code bad_code is an end marker
|
||||
//
|
||||
// num_literals: The number of literals in literal_encoding
|
||||
// num_distances: The number of distances in distance_encoding
|
||||
// lit_enc: The literal encoder to use
|
||||
// dist_enc: The distance encoder to use
|
||||
fn generateCodegen(
|
||||
self: *Self,
|
||||
num_literals: u32,
|
||||
num_distances: u32,
|
||||
lit_enc: *hc.LiteralEncoder,
|
||||
dist_enc: *hc.DistanceEncoder,
|
||||
) void {
|
||||
for (self.codegen_freq, 0..) |_, i| {
|
||||
self.codegen_freq[i] = 0;
|
||||
}
|
||||
|
||||
// Note that we are using codegen both as a temporary variable for holding
|
||||
// a copy of the frequencies, and as the place where we put the result.
|
||||
// This is fine because the output is always shorter than the input used
|
||||
// so far.
|
||||
var codegen = &self.codegen; // cache
|
||||
// Copy the concatenated code sizes to codegen. Put a marker at the end.
|
||||
var cgnl = codegen[0..num_literals];
|
||||
for (cgnl, 0..) |_, i| {
|
||||
cgnl[i] = @as(u8, @intCast(lit_enc.codes[i].len));
|
||||
}
|
||||
|
||||
cgnl = codegen[num_literals .. num_literals + num_distances];
|
||||
for (cgnl, 0..) |_, i| {
|
||||
cgnl[i] = @as(u8, @intCast(dist_enc.codes[i].len));
|
||||
}
|
||||
codegen[num_literals + num_distances] = end_code_mark;
|
||||
|
||||
var size = codegen[0];
|
||||
var count: i32 = 1;
|
||||
var out_index: u32 = 0;
|
||||
var in_index: u32 = 1;
|
||||
while (size != end_code_mark) : (in_index += 1) {
|
||||
// INVARIANT: We have seen "count" copies of size that have not yet
|
||||
// had output generated for them.
|
||||
const next_size = codegen[in_index];
|
||||
if (next_size == size) {
|
||||
count += 1;
|
||||
continue;
|
||||
}
|
||||
// We need to generate codegen indicating "count" of size.
|
||||
if (size != 0) {
|
||||
codegen[out_index] = size;
|
||||
out_index += 1;
|
||||
self.codegen_freq[size] += 1;
|
||||
count -= 1;
|
||||
while (count >= 3) {
|
||||
var n: i32 = 6;
|
||||
if (n > count) {
|
||||
n = count;
|
||||
}
|
||||
codegen[out_index] = 16;
|
||||
out_index += 1;
|
||||
codegen[out_index] = @as(u8, @intCast(n - 3));
|
||||
out_index += 1;
|
||||
self.codegen_freq[16] += 1;
|
||||
count -= n;
|
||||
}
|
||||
} else {
|
||||
while (count >= 11) {
|
||||
var n: i32 = 138;
|
||||
if (n > count) {
|
||||
n = count;
|
||||
}
|
||||
codegen[out_index] = 18;
|
||||
out_index += 1;
|
||||
codegen[out_index] = @as(u8, @intCast(n - 11));
|
||||
out_index += 1;
|
||||
self.codegen_freq[18] += 1;
|
||||
count -= n;
|
||||
}
|
||||
if (count >= 3) {
|
||||
// 3 <= count <= 10
|
||||
codegen[out_index] = 17;
|
||||
out_index += 1;
|
||||
codegen[out_index] = @as(u8, @intCast(count - 3));
|
||||
out_index += 1;
|
||||
self.codegen_freq[17] += 1;
|
||||
count = 0;
|
||||
}
|
||||
}
|
||||
count -= 1;
|
||||
while (count >= 0) : (count -= 1) {
|
||||
codegen[out_index] = size;
|
||||
out_index += 1;
|
||||
self.codegen_freq[size] += 1;
|
||||
}
|
||||
// Set up invariant for next time through the loop.
|
||||
size = next_size;
|
||||
count = 1;
|
||||
}
|
||||
// Marker indicating the end of the codegen.
|
||||
codegen[out_index] = end_code_mark;
|
||||
}
|
||||
|
||||
const DynamicSize = struct {
|
||||
size: u32,
|
||||
num_codegens: u32,
|
||||
};
|
||||
|
||||
// dynamicSize returns the size of dynamically encoded data in bits.
|
||||
fn dynamicSize(
|
||||
self: *Self,
|
||||
lit_enc: *hc.LiteralEncoder, // literal encoder
|
||||
dist_enc: *hc.DistanceEncoder, // distance encoder
|
||||
extra_bits: u32,
|
||||
) DynamicSize {
|
||||
var num_codegens = self.codegen_freq.len;
|
||||
while (num_codegens > 4 and self.codegen_freq[codegen_order[num_codegens - 1]] == 0) {
|
||||
num_codegens -= 1;
|
||||
}
|
||||
const header = 3 + 5 + 5 + 4 + (3 * num_codegens) +
|
||||
self.codegen_encoding.bitLength(self.codegen_freq[0..]) +
|
||||
self.codegen_freq[16] * 2 +
|
||||
self.codegen_freq[17] * 3 +
|
||||
self.codegen_freq[18] * 7;
|
||||
const size = header +
|
||||
lit_enc.bitLength(&self.literal_freq) +
|
||||
dist_enc.bitLength(&self.distance_freq) +
|
||||
extra_bits;
|
||||
|
||||
return DynamicSize{
|
||||
.size = @as(u32, @intCast(size)),
|
||||
.num_codegens = @as(u32, @intCast(num_codegens)),
|
||||
};
|
||||
}
|
||||
|
||||
// fixedSize returns the size of dynamically encoded data in bits.
|
||||
fn fixedSize(self: *Self, extra_bits: u32) u32 {
|
||||
return 3 +
|
||||
self.fixed_literal_encoding.bitLength(&self.literal_freq) +
|
||||
self.fixed_distance_encoding.bitLength(&self.distance_freq) +
|
||||
extra_bits;
|
||||
}
|
||||
|
||||
const StoredSize = struct {
|
||||
size: u32,
|
||||
storable: bool,
|
||||
};
|
||||
|
||||
// storedSizeFits calculates the stored size, including header.
|
||||
// The function returns the size in bits and whether the block
|
||||
// fits inside a single block.
|
||||
fn storedSizeFits(in: ?[]const u8) StoredSize {
|
||||
if (in == null) {
|
||||
return .{ .size = 0, .storable = false };
|
||||
}
|
||||
if (in.?.len <= consts.max_store_block_size) {
|
||||
return .{ .size = @as(u32, @intCast((in.?.len + 5) * 8)), .storable = true };
|
||||
}
|
||||
return .{ .size = 0, .storable = false };
|
||||
}
|
||||
|
||||
// Write the header of a dynamic Huffman block to the output stream.
|
||||
//
|
||||
// num_literals: The number of literals specified in codegen
|
||||
// num_distances: The number of distances specified in codegen
|
||||
// num_codegens: The number of codegens used in codegen
|
||||
// eof: Is it the end-of-file? (end of stream)
|
||||
fn dynamicHeader(
|
||||
self: *Self,
|
||||
num_literals: u32,
|
||||
num_distances: u32,
|
||||
num_codegens: u32,
|
||||
eof: bool,
|
||||
) Error!void {
|
||||
const first_bits: u32 = if (eof) 5 else 4;
|
||||
try self.bit_writer.writeBits(first_bits, 3);
|
||||
try self.bit_writer.writeBits(num_literals - 257, 5);
|
||||
try self.bit_writer.writeBits(num_distances - 1, 5);
|
||||
try self.bit_writer.writeBits(num_codegens - 4, 4);
|
||||
|
||||
var i: u32 = 0;
|
||||
while (i < num_codegens) : (i += 1) {
|
||||
const value = self.codegen_encoding.codes[codegen_order[i]].len;
|
||||
try self.bit_writer.writeBits(value, 3);
|
||||
}
|
||||
|
||||
i = 0;
|
||||
while (true) {
|
||||
const code_word: u32 = @as(u32, @intCast(self.codegen[i]));
|
||||
i += 1;
|
||||
if (code_word == end_code_mark) {
|
||||
break;
|
||||
}
|
||||
try self.writeCode(self.codegen_encoding.codes[@as(u32, @intCast(code_word))]);
|
||||
|
||||
switch (code_word) {
|
||||
16 => {
|
||||
try self.bit_writer.writeBits(self.codegen[i], 2);
|
||||
i += 1;
|
||||
},
|
||||
17 => {
|
||||
try self.bit_writer.writeBits(self.codegen[i], 3);
|
||||
i += 1;
|
||||
},
|
||||
18 => {
|
||||
try self.bit_writer.writeBits(self.codegen[i], 7);
|
||||
i += 1;
|
||||
},
|
||||
else => {},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn storedHeader(self: *Self, length: usize, eof: bool) Error!void {
|
||||
assert(length <= 65535);
|
||||
const flag: u32 = if (eof) 1 else 0;
|
||||
try self.bit_writer.writeBits(flag, 3);
|
||||
try self.flush();
|
||||
const l: u16 = @intCast(length);
|
||||
try self.bit_writer.writeBits(l, 16);
|
||||
try self.bit_writer.writeBits(~l, 16);
|
||||
}
|
||||
|
||||
fn fixedHeader(self: *Self, eof: bool) Error!void {
|
||||
// Indicate that we are a fixed Huffman block
|
||||
var value: u32 = 2;
|
||||
if (eof) {
|
||||
value = 3;
|
||||
}
|
||||
try self.bit_writer.writeBits(value, 3);
|
||||
}
|
||||
|
||||
// Write a block of tokens with the smallest encoding. Will choose block type.
|
||||
// The original input can be supplied, and if the huffman encoded data
|
||||
// is larger than the original bytes, the data will be written as a
|
||||
// stored block.
|
||||
// If the input is null, the tokens will always be Huffman encoded.
|
||||
pub fn write(self: *Self, tokens: []const Token, eof: bool, input: ?[]const u8) Error!void {
|
||||
const lit_and_dist = self.indexTokens(tokens);
|
||||
const num_literals = lit_and_dist.num_literals;
|
||||
const num_distances = lit_and_dist.num_distances;
|
||||
|
||||
var extra_bits: u32 = 0;
|
||||
const ret = storedSizeFits(input);
|
||||
const stored_size = ret.size;
|
||||
const storable = ret.storable;
|
||||
|
||||
if (storable) {
|
||||
// We only bother calculating the costs of the extra bits required by
|
||||
// the length of distance fields (which will be the same for both fixed
|
||||
// and dynamic encoding), if we need to compare those two encodings
|
||||
// against stored encoding.
|
||||
var length_code: u16 = Token.length_codes_start + 8;
|
||||
while (length_code < num_literals) : (length_code += 1) {
|
||||
// First eight length codes have extra size = 0.
|
||||
extra_bits += @as(u32, @intCast(self.literal_freq[length_code])) *
|
||||
@as(u32, @intCast(Token.lengthExtraBits(length_code)));
|
||||
}
|
||||
var distance_code: u16 = 4;
|
||||
while (distance_code < num_distances) : (distance_code += 1) {
|
||||
// First four distance codes have extra size = 0.
|
||||
extra_bits += @as(u32, @intCast(self.distance_freq[distance_code])) *
|
||||
@as(u32, @intCast(Token.distanceExtraBits(distance_code)));
|
||||
}
|
||||
}
|
||||
|
||||
// Figure out smallest code.
|
||||
// Fixed Huffman baseline.
|
||||
var literal_encoding = &self.fixed_literal_encoding;
|
||||
var distance_encoding = &self.fixed_distance_encoding;
|
||||
var size = self.fixedSize(extra_bits);
|
||||
|
||||
// Dynamic Huffman?
|
||||
var num_codegens: u32 = 0;
|
||||
|
||||
// Generate codegen and codegenFrequencies, which indicates how to encode
|
||||
// the literal_encoding and the distance_encoding.
|
||||
self.generateCodegen(
|
||||
num_literals,
|
||||
num_distances,
|
||||
&self.literal_encoding,
|
||||
&self.distance_encoding,
|
||||
);
|
||||
self.codegen_encoding.generate(self.codegen_freq[0..], 7);
|
||||
const dynamic_size = self.dynamicSize(
|
||||
&self.literal_encoding,
|
||||
&self.distance_encoding,
|
||||
extra_bits,
|
||||
);
|
||||
const dyn_size = dynamic_size.size;
|
||||
num_codegens = dynamic_size.num_codegens;
|
||||
|
||||
if (dyn_size < size) {
|
||||
size = dyn_size;
|
||||
literal_encoding = &self.literal_encoding;
|
||||
distance_encoding = &self.distance_encoding;
|
||||
}
|
||||
|
||||
// Stored bytes?
|
||||
if (storable and stored_size < size) {
|
||||
try self.storedBlock(input.?, eof);
|
||||
return;
|
||||
}
|
||||
|
||||
// Huffman.
|
||||
if (@intFromPtr(literal_encoding) == @intFromPtr(&self.fixed_literal_encoding)) {
|
||||
try self.fixedHeader(eof);
|
||||
} else {
|
||||
try self.dynamicHeader(num_literals, num_distances, num_codegens, eof);
|
||||
}
|
||||
|
||||
// Write the tokens.
|
||||
try self.writeTokens(tokens, &literal_encoding.codes, &distance_encoding.codes);
|
||||
}
|
||||
|
||||
pub fn storedBlock(self: *Self, input: []const u8, eof: bool) Error!void {
|
||||
try self.storedHeader(input.len, eof);
|
||||
try self.bit_writer.writeBytes(input);
|
||||
}
|
||||
|
||||
// writeBlockDynamic encodes a block using a dynamic Huffman table.
|
||||
// This should be used if the symbols used have a disproportionate
|
||||
// histogram distribution.
|
||||
// If input is supplied and the compression savings are below 1/16th of the
|
||||
// input size the block is stored.
|
||||
fn dynamicBlock(
|
||||
self: *Self,
|
||||
tokens: []const Token,
|
||||
eof: bool,
|
||||
input: ?[]const u8,
|
||||
) Error!void {
|
||||
const total_tokens = self.indexTokens(tokens);
|
||||
const num_literals = total_tokens.num_literals;
|
||||
const num_distances = total_tokens.num_distances;
|
||||
|
||||
// Generate codegen and codegenFrequencies, which indicates how to encode
|
||||
// the literal_encoding and the distance_encoding.
|
||||
self.generateCodegen(
|
||||
num_literals,
|
||||
num_distances,
|
||||
&self.literal_encoding,
|
||||
&self.distance_encoding,
|
||||
);
|
||||
self.codegen_encoding.generate(self.codegen_freq[0..], 7);
|
||||
const dynamic_size = self.dynamicSize(&self.literal_encoding, &self.distance_encoding, 0);
|
||||
const size = dynamic_size.size;
|
||||
const num_codegens = dynamic_size.num_codegens;
|
||||
|
||||
// Store bytes, if we don't get a reasonable improvement.
|
||||
|
||||
const stored_size = storedSizeFits(input);
|
||||
const ssize = stored_size.size;
|
||||
const storable = stored_size.storable;
|
||||
if (storable and ssize < (size + (size >> 4))) {
|
||||
try self.storedBlock(input.?, eof);
|
||||
return;
|
||||
}
|
||||
|
||||
// Write Huffman table.
|
||||
try self.dynamicHeader(num_literals, num_distances, num_codegens, eof);
|
||||
|
||||
// Write the tokens.
|
||||
try self.writeTokens(tokens, &self.literal_encoding.codes, &self.distance_encoding.codes);
|
||||
}
|
||||
|
||||
const TotalIndexedTokens = struct {
|
||||
num_literals: u32,
|
||||
num_distances: u32,
|
||||
};
|
||||
|
||||
// Indexes a slice of tokens followed by an end_block_marker, and updates
|
||||
// literal_freq and distance_freq, and generates literal_encoding
|
||||
// and distance_encoding.
|
||||
// The number of literal and distance tokens is returned.
|
||||
fn indexTokens(self: *Self, tokens: []const Token) TotalIndexedTokens {
|
||||
var num_literals: u32 = 0;
|
||||
var num_distances: u32 = 0;
|
||||
|
||||
for (self.literal_freq, 0..) |_, i| {
|
||||
self.literal_freq[i] = 0;
|
||||
}
|
||||
for (self.distance_freq, 0..) |_, i| {
|
||||
self.distance_freq[i] = 0;
|
||||
}
|
||||
|
||||
for (tokens) |t| {
|
||||
if (t.kind == Token.Kind.literal) {
|
||||
self.literal_freq[t.literal()] += 1;
|
||||
continue;
|
||||
}
|
||||
self.literal_freq[t.lengthCode()] += 1;
|
||||
self.distance_freq[t.distanceCode()] += 1;
|
||||
}
|
||||
// add end_block_marker token at the end
|
||||
self.literal_freq[consts.end_block_marker] += 1;
|
||||
|
||||
// get the number of literals
|
||||
num_literals = @as(u32, @intCast(self.literal_freq.len));
|
||||
while (self.literal_freq[num_literals - 1] == 0) {
|
||||
num_literals -= 1;
|
||||
}
|
||||
// get the number of distances
|
||||
num_distances = @as(u32, @intCast(self.distance_freq.len));
|
||||
while (num_distances > 0 and self.distance_freq[num_distances - 1] == 0) {
|
||||
num_distances -= 1;
|
||||
}
|
||||
if (num_distances == 0) {
|
||||
// We haven't found a single match. If we want to go with the dynamic encoding,
|
||||
// we should count at least one distance to be sure that the distance huffman tree could be encoded.
|
||||
self.distance_freq[0] = 1;
|
||||
num_distances = 1;
|
||||
}
|
||||
self.literal_encoding.generate(&self.literal_freq, 15);
|
||||
self.distance_encoding.generate(&self.distance_freq, 15);
|
||||
return TotalIndexedTokens{
|
||||
.num_literals = num_literals,
|
||||
.num_distances = num_distances,
|
||||
};
|
||||
}
|
||||
|
||||
// Writes a slice of tokens to the output followed by and end_block_marker.
|
||||
// codes for literal and distance encoding must be supplied.
|
||||
fn writeTokens(
|
||||
self: *Self,
|
||||
tokens: []const Token,
|
||||
le_codes: []hc.HuffCode,
|
||||
oe_codes: []hc.HuffCode,
|
||||
) Error!void {
|
||||
for (tokens) |t| {
|
||||
if (t.kind == Token.Kind.literal) {
|
||||
try self.writeCode(le_codes[t.literal()]);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Write the length
|
||||
const le = t.lengthEncoding();
|
||||
try self.writeCode(le_codes[le.code]);
|
||||
if (le.extra_bits > 0) {
|
||||
try self.bit_writer.writeBits(le.extra_length, le.extra_bits);
|
||||
}
|
||||
|
||||
// Write the distance
|
||||
const oe = t.distanceEncoding();
|
||||
try self.writeCode(oe_codes[oe.code]);
|
||||
if (oe.extra_bits > 0) {
|
||||
try self.bit_writer.writeBits(oe.extra_distance, oe.extra_bits);
|
||||
}
|
||||
}
|
||||
// add end_block_marker at the end
|
||||
try self.writeCode(le_codes[consts.end_block_marker]);
|
||||
}
|
||||
|
||||
// Encodes a block of bytes as either Huffman encoded literals or uncompressed bytes
|
||||
// if the results only gains very little from compression.
|
||||
pub fn huffmanBlock(self: *Self, input: []const u8, eof: bool) Error!void {
|
||||
// Add everything as literals
|
||||
histogram(input, &self.literal_freq);
|
||||
|
||||
self.literal_freq[consts.end_block_marker] = 1;
|
||||
|
||||
const num_literals = consts.end_block_marker + 1;
|
||||
self.distance_freq[0] = 1;
|
||||
const num_distances = 1;
|
||||
|
||||
self.literal_encoding.generate(&self.literal_freq, 15);
|
||||
|
||||
// Figure out smallest code.
|
||||
// Always use dynamic Huffman or Store
|
||||
var num_codegens: u32 = 0;
|
||||
|
||||
// Generate codegen and codegenFrequencies, which indicates how to encode
|
||||
// the literal_encoding and the distance_encoding.
|
||||
self.generateCodegen(
|
||||
num_literals,
|
||||
num_distances,
|
||||
&self.literal_encoding,
|
||||
&self.huff_distance,
|
||||
);
|
||||
self.codegen_encoding.generate(self.codegen_freq[0..], 7);
|
||||
const dynamic_size = self.dynamicSize(&self.literal_encoding, &self.huff_distance, 0);
|
||||
const size = dynamic_size.size;
|
||||
num_codegens = dynamic_size.num_codegens;
|
||||
|
||||
// Store bytes, if we don't get a reasonable improvement.
|
||||
const stored_size_ret = storedSizeFits(input);
|
||||
const ssize = stored_size_ret.size;
|
||||
const storable = stored_size_ret.storable;
|
||||
|
||||
if (storable and ssize < (size + (size >> 4))) {
|
||||
try self.storedBlock(input, eof);
|
||||
return;
|
||||
}
|
||||
|
||||
// Huffman.
|
||||
try self.dynamicHeader(num_literals, num_distances, num_codegens, eof);
|
||||
const encoding = self.literal_encoding.codes[0..257];
|
||||
|
||||
for (input) |t| {
|
||||
const c = encoding[t];
|
||||
try self.bit_writer.writeBits(c.code, c.len);
|
||||
}
|
||||
try self.writeCode(encoding[consts.end_block_marker]);
|
||||
}
|
||||
|
||||
// histogram accumulates a histogram of b in h.
|
||||
fn histogram(b: []const u8, h: *[286]u16) void {
|
||||
// Clear histogram
|
||||
for (h, 0..) |_, i| {
|
||||
h[i] = 0;
|
||||
}
|
||||
|
||||
var lh = h.*[0..256];
|
||||
for (b) |t| {
|
||||
lh[t] += 1;
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
// tests
|
||||
const expect = std.testing.expect;
|
||||
const fmt = std.fmt;
|
||||
const testing = std.testing;
|
||||
const ArrayList = std.ArrayList;
|
||||
|
||||
const TestCase = @import("testdata/block_writer.zig").TestCase;
|
||||
const testCases = @import("testdata/block_writer.zig").testCases;
|
||||
|
||||
// tests if the writeBlock encoding has changed.
|
||||
test "write" {
|
||||
inline for (0..testCases.len) |i| {
|
||||
try testBlock(testCases[i], .write_block);
|
||||
}
|
||||
}
|
||||
|
||||
// tests if the writeBlockDynamic encoding has changed.
|
||||
test "dynamicBlock" {
|
||||
inline for (0..testCases.len) |i| {
|
||||
try testBlock(testCases[i], .write_dyn_block);
|
||||
}
|
||||
}
|
||||
|
||||
test "huffmanBlock" {
|
||||
inline for (0..testCases.len) |i| {
|
||||
try testBlock(testCases[i], .write_huffman_block);
|
||||
}
|
||||
try testBlock(.{
|
||||
.tokens = &[_]Token{},
|
||||
.input = "huffman-rand-max.input",
|
||||
.want = "huffman-rand-max.{s}.expect",
|
||||
}, .write_huffman_block);
|
||||
}
|
||||
|
||||
const TestFn = enum {
|
||||
write_block,
|
||||
write_dyn_block, // write dynamic block
|
||||
write_huffman_block,
|
||||
|
||||
fn to_s(self: TestFn) []const u8 {
|
||||
return switch (self) {
|
||||
.write_block => "wb",
|
||||
.write_dyn_block => "dyn",
|
||||
.write_huffman_block => "huff",
|
||||
};
|
||||
}
|
||||
|
||||
fn write(
|
||||
comptime self: TestFn,
|
||||
bw: anytype,
|
||||
tok: []const Token,
|
||||
input: ?[]const u8,
|
||||
final: bool,
|
||||
) !void {
|
||||
switch (self) {
|
||||
.write_block => try bw.write(tok, final, input),
|
||||
.write_dyn_block => try bw.dynamicBlock(tok, final, input),
|
||||
.write_huffman_block => try bw.huffmanBlock(input.?, final),
|
||||
}
|
||||
try bw.flush();
|
||||
}
|
||||
};
|
||||
|
||||
// testBlock tests a block against its references
|
||||
//
|
||||
// size
|
||||
// 64K [file-name].input - input non compressed file
|
||||
// 8.1K [file-name].golden -
|
||||
// 78 [file-name].dyn.expect - output with writeBlockDynamic
|
||||
// 78 [file-name].wb.expect - output with writeBlock
|
||||
// 8.1K [file-name].huff.expect - output with writeBlockHuff
|
||||
// 78 [file-name].dyn.expect-noinput - output with writeBlockDynamic when input is null
|
||||
// 78 [file-name].wb.expect-noinput - output with writeBlock when input is null
|
||||
//
|
||||
// wb - writeBlock
|
||||
// dyn - writeBlockDynamic
|
||||
// huff - writeBlockHuff
|
||||
//
|
||||
fn testBlock(comptime tc: TestCase, comptime tfn: TestFn) !void {
|
||||
if (tc.input.len != 0 and tc.want.len != 0) {
|
||||
const want_name = comptime fmt.comptimePrint(tc.want, .{tfn.to_s()});
|
||||
const input = @embedFile("testdata/block_writer/" ++ tc.input);
|
||||
const want = @embedFile("testdata/block_writer/" ++ want_name);
|
||||
try testWriteBlock(tfn, input, want, tc.tokens);
|
||||
}
|
||||
|
||||
if (tfn == .write_huffman_block) {
|
||||
return;
|
||||
}
|
||||
|
||||
const want_name_no_input = comptime fmt.comptimePrint(tc.want_no_input, .{tfn.to_s()});
|
||||
const want = @embedFile("testdata/block_writer/" ++ want_name_no_input);
|
||||
try testWriteBlock(tfn, null, want, tc.tokens);
|
||||
}
|
||||
|
||||
// Uses writer function `tfn` to write `tokens`, tests that we got `want` as output.
|
||||
fn testWriteBlock(comptime tfn: TestFn, input: ?[]const u8, want: []const u8, tokens: []const Token) !void {
|
||||
var buf = ArrayList(u8).init(testing.allocator);
|
||||
var bw = blockWriter(buf.writer());
|
||||
try tfn.write(&bw, tokens, input, false);
|
||||
var got = buf.items;
|
||||
try testing.expectEqualSlices(u8, want, got); // expect writeBlock to yield expected result
|
||||
try expect(got[0] & 0b0000_0001 == 0); // bfinal is not set
|
||||
//
|
||||
// Test if the writer produces the same output after reset.
|
||||
buf.deinit();
|
||||
buf = ArrayList(u8).init(testing.allocator);
|
||||
defer buf.deinit();
|
||||
bw.setWriter(buf.writer());
|
||||
|
||||
try tfn.write(&bw, tokens, input, true);
|
||||
try bw.flush();
|
||||
got = buf.items;
|
||||
|
||||
try expect(got[0] & 1 == 1); // bfinal is set
|
||||
buf.items[0] &= 0b1111_1110; // remove bfinal bit, so we can run test slices
|
||||
try testing.expectEqualSlices(u8, want, got); // expect writeBlock to yield expected result
|
||||
}
|
||||
@ -7,7 +7,7 @@ const print = std.debug.print;
|
||||
|
||||
const Token = @import("Token.zig");
|
||||
const consts = @import("consts.zig");
|
||||
const BlockWriter = @import("block_writer.zig").BlockWriter;
|
||||
const BlockWriter = @import("BlockWriter.zig");
|
||||
const Container = @import("container.zig").Container;
|
||||
const SlidingWindow = @import("SlidingWindow.zig");
|
||||
const Lookup = @import("Lookup.zig");
|
||||
@ -53,24 +53,20 @@ const LevelArgs = struct {
|
||||
};
|
||||
|
||||
/// Compress plain data from reader into compressed stream written to writer.
|
||||
pub fn compress(comptime container: Container, reader: anytype, writer: anytype, options: Options) !void {
|
||||
var c = try compressor(container, writer, options);
|
||||
pub fn compress(
|
||||
comptime container: Container,
|
||||
reader: *std.io.BufferedReader,
|
||||
writer: *std.io.BufferedWriter,
|
||||
options: Options,
|
||||
) !void {
|
||||
var c = try Compressor.init(container, writer, options);
|
||||
try c.compress(reader);
|
||||
try c.finish();
|
||||
}
|
||||
|
||||
/// Create compressor for writer type.
|
||||
pub fn compressor(comptime container: Container, writer: anytype, options: Options) !Compressor(
|
||||
container,
|
||||
@TypeOf(writer),
|
||||
) {
|
||||
return try Compressor(container, @TypeOf(writer)).init(writer, options);
|
||||
}
|
||||
|
||||
/// Compressor type.
|
||||
pub fn Compressor(comptime container: Container, comptime WriterType: type) type {
|
||||
const TokenWriterType = BlockWriter(WriterType);
|
||||
return Deflate(container, WriterType, TokenWriterType);
|
||||
pub fn Compressor(comptime container: Container) type {
|
||||
return Deflate(container, BlockWriter);
|
||||
}
|
||||
|
||||
/// Default compression algorithm. Has two steps: tokenization and token
|
||||
|
||||
@ -11,22 +11,22 @@ const codegen_order = @import("consts.zig").huffman.codegen_order;
|
||||
|
||||
/// Decompresses deflate bit stream `reader` and writes uncompressed data to the
|
||||
/// `writer` stream.
|
||||
pub fn decompress(comptime container: Container, reader: anytype, writer: anytype) !void {
|
||||
pub fn decompress(comptime container: Container, reader: *std.io.BufferedReader, writer: *std.io.BufferedWriter) !void {
|
||||
var d = decompressor(container, reader);
|
||||
try d.decompress(writer);
|
||||
}
|
||||
|
||||
/// Inflate decompressor for the reader type.
|
||||
pub fn decompressor(comptime container: Container, reader: anytype) Decompressor(container, @TypeOf(reader)) {
|
||||
return Decompressor(container, @TypeOf(reader)).init(reader);
|
||||
pub fn decompressor(comptime container: Container, reader: *std.io.BufferedReader) Decompressor(container) {
|
||||
return Decompressor(container).init(reader);
|
||||
}
|
||||
|
||||
pub fn Decompressor(comptime container: Container, comptime ReaderType: type) type {
|
||||
pub fn Decompressor(comptime container: Container) type {
|
||||
// zlib has 4 bytes footer, lookahead of 4 bytes ensures that we will not overshoot.
|
||||
// gzip has 8 bytes footer so we will not overshoot even with 8 bytes of lookahead.
|
||||
// For raw deflate there is always possibility of overshot so we use 8 bytes lookahead.
|
||||
const lookahead: type = if (container == .zlib) u32 else u64;
|
||||
return Inflate(container, lookahead, ReaderType);
|
||||
return Inflate(container, lookahead);
|
||||
}
|
||||
|
||||
/// Inflate decompresses deflate bit stream. Reads compressed data from reader
|
||||
@ -48,15 +48,14 @@ pub fn Decompressor(comptime container: Container, comptime ReaderType: type) ty
|
||||
/// * 64K for history (CircularBuffer)
|
||||
/// * ~10K huffman decoders (Literal and DistanceDecoder)
|
||||
///
|
||||
pub fn Inflate(comptime container: Container, comptime LookaheadType: type, comptime ReaderType: type) type {
|
||||
pub fn Inflate(comptime container: Container, comptime LookaheadType: type) type {
|
||||
assert(LookaheadType == u32 or LookaheadType == u64);
|
||||
const BitReaderType = BitReader(LookaheadType, ReaderType);
|
||||
const BitReaderType = BitReader(LookaheadType);
|
||||
|
||||
return struct {
|
||||
//const BitReaderType = BitReader(ReaderType);
|
||||
const F = BitReaderType.flag;
|
||||
|
||||
bits: BitReaderType = .{},
|
||||
bits: BitReaderType,
|
||||
hist: CircularBuffer = .{},
|
||||
// Hashes, produces checkusm, of uncompressed data for gzip/zlib footer.
|
||||
hasher: container.Hasher() = .{},
|
||||
@ -88,8 +87,8 @@ pub fn Inflate(comptime container: Container, comptime LookaheadType: type, comp
|
||||
InvalidDynamicBlockHeader,
|
||||
};
|
||||
|
||||
pub fn init(rt: ReaderType) Self {
|
||||
return .{ .bits = BitReaderType.init(rt) };
|
||||
pub fn init(bw: *std.io.BufferedReader) Self {
|
||||
return .{ .bits = BitReaderType.init(bw) };
|
||||
}
|
||||
|
||||
fn blockHeader(self: *Self) !void {
|
||||
@ -289,7 +288,7 @@ pub fn Inflate(comptime container: Container, comptime LookaheadType: type, comp
|
||||
}
|
||||
|
||||
/// Replaces the inner reader with new reader.
|
||||
pub fn setReader(self: *Self, new_reader: ReaderType) void {
|
||||
pub fn setReader(self: *Self, new_reader: *std.io.BufferedReader) void {
|
||||
self.bits.forward_reader = new_reader;
|
||||
if (self.state == .end or self.state == .protocol_footer) {
|
||||
self.state = .protocol_header;
|
||||
@ -298,7 +297,7 @@ pub fn Inflate(comptime container: Container, comptime LookaheadType: type, comp
|
||||
|
||||
// Reads all compressed data from the internal reader and outputs plain
|
||||
// (uncompressed) data to the provided writer.
|
||||
pub fn decompress(self: *Self, writer: anytype) !void {
|
||||
pub fn decompress(self: *Self, writer: *std.io.BufferedWriter) !void {
|
||||
while (try self.next()) |buf| {
|
||||
try writer.writeAll(buf);
|
||||
}
|
||||
|
||||
@ -4,14 +4,174 @@ const math = std.math;
|
||||
const Allocator = std.mem.Allocator;
|
||||
|
||||
pub const lzbuffer = @import("decode/lzbuffer.zig");
|
||||
pub const rangecoder = @import("decode/rangecoder.zig");
|
||||
|
||||
const LzCircularBuffer = lzbuffer.LzCircularBuffer;
|
||||
const BitTree = rangecoder.BitTree;
|
||||
const LenDecoder = rangecoder.LenDecoder;
|
||||
const RangeDecoder = rangecoder.RangeDecoder;
|
||||
const Vec2D = @import("vec2d.zig").Vec2D;
|
||||
|
||||
pub const RangeDecoder = struct {
|
||||
range: u32,
|
||||
code: u32,
|
||||
|
||||
pub fn init(br: *std.io.BufferedReader) !RangeDecoder {
|
||||
const reserved = try br.takeByte();
|
||||
if (reserved != 0) {
|
||||
return error.CorruptInput;
|
||||
}
|
||||
return .{
|
||||
.range = 0xFFFF_FFFF,
|
||||
.code = try br.readInt(u32, .big),
|
||||
};
|
||||
}
|
||||
|
||||
pub inline fn isFinished(self: RangeDecoder) bool {
|
||||
return self.code == 0;
|
||||
}
|
||||
|
||||
inline fn normalize(self: *RangeDecoder, br: *std.io.BufferedReader) !void {
|
||||
if (self.range < 0x0100_0000) {
|
||||
self.range <<= 8;
|
||||
self.code = (self.code << 8) ^ @as(u32, try br.takeByte());
|
||||
}
|
||||
}
|
||||
|
||||
inline fn getBit(self: *RangeDecoder, br: *std.io.BufferedReader) !bool {
|
||||
self.range >>= 1;
|
||||
|
||||
const bit = self.code >= self.range;
|
||||
if (bit)
|
||||
self.code -= self.range;
|
||||
|
||||
try self.normalize(br);
|
||||
return bit;
|
||||
}
|
||||
|
||||
pub fn get(self: *RangeDecoder, br: *std.io.BufferedReader, count: usize) !u32 {
|
||||
var result: u32 = 0;
|
||||
var i: usize = 0;
|
||||
while (i < count) : (i += 1)
|
||||
result = (result << 1) ^ @intFromBool(try self.getBit(br));
|
||||
return result;
|
||||
}
|
||||
|
||||
pub inline fn decodeBit(self: *RangeDecoder, br: *std.io.BufferedReader, prob: *u16, update: bool) !bool {
|
||||
const bound = (self.range >> 11) * prob.*;
|
||||
|
||||
if (self.code < bound) {
|
||||
if (update)
|
||||
prob.* += (0x800 - prob.*) >> 5;
|
||||
self.range = bound;
|
||||
|
||||
try self.normalize(br);
|
||||
return false;
|
||||
} else {
|
||||
if (update)
|
||||
prob.* -= prob.* >> 5;
|
||||
self.code -= bound;
|
||||
self.range -= bound;
|
||||
|
||||
try self.normalize(br);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
fn parseBitTree(
|
||||
self: *RangeDecoder,
|
||||
br: *std.io.BufferedReader,
|
||||
num_bits: u5,
|
||||
probs: []u16,
|
||||
update: bool,
|
||||
) !u32 {
|
||||
var tmp: u32 = 1;
|
||||
var i: @TypeOf(num_bits) = 0;
|
||||
while (i < num_bits) : (i += 1) {
|
||||
const bit = try self.decodeBit(br, &probs[tmp], update);
|
||||
tmp = (tmp << 1) ^ @intFromBool(bit);
|
||||
}
|
||||
return tmp - (@as(u32, 1) << num_bits);
|
||||
}
|
||||
|
||||
pub fn parseReverseBitTree(
|
||||
self: *RangeDecoder,
|
||||
br: *std.io.BufferedReader,
|
||||
num_bits: u5,
|
||||
probs: []u16,
|
||||
offset: usize,
|
||||
update: bool,
|
||||
) !u32 {
|
||||
var result: u32 = 0;
|
||||
var tmp: usize = 1;
|
||||
var i: @TypeOf(num_bits) = 0;
|
||||
while (i < num_bits) : (i += 1) {
|
||||
const bit = @intFromBool(try self.decodeBit(br, &probs[offset + tmp], update));
|
||||
tmp = (tmp << 1) ^ bit;
|
||||
result ^= @as(u32, bit) << i;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
};
|
||||
|
||||
pub fn BitTree(comptime num_bits: usize) type {
|
||||
return struct {
|
||||
probs: [1 << num_bits]u16 = @splat(0x400),
|
||||
|
||||
const Self = @This();
|
||||
|
||||
pub fn parse(
|
||||
self: *Self,
|
||||
br: *std.io.BufferedReader,
|
||||
decoder: *RangeDecoder,
|
||||
update: bool,
|
||||
) !u32 {
|
||||
return decoder.parseBitTree(br, num_bits, &self.probs, update);
|
||||
}
|
||||
|
||||
pub fn parseReverse(
|
||||
self: *Self,
|
||||
br: *std.io.BufferedReader,
|
||||
decoder: *RangeDecoder,
|
||||
update: bool,
|
||||
) !u32 {
|
||||
return decoder.parseReverseBitTree(br, num_bits, &self.probs, 0, update);
|
||||
}
|
||||
|
||||
pub fn reset(self: *Self) void {
|
||||
@memset(&self.probs, 0x400);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
pub const LenDecoder = struct {
|
||||
choice: u16 = 0x400,
|
||||
choice2: u16 = 0x400,
|
||||
low_coder: [16]BitTree(3) = @splat(.{}),
|
||||
mid_coder: [16]BitTree(3) = @splat(.{}),
|
||||
high_coder: BitTree(8) = .{},
|
||||
|
||||
pub fn decode(
|
||||
self: *LenDecoder,
|
||||
br: *std.io.BufferedReader,
|
||||
decoder: *RangeDecoder,
|
||||
pos_state: usize,
|
||||
update: bool,
|
||||
) !usize {
|
||||
if (!try decoder.decodeBit(br, &self.choice, update)) {
|
||||
return @as(usize, try self.low_coder[pos_state].parse(br, decoder, update));
|
||||
} else if (!try decoder.decodeBit(br, &self.choice2, update)) {
|
||||
return @as(usize, try self.mid_coder[pos_state].parse(br, decoder, update)) + 8;
|
||||
} else {
|
||||
return @as(usize, try self.high_coder.parse(br, decoder, update)) + 16;
|
||||
}
|
||||
}
|
||||
|
||||
pub fn reset(self: *LenDecoder) void {
|
||||
self.choice = 0x400;
|
||||
self.choice2 = 0x400;
|
||||
for (&self.low_coder) |*t| t.reset();
|
||||
for (&self.mid_coder) |*t| t.reset();
|
||||
self.high_coder.reset();
|
||||
}
|
||||
};
|
||||
|
||||
pub const Options = struct {
|
||||
unpacked_size: UnpackedSize = .read_from_header,
|
||||
memlimit: ?usize = null,
|
||||
|
||||
@ -1,181 +0,0 @@
|
||||
const std = @import("../../../std.zig");
|
||||
const mem = std.mem;
|
||||
|
||||
pub const RangeDecoder = struct {
|
||||
range: u32,
|
||||
code: u32,
|
||||
|
||||
pub fn init(reader: anytype) !RangeDecoder {
|
||||
const reserved = try reader.readByte();
|
||||
if (reserved != 0) {
|
||||
return error.CorruptInput;
|
||||
}
|
||||
return RangeDecoder{
|
||||
.range = 0xFFFF_FFFF,
|
||||
.code = try reader.readInt(u32, .big),
|
||||
};
|
||||
}
|
||||
|
||||
pub fn fromParts(
|
||||
range: u32,
|
||||
code: u32,
|
||||
) RangeDecoder {
|
||||
return .{
|
||||
.range = range,
|
||||
.code = code,
|
||||
};
|
||||
}
|
||||
|
||||
pub fn set(self: *RangeDecoder, range: u32, code: u32) void {
|
||||
self.range = range;
|
||||
self.code = code;
|
||||
}
|
||||
|
||||
pub inline fn isFinished(self: RangeDecoder) bool {
|
||||
return self.code == 0;
|
||||
}
|
||||
|
||||
inline fn normalize(self: *RangeDecoder, reader: anytype) !void {
|
||||
if (self.range < 0x0100_0000) {
|
||||
self.range <<= 8;
|
||||
self.code = (self.code << 8) ^ @as(u32, try reader.readByte());
|
||||
}
|
||||
}
|
||||
|
||||
inline fn getBit(self: *RangeDecoder, reader: anytype) !bool {
|
||||
self.range >>= 1;
|
||||
|
||||
const bit = self.code >= self.range;
|
||||
if (bit)
|
||||
self.code -= self.range;
|
||||
|
||||
try self.normalize(reader);
|
||||
return bit;
|
||||
}
|
||||
|
||||
pub fn get(self: *RangeDecoder, reader: anytype, count: usize) !u32 {
|
||||
var result: u32 = 0;
|
||||
var i: usize = 0;
|
||||
while (i < count) : (i += 1)
|
||||
result = (result << 1) ^ @intFromBool(try self.getBit(reader));
|
||||
return result;
|
||||
}
|
||||
|
||||
pub inline fn decodeBit(self: *RangeDecoder, reader: anytype, prob: *u16, update: bool) !bool {
|
||||
const bound = (self.range >> 11) * prob.*;
|
||||
|
||||
if (self.code < bound) {
|
||||
if (update)
|
||||
prob.* += (0x800 - prob.*) >> 5;
|
||||
self.range = bound;
|
||||
|
||||
try self.normalize(reader);
|
||||
return false;
|
||||
} else {
|
||||
if (update)
|
||||
prob.* -= prob.* >> 5;
|
||||
self.code -= bound;
|
||||
self.range -= bound;
|
||||
|
||||
try self.normalize(reader);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
fn parseBitTree(
|
||||
self: *RangeDecoder,
|
||||
reader: anytype,
|
||||
num_bits: u5,
|
||||
probs: []u16,
|
||||
update: bool,
|
||||
) !u32 {
|
||||
var tmp: u32 = 1;
|
||||
var i: @TypeOf(num_bits) = 0;
|
||||
while (i < num_bits) : (i += 1) {
|
||||
const bit = try self.decodeBit(reader, &probs[tmp], update);
|
||||
tmp = (tmp << 1) ^ @intFromBool(bit);
|
||||
}
|
||||
return tmp - (@as(u32, 1) << num_bits);
|
||||
}
|
||||
|
||||
pub fn parseReverseBitTree(
|
||||
self: *RangeDecoder,
|
||||
reader: anytype,
|
||||
num_bits: u5,
|
||||
probs: []u16,
|
||||
offset: usize,
|
||||
update: bool,
|
||||
) !u32 {
|
||||
var result: u32 = 0;
|
||||
var tmp: usize = 1;
|
||||
var i: @TypeOf(num_bits) = 0;
|
||||
while (i < num_bits) : (i += 1) {
|
||||
const bit = @intFromBool(try self.decodeBit(reader, &probs[offset + tmp], update));
|
||||
tmp = (tmp << 1) ^ bit;
|
||||
result ^= @as(u32, bit) << i;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
};
|
||||
|
||||
pub fn BitTree(comptime num_bits: usize) type {
|
||||
return struct {
|
||||
probs: [1 << num_bits]u16 = @splat(0x400),
|
||||
|
||||
const Self = @This();
|
||||
|
||||
pub fn parse(
|
||||
self: *Self,
|
||||
reader: anytype,
|
||||
decoder: *RangeDecoder,
|
||||
update: bool,
|
||||
) !u32 {
|
||||
return decoder.parseBitTree(reader, num_bits, &self.probs, update);
|
||||
}
|
||||
|
||||
pub fn parseReverse(
|
||||
self: *Self,
|
||||
reader: anytype,
|
||||
decoder: *RangeDecoder,
|
||||
update: bool,
|
||||
) !u32 {
|
||||
return decoder.parseReverseBitTree(reader, num_bits, &self.probs, 0, update);
|
||||
}
|
||||
|
||||
pub fn reset(self: *Self) void {
|
||||
@memset(&self.probs, 0x400);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
pub const LenDecoder = struct {
|
||||
choice: u16 = 0x400,
|
||||
choice2: u16 = 0x400,
|
||||
low_coder: [16]BitTree(3) = @splat(.{}),
|
||||
mid_coder: [16]BitTree(3) = @splat(.{}),
|
||||
high_coder: BitTree(8) = .{},
|
||||
|
||||
pub fn decode(
|
||||
self: *LenDecoder,
|
||||
reader: anytype,
|
||||
decoder: *RangeDecoder,
|
||||
pos_state: usize,
|
||||
update: bool,
|
||||
) !usize {
|
||||
if (!try decoder.decodeBit(reader, &self.choice, update)) {
|
||||
return @as(usize, try self.low_coder[pos_state].parse(reader, decoder, update));
|
||||
} else if (!try decoder.decodeBit(reader, &self.choice2, update)) {
|
||||
return @as(usize, try self.mid_coder[pos_state].parse(reader, decoder, update)) + 8;
|
||||
} else {
|
||||
return @as(usize, try self.high_coder.parse(reader, decoder, update)) + 16;
|
||||
}
|
||||
}
|
||||
|
||||
pub fn reset(self: *LenDecoder) void {
|
||||
self.choice = 0x400;
|
||||
self.choice2 = 0x400;
|
||||
for (&self.low_coder) |*t| t.reset();
|
||||
for (&self.mid_coder) |*t| t.reset();
|
||||
self.high_coder.reset();
|
||||
}
|
||||
};
|
||||
@ -11,7 +11,7 @@ pub fn decompress(allocator: Allocator, reader: *std.io.BufferedReader, writer:
|
||||
|
||||
test {
|
||||
const expected = "Hello\nWorld!\n";
|
||||
const compressed = &[_]u8{
|
||||
const compressed = [_]u8{
|
||||
0x01, 0x00, 0x05, 0x48, 0x65, 0x6C, 0x6C, 0x6F, 0x0A, 0x02,
|
||||
0x00, 0x06, 0x57, 0x6F, 0x72, 0x6C, 0x64, 0x21, 0x0A, 0x00,
|
||||
};
|
||||
|
||||
@ -5,7 +5,7 @@ const lzma = @import("../lzma.zig");
|
||||
const DecoderState = lzma.decode.DecoderState;
|
||||
const LzAccumBuffer = lzma.decode.lzbuffer.LzAccumBuffer;
|
||||
const Properties = lzma.decode.Properties;
|
||||
const RangeDecoder = lzma.decode.rangecoder.RangeDecoder;
|
||||
const RangeDecoder = lzma.decode.RangeDecoder;
|
||||
|
||||
pub const Decoder = struct {
|
||||
lzma_state: DecoderState,
|
||||
@ -32,14 +32,14 @@ pub const Decoder = struct {
|
||||
pub fn decompress(
|
||||
self: *Decoder,
|
||||
allocator: Allocator,
|
||||
reader: anytype,
|
||||
writer: anytype,
|
||||
reader: *std.io.BufferedReader,
|
||||
writer: *std.io.BufferedWriter,
|
||||
) !void {
|
||||
var accum = LzAccumBuffer.init(std.math.maxInt(usize));
|
||||
defer accum.deinit(allocator);
|
||||
|
||||
while (true) {
|
||||
const status = try reader.readByte();
|
||||
const status = try reader.takeByte();
|
||||
|
||||
switch (status) {
|
||||
0 => break,
|
||||
@ -55,8 +55,8 @@ pub const Decoder = struct {
|
||||
fn parseLzma(
|
||||
self: *Decoder,
|
||||
allocator: Allocator,
|
||||
reader: anytype,
|
||||
writer: anytype,
|
||||
br: *std.io.BufferedReader,
|
||||
writer: *std.io.BufferedWriter,
|
||||
accum: *LzAccumBuffer,
|
||||
status: u8,
|
||||
) !void {
|
||||
@ -97,12 +97,12 @@ pub const Decoder = struct {
|
||||
const unpacked_size = blk: {
|
||||
var tmp: u64 = status & 0x1F;
|
||||
tmp <<= 16;
|
||||
tmp |= try reader.readInt(u16, .big);
|
||||
tmp |= try br.takeInt(u16, .big);
|
||||
break :blk tmp + 1;
|
||||
};
|
||||
|
||||
const packed_size = blk: {
|
||||
const tmp: u17 = try reader.readInt(u16, .big);
|
||||
const tmp: u17 = try br.takeInt(u16, .big);
|
||||
break :blk tmp + 1;
|
||||
};
|
||||
|
||||
@ -114,7 +114,7 @@ pub const Decoder = struct {
|
||||
var new_props = self.lzma_state.lzma_props;
|
||||
|
||||
if (reset.props) {
|
||||
var props = try reader.readByte();
|
||||
var props = try br.takeByte();
|
||||
if (props >= 225) {
|
||||
return error.CorruptInput;
|
||||
}
|
||||
@ -137,10 +137,10 @@ pub const Decoder = struct {
|
||||
|
||||
self.lzma_state.unpacked_size = unpacked_size + accum.len;
|
||||
|
||||
var counter = std.io.countingReader(reader);
|
||||
const counter_reader = counter.reader();
|
||||
var counter: std.io.CountingReader = .{ .child_reader = br.reader() };
|
||||
var counter_reader = counter.reader().unbuffered();
|
||||
|
||||
var rangecoder = try RangeDecoder.init(counter_reader);
|
||||
var rangecoder = try RangeDecoder.init(&counter_reader);
|
||||
while (try self.lzma_state.process(allocator, counter_reader, writer, accum, &rangecoder) == .continue_) {}
|
||||
|
||||
if (counter.bytes_read != packed_size) {
|
||||
@ -150,12 +150,12 @@ pub const Decoder = struct {
|
||||
|
||||
fn parseUncompressed(
|
||||
allocator: Allocator,
|
||||
reader: anytype,
|
||||
writer: anytype,
|
||||
reader: *std.io.BufferedReader,
|
||||
writer: *std.io.BufferedWriter,
|
||||
accum: *LzAccumBuffer,
|
||||
reset_dict: bool,
|
||||
) !void {
|
||||
const unpacked_size = @as(u17, try reader.readInt(u16, .big)) + 1;
|
||||
const unpacked_size = @as(u17, try reader.takeInt(u16, .big)) + 1;
|
||||
|
||||
if (reset_dict) {
|
||||
try accum.reset(writer);
|
||||
@ -163,7 +163,7 @@ pub const Decoder = struct {
|
||||
|
||||
var i: @TypeOf(unpacked_size) = 0;
|
||||
while (i < unpacked_size) : (i += 1) {
|
||||
try accum.appendByte(allocator, try reader.readByte());
|
||||
try accum.appendByte(allocator, try reader.takeByte());
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
@ -1,73 +1,44 @@
|
||||
const std = @import("../std.zig");
|
||||
const deflate = @import("flate/deflate.zig");
|
||||
const inflate = @import("flate/inflate.zig");
|
||||
|
||||
/// Decompress compressed data from reader and write plain data to the writer.
|
||||
pub fn decompress(reader: anytype, writer: anytype) !void {
|
||||
pub fn decompress(reader: *std.io.BufferedReader, writer: *std.io.BufferedWriter) !void {
|
||||
try inflate.decompress(.zlib, reader, writer);
|
||||
}
|
||||
|
||||
/// Decompressor type
|
||||
pub fn Decompressor(comptime ReaderType: type) type {
|
||||
return inflate.Decompressor(.zlib, ReaderType);
|
||||
}
|
||||
|
||||
/// Create Decompressor which will read compressed data from reader.
|
||||
pub fn decompressor(reader: anytype) Decompressor(@TypeOf(reader)) {
|
||||
return inflate.decompressor(.zlib, reader);
|
||||
}
|
||||
pub const Decompressor = inflate.Decompressor(.zlib);
|
||||
|
||||
/// Compression level, trades between speed and compression size.
|
||||
pub const Options = deflate.Options;
|
||||
|
||||
/// Compress plain data from reader and write compressed data to the writer.
|
||||
pub fn compress(reader: anytype, writer: anytype, options: Options) !void {
|
||||
pub fn compress(reader: *std.io.BufferedReader, writer: *std.io.BufferedWriter, options: Options) !void {
|
||||
try deflate.compress(.zlib, reader, writer, options);
|
||||
}
|
||||
|
||||
/// Compressor type
|
||||
pub fn Compressor(comptime WriterType: type) type {
|
||||
return deflate.Compressor(.zlib, WriterType);
|
||||
}
|
||||
|
||||
/// Create Compressor which outputs compressed data to the writer.
|
||||
pub fn compressor(writer: anytype, options: Options) !Compressor(@TypeOf(writer)) {
|
||||
return try deflate.compressor(.zlib, writer, options);
|
||||
}
|
||||
pub const Compressor = deflate.Compressor(.zlib);
|
||||
|
||||
/// Huffman only compression. Without Lempel-Ziv match searching. Faster
|
||||
/// compression, less memory requirements but bigger compressed sizes.
|
||||
pub const huffman = struct {
|
||||
pub fn compress(reader: anytype, writer: anytype) !void {
|
||||
pub fn compress(reader: *std.io.BufferedReader, writer: *std.io.BufferedWriter) !void {
|
||||
try deflate.huffman.compress(.zlib, reader, writer);
|
||||
}
|
||||
|
||||
pub fn Compressor(comptime WriterType: type) type {
|
||||
return deflate.huffman.Compressor(.zlib, WriterType);
|
||||
}
|
||||
|
||||
pub fn compressor(writer: anytype) !huffman.Compressor(@TypeOf(writer)) {
|
||||
return deflate.huffman.compressor(.zlib, writer);
|
||||
}
|
||||
pub const Compressor = deflate.huffman.Compressor(.zlib);
|
||||
};
|
||||
|
||||
// No compression store only. Compressed size is slightly bigger than plain.
|
||||
pub const store = struct {
|
||||
pub fn compress(reader: anytype, writer: anytype) !void {
|
||||
pub fn compress(reader: *std.io.BufferedReader, writer: *std.io.BufferedWriter) !void {
|
||||
try deflate.store.compress(.zlib, reader, writer);
|
||||
}
|
||||
|
||||
pub fn Compressor(comptime WriterType: type) type {
|
||||
return deflate.store.Compressor(.zlib, WriterType);
|
||||
}
|
||||
|
||||
pub fn compressor(writer: anytype) !store.Compressor(@TypeOf(writer)) {
|
||||
return deflate.store.compressor(.zlib, writer);
|
||||
}
|
||||
pub const Compressor = deflate.store.Compressor(.zlib);
|
||||
};
|
||||
|
||||
test "should not overshoot" {
|
||||
const std = @import("std");
|
||||
|
||||
// Compressed zlib data with extra 4 bytes at the end.
|
||||
const data = [_]u8{
|
||||
0x78, 0x9c, 0x73, 0xce, 0x2f, 0xa8, 0x2c, 0xca, 0x4c, 0xcf, 0x28, 0x51, 0x08, 0xcf, 0xcc, 0xc9,
|
||||
@ -79,7 +50,7 @@ test "should not overshoot" {
|
||||
var stream = std.io.fixedBufferStream(data[0..]);
|
||||
const reader = stream.reader();
|
||||
|
||||
var dcp = decompressor(reader);
|
||||
var dcp = Decompressor.init(reader);
|
||||
var out: [128]u8 = undefined;
|
||||
|
||||
// Decompress
|
||||
|
||||
@ -2241,7 +2241,7 @@ pub const ElfModule = struct {
|
||||
if (chdr.ch_type != .ZLIB) continue;
|
||||
const ch_size = chdr.ch_size;
|
||||
|
||||
var zlib_stream = std.compress.zlib.decompressor(§ion_reader);
|
||||
var zlib_stream: std.compress.zlib.Decompressor = .init(§ion_reader);
|
||||
|
||||
const decompressed_section = try gpa.alloc(u8, ch_size);
|
||||
errdefer gpa.free(decompressed_section);
|
||||
|
||||
@ -2027,8 +2027,10 @@ pub const VirtualMachine = struct {
|
||||
|
||||
var prev_row: Row = self.current_row;
|
||||
|
||||
var cie_stream: std.io.FixedBufferStream = .{ .buffer = cie.initial_instructions };
|
||||
var fde_stream: std.io.FixedBufferStream = .{ .buffer = fde.instructions };
|
||||
var cie_stream: std.io.BufferedReader = undefined;
|
||||
cie_stream.initFixed(&cie.initial_instructions);
|
||||
var fde_stream: std.io.BufferedReader = undefined;
|
||||
fde_stream.initFixed(&fde.instructions);
|
||||
const streams: [2]*std.io.FixedBufferStream = .{ &cie_stream, &fde_stream };
|
||||
|
||||
for (&streams, 0..) |stream, i| {
|
||||
|
||||
@ -1613,11 +1613,11 @@ pub fn writer(file: File) std.io.Writer {
|
||||
const max_buffers_len = 16;
|
||||
|
||||
pub fn reader_posRead(
|
||||
context: *anyopaque,
|
||||
context: ?*anyopaque,
|
||||
bw: *std.io.BufferedWriter,
|
||||
limit: std.io.Reader.Limit,
|
||||
offset: u64,
|
||||
) anyerror!usize {
|
||||
) std.io.Reader.Result {
|
||||
const file = opaqueToHandle(context);
|
||||
const len: std.io.Writer.Len = if (limit.unwrap()) |l| .init(l) else .entire_file;
|
||||
return writer.writeFile(bw, file, .init(offset), len, &.{}, 0);
|
||||
|
||||
@ -13,9 +13,9 @@ const std = @import("../std.zig");
|
||||
// of the byte.
|
||||
|
||||
/// Creates a bit reader which allows for reading bits from an underlying standard reader
|
||||
pub fn BitReader(comptime endian: std.builtin.Endian, comptime Reader: type) type {
|
||||
pub fn BitReader(comptime endian: std.builtin.Endian) type {
|
||||
return struct {
|
||||
reader: Reader,
|
||||
reader: *std.io.BufferedReader,
|
||||
bits: u8 = 0,
|
||||
count: u4 = 0,
|
||||
|
||||
@ -157,7 +157,7 @@ pub fn BitReader(comptime endian: std.builtin.Endian, comptime Reader: type) typ
|
||||
};
|
||||
}
|
||||
|
||||
pub fn bitReader(comptime endian: std.builtin.Endian, reader: anytype) BitReader(endian, @TypeOf(reader)) {
|
||||
pub fn bitReader(comptime endian: std.builtin.Endian, reader: *std.io.BufferedReader) BitReader(endian) {
|
||||
return .{ .reader = reader };
|
||||
}
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user